"""
critical_gap.py - 보고서 CRITICAL 이슈 미수정 검증 verifier

보고서에서 CRITICAL 이슈가 보고되었으나 수정이 확인되지 않으면 FAIL.
"""

import os
import re

DEFAULT_REPORTS_DIR = "/home/jay/workspace/memory/reports"

# CRITICAL 이슈 탐지 키워드 (호환성 유지용 - 외부 import 없음, 신규 로직은 정규식 사용)
CRITICAL_KEYWORDS = ["CRITICAL", "critical", "심각", "블로커"]

# 수정 확인 키워드
RESOLVED_KEYWORDS = ["수정 완료", "해결", "fixed", "resolved"]

# 영문 word boundary, 한글은 boundary 패턴 분리
_CRITICAL_RE_EN = re.compile(r"\b(CRITICAL|critical)\b")
_CRITICAL_RE_KO = re.compile(r"(?:^|[\s,。、!?])(?:심각|블로커)(?:[\s,。、!?]|$)")

# 이슈 마커 패턴 (둘 중 하나여야 critical로 인정):
# (a) 마크다운 헤더 (## CRITICAL 이슈)
# (b) 리스트 마커 시작 (- CRITICAL: ...)
# (c) "severity: critical" 메타 라벨
ISSUE_MARKER_PATTERNS = [
    re.compile(r"^\s*#{1,6}\s+.*(?:CRITICAL|critical|심각|블로커)"),
    re.compile(r"^\s*[-*]\s+.*(?:CRITICAL|critical|심각|블로커)"),
    re.compile(r"^\s*\d+\.\s+.*(?:CRITICAL|critical|심각|블로커)"),
    re.compile(r"severity\s*[:=]\s*critical", re.IGNORECASE),
    re.compile(r"!\[(?:security-critical|critical|high)\]", re.IGNORECASE),
]

# 같은 줄에 RESOLVED 마커가 인라인으로 있으면 차단
RESOLVED_INLINE_PATTERNS = [
    re.compile(r"\*\*\[.*resolved.*\]\*\*", re.IGNORECASE),
    re.compile(r"✅\s*RESOLVED"),
    re.compile(r"~~.*(critical|심각).*~~"),
]

# RESOLVED 키워드를 word boundary + 부정 컨텍스트 차단으로 강화
# - "해결" 단독 매칭 시 직후/직전에 "미", "안", "불", "되지 않" 등이 오면 unresolved로 간주
_RESOLVED_PATTERNS = [
    re.compile(r"수정\s*완료"),                       # "수정 완료" / "수정완료"
    re.compile(r"\bfixed\b", re.IGNORECASE),
    re.compile(r"\bresolved\b", re.IGNORECASE),
    re.compile(r"✅\s*RESOLVED", re.IGNORECASE),
    # "해결" 매칭 — 직후 부정어 차단
    # 매칭: "해결됨", "해결되었", "해결 완료", "해결책" 등
    # 비매칭: "해결 미완료", "해결되지 않", "미해결", "해결 안 됨"
    re.compile(r"해결(?:됨|되었|되었음|됐|책|책을|책이|\s*완료)"),
]

# 부정 컨텍스트 (이게 같은 줄에 있으면 RESOLVED 후보 무효화)
_UNRESOLVED_HINTS = [
    re.compile(r"미\s*해결"),
    re.compile(r"미\s*완료"),
    re.compile(r"해결\s*되지\s*않"),
    re.compile(r"해결\s*안\s*됨"),
    re.compile(r"해결\s*못"),
    re.compile(r"unresolved", re.IGNORECASE),
    re.compile(r"not\s+(?:fixed|resolved)", re.IGNORECASE),
]

# 이슈 섹션 헤더 패턴 — 이 헤더 아래만 CRITICAL 검사 수행
# 헤더 키워드 뒤 후행 텍스트(예: "## CRITICAL 이슈 1건 발견")는 허용
_ISSUE_SECTION_HEADERS = re.compile(
    r"^#{1,6}\s+(?:"
    r"CRITICAL\s*이슈"
    r"|발견된?\s*이슈"
    r"|발견된?\s*문제"
    r"|미해결\s*항목"
    r"|문제점"
    r"|이슈"
    r"|issues"
    r")",
    re.IGNORECASE,
)

# 마크다운 헤더 — 이슈 섹션 종료 시그널
_ANY_HEADER = re.compile(r"^#{1,6}\s+")

# 메타라벨 화이트리스트 — 이 패턴 매칭 줄은 CRITICAL 후보에서 제외
_META_LABEL_PATTERNS = [
    re.compile(
        r"^\s*(?:-\s*)?(?:우선순위|priority|level|발사\s*우선순위|dispatch\s*level|작업\s*레벨)\s*[:：]",
        re.IGNORECASE,
    ),
    re.compile(r"^\s*(?:#{1,6}\s+)?\[CRITICAL\]\s*$", re.IGNORECASE),
    re.compile(r"--level\s+critical", re.IGNORECASE),
]

# 일반 명사구 화이트리스트 — critical이 이 명사구로만 나타나면 후보 제외
_GENERIC_PHRASE_PATTERNS = [
    re.compile(
        r"\bcritical\s+(?:path|chain|section|mass|period|thinking)\b",
        re.IGNORECASE,
    ),
]

# 인접 컨텍스트 메타 키워드
_METADATA_CONTEXT_KEYWORDS = re.compile(
    r"level|priority|우선순위|\[CRITICAL\]|--level|dispatch|발사|발행",
    re.IGNORECASE,
)


def _extract_issue_sections(content: str) -> list[tuple[int, str]]:
    """
    보고서 전체 문자열에서 이슈 섹션 안의 줄들만 추출한다.

    Args:
        content: 보고서 전체 문자열

    Returns:
        (line_num, line) 튜플 리스트 — 이슈 섹션 헤더 제외, 섹션 내 모든 줄 포함.
        이슈 섹션 헤더를 만나면 수집 시작, 다른 마크다운 헤더를 만나면 수집 종료.
    """
    result: list[tuple[int, str]] = []
    in_issue_section = False

    for i, line in enumerate(content.splitlines(), start=1):
        if _ISSUE_SECTION_HEADERS.match(line.strip()):
            in_issue_section = True
            # 헤더 자체도 후보로 포함 — "## CRITICAL 이슈" 처럼
            # 헤더에 직접 critical 키워드가 있는 경우를 잡기 위함
            result.append((i, line))
            continue
        if in_issue_section:
            if _ANY_HEADER.match(line):
                in_issue_section = False
                continue
            result.append((i, line))

    return result


def _is_metadata_context(lines: list[str], line_num: int) -> bool:
    """
    line_num(1-based) 줄 앞뒤 ±2 줄에 메타 키워드가 있으면 True.

    Args:
        lines: 전체 줄 목록 (0-indexed)
        line_num: 검사 기준 줄 번호 (1-based)

    Returns:
        True if metadata context detected in surrounding ±2 lines.
    """
    # line_num은 1-based이므로 0-based 인덱스 = line_num - 1
    idx = line_num - 1
    start = max(0, idx - 2)
    end = min(len(lines), idx + 3)  # idx+2 포함을 위해 idx+3

    for j in range(start, end):
        if j == idx:
            continue  # 기준 줄 자체는 제외
        if _METADATA_CONTEXT_KEYWORDS.search(lines[j]):
            return True
    return False


def _detect_critical_lines(lines: list[str], content: str = "") -> list[tuple[int, str]]:
    found: list[tuple[int, str]] = []

    # content가 있으면 이슈 섹션만 추출
    if content:
        issue_lines = _extract_issue_sections(content)
        # 이슈 섹션이 정의되지 않은 보고서 → PASS (빈 리스트 반환)
        if not issue_lines:
            return []
        candidates = issue_lines
    else:
        candidates = list(enumerate(lines, start=1))

    for line_num, line in candidates:
        # 정규식으로 keyword 후보 찾기
        if not (_CRITICAL_RE_EN.search(line) or _CRITICAL_RE_KO.search(line)):
            continue
        # 메타라벨 화이트리스트 — 매칭 시 skip
        if any(p.search(line) for p in _META_LABEL_PATTERNS):
            continue
        # 일반 명사구 화이트리스트 — 줄 안의 critical 출현이 모두 명사구일 때만 skip
        critical_matches = list(_CRITICAL_RE_EN.finditer(line))
        if critical_matches:
            generic_spans = set()
            for gp in _GENERIC_PHRASE_PATTERNS:
                for m in gp.finditer(line):
                    # 명사구 내에서 'critical' 이 매칭되는 위치를 수집
                    ci = m.group(0).lower().find("critical")
                    if ci >= 0:
                        generic_spans.add(m.start() + ci)
            critical_positions = {m.start() for m in critical_matches}
            if critical_positions and critical_positions.issubset(generic_spans):
                continue
        # 인접 컨텍스트 검사 — 메타 컨텍스트면 skip
        if _is_metadata_context(lines, line_num):
            continue
        # 이슈 마커 패턴 중 하나라도 매칭해야 인정
        if not any(p.search(line) for p in ISSUE_MARKER_PATTERNS):
            continue
        # 같은 줄에 인라인 RESOLVED 마커 있으면 skip
        if any(p.search(line) for p in RESOLVED_INLINE_PATTERNS):
            continue
        found.append((line_num, line.strip()))
    return found


def _detect_resolved_after(lines: list[str], critical_line_num: int) -> bool:
    """
    critical_line_num 이후 줄에서 RESOLVED 마커 탐지. 부정 컨텍스트는 차단.

    Args:
        lines: 전체 줄 목록
        critical_line_num: CRITICAL 이슈가 발견된 줄 번호 (1-based)

    Returns:
        True if resolved marker found after critical_line_num (단, 같은 줄에
        부정 힌트가 있으면 해당 줄은 후보에서 제외).
    """
    # 보고서 후반부: critical 줄 이후의 줄들 검색
    for line in lines[critical_line_num:]:
        # 부정 힌트가 있으면 이 줄은 RESOLVED 후보 X
        if any(p.search(line) for p in _UNRESOLVED_HINTS):
            continue
        # RESOLVED 패턴 매칭
        if any(p.search(line) for p in _RESOLVED_PATTERNS):
            return True
    return False


def verify(task_id: str, report_path: str = "") -> dict:
    """
    보고서에서 CRITICAL 이슈 확인 + 수정 여부 검증.

    보고서 경로: /home/jay/workspace/memory/reports/{task_id}.md
    - CRITICAL 이슈가 있는데 수정 확인이 없으면 FAIL
    - CRITICAL 이슈가 있고 수정 확인도 있으면 PASS
    - CRITICAL 이슈가 없으면 PASS (이슈 없음)
    - 보고서 파일 없으면 SKIP
    - 빈 보고서 → PASS

    Returns:
        {"status": "PASS"|"FAIL"|"WARN"|"SKIP", "details": [...]}
    """
    if not task_id:
        return {"status": "SKIP", "details": ["No task_id provided"]}

    # 보고서 경로 결정
    if report_path:
        path = report_path
    else:
        path = os.path.join(DEFAULT_REPORTS_DIR, f"{task_id}.md")

    # 파일 존재 확인
    if not os.path.exists(path):
        return {
            "status": "SKIP",
            "details": [f"Report not found: {path}"],
        }

    # 파일 읽기
    try:
        with open(path, "r", encoding="utf-8") as f:
            content = f.read()
    except OSError as e:
        return {
            "status": "SKIP",
            "details": [f"Failed to read report: {type(e).__name__}: {e}"],
        }

    lines = content.splitlines()

    # 빈 보고서
    if not lines or not content.strip():
        return {"status": "PASS", "details": ["Report is empty — no CRITICAL issues"]}

    # CRITICAL 이슈 탐지
    critical_lines = _detect_critical_lines(lines, content)

    if not critical_lines:
        return {
            "status": "PASS",
            "details": [f"No CRITICAL issues found in report: {path}"],
        }

    # CRITICAL 이슈별 수정 확인 여부 체크
    details: list[str] = []
    unresolved: list[str] = []

    for line_num, line_content in critical_lines:
        resolved = _detect_resolved_after(lines, line_num)
        issue_desc = _extract_issue_description(line_content)
        if resolved:
            details.append(f"CRITICAL (line {line_num}) RESOLVED: {issue_desc}")
        else:
            details.append(f"CRITICAL (line {line_num}) UNRESOLVED: {issue_desc}")
            unresolved.append(f"line {line_num}: {issue_desc}")

    if unresolved:
        details.insert(0, f"FAIL — {len(unresolved)} CRITICAL issue(s) not resolved")
        return {"status": "FAIL", "details": details}

    details.insert(0, f"PASS — {len(critical_lines)} CRITICAL issue(s) all resolved")
    return {"status": "PASS", "details": details}


def _extract_issue_description(line: str) -> str:
    """CRITICAL 이슈 줄에서 설명 텍스트 추출 (최대 80자)."""
    # 마크다운 헤더 기호, 번호 목록 기호 등 제거
    cleaned = re.sub(r"^[#\-*\d\.\s]+", "", line).strip()
    if len(cleaned) > 80:
        return cleaned[:80] + "…"
    return cleaned if cleaned else line[:80]