"""
symbol_existence_check.py - 심볼 존재 여부 검증 verifier

보고서(.md)에서 "구현 완료"라고 한 핵심 심볼(함수명, 클래스명 등)이
실제 파일에 존재하는지 자동 검증.

이리스(Iris) dev1팀 프론트엔드 개발자 작성
task-1882
"""

import os
import re
import subprocess
from typing import Optional

def _build_file_path_patterns(workspace_root: str) -> list:
    """workspace_root 기반 파일 경로 추출 패턴 생성."""
    escaped = re.escape(workspace_root)
    return [
        re.compile(r"\|\s*(" + escaped + r"/[^\s|]+)"),   # | /path/...
        re.compile(r"-\s*(" + escaped + r"/[^\s]+)"),      # - /path/...
        re.compile(r"(?<!\w)(" + escaped + r"/[^\s|)>`\]]+)"),  # plain /path/...
    ]

# 코드 블록 내 또는 보고서에서 심볼을 추출하는 패턴
_SYMBOL_PATTERNS = [
    re.compile(r"\bdef\s+(\w+)"),       # Python 함수
    re.compile(r"\bfunction\s+(\w+)"),  # JS/TS 함수
    re.compile(r"\bclass\s+(\w+)"),     # 클래스
]

# 파일:라인 패턴: /home/jay/workspace/path/to/file.py:123
_FILE_LINE_PATTERN = re.compile(r"(/home/jay/workspace/[^\s:]+):(\d+)")

# 섹션 헤더 패턴 (수정 파일 / 산출물)
_SECTION_PATTERN = re.compile(
    r"^#{1,4}\s*(수정\s*파일|산출물|변경\s*파일|Modified\s*Files?|Output\s*Files?)",
    re.IGNORECASE | re.MULTILINE,
)


def _find_report(task_id: str, workspace_root: str) -> Optional[str]:
    """task_id에 해당하는 보고서 파일 경로를 반환."""
    candidates = [
        os.path.join(workspace_root, "memory", "reports", f"{task_id}.md"),
        os.path.join(workspace_root, "memory", "reports", f"{task_id}_report.md"),
    ]
    for path in candidates:
        if os.path.isfile(path):
            return path
    # reports 디렉토리 내 task_id가 포함된 파일 검색
    reports_dir = os.path.join(workspace_root, "memory", "reports")
    if os.path.isdir(reports_dir):
        for fname in os.listdir(reports_dir):
            if task_id in fname and fname.endswith(".md"):
                return os.path.join(reports_dir, fname)
    return None


def _extract_modified_files(content: str, workspace_root: str = "/home/jay/workspace") -> tuple:
    """보고서에서 '수정 파일' 또는 '산출물' 섹션의 파일 경로를 추출."""
    files = set()

    # 섹션이 있는지 먼저 확인
    has_section = bool(_SECTION_PATTERN.search(content))

    if has_section:
        # 해당 섹션 이후 내용만 파싱
        lines = content.split("\n")
        in_section = False
        section_content_lines = []

        for line in lines:
            if _SECTION_PATTERN.match(line):
                in_section = True
                continue
            if in_section:
                # 새 섹션(## 등) 시작 시 종료
                if re.match(r"^#{1,4}\s+\S", line) and not _SECTION_PATTERN.match(line):
                    break
                section_content_lines.append(line)

        section_content = "\n".join(section_content_lines)
    else:
        # 섹션 없으면 전체 내용에서 추출
        section_content = content

    # 파일 경로 추출
    file_path_patterns = _build_file_path_patterns(workspace_root)
    for pattern in file_path_patterns:
        for match in pattern.finditer(section_content):
            path = match.group(1).strip().rstrip(".,;:)")
            # Python/JS/TS/yaml/json 등 코드 파일만
            if os.path.splitext(path)[1] in (
                ".py", ".js", ".ts", ".jsx", ".tsx",
                ".yaml", ".yml", ".json", ".sh", ".rb", ".go",
            ):
                files.add(path)

    return sorted(files), has_section


def _extract_symbols_from_code_blocks(content: str) -> list:
    """코드 블록(``` ... ```) 내에서 심볼(함수명, 클래스명)을 추출."""
    symbols = set()

    # 코드 블록 추출
    code_block_pattern = re.compile(r"```[^\n]*\n(.*?)```", re.DOTALL)
    for block_match in code_block_pattern.finditer(content):
        block_text = block_match.group(1)
        for sym_pattern in _SYMBOL_PATTERNS:
            for sym_match in sym_pattern.finditer(block_text):
                name = sym_match.group(1)
                # 내장/예약어 제외
                if name not in ("self", "cls", "None", "True", "False", "pass", "return"):
                    symbols.add(name)

    return sorted(symbols)


def _extract_file_line_refs(content: str) -> list:
    """보고서에서 '파일경로:라인번호' 형태의 참조를 추출."""
    refs = []
    for match in _FILE_LINE_PATTERN.finditer(content):
        filepath = match.group(1)
        lineno = int(match.group(2))
        refs.append((filepath, lineno))
    return refs


def _symbol_exists_in_file(filepath: str, symbol: str) -> bool:
    """파일에서 심볼(함수명, 클래스명)이 존재하는지 grep으로 확인."""
    if not os.path.isfile(filepath):
        return False

    # grep 패턴: def/class/function 정의 또는 사용 형태 확인
    patterns = [
        f"def {symbol}",
        f"class {symbol}",
        f"function {symbol}",
        f"async def {symbol}",
        f"async function {symbol}",
    ]

    try:
        for pat in patterns:
            result = subprocess.run(
                ["grep", "-n", pat, filepath],
                capture_output=True,
                text=True,
                timeout=10,
            )
            if result.returncode == 0 and result.stdout.strip():
                return True
    except (subprocess.TimeoutExpired, OSError):
        pass

    return False


def verify(task_id: str, workspace_root: str = "/home/jay/workspace") -> dict:
    """
    보고서에서 언급된 심볼(함수명, 클래스명)이 실제 파일에 존재하는지 검증합니다.

    Args:
        task_id: 검사 대상 task ID
        workspace_root: 워크스페이스 루트 경로

    Returns:
        {"status": "PASS"|"FAIL"|"SKIP"|"WARN", "details": [...]}
    """
    # 1. 보고서 파일 찾기
    report_path = _find_report(task_id, workspace_root)
    if report_path is None:
        return {
            "status": "SKIP",
            "details": [f"Report not found for task_id='{task_id}'"],
        }

    try:
        with open(report_path, "r", encoding="utf-8") as f:
            content = f.read()
    except OSError as e:
        return {
            "status": "SKIP",
            "details": [f"Failed to read report: {type(e).__name__}: {e}"],
        }

    # 2. 수정 파일 섹션에서 파일 경로 추출
    modified_files, has_section = _extract_modified_files(content, workspace_root)

    if not has_section:
        return {
            "status": "SKIP",
            "details": ["보고서에 '수정 파일' 또는 '산출물' 섹션이 없습니다."],
        }

    if not modified_files:
        return {
            "status": "SKIP",
            "details": [
                "보고서에 '수정 파일' 섹션은 있으나 추출된 파일 경로가 없습니다.",
                f"Report: {report_path}",
            ],
        }

    # 3. 코드 블록에서 심볼 추출
    symbols = _extract_symbols_from_code_blocks(content)

    if not symbols:
        return {
            "status": "SKIP",
            "details": [
                "보고서 코드 블록에서 검증할 심볼(함수명/클래스명)을 추출하지 못했습니다.",
                f"Modified files: {len(modified_files)}",
            ],
        }

    # 4. 각 파일에서 심볼 존재 여부 확인
    details = [
        f"Report: {report_path}",
        f"Modified files: {len(modified_files)}",
        f"Symbols to check: {', '.join(symbols)}",
    ]

    missing = []
    checked = []

    for filepath in modified_files:
        if not os.path.isfile(filepath):
            details.append(f"SKIP (file not found): {filepath}")
            continue

        for symbol in symbols:
            exists = _symbol_exists_in_file(filepath, symbol)
            if exists:
                checked.append(f"OK: {symbol} in {filepath}")
            else:
                # 해당 파일에 관련 심볼이 없을 수도 있으므로
                # 모든 파일에서 해당 심볼이 없을 때만 FAIL
                missing.append((symbol, filepath))

    # 재확인: 각 심볼이 modified_files 중 하나라도에서 존재하는지
    truly_missing = []
    for symbol in symbols:
        found_in_any = False
        for filepath in modified_files:
            if os.path.isfile(filepath) and _symbol_exists_in_file(filepath, symbol):
                found_in_any = True
                break
        if not found_in_any:
            truly_missing.append(symbol)

    if checked:
        for item in checked:
            details.append(item)

    if truly_missing:
        for sym in truly_missing:
            details.append(f"MISSING: '{sym}' not found in any modified file")
        return {"status": "FAIL", "details": details}

    return {"status": "PASS", "details": details}


if __name__ == "__main__":
    import json
    import sys

    if len(sys.argv) < 2:
        print(
            json.dumps(
                {"status": "SKIP", "details": ["Usage: symbol_existence_check.py <task_id> [workspace_root]"]},
                ensure_ascii=False,
                indent=2,
            )
        )
        sys.exit(0)

    _task_id = sys.argv[1]
    _workspace_root = sys.argv[2] if len(sys.argv) > 2 else "/home/jay/workspace"
    result = verify(_task_id, _workspace_root)
    print(json.dumps(result, ensure_ascii=False, indent=2))