"""
two_pass_audit.py - 2-pass 구조적 감사 verifier (gstack /review 패턴)

Pass 1 (CRITICAL): 자동 블록 — 하나라도 발견 시 FAIL
Pass 2 (INFORMATIONAL): 경고만 — 발견 시 WARN

출처: gstack /review 스킬 (MIT 라이선스, https://github.com/garrytan/gstack)
"""

import os
import re
from typing import Optional

# Pass 1 CRITICAL 패턴
CRITICAL_PATTERNS = {
    "SQL & Data Safety": [
        r"execute\s*\(",  # 직접 SQL 실행
        r"\.raw\s*\(",  # raw SQL
        r"DROP\s+TABLE",
        r"DELETE\s+FROM\s+\w+\s*$",  # WHERE 없는 DELETE
        r"TRUNCATE\s+TABLE",
        r"UPDATE\s+\w+\s+SET\s+.*(?!WHERE)",  # WHERE 없는 UPDATE (근사)
    ],
    "Race Conditions": [
        r"global\s+\w+",  # global 변수 사용
        r"threading\.(?!Lock|RLock|Event|Semaphore|Condition|Barrier)\w+",  # 잠금/동기화 primitive 제외 threading
    ],
    "LLM Output Trust Boundary": [
        r"eval\s*\(",  # eval 사용
        r"exec\s*\(",  # exec 사용
        r"subprocess\..*shell\s*=\s*True",  # shell injection
        r"os\.system\s*\(",  # os.system
    ],
    "Enum/Value Completeness": [
        r"#\s*TODO.*switch|#\s*TODO.*case|#\s*TODO.*enum",  # 미완성 switch/enum
    ],
}

# Pass 2 INFORMATIONAL 패턴
INFORMATIONAL_PATTERNS = {
    "Magic Numbers": [
        r"(?<!\w)(?:0x[\da-fA-F]+|\d{3,})\b(?!\s*[=:]\s*['\"])",  # 3자리+ 숫자 리터럴
    ],
    "Dead Code": [
        r"#\s*(?:UNUSED|DEPRECATED|DEAD|REMOVE)",
        r"pass\s*$",  # pass만 있는 함수 (근사)
    ],
    "Test Gaps": [
        r"def\s+\w+\(.*\).*:\s*$",  # 함수 정의 (테스트 존재 여부는 별도 확인)
    ],
    "Performance": [
        r"for\s+\w+\s+in\s+.*:\s*\n\s*for\s+\w+\s+in",  # 중첩 루프 (O(n²))
    ],
}

DEFAULT_REPORTS_DIR = "/home/jay/workspace/memory/reports"


def _scan_file(filepath: str, patterns: dict[str, list[str]]) -> list[dict]:
    """파일을 스캔하여 패턴 매칭 결과 반환."""
    findings = []
    try:
        with open(filepath, "r", encoding="utf-8", errors="ignore") as f:
            content = f.read()
        lines = content.splitlines()
        for category, regexes in patterns.items():
            for regex in regexes:
                for i, line in enumerate(lines, start=1):
                    # 주석 라인, 문자열 리터럴(regex 패턴 정의 등)은 스킵
                    stripped = line.strip()
                    if stripped.startswith("#") and "TODO" not in stripped:
                        continue
                    # r"..." 또는 "..." 형태의 문자열 정의 라인은 스킵
                    if stripped.startswith(('r"', "r'", 'r"""', "r'''", '"', "'")):
                        continue
                    try:
                        if re.search(regex, line):
                            findings.append(
                                {
                                    "category": category,
                                    "file": filepath,
                                    "line": i,
                                    "pattern": regex,
                                    "content": stripped[:100],
                                }
                            )
                    except re.error:
                        pass
    except OSError:
        pass
    return findings


def _get_check_files(task_id: str) -> list[str]:
    """task_id 기반으로 보고서에서 변경 파일 목록 추출."""
    report_path = os.path.join(DEFAULT_REPORTS_DIR, f"{task_id}.md")
    if not os.path.exists(report_path):
        return []

    files = []
    try:
        with open(report_path, "r", encoding="utf-8") as f:
            content = f.read()
        # 파일 경로 패턴 매칭 (Python/JS 파일)
        paths = re.findall(r"(/home/jay/workspace/\S+\.(?:py|js|ts|tsx))", content)
        for p in paths:
            if os.path.isfile(p):
                files.append(p)
    except OSError:
        pass
    return files


def verify(task_id: str, check_files: Optional[list[str]] = None, **kwargs) -> dict:
    """
    2-pass 구조적 감사 실행.

    Pass 1 CRITICAL 발견 → FAIL
    Pass 2 INFORMATIONAL 발견 → WARN
    아무것도 없으면 → PASS

    Args:
        task_id: 검증할 task ID
        check_files: 검사할 파일 목록 (없으면 보고서에서 추출)

    Returns:
        {"status": "PASS"|"FAIL"|"WARN", "details": [...]}
    """
    if not task_id:
        return {"status": "SKIP", "details": ["No task_id provided"]}

    files = check_files or _get_check_files(task_id)
    if not files:
        return {
            "status": "SKIP",
            "details": ["No files to audit (no check_files and no files found in report)"],
        }

    details = []
    pass1_findings = []
    pass2_findings = []

    for filepath in files:
        if not filepath.endswith((".py", ".js", ".ts", ".tsx")):
            continue
        # Pass 1: CRITICAL
        p1 = _scan_file(filepath, CRITICAL_PATTERNS)
        pass1_findings.extend(p1)
        # Pass 2: INFORMATIONAL
        p2 = _scan_file(filepath, INFORMATIONAL_PATTERNS)
        pass2_findings.extend(p2)

    # 결과 정리
    if pass1_findings:
        details.append(f"Pass 1 CRITICAL: {len(pass1_findings)} finding(s)")
        for f in pass1_findings[:10]:  # 최대 10개 표시
            details.append(f"  [CRITICAL] {f['category']} — {f['file']}:{f['line']} — {f['content']}")
        if len(pass1_findings) > 10:
            details.append(f"  ... and {len(pass1_findings) - 10} more")

    if pass2_findings:
        details.append(f"Pass 2 INFORMATIONAL: {len(pass2_findings)} finding(s)")
        for f in pass2_findings[:10]:
            details.append(f"  [INFO] {f['category']} — {f['file']}:{f['line']} — {f['content']}")
        if len(pass2_findings) > 10:
            details.append(f"  ... and {len(pass2_findings) - 10} more")

    if not pass1_findings and not pass2_findings:
        details.append(f"2-pass audit clean: {len(files)} file(s) scanned, 0 findings")

    details.append(f"Files scanned: {len(files)}")

    # Pass 1 FAIL → 전체 FAIL, Pass 2만 → WARN
    if pass1_findings:
        return {"status": "FAIL", "details": details}
    elif pass2_findings:
        return {"status": "WARN", "details": details}
    else:
        return {"status": "PASS", "details": details}