#!/usr/bin/env python3
"""
pre_push_guard.py — push 직전 4가지 안전 점검 (Guard MVP Phase 1)

4가지 검사:
  B-1 working tree clean 검사      (working_tree_modified ∪ untracked 기준)
  B-2 main...origin/main ahead/behind 검사
  B-3 task scope 일치 검사         (head_diff 기준)
  B-4 qc-result JSON vs 보고서 일치 검사

모두 PASS → exit 0 / 한 건이라도 FAIL → exit 1
"""

import argparse
import fnmatch
import json
import os
import re
import subprocess
import sys
# qc_report_guard는 같은 디렉토리에 있으므로 경로 추가
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
import qc_report_guard

# task_scope도 import
import task_scope as _task_scope


# ── 시스템 자동 파일 무시 패턴 (task-scope-guard.sh:69 그대로) ──────────────
# task_id는 런타임에 치환하므로 함수로 생성

def _build_system_ignore(task_id: str) -> re.Pattern:
    safe_id = re.escape(task_id)
    return re.compile(
        # ── 1. 디렉토리 통째 ignore (시스템 자동 생성) ──
        r"^(memory/heartbeats/|memory/daily/|memory/logs/|memory/reports/"
        r"|memory/capabilities/|memory/sessions/|memory/meetings/"
        r"|memory/backups/|memory/screenshots/|memory/events/"
        r"|logs/|whisper/|memory/whisper/|output/)"
        # ── 2. 단일 파일 ignore ──
        r"|^bot-activity\.json$|^token-ledger\.json$"
        r"|^memory/token-ledger\.json$|^memory/task-timers\.json"
        r"|^memory/pipeline-status\.json$|^memory/preview-state\.json$"
        r"|^memory/merge-log\.json$|^memory/bot_settings_sync\.json$"
        r"|^memory/memory-check-log\.json$"
        r"|^memory/canary-status\.json$|^\.heartbeat$|^memory/\.task-counter$"
        r"|^config/constants\.json$|^scripts/gemini_rate_tracker\.json$"
        r"|^tests/coverage-report\.txt$"
        # ── 3. 본 task 자체 메타 (ignore — working tree 기준) ──
        r"|^memory/tasks/" + safe_id + r"\.md$"
        r"|^memory/tasks/" + safe_id + r"/"
        r"|^memory/reports/" + safe_id + r"(?:-[\w-]+)?\.md$"
        # ── 4. 다른 task 파일 (본 task가 아니면 무시) ──
        r"|^memory/tasks/(?:task-|dispatch-).*\.md$"
        r"|^memory/plans/tasks/(?!" + safe_id + r"(?:/|$))[^/]+/.*$"
        # ── 5. pytest 인프라 메타 ──
        r"|(?:^|/)conftest\.py$"
        # ── 6. 다른 task가 생성한 파일 (teams/shared 등) ──
        r"|^teams/"
        r"|^tests/test_(?!pre_push_guard|task_scope|qc_report_guard)[^/]*\.py$"
    )


# ── glob 매칭 헬퍼 (task-scope-guard.sh:87 그대로) ───────────────────────────

def glob_match(pattern: str, path: str) -> bool:
    """fnmatch 기반 glob 매칭. ** 패턴 지원."""
    if pattern.endswith("/**"):
        prefix = pattern[:-3]
        return path == prefix or path.startswith(prefix + "/")
    if pattern == "**":
        return True
    if "**" in pattern:
        if fnmatch.fnmatch(path, pattern.replace("**", "*")):
            return True
        filename = os.path.basename(path)
        pat_simple = re.sub(r"\*\*/", "", pattern).replace("/**", "")
        if fnmatch.fnmatch(filename, pat_simple):
            return True
        return False
    return fnmatch.fnmatch(path, pattern)


# ── capability snapshot resolver ─────────────────────────────────────────────

def _resolve_allowed_resources(task_id: str, workspace: str) -> tuple[dict | None, str]:
    """
    allowed_resources 해소.
    1) <workspace>/memory/capabilities/<task_id>.json 우선
    2) 없으면 <workspace>/memory/tasks/<task_id>.md ## allowed_resources YAML fallback
    반환: (allowed_resources_dict | None, 에러메시지|"")
    """
    cap_path = os.path.join(workspace, "memory", "capabilities", f"{task_id}.json")
    if os.path.exists(cap_path):
        try:
            with open(cap_path, encoding="utf-8") as f:
                snap = json.load(f)
            ar = snap.get("allowed_resources", {})
            # placeholder task-XXXX → 실제 task_id로 in-memory 치환
            ar = _substitute_placeholder(ar, task_id)
            return ar, ""
        except Exception as e:
            return None, f"capability snapshot 파싱 실패: {e}"

    # fallback: task 파일 YAML 블록
    task_path = os.path.join(workspace, "memory", "tasks", f"{task_id}.md")
    if not os.path.exists(task_path):
        return None, f"capability snapshot 없음, task 파일도 없음: {task_path}"

    try:
        with open(task_path, encoding="utf-8") as f:
            task_text = f.read()
        ar = _parse_allowed_resources_yaml(task_text)
        if ar is None:
            return None, "task 파일에 ## allowed_resources 블록 없음"
        ar = _substitute_placeholder(ar, task_id)
        return ar, ""
    except Exception as e:
        return None, f"task 파일 파싱 실패: {e}"


def _substitute_placeholder(ar: dict, task_id: str) -> dict:
    """paths/forbidden_paths의 'task-XXXX' placeholder → 실제 task_id로 치환."""
    def _sub(items: list[str]) -> list[str]:
        return [item.replace("task-XXXX", task_id) for item in items]

    result = dict(ar)
    if "paths" in result:
        result["paths"] = _sub(result["paths"])
    if "forbidden_paths" in result:
        result["forbidden_paths"] = _sub(result["forbidden_paths"])
    return result


def _parse_allowed_resources_yaml(text: str) -> dict | None:
    """## allowed_resources 섹션에서 YAML 블록 파싱 (표준 라이브러리만)."""
    # ```yaml ... ``` 블록 찾기
    m = re.search(
        r"^##\s+allowed_resources\s*\n```(?:yaml)?\n(.*?)```",
        text, re.MULTILINE | re.DOTALL
    )
    if not m:
        return None
    yaml_text = m.group(1)

    # 단순 YAML 파서: paths: / forbidden_paths: 리스트만 추출
    result: dict = {}
    current_key: str | None = None
    for line in yaml_text.splitlines():
        stripped = line.strip()
        if stripped.startswith("paths:"):
            current_key = "paths"
            result[current_key] = []
        elif stripped.startswith("forbidden_paths:"):
            current_key = "forbidden_paths"
            result[current_key] = []
        elif stripped.startswith("- ") and current_key in ("paths", "forbidden_paths"):
            result[current_key].append(stripped[2:].strip().strip('"'))
        elif stripped and not stripped.startswith("-") and ":" in stripped:
            current_key = None
    return result if result else None


# ── git 헬퍼 ─────────────────────────────────────────────────────────────────

def _run_git(args: list[str], cwd: str) -> tuple[int, str]:
    """git 실행 → (returncode, stdout). shell=False."""
    cmd = ["git", "-C", cwd] + args
    r = subprocess.run(cmd, capture_output=True, text=True, check=False)
    return r.returncode, r.stdout.strip()


# ── B-1: working tree clean 검사 ─────────────────────────────────────────────

def check_b1_working_tree(
    diff_sets: dict,
    ignore_pattern: re.Pattern,
    allowed_resources: dict,
) -> tuple[bool, str]:
    """
    working_tree_modified ∪ untracked 집합에서
    system-ignore 적용 후 task scope 밖 파일이 있으면 FAIL.
    """
    # git은 한글 등 non-ASCII 파일명을 "\355\205..." 형식 인용으로 반환
    # → 앞뒤 따옴표 strip 후 비교
    def _strip_quotes(p: str) -> str:
        return p.strip('"') if p.startswith('"') and p.endswith('"') else p

    dirty = {
        _strip_quotes(p)
        for p in (
            set(diff_sets.get("working_tree_modified", []))
            | set(diff_sets.get("untracked", []))
        )
    }

    # system-ignore 적용
    after_ignore = [p for p in dirty if not ignore_pattern.search(p)]

    if not after_ignore:
        return True, "변경 없음 (clean)"

    # task scope(allowed paths) 매칭
    allowed_paths: list[str] = allowed_resources.get("paths", [])
    forbidden_paths: list[str] = allowed_resources.get("forbidden_paths", [])
    out_of_scope: list[str] = []

    for path in after_ignore:
        # forbidden 먼저
        if any(glob_match(fp, path) for fp in forbidden_paths):
            out_of_scope.append(f"{path} (forbidden)")
            continue
        # allowed에 속하는지
        if not any(glob_match(ap, path) for ap in allowed_paths):
            out_of_scope.append(path)

    if out_of_scope:
        detail = ", ".join(out_of_scope[:5])
        if len(out_of_scope) > 5:
            detail += f" ... ({len(out_of_scope)}건)"
        return False, (
            f"task scope 밖 변경 {len(out_of_scope)}건: {detail}"
            " — git stash push -u 로 격리 후 재시도"
        )

    return True, f"system-ignore 후 {len(after_ignore)}건 모두 scope 내"


# ── B-2: ahead/behind 검사 ───────────────────────────────────────────────────

def check_b2_ahead_behind(base_ref: str, cwd: str) -> tuple[bool, str]:
    """
    git rev-list --left-right --count <base_ref>...HEAD
    behind > 0 → FAIL
    """
    rc, out = _run_git(
        ["rev-list", "--left-right", "--count", f"{base_ref}...HEAD"], cwd
    )
    if rc != 0:
        # origin/main 없는 환경 (로컬 전용)
        return True, f"rev-list 실패 (rc={rc}) — 로컬 전용 환경으로 간주, PASS"

    parts = out.split()
    if len(parts) != 2:
        return True, f"rev-list 출력 파싱 실패 ({out!r}) — PASS (관대)"

    behind, ahead = int(parts[0]), int(parts[1])

    if behind > 0:
        return False, f"behind={behind} ahead={ahead} — rebase 권장: git pull --rebase"
    if ahead == 0 and behind == 0:
        return True, "ahead=0 behind=0 — push 불필요"
    return True, f"ahead={ahead} behind={behind} — push 가능"


# ── B-3: task scope 일치 검사 ────────────────────────────────────────────────

def check_b3_task_scope(
    diff_sets: dict,
    ignore_pattern: re.Pattern,
    allowed_resources: dict,
) -> tuple[bool, str]:
    """
    head_diff 기준 (push될 커밋).
    forbidden 1건이라도 → FAIL.
    system-ignore 후 allowed에 모두 매치되어야 PASS.
    """
    head_diff: list[str] = diff_sets.get("head_diff", [])
    allowed_paths: list[str] = allowed_resources.get("paths", [])
    forbidden_paths: list[str] = allowed_resources.get("forbidden_paths", [])

    if not head_diff:
        return True, "head_diff 없음 (커밋 없음) — PASS"

    forbidden_hits: list[str] = []
    out_of_scope: list[str] = []

    for path in head_diff:
        # forbidden 먼저
        if any(glob_match(fp, path) for fp in forbidden_paths):
            forbidden_hits.append(path)
            continue
        # system-ignore
        if ignore_pattern.search(path):
            continue
        # allowed 매칭
        if not any(glob_match(ap, path) for ap in allowed_paths):
            out_of_scope.append(path)

    if forbidden_hits:
        detail = ", ".join(forbidden_hits[:5])
        return False, f"forbidden_paths 침범 {len(forbidden_hits)}건: {detail}"

    if out_of_scope:
        detail = ", ".join(out_of_scope[:5])
        if len(out_of_scope) > 5:
            detail += f" ... ({len(out_of_scope)}건)"
        return False, f"task scope 밖 파일 {len(out_of_scope)}건: {detail}"

    return True, f"head_diff {len(head_diff)}건 모두 scope 내"


# ── B-4: qc-result ↔ 보고서 일치 검사 ───────────────────────────────────────

def check_b4_qc_report(
    task_id: str,
    workspace: str,
    strict: bool,
) -> tuple[bool, str]:
    """
    qc_report_guard.check() 호출.
    qc-result 없으면: strict=True → FAIL, 기본 → WARN(rc=0 유지).
    """
    result = qc_report_guard.check(task_id=task_id, workspace=workspace)

    if result["json_verdict"] == "MISSING":
        if strict:
            return False, "qc-result 파일 없음 (--strict 모드: FAIL)"
        # 기본: WARN — rc=0 유지지만 stderr 안내
        print(
            f"[pre-push-guard] WARN: qc-result 없음 — --strict 미사용이므로 통과 (B-4)",
            file=sys.stderr,
        )
        return True, "qc-result 없음 — WARN (rc=0 유지, --strict 없음)"

    if not result["ok"]:
        detail = "; ".join(result["violations"][:3])
        return False, detail

    return True, (
        f"JSON={result['json_verdict']} / report={result['report_verdict']} — 일치"
    )


# ── 메인 ─────────────────────────────────────────────────────────────────────

def main() -> None:
    parser = argparse.ArgumentParser(
        description="pre_push_guard.py — push 직전 4가지 안전 점검 (Guard MVP Phase 1)"
    )
    parser.add_argument("--task-id", required=True, help="task ID (예: task-2434)")
    parser.add_argument("--base-sha", default="origin/main",
                        help="비교 기준 ref (기본: origin/main)")
    parser.add_argument("--cwd", default="/home/jay/workspace",
                        help="git 저장소 루트 (기본: /home/jay/workspace)")
    parser.add_argument("--workspace", default="/home/jay/workspace",
                        help="capability/event 검색용 루트 (기본: /home/jay/workspace)")
    parser.add_argument("--strict", action="store_true",
                        help="qc-result 누락 시 FAIL (기본: WARN으로 통과)")
    args = parser.parse_args()

    task_id = args.task_id
    cwd = args.cwd
    workspace = args.workspace
    base_sha = args.base_sha

    print(f"[pre-push-guard] task={task_id}", file=sys.stderr)

    # ── allowed_resources 해소 ──
    allowed_resources, res_err = _resolve_allowed_resources(task_id, workspace)
    if allowed_resources is None:
        print(f"[pre-push-guard] ERROR: {res_err}", file=sys.stderr)
        sys.exit(1)

    # ── system-ignore 패턴 빌드 ──
    ignore_pattern = _build_system_ignore(task_id)

    # ── diff 집합 수집 (task_scope 모듈 직접 호출) ──
    diff_sets, diff_err = _task_scope.get_diff_sets(base_sha, cwd)
    if diff_err:
        print(f"[pre-push-guard] WARN: diff 수집 일부 실패: {diff_err}", file=sys.stderr)
        # 수집 실패는 치명적이지 않으면 계속 진행 (빈 리스트로)

    # ── B-1 ──
    b1_pass, b1_detail = check_b1_working_tree(
        diff_sets, ignore_pattern, allowed_resources
    )
    _status = "PASS" if b1_pass else "FAIL"
    print(f"  B-1 working tree clean    : {_status} — {b1_detail}", file=sys.stderr)

    # ── B-2 ──
    b2_pass, b2_detail = check_b2_ahead_behind(base_sha, cwd)
    _status = "PASS" if b2_pass else "FAIL"
    print(f"  B-2 ahead/behind          : {_status} — {b2_detail}", file=sys.stderr)

    # ── B-3 ──
    b3_pass, b3_detail = check_b3_task_scope(diff_sets, ignore_pattern, allowed_resources)
    _status = "PASS" if b3_pass else "FAIL"
    print(f"  B-3 task scope 일치       : {_status} — {b3_detail}", file=sys.stderr)

    # ── B-4 ──
    b4_pass, b4_detail = check_b4_qc_report(task_id, workspace, args.strict)
    _status = "PASS" if b4_pass else "FAIL"
    print(f"  B-4 보고서/qc-result 일치 : {_status} — {b4_detail}", file=sys.stderr)

    # ── OVERALL ──
    overall_pass = b1_pass and b2_pass and b3_pass and b4_pass
    if overall_pass:
        print(f"[pre-push-guard] OVERALL: PASS (rc=0)", file=sys.stderr)
        sys.exit(0)
    else:
        print(f"[pre-push-guard] OVERALL: FAIL (rc=1)", file=sys.stderr)
        sys.exit(1)


if __name__ == "__main__":
    main()