#!/usr/bin/env python3
"""divergence_guard.py — local main ↔ origin/main divergence 측정 + dispatch HOLD 판정(fail-closed).

task-2700 Phase 1 모듈 1.
"""
from __future__ import annotations

import argparse
import json
import os
import subprocess
import sys
from dataclasses import asdict, dataclass
from datetime import datetime, timezone
from pathlib import Path

DIVERGENCE_GATE_TASK_KINDS = {"coding", "security", "callback", "finish-task"}

_READ_ONLY_KEYWORDS = {
    "read_only", "read-only", "readonly",
    "diagnosis", "diagnose",
    "document", "documentation", "docs",
    "research", "리서치", "문서",
    "watcher", "monitor",
    "report",
}


# ---------------------------------------------------------------------------
# Data classes
# ---------------------------------------------------------------------------

@dataclass
class DivergenceResult:
    ahead: int           # HEAD에 있고 origin/main에 없는 커밋 수
    behind: int          # origin/main에 있고 HEAD에 없는 커밋 수
    local_sha: str
    origin_sha: str
    diverged: bool       # ahead>0 or behind>0
    measured: bool       # 측정 성공 여부
    error: str | None


@dataclass
class HoldDecision:
    hold: bool
    reason: str
    classification: str   # "DIVERGENCE_HOLD" | "MEASUREMENT_FAILED_HOLD" | "CLEAN_PASS" | "NON_GATED_KIND_PASS"
    task_kind: str
    divergence: DivergenceResult


# ---------------------------------------------------------------------------
# Core measurement functions
# ---------------------------------------------------------------------------

def resolve_sha(repo_root: str, ref: str) -> str | None:
    """ref의 SHA를 반환. 실패 시 None."""
    result = subprocess.run(
        ["git", "rev-parse", ref],
        cwd=repo_root,
        capture_output=True,
        text=True,
        timeout=15,
    )
    if result.returncode != 0:
        return None
    return result.stdout.strip() or None


def measure_divergence(
    repo_root: str,
    remote_ref: str = "origin/main",
    do_fetch: bool = False,
) -> DivergenceResult:
    """local main ↔ remote_ref 의 divergence 측정.

    do_fetch=True 면 git fetch origin 먼저 시도(실패해도 계속).
    git rev-list --left-right --count <remote_ref>...HEAD 사용:
      left = behind (remote 쪽), right = ahead (HEAD 쪽).
    측정 실패 시 fail-closed: measured=False, diverged=True.
    """
    if do_fetch:
        try:
            subprocess.run(
                ["git", "fetch", "origin"],
                cwd=repo_root,
                capture_output=True,
                text=True,
                timeout=30,
            )
        except Exception:
            pass  # fetch 실패해도 계속

    # local_sha
    local_sha = resolve_sha(repo_root, "HEAD") or ""
    # origin_sha
    origin_sha = resolve_sha(repo_root, remote_ref) or ""

    # rev-list divergence
    rev_result = subprocess.run(
        ["git", "rev-list", "--left-right", "--count", f"{remote_ref}...HEAD"],
        cwd=repo_root,
        capture_output=True,
        text=True,
        timeout=20,
    )

    if rev_result.returncode != 0:
        err = (rev_result.stderr or "git rev-list failed").strip()
        return DivergenceResult(
            ahead=-1,
            behind=-1,
            local_sha=local_sha,
            origin_sha=origin_sha,
            diverged=True,   # fail-closed
            measured=False,
            error=err,
        )

    out = rev_result.stdout.strip()
    try:
        parts = out.split()
        if len(parts) != 2:
            raise ValueError(f"unexpected output: {out!r}")
        behind, ahead = map(int, parts)  # left=behind(remote), right=ahead(HEAD)
    except Exception as exc:
        return DivergenceResult(
            ahead=-1,
            behind=-1,
            local_sha=local_sha,
            origin_sha=origin_sha,
            diverged=True,
            measured=False,
            error=str(exc),
        )

    diverged = ahead > 0 or behind > 0
    return DivergenceResult(
        ahead=ahead,
        behind=behind,
        local_sha=local_sha,
        origin_sha=origin_sha,
        diverged=diverged,
        measured=True,
        error=None,
    )


# ---------------------------------------------------------------------------
# Hold decision
# ---------------------------------------------------------------------------

def should_hold(
    task_kind: str,
    divergence: DivergenceResult,
    *,
    fail_closed: bool = True,
) -> HoldDecision:
    """task_kind + divergence 결과로 HOLD 여부 판정.

    fail-closed: 측정 실패 시에도 HOLD.
    """
    # 1) 게이트 대상이 아닌 kind
    if task_kind not in DIVERGENCE_GATE_TASK_KINDS:
        return HoldDecision(
            hold=False,
            reason=f"task_kind={task_kind!r} is not in DIVERGENCE_GATE_TASK_KINDS — pass",
            classification="NON_GATED_KIND_PASS",
            task_kind=task_kind,
            divergence=divergence,
        )

    # 2) 측정 실패 + fail_closed
    if not divergence.measured:
        if fail_closed:
            return HoldDecision(
                hold=True,
                reason=f"divergence measurement failed (fail-closed): {divergence.error}",
                classification="MEASUREMENT_FAILED_HOLD",
                task_kind=task_kind,
                divergence=divergence,
            )
        else:
            return HoldDecision(
                hold=False,
                reason="divergence measurement failed but fail_closed=False — pass",
                classification="CLEAN_PASS",
                task_kind=task_kind,
                divergence=divergence,
            )

    # 3) 실제 diverged
    if divergence.diverged:
        return HoldDecision(
            hold=True,
            reason=(
                f"local main is diverged from origin/main: "
                f"ahead={divergence.ahead}, behind={divergence.behind}"
            ),
            classification="DIVERGENCE_HOLD",
            task_kind=task_kind,
            divergence=divergence,
        )

    # 4) 깨끗
    return HoldDecision(
        hold=False,
        reason="local main is in sync with origin/main — clean pass",
        classification="CLEAN_PASS",
        task_kind=task_kind,
        divergence=divergence,
    )


# ---------------------------------------------------------------------------
# Task kind classifier
# ---------------------------------------------------------------------------

def classify_task_kind(
    task_meta: dict | None = None,
    *,
    task_file: str | None = None,
) -> str:
    """task_meta dict 또는 task_file 내용에서 task kind 추론.

    기본값은 안전측 "coding"(게이트 대상).
    read_only/diagnosis/문서/리서치 키워드만 있으면 "read_only" 등 비게이트 반환.
    """
    texts: list[str] = []

    if task_meta:
        for key in ("kind", "task_kind", "type", "mode", "task_mode", "title", "description"):
            val = task_meta.get(key)
            if isinstance(val, str):
                texts.append(val.lower())

    if task_file:
        try:
            content = Path(task_file).read_text(encoding="utf-8", errors="replace")
            texts.append(content.lower())
        except Exception:
            pass

    combined = " ".join(texts)

    # 명시적 kind/type 필드 우선
    if task_meta:
        for field in ("kind", "task_kind", "type"):
            val = task_meta.get(field, "")
            if isinstance(val, str):
                v = val.lower().strip()
                if v in DIVERGENCE_GATE_TASK_KINDS:
                    return v
                if v in _READ_ONLY_KEYWORDS or v == "read_only":
                    return "read_only"

    # 키워드 스캔
    for kw in _READ_ONLY_KEYWORDS:
        if kw in combined:
            return "read_only"

    return "coding"  # 안전측 기본값


# ---------------------------------------------------------------------------
# Hold marker writer
# ---------------------------------------------------------------------------

def write_hold_marker(events_dir: str, task_id: str, decision: HoldDecision) -> str:
    """<events_dir>/<task_id>.divergence-hold.json 에 decision을 JSON으로 기록. 경로 반환."""
    path = Path(events_dir) / f"{task_id}.divergence-hold.json"
    path.parent.mkdir(parents=True, exist_ok=True)

    payload = asdict(decision)
    payload["written_at"] = datetime.now(timezone.utc).isoformat()

    path.write_text(json.dumps(payload, indent=2, ensure_ascii=False), encoding="utf-8")
    return str(path)


# ---------------------------------------------------------------------------
# CLI
# ---------------------------------------------------------------------------

def _build_parser() -> argparse.ArgumentParser:
    p = argparse.ArgumentParser(
        description="Measure local/origin/main divergence and decide HOLD.",
    )
    p.add_argument("--repo-root", required=True, help="git repo root path")
    p.add_argument("--task-id", required=True, help="task ID (e.g. task-2700)")
    p.add_argument("--task-kind", default=None, help="task kind (default: auto-classify)")
    p.add_argument("--remote-ref", default="origin/main", help="remote ref to compare against")
    p.add_argument("--fetch", action="store_true", help="run git fetch origin before measuring")
    p.add_argument("--events-dir", default=None, help="directory to write hold marker JSON")
    # ★ task-2700 doctrine: bypass flag 금지 — CLI는 항상 fail-closed. --fail-open 제거됨.
    return p


def main(argv: list[str] | None = None) -> None:
    parser = _build_parser()
    args = parser.parse_args(argv)

    repo_root = os.path.abspath(args.repo_root)

    # divergence 측정
    div = measure_divergence(repo_root, remote_ref=args.remote_ref, do_fetch=args.fetch)

    # task kind
    task_kind = args.task_kind if args.task_kind else classify_task_kind()

    # hold 판정 (★ CLI는 항상 fail-closed — bypass 불가)
    decision = should_hold(task_kind, div, fail_closed=True)

    # hold marker 기록
    marker_path: str | None = None
    if args.events_dir and decision.hold:
        marker_path = write_hold_marker(args.events_dir, args.task_id, decision)

    output = {
        "task_id": args.task_id,
        "task_kind": task_kind,
        "hold": decision.hold,
        "classification": decision.classification,
        "reason": decision.reason,
        "divergence": asdict(div),
        "marker_path": marker_path,
    }
    print(json.dumps(output, indent=2, ensure_ascii=False))

    # exit 0 = pass, exit 3 = HOLD
    sys.exit(3 if decision.hold else 0)


if __name__ == "__main__":
    main()
