#!/usr/bin/env python3
"""mixed_commit_detector.py — task 브랜치에 섞인 alien commit 탐지기.

task-2459 Phase 2-C 구현. spec: memory/specs/mixed-commit-detector-spec.md (v1.0).

핵심 정책:
  - read-only 분석기. git history mutation 절대 금지 (rebase/cherry-pick/reset 0줄).
  - LLM API 호출 0줄. 외부 네트워크 호출 0줄.
  - mixed 감지 시 .tasks/locks/<id>.frozen 마커 + evidence 저장 후 exit 1.
  - 마커 자동 제거 금지 (회장/아누 수동 처리).

Exit code:
  0 = clean / empty
  1 = mixed
  2 = internal error (git 실패, IO 실패 등)
"""
from __future__ import annotations

import argparse
import json
import os
import re
import subprocess
import sys
import tempfile
from datetime import datetime, timezone
from pathlib import Path
from typing import Any


DETECTOR_VERSION = "1.0"
TASK_RE = re.compile(r"\[(task-[\w.]+)\]")


# ---------------------------------------------------------------------------
# 유틸리티
# ---------------------------------------------------------------------------

def _now_iso() -> str:
    """UTC ISO 8601 (`2026-05-05T18:21:09Z`)."""
    return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")


def _now_compact() -> str:
    """파일명용 compact 타임스탬프 (`20260505T182109Z`)."""
    return datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ")


def _atomic_write_json(path: Path, data: dict) -> None:
    """tempfile + os.replace 로 atomic write."""
    path.parent.mkdir(parents=True, exist_ok=True)
    fd, tmp_path = tempfile.mkstemp(
        dir=str(path.parent), prefix=path.name + ".", suffix=".tmp"
    )
    try:
        with os.fdopen(fd, "w", encoding="utf-8") as f:
            json.dump(data, f, ensure_ascii=False, indent=2)
            f.write("\n")
        os.replace(tmp_path, str(path))
    except Exception:
        try:
            os.unlink(tmp_path)
        except OSError:
            pass
        raise


def _git_run(args: list[str], cwd: Path, timeout: int = 30) -> subprocess.CompletedProcess:
    """git read-only 명령 실행. mutation 명령은 호출자에서 절대 사용 금지."""
    return subprocess.run(
        ["git"] + args,
        cwd=str(cwd),
        capture_output=True,
        text=True,
        timeout=timeout,
    )


# ---------------------------------------------------------------------------
# 코어 로직
# ---------------------------------------------------------------------------

def collect_commits(base_ref: str, head_ref: str, git_dir: Path) -> list[dict]:
    """`git log <base>..<head>` 로 commit SHA + subject 수집.

    git 명령은 ``git_dir`` 에서 실행한다. ``git_dir`` 는 task worktree 의 경로여야
    하며, evidence/freeze 저장 경로(workspace)와는 분리된다.
    NUL 종료 포맷을 사용해 subject 안의 `|` 깨짐을 회피.
    """
    fmt = "%H%x00%s%x00%an%x00"
    res = _git_run(
        ["log", f"{base_ref}..{head_ref}", f"--pretty=format:{fmt}"],
        cwd=git_dir,
    )
    if res.returncode != 0:
        raise RuntimeError(
            f"git log failed (exit {res.returncode}): {res.stderr.strip()}"
        )

    commits: list[dict] = []
    out = res.stdout
    if not out:
        return commits

    # 각 commit 마지막에 '\x00' 가 추가되므로 split 결과 마지막 토큰이 빈 문자열.
    parts = out.split("\x00")
    # 3개씩 묶어서 처리
    i = 0
    while i + 2 < len(parts):
        sha = parts[i].lstrip("\n")
        subject = parts[i + 1]
        author = parts[i + 2]
        if sha:
            commits.append({"sha": sha, "subject": subject, "author": author})
        i += 3
    return commits


def extract_task_tokens(subject: str) -> list[str]:
    """subject 메시지 전체에서 [task-...] 토큰 추출.

    spec ★: prefix anchor 금지, 메시지 전체 매치.
    """
    return TASK_RE.findall(subject)


def detect_mixed(commits: list[dict], current_task_id: str) -> dict:
    """commit 리스트로부터 mixed 여부 + 토큰 분석 수행."""
    per_commit: list[dict] = []
    all_tokens: set[str] = set()
    untagged = 0
    own_count = 0

    for c in commits:
        tokens = extract_task_tokens(c["subject"])
        is_alien = False
        if not tokens:
            untagged += 1
        else:
            for t in tokens:
                all_tokens.add(t)
                if t == current_task_id:
                    own_count += 1
                else:
                    is_alien = True
        per_commit.append(
            {
                "sha": c["sha"],
                "subject": c["subject"],
                "tokens": tokens,
                "alien": is_alien,
            }
        )

    alien = sorted(all_tokens - {current_task_id})
    mixed_tasks = sorted(all_tokens | {current_task_id} if all_tokens else set())
    return {
        "mixed": bool(alien),
        "alien_tasks": alien,
        "mixed_tasks": mixed_tasks,
        "本_task_token_count": own_count,
        "alien_token_count": sum(1 for c in per_commit if c["alien"]),
        "untagged_commit_count": untagged,
        "commits": per_commit,
    }


def write_freeze_marker(task_id: str, data: dict, workspace: Path) -> Path:
    """`.tasks/locks/<id>.frozen` JSON 마커 atomic write."""
    path = workspace / ".tasks" / "locks" / f"{task_id}.frozen"
    _atomic_write_json(path, data)
    return path


def write_evidence(task_id: str, data: dict, workspace: Path) -> Path:
    """`.tasks/evidence/<id>/mixed-commit-<ts>.json` evidence atomic write."""
    ts = _now_compact()
    path = workspace / ".tasks" / "evidence" / task_id / f"mixed-commit-{ts}.json"
    _atomic_write_json(path, data)
    return path


def resolve_branch(git_dir: Path) -> str | None:
    """현재 HEAD branch 이름 (detached 시 None)."""
    res = _git_run(["symbolic-ref", "--short", "-q", "HEAD"], cwd=git_dir)
    if res.returncode == 0:
        return res.stdout.strip() or None
    return None


def resolve_sha(ref: str, git_dir: Path) -> str | None:
    res = _git_run(["rev-parse", ref], cwd=git_dir)
    if res.returncode == 0:
        return res.stdout.strip()
    return None


# ---------------------------------------------------------------------------
# 출력 / payload 빌드
# ---------------------------------------------------------------------------

def _build_payload(
    task_id: str,
    branch_ref: str,
    base_ref: str,
    git_dir: Path,
    analysis: dict,
) -> dict:
    resolved_branch = (
        resolve_branch(git_dir) if branch_ref == "HEAD" else branch_ref
    )
    base_sha = resolve_sha(base_ref, git_dir)
    head_sha = resolve_sha(branch_ref, git_dir)

    payload: dict[str, Any] = {
        "task_id": task_id,
        "verified_at": _now_iso(),
        "detector_version": DETECTOR_VERSION,
        "branch_ref": branch_ref,
        "resolved_branch": resolved_branch,
        "base_ref": base_ref,
        "base_sha": base_sha,
        "head_sha": head_sha,
        "mixed": analysis["mixed"],
        "mixed_tasks": analysis["mixed_tasks"],
        "alien_tasks": analysis["alien_tasks"],
        "本_task_token_count": analysis["本_task_token_count"],
        "alien_token_count": analysis["alien_token_count"],
        "untagged_commit_count": analysis["untagged_commit_count"],
        "commits": analysis["commits"],
    }

    if analysis["mixed"]:
        alien_str = ", ".join(analysis["alien_tasks"])
        payload["escalation_message"] = (
            f"{task_id} 브랜치에서 alien commit "
            f"{analysis['alien_token_count']}건 감지 ({alien_str}). "
            f".tasks/locks/{task_id}.frozen 생성됨. 회장/아누만 수동 처리."
        )
        payload["freeze_marker_path"] = f".tasks/locks/{task_id}.frozen"
        payload["exit_code"] = 1
    else:
        payload["exit_code"] = 0
    return payload


def _emit(obj: dict, quiet: bool) -> None:
    if quiet:
        return
    print(json.dumps(obj, ensure_ascii=False))


# ---------------------------------------------------------------------------
# main
# ---------------------------------------------------------------------------

def main(argv: list[str] | None = None) -> int:
    parser = argparse.ArgumentParser(
        description="Detect alien-task commits on the current task branch.",
    )
    parser.add_argument("task_id", help="현재 task id (예: task-2459)")
    parser.add_argument("--branch", default="HEAD", help="검사 대상 ref (기본 HEAD)")
    parser.add_argument(
        "--base", default="origin/main", help="비교 base ref (기본 origin/main)"
    )
    parser.add_argument(
        "--workspace",
        default=os.environ.get("WORKSPACE_ROOT") or os.getcwd(),
        help=(
            "워크스페이스 루트 — evidence/freeze 파일 저장 경로 결정용. "
            "git 조회는 --git-dir 또는 cwd 에서 수행한다."
        ),
    )
    parser.add_argument(
        "--git-dir",
        default=None,
        dest="git_dir",
        help=(
            "git 조회 디렉토리 (task worktree 경로). "
            "미지정 시 현재 cwd 를 사용. workspace 와 분리되어 운영 환경의 "
            "main repo root vs task worktree 혼동을 방지한다."
        ),
    )
    parser.add_argument(
        "--json",
        action="store_true",
        dest="json_only",
        help="dry-run: stdout JSON만, evidence/freeze 저장 안 함",
    )
    parser.add_argument(
        "--quiet", action="store_true", help="정상 stdout 메시지 억제"
    )
    args = parser.parse_args(argv)

    workspace = Path(args.workspace).resolve()
    if not workspace.exists():
        print(
            f"[mixed-commit-detector] workspace not found: {workspace}",
            file=sys.stderr,
        )
        return 2

    # git_dir = git 조회 cwd (task worktree). workspace 와 별개.
    git_dir = Path(args.git_dir).resolve() if args.git_dir else Path.cwd().resolve()
    if not git_dir.exists():
        print(
            f"[mixed-commit-detector] git_dir not found: {git_dir}",
            file=sys.stderr,
        )
        return 2

    try:
        commits = collect_commits(args.base, args.branch, git_dir)
    except subprocess.TimeoutExpired:
        print(
            "[mixed-commit-detector] git log timed out",
            file=sys.stderr,
        )
        return 2
    except RuntimeError as exc:
        print(f"[mixed-commit-detector] {exc}", file=sys.stderr)
        return 2
    except Exception as exc:  # noqa: BLE001
        print(
            f"[mixed-commit-detector] unexpected error: {exc}",
            file=sys.stderr,
        )
        return 2

    if not commits:
        # Step 5: empty
        out = {
            "status": "empty",
            "task_id": args.task_id,
            "branch_ref": args.branch,
            "base_ref": args.base,
            "n_commits": 0,
            "exit_code": 0,
        }
        _emit(out, args.quiet)
        return 0

    analysis = detect_mixed(commits, args.task_id)

    try:
        payload = _build_payload(
            args.task_id, args.branch, args.base, git_dir, analysis
        )
    except subprocess.TimeoutExpired:
        print("[mixed-commit-detector] git rev-parse timed out", file=sys.stderr)
        return 2
    except Exception as exc:  # noqa: BLE001
        print(
            f"[mixed-commit-detector] payload build failed: {exc}",
            file=sys.stderr,
        )
        return 2

    if not analysis["mixed"]:
        # Step 5: clean (or clean_with_untagged)
        status = (
            "clean_with_untagged"
            if analysis["untagged_commit_count"] > 0
            else "clean"
        )
        out = {
            "status": status,
            "task_id": args.task_id,
            "branch_ref": args.branch,
            "base_ref": args.base,
            "n_commits": len(commits),
            "untagged_commit_count": analysis["untagged_commit_count"],
            "exit_code": 0,
        }
        _emit(out, args.quiet)
        return 0

    # Step 4: mixed 감지
    if args.json_only:
        # dry-run: stdout 만, 부수 효과 없음
        if not args.quiet:
            print(json.dumps(payload, ensure_ascii=False))
        # stderr 에 escalation 메시지도 안내 (dry-run 이지만 사용자 가시성)
        print(
            f"[mixed-commit-detector] DRY-RUN mixed detected: "
            f"{', '.join(analysis['alien_tasks'])}",
            file=sys.stderr,
        )
        return 1

    try:
        freeze_path = write_freeze_marker(args.task_id, payload, workspace)
        evidence_path = write_evidence(args.task_id, payload, workspace)
    except OSError as exc:
        print(
            f"[mixed-commit-detector] failed to write marker/evidence: {exc}",
            file=sys.stderr,
        )
        # 가능하면 payload 라도 stdout 으로
        if not args.quiet:
            print(json.dumps(payload, ensure_ascii=False))
        return 2

    summary = {
        "status": "mixed",
        "task_id": args.task_id,
        "alien_tasks": analysis["alien_tasks"],
        "freeze": str(freeze_path.relative_to(workspace)),
        "evidence": str(evidence_path.relative_to(workspace)),
        "exit_code": 1,
    }
    _emit(summary, args.quiet)

    # escalation 메시지는 항상 stderr (quiet 와 무관 — 운영자가 봐야 함)
    print(
        f"[mixed-commit-detector] {payload['escalation_message']}",
        file=sys.stderr,
    )
    return 1


if __name__ == "__main__":
    sys.exit(main())
