#!/usr/bin/env python3
"""dirty_registry.py — main dirty snapshot 기록(JSONL) + dirty 파일 owner 추정 +
EXTERNAL_DIRTY_BLOCKER vs OWN_DIRTY_FAIL 분류/분리.

task-2700 Phase 1 모듈 2.
"""
from __future__ import annotations

import argparse
import fnmatch
import json
import os
import subprocess
from dataclasses import asdict, dataclass
from datetime import datetime, timezone
from pathlib import Path

EXTERNAL_DIRTY_BLOCKER = "EXTERNAL_DIRTY_BLOCKER"
OWN_DIRTY_FAIL = "OWN_DIRTY_FAIL"
CLEAN = "CLEAN"


# ---------------------------------------------------------------------------
# Data classes
# ---------------------------------------------------------------------------

@dataclass
class DirtyRecord:
    path: str
    status: str          # git porcelain 2글자 코드 (" M", "??", " D", "AM" 등)
    mtime: float | None  # 파일 mtime(epoch). 삭제/부재 시 None
    diff_summary: str    # 짧은 요약
    owner_task: str | None


# ---------------------------------------------------------------------------
# Glob matching (task-scope-guard.sh 와 동일 의미)
# ---------------------------------------------------------------------------

def glob_match(pattern: str, path: str) -> bool:
    """** 및 /** 포함 glob 패턴으로 path 매칭.

    - "**" → 항상 True
    - "a/**" → a 또는 a/ prefix 매칭
    - ** 포함 패턴 → * 치환 후 fnmatch + basename fallback
    - 그 외 → fnmatch (basename fallback 포함)
    """
    if pattern == "**":
        return True

    # "a/**" style — prefix match
    if pattern.endswith("/**"):
        prefix = pattern[:-3]
        if path == prefix or path.startswith(prefix + "/"):
            return True

    if "**" in pattern:
        # ** → * 치환 후 fnmatch
        flat_pattern = pattern.replace("**", "*")
        if fnmatch.fnmatch(path, flat_pattern):
            return True
        # basename fallback
        if fnmatch.fnmatch(os.path.basename(path), flat_pattern):
            return True
        return False

    # 일반 패턴
    if fnmatch.fnmatch(path, pattern):
        return True
    # basename fallback (e.g. pattern="*.py" matches "utils/foo.py")
    if fnmatch.fnmatch(os.path.basename(path), pattern):
        return True
    return False


# ---------------------------------------------------------------------------
# Expected files loader
# ---------------------------------------------------------------------------

def load_expected_files(capabilities_dir: str, task_id: str) -> list[str]:
    """<capabilities_dir>/<task_id>.json 의 allowed_resources.paths 를 읽어
    각 항목에서 첫 공백 이전 토큰만 추출(주석 제거).

    예: "scripts/finish-task.sh (★ GIT-GATE...)" → "scripts/finish-task.sh"
    파일 없으면 [].
    """
    cap_file = Path(capabilities_dir) / f"{task_id}.json"
    if not cap_file.exists():
        return []

    try:
        data = json.loads(cap_file.read_text(encoding="utf-8", errors="replace"))
        paths_raw = data.get("allowed_resources", {}).get("paths", [])
        result: list[str] = []
        for entry in paths_raw:
            if not isinstance(entry, str):
                continue
            token = entry.split()[0] if entry.strip() else ""
            if token:
                result.append(token)
        return result
    except Exception:
        return []


# ---------------------------------------------------------------------------
# Owner estimation
# ---------------------------------------------------------------------------

def estimate_owner(path: str, capabilities_dir: str) -> str | None:
    """capabilities_dir 의 모든 task-*.json 을 스캔.

    load_expected_files 로 패턴 얻어 glob_match 되는 첫 task_id 반환.
    더 구체적인(긴) 패턴 우선. 없으면 None.
    """
    cap_path = Path(capabilities_dir)
    if not cap_path.is_dir():
        return None

    best_task: str | None = None
    best_specificity: int = -1

    for json_file in sorted(cap_path.glob("task-*.json")):
        task_id = json_file.stem  # e.g. "task-2700"
        patterns = load_expected_files(str(cap_path), task_id)
        for pattern in patterns:
            if glob_match(pattern, path):
                specificity = len(pattern)
                if specificity > best_specificity:
                    best_specificity = specificity
                    best_task = task_id

    return best_task


# ---------------------------------------------------------------------------
# Dirty collection
# ---------------------------------------------------------------------------

def _git_diff_stat_summary(repo_root: str, path: str) -> str:
    """git diff --stat -- <path> 의 마지막 요약 줄 반환. 실패 시 "modified"."""
    try:
        r = subprocess.run(
            ["git", "diff", "--stat", "--", path],
            cwd=repo_root,
            capture_output=True,
            text=True,
            timeout=10,
        )
        lines = [l.strip() for l in r.stdout.strip().splitlines() if l.strip()]
        if lines:
            return lines[-1]
    except Exception:
        pass
    return "modified"


def collect_dirty(
    repo_root: str,
    capabilities_dir: str | None = None,
) -> list[DirtyRecord]:
    """git status --porcelain 파싱 → DirtyRecord 리스트."""
    result = subprocess.run(
        ["git", "status", "--porcelain"],
        cwd=repo_root,
        capture_output=True,
        text=True,
        timeout=20,
    )
    if result.returncode != 0:
        return []

    records: list[DirtyRecord] = []
    for line in result.stdout.splitlines():
        if len(line) < 3:
            continue
        status = line[:2]   # 앞 2글자
        raw_path = line[3:]  # 3글자부터

        # rename 처리: "R  old -> new" 또는 "R  old\tnew"
        if " -> " in raw_path:
            raw_path = raw_path.split(" -> ")[-1].strip()
        elif "\t" in raw_path:
            raw_path = raw_path.split("\t")[-1].strip()

        # 따옴표 제거 (git이 특수문자 파일명을 따옴표로 감쌀 때)
        path = raw_path.strip('"')

        # mtime
        abs_path = os.path.join(repo_root, path)
        mtime: float | None = None
        try:
            mtime = os.path.getmtime(abs_path)
        except (OSError, FileNotFoundError):
            mtime = None

        # diff_summary
        status_stripped = status.strip()
        if status == "??":
            diff_summary = "untracked"
        elif status_stripped in ("D",) or status in (" D", "D "):
            diff_summary = "deleted"
        else:
            diff_summary = _git_diff_stat_summary(repo_root, path)

        # owner
        owner: str | None = None
        if capabilities_dir:
            owner = estimate_owner(path, capabilities_dir)

        records.append(DirtyRecord(
            path=path,
            status=status,
            mtime=mtime,
            diff_summary=diff_summary,
            owner_task=owner,
        ))

    return records


# ---------------------------------------------------------------------------
# Registry writer
# ---------------------------------------------------------------------------

def write_registry(
    registry_path: str,
    records: list[DirtyRecord],
    *,
    phase: str,
    task_id: str,
) -> int:
    """dirty 파일별 1줄의 JSONL 을 registry_path 에 append.

    각 줄: {"ts", "task_id", "phase", "path", "status", "mtime", "diff_summary", "owner_task"}.
    디렉토리 자동 생성. 기록 줄 수 반환.
    """
    reg_path = Path(registry_path)
    reg_path.parent.mkdir(parents=True, exist_ok=True)

    ts = datetime.now(timezone.utc).isoformat()
    lines_written = 0

    with reg_path.open("a", encoding="utf-8") as f:
        for rec in records:
            row = {
                "ts": ts,
                "task_id": task_id,
                "phase": phase,
                "path": rec.path,
                "status": rec.status,
                "mtime": rec.mtime,
                "diff_summary": rec.diff_summary,
                "owner_task": rec.owner_task,
            }
            f.write(json.dumps(row, ensure_ascii=False) + "\n")
            lines_written += 1

    return lines_written


# ---------------------------------------------------------------------------
# Snapshot main dirty
# ---------------------------------------------------------------------------

def snapshot_main_dirty(
    repo_root: str,
    registry_path: str,
    *,
    phase: str,
    task_id: str,
    capabilities_dir: str | None = None,
) -> dict:
    """collect_dirty + write_registry. phase는 "dispatch" 또는 "finish"."""
    records = collect_dirty(repo_root, capabilities_dir=capabilities_dir)
    count = write_registry(registry_path, records, phase=phase, task_id=task_id)
    return {
        "count": count,
        "registry_path": registry_path,
        "records": [asdict(r) for r in records],
    }


# ---------------------------------------------------------------------------
# Separate / classify
# ---------------------------------------------------------------------------

def separate_dirty(
    expected_files: list[str],
    dirty_paths: list[str],
) -> tuple[list[str], list[str]]:
    """dirty_paths 각각을 expected_files 패턴들과 glob_match.

    매칭되면 own, 아니면 unrelated.
    (own_dirty, unrelated_dirty) 반환.
    """
    own: list[str] = []
    unrelated: list[str] = []

    for dp in dirty_paths:
        matched = any(glob_match(pat, dp) for pat in expected_files)
        if matched:
            own.append(dp)
        else:
            unrelated.append(dp)

    return own, unrelated


def classify_blocker(
    expected_files: list[str],
    dirty_paths: list[str],
) -> dict:
    """own/unrelated dirty 분류 후 EXTERNAL_DIRTY_BLOCKER / OWN_DIRTY_FAIL / CLEAN 판정.

    - own이 비어있지 않으면 → OWN_DIRTY_FAIL (task 책임, FAIL 유지)
    - own 비었고 unrelated 있으면 → EXTERNAL_DIRTY_BLOCKER (환경 책임)
    - 둘 다 비었으면 → CLEAN
    """
    own, unrelated = separate_dirty(expected_files, dirty_paths)

    if own:
        return {
            "classification": OWN_DIRTY_FAIL,
            "own_dirty": own,
            "unrelated_dirty": unrelated,
        }
    if unrelated:
        return {
            "classification": EXTERNAL_DIRTY_BLOCKER,
            "own_dirty": [],
            "unrelated_dirty": unrelated,
        }
    return {
        "classification": CLEAN,
        "own_dirty": [],
        "unrelated_dirty": [],
    }


# ---------------------------------------------------------------------------
# CLI
# ---------------------------------------------------------------------------

def _build_parser() -> argparse.ArgumentParser:
    p = argparse.ArgumentParser(
        description="Snapshot main dirty state and classify blocker.",
    )
    p.add_argument("--repo-root", required=True, help="git repo root path")
    p.add_argument("--task-id", required=True, help="task ID")
    p.add_argument("--registry", required=True, help="JSONL registry file path")
    p.add_argument("--phase", default="dispatch", choices=["dispatch", "finish"],
                   help="phase: dispatch or finish")
    p.add_argument("--capabilities-dir", default=None, help="capabilities JSON dir")
    return p


def main(argv: list[str] | None = None) -> None:
    parser = _build_parser()
    args = parser.parse_args(argv)

    repo_root = os.path.abspath(args.repo_root)

    result = snapshot_main_dirty(
        repo_root,
        args.registry,
        phase=args.phase,
        task_id=args.task_id,
        capabilities_dir=args.capabilities_dir,
    )

    # 간단 blocker 분류도 함께 출력
    dirty_paths = [r["path"] for r in result["records"]]
    expected: list[str] = []
    if args.capabilities_dir:
        expected = load_expected_files(args.capabilities_dir, args.task_id)
    blocker = classify_blocker(expected, dirty_paths)

    output = {
        "task_id": args.task_id,
        "phase": args.phase,
        "snapshot": result,
        "blocker": blocker,
    }
    print(json.dumps(output, indent=2, ensure_ascii=False))


if __name__ == "__main__":
    main()
