#!/usr/bin/env python3
"""
stash_audit.py — Git stash 출처 추적 진단 도구 (read-only)

spec: memory/specs/stash-origin-audit.md
task: task-2570

절대 금지: git stash pop / drop / clear / push
허용: git stash list, git stash show
"""

import argparse
import json
import re
import subprocess
import sys
from datetime import datetime
from pathlib import Path

# ---------------------------------------------------------------------------
# 분류 패턴 (spec §5 — 순서 중요, 첫 매칭 우선)
# ---------------------------------------------------------------------------

SOURCE_PATTERNS = [
    # 1. pre-task
    (re.compile(r"pre[-_]task[-_\s]", re.IGNORECASE), "pre-task"),
    (re.compile(r"WIP:\s*pre[-_]task", re.IGNORECASE), "pre-task"),
    # 2. quarantine
    (re.compile(r"finish[-_]task[-_]quarantine", re.IGNORECASE), "quarantine"),
    # 3. other-files
    (re.compile(r"other[-_]files[-_]stash", re.IGNORECASE), "other-files"),
    (re.compile(r"other[-_]tasks?[-_]dirty", re.IGNORECASE), "other-files"),
    (re.compile(r"non[-_]task[-_]scope[-_]dirty", re.IGNORECASE), "other-files"),
    # 4. finish-task
    (re.compile(r"finish[-_]task", re.IGNORECASE), "finish-task"),
    (re.compile(r"finish[-_]stash", re.IGNORECASE), "finish-task"),
    # 5. wip
    (re.compile(r"^WIP on \w"), "wip"),
]

CALLER_MAP = {
    "finish-task": "finish-task.sh",
    "quarantine": "finish-task.sh",
    "other-files": "finish-task.sh",
    "pre-task": "pre-task (수동 또는 pre_push_guard.py)",
    "wip": "수동 git stash",
    "unknown": "unknown",
}

TASK_ID_RE = re.compile(r"\[?(task-\d+(?:\+\d+)?)\]?", re.IGNORECASE)

# ---------------------------------------------------------------------------
# git 명령 실행 (read-only 보장)
# ---------------------------------------------------------------------------

ALLOWED_GIT_STASH_SUBCOMMANDS = {"list", "show"}


def _run_git(args: list[str], cwd: str) -> str:
    """git 명령 실행. stash pop/drop/clear/push 호출 시 즉시 중단."""
    if args and args[0] == "stash":
        sub = args[1] if len(args) > 1 else ""
        if sub not in ALLOWED_GIT_STASH_SUBCOMMANDS:
            raise RuntimeError(
                f"[stash_audit] FORBIDDEN: git stash {sub} — read-only 원칙 위반"
            )
    result = subprocess.run(
        ["git"] + args,
        cwd=cwd,
        capture_output=True,
        text=True,
    )
    if result.returncode != 0:
        return ""
    return result.stdout.strip()


# ---------------------------------------------------------------------------
# 파싱 / 분류
# ---------------------------------------------------------------------------

def _classify_source(message: str) -> str:
    """spec §5 패턴 매칭으로 source 결정."""
    for pattern, source in SOURCE_PATTERNS:
        if pattern.search(message):
            return source
    return "unknown"


def _extract_task_id(message: str) -> str | None:
    """task-NNNN 또는 task-NNNN+N 추출."""
    m = TASK_ID_RE.search(message)
    return m.group(1) if m else None


def _extract_reason(message: str) -> str:
    """
    stash 메시지에서 reason 텍스트 추출.
    - 'On main: ...' 에서 On main: 제거
    - 'WIP on main: SHA ...' 에서 SHA 이후 텍스트 추출
    - '[task-NNNN][source=X][reason=Y]' 포맷이면 reason 값 추출
    """
    # 표준 포맷: [reason=YYY]
    m = re.search(r"\[reason=([^\]]+)\]", message)
    if m:
        return m.group(1).strip()

    # WIP on branch: SHA description
    m = re.match(r"WIP on \S+:\s+[0-9a-f]+\s+(.*)", message)
    if m:
        return m.group(1).strip()

    # On main: ...
    m = re.match(r"(?:On \S+:\s*)(.*)", message)
    if m:
        return m.group(1).strip()

    return message.strip()


def _get_stash_timestamp(index: int, cwd: str) -> str | None:
    """stash@{N} reflog에서 timestamp 추출 시도."""
    raw = _run_git(
        ["log", "--format=%ci", "-1", f"stash@{{{index}}}"],
        cwd=cwd,
    )
    if raw:
        return raw.strip()
    return None


def parse_stash_list(cwd: str, limit: int | None = None) -> list[dict]:
    """
    git stash list 파싱 → 구조화된 stash 항목 목록 반환.
    실데이터 변경 없음 (list만 호출).
    """
    raw = _run_git(["stash", "list"], cwd=cwd)
    if not raw:
        return []

    lines = raw.splitlines()
    if limit is not None:
        lines = lines[:limit]

    entries = []
    for line in lines:
        # 형식: stash@{N}: message
        m = re.match(r"stash@\{(\d+)\}:\s*(.*)", line)
        if not m:
            continue
        index = int(m.group(1))
        raw_message = m.group(2).strip()

        source = _classify_source(raw_message)
        task_id = _extract_task_id(raw_message)
        reason = _extract_reason(raw_message)
        caller_script = CALLER_MAP.get(source, "unknown")
        timestamp = _get_stash_timestamp(index, cwd)

        entries.append(
            {
                "index": index,
                "task_id": task_id,
                "source": source,
                "reason": reason,
                "caller_script": caller_script,
                "timestamp": timestamp,
                "raw_message": raw_message,
            }
        )

    return entries


# ---------------------------------------------------------------------------
# 집계
# ---------------------------------------------------------------------------

def summarize(entries: list[dict]) -> dict:
    """카테고리별 집계 및 요약 생성."""
    count_by_source: dict[str, int] = {}
    for e in entries:
        src = e["source"]
        count_by_source[src] = count_by_source.get(src, 0) + 1

    return {
        "total": len(entries),
        "count_by_source": count_by_source,
        "generated_at": datetime.now().isoformat(timespec="seconds"),
    }


# ---------------------------------------------------------------------------
# 출력 포맷터
# ---------------------------------------------------------------------------

def format_table(entries: list[dict], summary: dict) -> str:
    """사람 읽기 좋은 표 형식."""
    lines = []
    lines.append("=" * 78)
    lines.append("Git Stash Origin Audit Report")
    lines.append(f"생성 시각: {summary['generated_at']}")
    lines.append(f"총 stash: {summary['total']}개")
    lines.append("")
    lines.append("[ 카테고리별 집계 ]")
    for src, cnt in sorted(summary["count_by_source"].items(), key=lambda x: -x[1]):
        bar = "#" * cnt
        lines.append(f"  {src:<15} {cnt:>3}개  {bar}")
    lines.append("")
    lines.append("[ 상세 목록 ]")
    lines.append("-" * 78)
    hdr = f"{'IDX':>4}  {'task_id':<14}  {'source':<14}  {'reason'}"
    lines.append(hdr)
    lines.append("-" * 78)
    for e in entries:
        task_id = e["task_id"] or "-"
        reason = e["reason"]
        if len(reason) > 45:
            reason = reason[:42] + "..."
        lines.append(
            f"{e['index']:>4}  {task_id:<14}  {e['source']:<14}  {reason}"
        )
    lines.append("=" * 78)
    return "\n".join(lines)


def format_json(entries: list[dict], summary: dict) -> str:
    """JSON 출력."""
    payload = {
        "summary": summary,
        "entries": entries,
    }
    return json.dumps(payload, ensure_ascii=False, indent=2)


def format_markdown(entries: list[dict], summary: dict) -> str:
    """Markdown 출력 (보고서용)."""
    lines = []
    lines.append("# Git Stash Origin Audit Report")
    lines.append("")
    lines.append(f"- **생성 시각**: {summary['generated_at']}")
    lines.append(f"- **총 stash**: {summary['total']}개")
    lines.append("")
    lines.append("## 카테고리별 집계")
    lines.append("")
    lines.append("| source | count |")
    lines.append("|--------|------:|")
    for src, cnt in sorted(summary["count_by_source"].items(), key=lambda x: -x[1]):
        lines.append(f"| `{src}` | {cnt} |")
    lines.append("")
    lines.append("## 상세 목록")
    lines.append("")
    lines.append("| idx | task_id | source | reason | timestamp |")
    lines.append("|----:|---------|--------|--------|-----------|")
    for e in entries:
        task_id = e["task_id"] or "-"
        reason = e["reason"]
        if len(reason) > 50:
            reason = reason[:47] + "..."
        ts = e["timestamp"] or "-"
        lines.append(
            f"| {e['index']} | `{task_id}` | `{e['source']}` | {reason} | {ts} |"
        )
    return "\n".join(lines)


# ---------------------------------------------------------------------------
# CLI
# ---------------------------------------------------------------------------

def build_parser() -> argparse.ArgumentParser:
    parser = argparse.ArgumentParser(
        prog="stash_audit.py",
        description="Git stash 출처 추적 진단 도구 (read-only). pop/drop 절대 금지.",
    )
    parser.add_argument(
        "--json",
        dest="output_json",
        action="store_true",
        help="JSON 형식으로 출력",
    )
    parser.add_argument(
        "--markdown",
        dest="output_markdown",
        action="store_true",
        help="Markdown 형식으로 출력 (보고서용)",
    )
    parser.add_argument(
        "--out",
        dest="out_path",
        metavar="PATH",
        default=None,
        help="파일에 저장 (지정 없으면 stdout)",
    )
    parser.add_argument(
        "--workspace",
        dest="workspace",
        metavar="PATH",
        default=None,
        help="git workspace 경로 (기본: 현재 디렉토리)",
    )
    parser.add_argument(
        "--limit",
        dest="limit",
        metavar="N",
        type=int,
        default=None,
        help="최대 N개만 분석 (기본: 무제한)",
    )
    return parser


def main() -> None:
    parser = build_parser()
    args = parser.parse_args()

    # workspace 결정
    if args.workspace:
        workspace = str(Path(args.workspace).resolve())
    else:
        workspace = str(Path.cwd())

    # stash 파싱
    entries = parse_stash_list(cwd=workspace, limit=args.limit)
    summary = summarize(entries)

    # 출력 포맷 선택
    if args.output_json:
        output = format_json(entries, summary)
    elif args.output_markdown:
        output = format_markdown(entries, summary)
    else:
        output = format_table(entries, summary)

    # 출력 대상
    if args.out_path:
        out_file = Path(args.out_path)
        out_file.parent.mkdir(parents=True, exist_ok=True)
        out_file.write_text(output, encoding="utf-8")
        print(f"[stash_audit] 결과 저장: {out_file}", file=sys.stderr)
    else:
        print(output)


if __name__ == "__main__":
    main()
