#!/usr/bin/env python3
"""
task-2700 regression test: local main ↔ origin/main divergence prevention.

벨레스 (QA/테스터 역할) — 8 검증 시나리오 + task-2699 fixture.
"""
from __future__ import annotations

import importlib.util
import json
import os
import subprocess
import sys
from pathlib import Path

import pytest

# ---------------------------------------------------------------------------
# sys.path 설정 — worktree 루트를 경로에 추가 (다른 regression 테스트 패턴 준수)
# ---------------------------------------------------------------------------
REPO = str(Path(__file__).resolve().parent.parent.parent)
if REPO in sys.path:
    sys.path.remove(REPO)
sys.path.insert(0, REPO)

# ---------------------------------------------------------------------------
# 모듈 import
# ---------------------------------------------------------------------------
from utils.divergence_guard import (  # noqa: E402
    DivergenceResult,
    HoldDecision,
    measure_divergence,
    should_hold,
    DIVERGENCE_GATE_TASK_KINDS,
)
from utils.dirty_registry import (  # noqa: E402
    EXTERNAL_DIRTY_BLOCKER,
    OWN_DIRTY_FAIL,
    CLEAN,
    classify_blocker,
    collect_dirty,
    separate_dirty,
    snapshot_main_dirty,
    glob_match,
)
from utils.callback_cause_classifier import (  # noqa: E402
    NORMAL_CALLBACK_MISSING,
    FINISH_TASK_GIT_GATE_BLOCKED,
    FINISH_PROFILES,
    classify_callback_missing,
    resolve_finish_profile,
)


# worktree_manager는 scripts/__init__.py 없을 수 있으므로 importlib로 로드
def _load_worktree_manager():
    spec_path = os.path.join(REPO, "scripts", "worktree_manager.py")
    spec = importlib.util.spec_from_file_location("worktree_manager", spec_path)
    mod = importlib.util.module_from_spec(spec)
    spec.loader.exec_module(mod)
    return mod


# ---------------------------------------------------------------------------
# 헬퍼: hermetic divergent git repo 생성
# ---------------------------------------------------------------------------

# Git 환경을 완전히 격리하기 위한 공통 env
def _git_env() -> dict:
    env = os.environ.copy()
    env["GIT_CONFIG_GLOBAL"] = "/dev/null"
    env["GIT_AUTHOR_NAME"] = "tester"
    env["GIT_AUTHOR_EMAIL"] = "t@t.test"
    env["GIT_COMMITTER_NAME"] = "tester"
    env["GIT_COMMITTER_EMAIL"] = "t@t.test"
    # HOME을 비워 ~/.gitconfig 간섭 방지
    env["HOME"] = "/tmp"
    return env


def _git(args: list[str], cwd: str, env: dict | None = None) -> str:
    """git 명령 실행, stdout 반환."""
    e = env or _git_env()
    r = subprocess.run(
        ["git"] + args,
        cwd=cwd,
        capture_output=True,
        text=True,
        env=e,
        timeout=30,
    )
    if r.returncode != 0:
        raise RuntimeError(f"git {args} failed: {r.stderr.strip()}")
    return r.stdout.strip()


def make_divergent_repo(
    tmp_path: Path,
    ahead: int = 0,
    behind: int = 0,
    extra_files: dict[str, str] | None = None,
) -> str:
    """origin(bare) + local clone 을 구성하여 local 이 origin/main 대비
    ahead/behind 가 되도록 만든다.

    extra_files: {relpath: content} — local 작업트리에 uncommitted dirty로 생성.
    반환: local repo 경로(str).
    """
    env = _git_env()

    # 1) bare origin repo 생성
    origin_dir = str(tmp_path / "origin.git")
    os.makedirs(origin_dir, exist_ok=True)
    _git(["init", "--bare", "-b", "main", origin_dir], cwd=str(tmp_path), env=env)

    # 2) seed clone 에서 초기 커밋 → origin main 에 push
    seed_dir = str(tmp_path / "seed")
    _git(["clone", origin_dir, seed_dir], cwd=str(tmp_path), env=env)
    # 브랜치명 보장
    _git(["checkout", "-B", "main"], cwd=seed_dir, env=env)

    # 초기 커밋 생성
    seed_file = os.path.join(seed_dir, "base.txt")
    with open(seed_file, "w") as f:
        f.write("base commit\n")
    _git(["add", "base.txt"], cwd=seed_dir, env=env)
    _git(["commit", "-m", "base commit"], cwd=seed_dir, env=env)
    _git(["push", "origin", "main"], cwd=seed_dir, env=env)

    # 3) local repo = origin clone
    local_dir = str(tmp_path / "local")
    _git(["clone", origin_dir, local_dir], cwd=str(tmp_path), env=env)
    _git(["checkout", "-B", "main"], cwd=local_dir, env=env)

    # 4) behind 만들기: seed 에서 `behind` 개 커밋 → origin push (local은 fetch 전)
    for i in range(behind):
        fn = f"behind_{i}.txt"
        with open(os.path.join(seed_dir, fn), "w") as f:
            f.write(f"behind commit {i}\n")
        _git(["add", fn], cwd=seed_dir, env=env)
        _git(["commit", "-m", f"behind commit {i}"], cwd=seed_dir, env=env)
    if behind > 0:
        _git(["push", "origin", "main"], cwd=seed_dir, env=env)

    # 5) ahead 만들기: local 에서 `ahead` 개 커밋 (push 안 함)
    for i in range(ahead):
        fn = f"ahead_{i}.txt"
        with open(os.path.join(local_dir, fn), "w") as f:
            f.write(f"ahead commit {i}\n")
        _git(["add", fn], cwd=local_dir, env=env)
        _git(["commit", "-m", f"ahead commit {i}"], cwd=local_dir, env=env)

    # 6) local 에서 git fetch origin (origin/main ref 갱신)
    _git(["fetch", "origin"], cwd=local_dir, env=env)

    # 7) extra_files 를 local 작업트리에 생성 (uncommitted dirty)
    if extra_files:
        for relpath, content in extra_files.items():
            abs_path = os.path.join(local_dir, relpath)
            os.makedirs(os.path.dirname(abs_path), exist_ok=True)
            with open(abs_path, "w") as f:
                f.write(content)

    return local_dir


# ---------------------------------------------------------------------------
# 시나리오 1: DIVERGENCE_HOLD
# ---------------------------------------------------------------------------

def test_divergence_hold(tmp_path):
    """ahead=2, behind=1 → diverged=True, hold=True, DIVERGENCE_HOLD."""
    repo = make_divergent_repo(tmp_path, ahead=2, behind=1)

    result = measure_divergence(repo, remote_ref="origin/main", do_fetch=False)

    assert result.measured is True, f"measured should be True, got error: {result.error}"
    assert result.ahead == 2, f"ahead expected 2, got {result.ahead}"
    assert result.behind == 1, f"behind expected 1, got {result.behind}"
    assert result.diverged is True

    decision = should_hold("coding", result)
    assert decision.hold is True
    assert decision.classification == "DIVERGENCE_HOLD"


# ---------------------------------------------------------------------------
# 시나리오 2: CLEAN_PASS
# ---------------------------------------------------------------------------

def test_clean_pass(tmp_path):
    """ahead=0, behind=0 → diverged=False, hold=False, CLEAN_PASS."""
    repo = make_divergent_repo(tmp_path, ahead=0, behind=0)

    result = measure_divergence(repo, remote_ref="origin/main", do_fetch=False)

    assert result.measured is True, f"measured should be True, got error: {result.error}"
    assert result.ahead == 0
    assert result.behind == 0
    assert result.diverged is False

    decision = should_hold("coding", result)
    assert decision.hold is False
    assert decision.classification == "CLEAN_PASS"


# ---------------------------------------------------------------------------
# 시나리오 3a: spawn base stale fail
# ---------------------------------------------------------------------------

def test_spawn_base_stale_fail(tmp_path):
    """worktree-base 마커가 다른 SHA → ok=False, stale=True."""
    wm = _load_worktree_manager()

    repo = make_divergent_repo(tmp_path, ahead=0, behind=0)

    # 현재 origin/main SHA 획득
    env = _git_env()
    r = subprocess.run(
        ["git", "rev-parse", "origin/main"],
        cwd=repo,
        capture_output=True,
        text=True,
        env=env,
    )
    assert r.returncode == 0, f"rev-parse failed: {r.stderr}"
    actual_sha = r.stdout.strip()

    # 다른(위조된) SHA로 마커 기록
    events_dir = str(tmp_path / "events")
    os.makedirs(events_dir, exist_ok=True)
    fake_sha = "a" * 40
    marker_path = os.path.join(events_dir, "task-2700.worktree-base.json")
    with open(marker_path, "w") as f:
        json.dump({"base_sha": fake_sha}, f)

    result = wm.verify_spawn_base(
        repo, "task-2700", "dev6",
        base_ref="origin/main",
        events_dir=events_dir,
    )

    assert result["ok"] is False, f"Expected ok=False, got {result}"
    assert result["stale"] is True, f"Expected stale=True, got {result}"


# ---------------------------------------------------------------------------
# 시나리오 3b: spawn base match pass
# ---------------------------------------------------------------------------

def test_spawn_base_match_pass(tmp_path):
    """worktree-base 마커가 동일 SHA → ok=True."""
    wm = _load_worktree_manager()

    repo = make_divergent_repo(tmp_path, ahead=0, behind=0)

    # 현재 origin/main SHA 획득
    env = _git_env()
    r = subprocess.run(
        ["git", "rev-parse", "origin/main"],
        cwd=repo,
        capture_output=True,
        text=True,
        env=env,
    )
    assert r.returncode == 0, f"rev-parse failed: {r.stderr}"
    actual_sha = r.stdout.strip()

    # 동일 SHA 마커 기록
    events_dir = str(tmp_path / "events")
    os.makedirs(events_dir, exist_ok=True)
    marker_path = os.path.join(events_dir, "task-2700.worktree-base.json")
    with open(marker_path, "w") as f:
        json.dump({"base_sha": actual_sha}, f)

    result = wm.verify_spawn_base(
        repo, "task-2700", "dev6",
        base_ref="origin/main",
        events_dir=events_dir,
    )

    assert result["ok"] is True, f"Expected ok=True, got {result}"
    assert result["stale"] is False


# ---------------------------------------------------------------------------
# 시나리오 4: EXTERNAL_DIRTY_BLOCKER
# ---------------------------------------------------------------------------

def test_external_dirty_blocker():
    """dirty paths 가 expected_files와 겹치지 않음 → EXTERNAL_DIRTY_BLOCKER."""
    expected_files = ["utils/divergence_guard.py"]
    dirty_paths = ["scripts/other.py", "memory/x.json"]

    result = classify_blocker(expected_files, dirty_paths)

    assert result["classification"] == EXTERNAL_DIRTY_BLOCKER
    assert result["own_dirty"] == []
    assert len(result["unrelated_dirty"]) == 2
    assert "scripts/other.py" in result["unrelated_dirty"]
    assert "memory/x.json" in result["unrelated_dirty"]


# ---------------------------------------------------------------------------
# 시나리오 5: OWN_DIRTY_FAIL
# ---------------------------------------------------------------------------

def test_own_dirty_fail():
    """utils/** 패턴에 utils/divergence_guard.py 가 own, scripts/other.py 는 unrelated → OWN_DIRTY_FAIL."""
    expected_files = ["utils/**"]
    dirty_paths = ["utils/divergence_guard.py", "scripts/other.py"]

    result = classify_blocker(expected_files, dirty_paths)

    assert result["classification"] == OWN_DIRTY_FAIL
    # own_dirty 에 utils/... 포함
    assert any("utils/" in p for p in result["own_dirty"]), \
        f"own_dirty should contain utils/..., got {result['own_dirty']}"
    # unrelated_dirty 에 scripts/other.py 포함
    assert "scripts/other.py" in result["unrelated_dirty"], \
        f"unrelated_dirty should contain scripts/other.py, got {result['unrelated_dirty']}"


# ---------------------------------------------------------------------------
# 시나리오 6a: FINISH_TASK_GIT_GATE_BLOCKED
# ---------------------------------------------------------------------------

def test_callback_cause_git_gate_blocked():
    """git_gate_blocked=True + done_exists=False + EXTERNAL_DIRTY_BLOCKER → FINISH_TASK_GIT_GATE_BLOCKED."""
    result = classify_callback_missing(
        done_exists=False,
        git_gate_blocked=True,
        blocker_classification="EXTERNAL_DIRTY_BLOCKER",
    )

    assert result["cause"] == FINISH_TASK_GIT_GATE_BLOCKED
    assert result["sub_cause"] == "EXTERNAL_DIRTY_BLOCKER"


# ---------------------------------------------------------------------------
# 시나리오 6b: NORMAL_CALLBACK_MISSING
# ---------------------------------------------------------------------------

def test_callback_cause_normal_missing():
    """done_exists=True, git_gate_blocked=False → NORMAL_CALLBACK_MISSING."""
    result = classify_callback_missing(
        done_exists=True,
        git_gate_blocked=False,
    )

    assert result["cause"] == NORMAL_CALLBACK_MISSING

    # resolve_finish_profile 검증
    rw_profile = resolve_finish_profile("read_only_watcher")
    assert rw_profile["create_done"] is False

    code_profile = resolve_finish_profile("code")
    assert code_profile["git_gate"] is True


# ---------------------------------------------------------------------------
# 시나리오 7: task-2699 fixture — divergence HOLD + EXTERNAL_DIRTY_BLOCKER 동시 검증
# ---------------------------------------------------------------------------

def test_task_2699_fixture(tmp_path):
    """task-2699 상황 재현: ahead=3, behind=2, 다수 dirty.

    - measure_divergence → diverged=True
    - should_hold("coding") → DIVERGENCE_HOLD
    - classify_blocker(task-2699 expected, dirty_paths) → EXTERNAL_DIRTY_BLOCKER
    """
    extra_files = {
        "scripts/unrelated_a.py": "x\n",
        "memory/specs/unrelated_b.md": "y\n",
        "utils/replacement_pr_runner.py": "z\n",
    }
    repo = make_divergent_repo(tmp_path, ahead=3, behind=2, extra_files=extra_files)

    # --- divergence 검증 ---
    div_result = measure_divergence(repo, remote_ref="origin/main", do_fetch=False)

    assert div_result.measured is True, f"measure failed: {div_result.error}"
    assert div_result.ahead == 3
    assert div_result.behind == 2
    assert div_result.diverged is True

    hold_decision = should_hold("coding", div_result)
    assert hold_decision.hold is True
    assert hold_decision.classification == "DIVERGENCE_HOLD", \
        f"Expected DIVERGENCE_HOLD, got {hold_decision.classification}"

    # --- EXTERNAL_DIRTY_BLOCKER 검증 ---
    # task-2699 의 expected_files: anu_v2/owner_trigger*.py (extra_files 와 겹침 0)
    task_2699_expected = ["anu_v2/owner_trigger*.py"]
    dirty_paths = list(extra_files.keys())  # 실제 dirty 파일 경로

    blocker_result = classify_blocker(task_2699_expected, dirty_paths)

    assert blocker_result["classification"] == EXTERNAL_DIRTY_BLOCKER, \
        (f"Expected EXTERNAL_DIRTY_BLOCKER, got {blocker_result['classification']}. "
         f"own_dirty={blocker_result['own_dirty']}, "
         f"unrelated_dirty={blocker_result['unrelated_dirty']}")
    assert blocker_result["own_dirty"] == []
    assert len(blocker_result["unrelated_dirty"]) == len(dirty_paths)


# ---------------------------------------------------------------------------
# 시나리오 8: JSONL registry
# ---------------------------------------------------------------------------

def test_registry_jsonl(tmp_path):
    """dirty 파일 2개 → snapshot_main_dirty → JSONL 각 줄에 필수 키, mtime float 검증."""
    extra_files = {
        "scripts/foo.py": "a\n",
        "utils/bar.py": "b\n",
    }
    repo = make_divergent_repo(tmp_path, ahead=0, behind=0, extra_files=extra_files)

    registry_path = str(tmp_path / "reg.jsonl")

    snap = snapshot_main_dirty(
        repo,
        registry_path,
        phase="dispatch",
        task_id="task-2700",
    )

    assert snap["count"] >= 1, "At least one dirty record expected"
    assert os.path.exists(registry_path), "registry JSONL should be created"

    # 줄 단위 JSON 파싱
    lines = Path(registry_path).read_text(encoding="utf-8").strip().splitlines()
    assert len(lines) == snap["count"], \
        f"Line count {len(lines)} != snap count {snap['count']}"

    required_keys = {"path", "status", "mtime", "owner_task", "phase", "ts"}
    float_mtime_found = False

    for line in lines:
        record = json.loads(line)
        for key in required_keys:
            assert key in record, f"Key '{key}' missing in registry record: {record}"
        # phase 값 검증
        assert record["phase"] == "dispatch"
        assert record["task_id"] == "task-2700"
        # mtime float 검증
        if record["mtime"] is not None and isinstance(record["mtime"], float):
            float_mtime_found = True

    assert float_mtime_found, \
        "At least one record should have mtime as float"


# ---------------------------------------------------------------------------
# 보너스 1: MEASUREMENT_FAILED_HOLD (fail-closed)
# ---------------------------------------------------------------------------

def test_measurement_failed_fail_closed(tmp_path):
    """빈 디렉토리에서 measure_divergence → measured=False → MEASUREMENT_FAILED_HOLD."""
    # git repo 가 아닌 빈 디렉토리
    empty_dir = str(tmp_path / "not_a_repo")
    os.makedirs(empty_dir)

    result = measure_divergence(empty_dir, remote_ref="origin/main", do_fetch=False)

    assert result.measured is False

    decision = should_hold("coding", result, fail_closed=True)
    assert decision.hold is True
    assert decision.classification == "MEASUREMENT_FAILED_HOLD"


# ---------------------------------------------------------------------------
# 보너스 2: NON_GATED_KIND_PASS
# ---------------------------------------------------------------------------

def test_non_gated_kind_pass(tmp_path):
    """task_kind='docs' (게이트 비대상) → hold=False, NON_GATED_KIND_PASS."""
    repo = make_divergent_repo(tmp_path, ahead=2, behind=1)
    result = measure_divergence(repo, remote_ref="origin/main", do_fetch=False)
    # diverged=True 이지만 docs 는 게이트 대상 아님
    assert result.diverged is True

    decision = should_hold("docs", result)
    assert decision.hold is False
    assert decision.classification == "NON_GATED_KIND_PASS"


# ---------------------------------------------------------------------------
# 보너스 3: CLI bypass flag(--fail-open) 제거 확인 (task-2700 doctrine: bypass 금지)
# ---------------------------------------------------------------------------

def test_cli_rejects_fail_open_bypass_flag(tmp_path):
    """divergence_guard CLI 가 --fail-open 같은 bypass flag 를 거부해야 한다.
    (회장 verbatim: divergence HOLD bypass flag 금지 — fail-closed 불변)"""
    repo = make_divergent_repo(tmp_path, ahead=1, behind=1)
    cli = os.path.join(REPO, "utils", "divergence_guard.py")
    proc = subprocess.run(
        [sys.executable, cli, "--repo-root", repo, "--task-id", "task-2700",
         "--task-kind", "coding", "--fail-open"],
        capture_output=True, text=True, env=_git_env(),
    )
    # argparse 가 unknown argument 로 거부 → returncode 2 (3=HOLD 아님)
    assert proc.returncode == 2, f"--fail-open 이 수용됨(bypass 가능) returncode={proc.returncode}"


def test_cli_holds_on_diverged_repo(tmp_path):
    """divergence_guard CLI 는 diverged repo 에서 fail-closed HOLD(exit 3)."""
    repo = make_divergent_repo(tmp_path, ahead=2, behind=3)
    cli = os.path.join(REPO, "utils", "divergence_guard.py")
    proc = subprocess.run(
        [sys.executable, cli, "--repo-root", repo, "--task-id", "task-2700",
         "--task-kind", "coding"],
        capture_output=True, text=True, env=_git_env(),
    )
    assert proc.returncode == 3, f"diverged repo HOLD(exit3) 기대, 실제 {proc.returncode}: {proc.stderr}"
    payload = json.loads(proc.stdout)
    assert payload["hold"] is True
    assert payload["classification"] == "DIVERGENCE_HOLD"