"""회귀 테스트 — anu_v2.worktree_cleanup (task-2550+1 clean replacement).

PR #100 6 unresolved Gemini findings 의 corrected fix 회귀:
  - HIGH `task_id in headRefName` substring 오탐 차단 (boundary test 강제)
  - medium #1 `hashlib.sha256` 결정론 hash (Python `hash()` 비결정론 제거)
  - medium #2 `lsof +D` CWD 검사 추가 (pgrep -f argv-only 한계 보완)
  - medium #3 `is_safe_ignoring_apply` helper 회귀 (cleanup_candidates dry-run 가시성)

pytest 사용. 외부 부수효과(subprocess / file write / clock) 는 모두 fake callable 로 주입.

⚠️ 테스트 코드 내 "ghp_faketoken123abc" 등 raw token placeholder 는
   실제 토큰이 아닌 테스트 fake 값임 — leak detector 오탐 방지를 위해 명시.
"""

from __future__ import annotations

import json
import subprocess
import sys
from datetime import datetime, timezone
from pathlib import Path

# workspace root → sys.path (anu_v2 패키지를 절대 import 하기 위함)
WORKSPACE_ROOT = Path(__file__).resolve().parents[2]
if str(WORKSPACE_ROOT) not in sys.path:
    sys.path.insert(0, str(WORKSPACE_ROOT))

from anu_v2.worktree_cleanup import (  # noqa: E402  # pyright: ignore[reportMissingImports]
    CleanupResult,
    SafetyConditionResult,
    WorktreeCandidate,
    WorktreeCleanup,
    _matches_task_id_strict,
    is_safe_ignoring_apply,
)

# ─── 공용 상수 / 헬퍼 ─────────────────────────────────────────────────────────
FAKE_SHA = "a" * 40
FAKE_TASK_ID = "task-2550"
FAKE_BRANCH = "task/task-2550-dev5"
FAKE_WORKTREE_PATH = "/home/jay/workspace/.worktrees/task-2550-dev5"


def _fake_clock() -> datetime:
    return datetime(2026, 5, 12, 12, 0, 0, tzinfo=timezone.utc)


def _proc(returncode: int = 0, stdout: str = "", stderr: str = "") -> subprocess.CompletedProcess:
    return subprocess.CompletedProcess(args=[], returncode=returncode, stdout=stdout, stderr=stderr)


def _noop_runner(*_args: object, **_kwargs: object) -> subprocess.CompletedProcess:
    del _args, _kwargs
    return _proc(0)


def _make_candidate(
    path: str = FAKE_WORKTREE_PATH,
    branch: str = FAKE_BRANCH,
    task_id: str | None = FAKE_TASK_ID,
    head_sha: str = FAKE_SHA,
) -> WorktreeCandidate:
    return WorktreeCandidate(path=path, branch=branch, task_id=task_id, head_sha=head_sha)


# ═══════════════════════════════════════════════════════════════════════════
# HIGH fix: _matches_task_id_strict boundary 회귀 (PR #100 HIGH unresolved)
# ═══════════════════════════════════════════════════════════════════════════

def test_high_boundary_task25_does_not_match_task2550_headref() -> None:
    """task-25 substring 이 task/task-2550-dev5 에 매칭되면 안 됨 (HIGH unresolved 본질)."""
    assert _matches_task_id_strict("task/task-2550-dev5", "task-25") is False


def test_high_boundary_task2500_does_not_match_task25_headref() -> None:
    """task-2500 이 task/task-25-dev5 에 매칭되면 안 됨."""
    assert _matches_task_id_strict("task/task-25-dev5", "task-2500") is False


def test_high_boundary_task250_does_not_match_task2500_headref() -> None:
    """task-250 이 task/task-2500-dev5 에 매칭되면 안 됨."""
    assert _matches_task_id_strict("task/task-2500-dev5", "task-250") is False


def test_high_boundary_exact_match_passes() -> None:
    """정확히 일치하는 경우는 PASS — 정상 동작 회귀."""
    assert _matches_task_id_strict("task/task-2550-dev5", "task-2550") is True
    assert _matches_task_id_strict("task/task-25-dev5", "task-25") is True
    assert _matches_task_id_strict("task/task-2500-dev5", "task-2500") is True


def test_high_boundary_plus_suffix_match() -> None:
    """task-2550+1 식별자가 task/task-2550+1-dev5 headRefName 에 매칭 PASS."""
    assert _matches_task_id_strict("task/task-2550+1-dev5", "task-2550+1") is True


def test_high_boundary_task2550_does_not_match_task2550plus1_headref() -> None:
    """task-2550 식별자가 task/task-2550+1-dev5 에 매칭되면 안 됨 (+ 도 경계).

    task-2550 ↔ task-2550+1 은 서로 다른 task → strict 분리.
    """
    assert _matches_task_id_strict("task/task-2550+1-dev5", "task-2550") is False


def test_high_boundary_terminal_match_at_end_of_string() -> None:
    """task-2550 이 headRefName 끝에 위치해도 (suffix 없음) 매칭 PASS."""
    assert _matches_task_id_strict("refs/heads/task/task-2550", "task-2550") is True
    assert _matches_task_id_strict("task-2550", "task-2550") is True


def test_high_boundary_invalid_task_id_format_returns_false() -> None:
    """잘못된 task_id 형식 (task- 접두사 없음) 은 False."""
    assert _matches_task_id_strict("task/task-2550-dev5", "2550") is False
    assert _matches_task_id_strict("task/task-2550-dev5", "") is False


def test_high_boundary_empty_headref_returns_false() -> None:
    """빈 headRefName 은 False."""
    assert _matches_task_id_strict("", "task-2550") is False


# ═══════════════════════════════════════════════════════════════════════════
# HIGH fix integration: check_safety_2_pr_merged substring 오탐 차단
# ═══════════════════════════════════════════════════════════════════════════

def test_safety_2_strict_match_rejects_substring_unrelated_pr(tmp_path: Path) -> None:
    """task_id=task-25 검사 시 PR headRefName=task/task-2550-dev5 은 매칭되면 안 됨.

    기존 substring 매칭 (`task-25` in `task/task-2550-dev5` == True) 이 BUG.
    """
    pr_response = json.dumps([
        {"number": 100, "state": "MERGED", "headRefName": "task/task-2550-dev5"},
        {"number": 50, "state": "OPEN", "headRefName": "task/task-25-dev2"},
    ])

    def fake_runner(args, **_):
        if "pr" in args and "list" in args:
            return _proc(0, pr_response, "")
        return _proc(0, "", "")

    cleanup = WorktreeCleanup(
        subprocess_runner=fake_runner,
        clock=_fake_clock,
        workspace_root=tmp_path,
    )
    # branch=None 으로 --search 경로로 진입 → task_id 기반 strict 매칭
    result = cleanup.check_safety_2_pr_merged("task-25", branch=None)

    # task-25 strict 매칭 PR 은 #50 OPEN 만 — MERGED 아니므로 FAIL
    # 기존 BUG 였다면 task-2550 PR 의 MERGED 가 task-25 에 잘못 매칭되어 PASS 했을 것
    assert result.passed is False
    # task-25 와 일치하는 PR (#50) 의 state OPEN 이 detail 에 표시되어야 함
    assert "OPEN" in result.detail


def test_safety_2_strict_match_accepts_exact_task_id(tmp_path: Path) -> None:
    """task_id 와 PR headRefName 이 정확히 일치 → MERGED PASS."""
    pr_response = json.dumps([
        {"number": 100, "state": "MERGED", "headRefName": "task/task-2550-dev5"},
    ])

    def fake_runner(args, **_):
        if "pr" in args and "list" in args:
            return _proc(0, pr_response, "")
        return _proc(0, "", "")

    cleanup = WorktreeCleanup(
        subprocess_runner=fake_runner,
        clock=_fake_clock,
        workspace_root=tmp_path,
    )
    result = cleanup.check_safety_2_pr_merged("task-2550", branch=None)
    assert result.passed is True
    assert "MERGED" in result.detail


def test_safety_2_strict_match_isolates_task_2550_from_task_2550_plus_1(tmp_path: Path) -> None:
    """task-2550 검사 시 PR=task/task-2550+1-dev5 (replacement) 와 분리.

    Replacement chain doctrine: task-2550 과 task-2550+1 은 별개의 PR.
    """
    pr_response = json.dumps([
        {"number": 200, "state": "MERGED", "headRefName": "task/task-2550+1-dev5"},
    ])

    def fake_runner(args, **_):
        if "pr" in args and "list" in args:
            return _proc(0, pr_response, "")
        return _proc(0, "", "")

    cleanup = WorktreeCleanup(
        subprocess_runner=fake_runner,
        clock=_fake_clock,
        workspace_root=tmp_path,
    )
    # task-2550 검사 → task-2550+1 PR 과는 별개로 분리 → FAIL
    result = cleanup.check_safety_2_pr_merged("task-2550", branch=None)
    assert result.passed is False


# ═══════════════════════════════════════════════════════════════════════════
# medium #1 fix: hashlib.sha256 결정론 hash 회귀
# ═══════════════════════════════════════════════════════════════════════════

def test_medium_1_log_filename_uses_deterministic_sha256(tmp_path: Path) -> None:
    """동일 path 에 대해 PYTHONHASHSEED 가 달라도 동일한 hash 결과여야 함.

    기존: `abs(hash(path)) % 10**8` 는 PYTHONHASHSEED 의존 (Python 재시작마다 달라짐).
    신규: hashlib.sha256(path)[:8] — 모든 환경에서 동일.
    """
    cleanup = WorktreeCleanup(
        subprocess_runner=_noop_runner,
        clock=_fake_clock,
        workspace_root=tmp_path,
    )
    cand = _make_candidate(path=str(tmp_path / "some-worktree"))
    ts = "2026-05-12T12:00:00+00:00"
    # 동일 path 로 2회 호출 — 동일 ts → 다른 nanosecond 변동 X → 동일 파일명 hash 부분
    cleanup._log_skipped(cand, "test_reason", ts)
    cleanup._log_skipped(cand, "test_reason", ts)

    log_files = list((tmp_path / "memory" / "events").glob("worktree-cleanup-skipped-*.json"))
    # 동일 ts + 동일 path → 동일 hash → 1개 파일 (덮어쓰기)
    assert len(log_files) == 1, (
        f"동일 path/ts 에서 결정론 hash 가 다르면 파일 충돌. got {[p.name for p in log_files]}"
    )

    # 파일명 hash 부분이 sha256 hex (8자) 패턴인지 확인
    import re as _re
    name = log_files[0].name
    m = _re.match(r"worktree-cleanup-skipped-.+-([a-f0-9]{8})\.json$", name)
    assert m is not None, f"파일명이 sha256 hex 8자 hash 패턴이 아님: {name}"


def test_medium_1_different_paths_yield_different_hashes(tmp_path: Path) -> None:
    """다른 path 는 다른 hash → 파일명 충돌 회피."""
    cleanup = WorktreeCleanup(
        subprocess_runner=_noop_runner,
        clock=_fake_clock,
        workspace_root=tmp_path,
    )
    cand_a = _make_candidate(path=str(tmp_path / "wt-a"))
    cand_b = _make_candidate(path=str(tmp_path / "wt-b"))
    ts = "2026-05-12T12:00:00+00:00"
    cleanup._log_skipped(cand_a, "test_a", ts)
    cleanup._log_skipped(cand_b, "test_b", ts)

    log_files = sorted((tmp_path / "memory" / "events").glob("worktree-cleanup-skipped-*.json"))
    assert len(log_files) == 2, f"다른 path 는 다른 파일이어야 함: {[p.name for p in log_files]}"


# ═══════════════════════════════════════════════════════════════════════════
# medium #2 fix: safety_5 lsof +D CWD 검사 추가 회귀
# ═══════════════════════════════════════════════════════════════════════════

def test_medium_2_safety_5_lsof_match_fails(tmp_path: Path) -> None:
    """lsof +D 로 path 에 file handle 가 열려있으면 FAIL (pgrep argv 미감지 보완).

    시나리오: pgrep -f 는 argv 에 path 가 없어서 rc=1 (no match),
    그러나 lsof 는 process CWD 가 path 라서 rc=0 (match).
    → 사용 중으로 판정 (FAIL).
    """
    lsof_output = (
        "COMMAND  PID USER   FD   TYPE DEVICE SIZE/OFF NODE NAME\n"
        f"bash     999 jay  cwd    DIR   8,1     4096    1 {FAKE_WORKTREE_PATH}\n"
    )

    def fake_runner(args, **_):
        if "worktree" in args and "list" in args:
            return _proc(0, "", "")
        if "pgrep" in args:
            return _proc(1, "", "")  # pgrep argv: no match
        if args and args[0] == "lsof":
            return _proc(0, lsof_output, "")  # lsof: 1 fh open
        return _proc(0, "", "")

    cleanup = WorktreeCleanup(
        subprocess_runner=fake_runner,
        clock=_fake_clock,
        workspace_root=tmp_path,
    )
    result = cleanup.check_safety_5_not_in_use(FAKE_WORKTREE_PATH)
    assert result.passed is False
    assert "lsof" in result.detail.lower() or "file-handle" in result.detail.lower() or "cwd" in result.detail.lower()


def test_medium_2_safety_5_lsof_no_match_passes(tmp_path: Path) -> None:
    """pgrep no match + lsof no match → PASS (정상 안전 경로)."""
    def fake_runner(args, **_):
        if "worktree" in args and "list" in args:
            return _proc(0, "", "")
        if "pgrep" in args:
            return _proc(1, "", "")
        if args and args[0] == "lsof":
            return _proc(1, "", "")  # lsof: no match
        return _proc(0, "", "")

    cleanup = WorktreeCleanup(
        subprocess_runner=fake_runner,
        clock=_fake_clock,
        workspace_root=tmp_path,
    )
    result = cleanup.check_safety_5_not_in_use(FAKE_WORKTREE_PATH)
    assert result.passed is True
    # detail 에 git list / pgrep / lsof 모두 통과 표기
    assert "lsof" in result.detail.lower() or "fh" in result.detail.lower()


def test_medium_2_safety_5_lsof_unknown_rc_fails_conservatively(tmp_path: Path) -> None:
    """lsof rc != 0,1 (예: signal / access denied) → 보수적 FAIL."""
    def fake_runner(args, **_):
        if "worktree" in args and "list" in args:
            return _proc(0, "", "")
        if "pgrep" in args:
            return _proc(1, "", "")
        if args and args[0] == "lsof":
            return _proc(2, "", "permission denied")
        return _proc(0, "", "")

    cleanup = WorktreeCleanup(
        subprocess_runner=fake_runner,
        clock=_fake_clock,
        workspace_root=tmp_path,
    )
    result = cleanup.check_safety_5_not_in_use(FAKE_WORKTREE_PATH)
    assert result.passed is False
    assert "lsof rc=2" in result.detail


def test_medium_2_safety_5_pgrep_match_short_circuits_before_lsof(tmp_path: Path) -> None:
    """pgrep 매치 단계에서 이미 FAIL → lsof 호출 X (성능 + 단락 평가)."""
    lsof_called = {"n": 0}

    def fake_runner(args, **_):
        if "worktree" in args and "list" in args:
            return _proc(0, "", "")
        if "pgrep" in args:
            return _proc(0, "1234\n", "")  # pgrep: match
        if args and args[0] == "lsof":
            lsof_called["n"] += 1
            return _proc(1, "", "")
        return _proc(0, "", "")

    cleanup = WorktreeCleanup(
        subprocess_runner=fake_runner,
        clock=_fake_clock,
        workspace_root=tmp_path,
    )
    result = cleanup.check_safety_5_not_in_use(FAKE_WORKTREE_PATH)
    assert result.passed is False
    assert lsof_called["n"] == 0, "pgrep match 후에는 lsof 호출되면 안 됨"


# ═══════════════════════════════════════════════════════════════════════════
# medium #3 fix: is_safe_ignoring_apply helper 회귀 (cleanup_candidates 가시성)
# ═══════════════════════════════════════════════════════════════════════════

def test_medium_3_is_safe_ignoring_apply_true_when_only_apply_fails() -> None:
    """safety 1~5 PASS + safety_6 (apply_explicit) 만 FAIL → helper True.

    이는 dry-run 에서 cleanup_candidates 가 가시성 회복하는 정확한 경계 조건.
    """
    sr = [
        SafetyConditionResult(condition=i, name=f"c{i}", passed=True, detail="ok")
        for i in range(1, 6)
    ]
    sr.append(SafetyConditionResult(condition=6, name="apply_explicit", passed=False, detail="dry-run"))
    result = CleanupResult(
        worktree_path="/some/path",
        task_id="task-2550",
        safety_results=sr,
        all_safe=False,  # safety_6 FAIL → all_safe False
        dirty=False,
        is_main=False,
        applied=False,
        skipped=True,
        skip_reason="dry-run",
        ts="2026-05-12T12:00:00+00:00",
    )
    assert is_safe_ignoring_apply(result) is True


def test_medium_3_is_safe_ignoring_apply_false_when_1to5_fails() -> None:
    """safety 1~5 중 1개라도 FAIL → helper False."""
    sr = [
        SafetyConditionResult(condition=1, name="done_acked", passed=True, detail="ok"),
        SafetyConditionResult(condition=2, name="pr_merged", passed=False, detail="not merged"),
        SafetyConditionResult(condition=3, name="merge_done", passed=True, detail="ok"),
        SafetyConditionResult(condition=4, name="branch_in_main", passed=True, detail="ok"),
        SafetyConditionResult(condition=5, name="not_in_use", passed=True, detail="ok"),
        SafetyConditionResult(condition=6, name="apply_explicit", passed=False, detail="dry-run"),
    ]
    result = CleanupResult(
        worktree_path="/some/path",
        task_id="task-2550",
        safety_results=sr,
        all_safe=False,
        dirty=False,
        is_main=False,
        applied=False,
        skipped=True,
        skip_reason="safety failed",
        ts="2026-05-12T12:00:00+00:00",
    )
    assert is_safe_ignoring_apply(result) is False


def test_medium_3_is_safe_ignoring_apply_false_when_main_or_dirty() -> None:
    """main / dirty 면 모든 safety PASS 여도 candidate False."""
    sr = [
        SafetyConditionResult(condition=i, name=f"c{i}", passed=True, detail="ok")
        for i in range(1, 7)
    ]
    main_result = CleanupResult(
        worktree_path="/some/path", task_id="task-2550", safety_results=sr,
        all_safe=True, dirty=False, is_main=True, applied=False, skipped=True,
        skip_reason="main", ts="2026-05-12",
    )
    assert is_safe_ignoring_apply(main_result) is False

    dirty_result = CleanupResult(
        worktree_path="/some/path", task_id="task-2550", safety_results=sr,
        all_safe=True, dirty=True, is_main=False, applied=False, skipped=True,
        skip_reason="dirty", ts="2026-05-12",
    )
    assert is_safe_ignoring_apply(dirty_result) is False


def test_medium_3_is_safe_ignoring_apply_false_when_safety_results_empty() -> None:
    """safety_results 비어있으면 (skip 경로 등) False."""
    result = CleanupResult(
        worktree_path="/some/path",
        task_id=None,
        safety_results=[],
        all_safe=False,
        dirty=False,
        is_main=False,
        applied=False,
        skipped=True,
        skip_reason="task_id missing",
        ts="2026-05-12",
    )
    assert is_safe_ignoring_apply(result) is False


# ═══════════════════════════════════════════════════════════════════════════
# concurrency / 동시 실행 (PYTHONHASHSEED 비결정론 회피 회귀)
# ═══════════════════════════════════════════════════════════════════════════

def test_concurrency_log_filename_stable_under_pythonhashseed(tmp_path: Path) -> None:
    """PYTHONHASHSEED 가 변해도 동일 path → 동일 파일명 hash.

    `hash()` 는 PYTHONHASHSEED 의존 (Python 재시작마다 다름).
    sha256 은 결정론 — 환경 변동에 무관.
    """
    import hashlib as _hashlib
    path = str(tmp_path / "deterministic-wt")
    h_a = _hashlib.sha256(path.encode("utf-8")).hexdigest()[:8]
    h_b = _hashlib.sha256(path.encode("utf-8")).hexdigest()[:8]
    assert h_a == h_b, "sha256 은 동일 input 에 대해 항상 동일 output (결정론)"


# ═══════════════════════════════════════════════════════════════════════════
# 통합 회귀: HIGH 부분 일치 차단 — PR 매칭 BUG 재현/회복
# ═══════════════════════════════════════════════════════════════════════════

def test_pr100_high_unresolved_regression_bug_blocked(tmp_path: Path) -> None:
    """PR #100 HIGH unresolved 회귀 시나리오 직접 재현:
       task-25 검사 + PR list 에 task-2550 MERGED + task-25 OPEN 이 있을 때
       기존 BUG 였다면 task-25 가 task-2550 의 MERGED 에 잘못 매칭되어 PASS 했을 것.
       수정 후 task-25 strict 매칭만 → OPEN → FAIL.
    """
    pr_response = json.dumps([
        {"number": 100, "state": "MERGED", "headRefName": "task/task-2550-dev5"},
        {"number": 50, "state": "OPEN", "headRefName": "task/task-25-dev3"},
        {"number": 99, "state": "MERGED", "headRefName": "task/task-2500+1-dev4"},
    ])

    def fake_runner(args, **_):
        if "pr" in args and "list" in args:
            return _proc(0, pr_response, "")
        return _proc(0, "", "")

    cleanup = WorktreeCleanup(
        subprocess_runner=fake_runner,
        clock=_fake_clock,
        workspace_root=tmp_path,
    )
    # task-25 → strict 매칭 PR 은 #50 OPEN 만. FAIL.
    result = cleanup.check_safety_2_pr_merged("task-25", branch=None)
    assert result.passed is False
    # 결정적 회귀 마커: detail 에 OPEN 이 포함되어야 함 (task-2550 의 MERGED 가 잘못 채택되지 않았다는 증거)
    assert "OPEN" in result.detail
    # task-2550 의 MERGED 가 unrelated 로 차단됐다는 보장 (탐지 가능 회귀)
    assert "MERGED" not in result.detail or "OPEN" in result.detail