"""utils/lifecycle_reconciliation_manager.py — task-2518 P0.

회장 명시 요구사항:
  - bot session ↔ task lifecycle 결합 제거
  - GitHub/CI/smoke evidence를 source-of-truth로 사용하는 idempotent reconcile state machine
  - manual .done 위장 차단 (evidence 없으면 RuntimeError)
  - dry-run 기본 (--apply 없이 side effect 0)

7 Lifecycle States:
  RUNNING                 — task-timer running, 작업 진행 중
  PR_OPEN                 — PR 생성됨, 머지 대기
  MERGED_PENDING_RECONCILE — PR merged, finalize 누락 (회장 §1 사례)
  RECONCILING             — reconcile 진행 중
  FINALIZED               — .done.acked + .merge-done + timer end 모두 정상
  STUCK_NEEDS_RECONCILE   — stuck 자동 감지
  ESCALATED               — Critical 7종 발동

8 Stuck Cases (회장 §1~7 + Telegram cut-off):
  TIMER_RUNNING_BUT_PR_MERGED          — 회장 §2
  PR_MERGED_BUT_DONE_MISSING           — 회장 §1
  MERGE_COMMIT_BUT_MERGE_DONE_MISSING  — 회장 §1
  CI_PASS_BUT_NOT_FINALIZED
  TELEGRAM_REPLY_CUT_OFF               — cron history truncation
  BOT_SESSION_ENDED_BUT_TASK_OK        — 회장 §7
  FINISH_TASK_INTERRUPTED              — 회장 §3
  STALE_ESCALATE_MARKER                — 회장 §4

Evidence priority (회장 명시):
  PR state > mergeCommit > origin/main 포함 > CI > smoke > timer > file marker
"""
from __future__ import annotations

import argparse
import json
import logging
import subprocess
import sys
import uuid
from dataclasses import dataclass, field
from datetime import datetime, timezone
from enum import Enum
from pathlib import Path
from typing import Any, Callable, Optional

# CLI 직접 실행 시 패키지 루트를 sys.path에 추가
_HERE = Path(__file__).resolve().parent.parent  # utils/ → worktree root
if str(_HERE) not in sys.path:
    sys.path.insert(0, str(_HERE))

from utils.canonical_workspace_resolver import (  # noqa: E402  # pyright: ignore[reportMissingImports]
    CanonicalWorkspace,
    resolve_canonical_workspace,
)
from utils.automation_contracts import (  # noqa: E402  # pyright: ignore[reportMissingImports]
    AutomationDecision,
    CriticalEscalationType,
    EscalationPacket,
    SmokeResult,
)

logger = logging.getLogger(__name__)

# ---------------------------------------------------------------------------
# Type aliases
# ---------------------------------------------------------------------------
RunnerType = Callable[..., subprocess.CompletedProcess]

_DEFAULT_WORKSPACE = Path("/home/jay/workspace")
_DEFAULT_CRON_HISTORY_DIR = Path("/home/jay/.cokacdir/schedule_history")
_EVENTS_DIR_NAME = "memory/events"
_TIMERS_FILE_NAME = "memory/task-timers.json"

# Telegram cut-off heuristic: response near this byte count is suspicious
_TELEGRAM_TRUNCATION_THRESHOLD_BYTES = 4000


# ---------------------------------------------------------------------------
# Enums
# ---------------------------------------------------------------------------

class LifecycleState(str, Enum):
    """7 lifecycle states (회장 명시 정확 매칭)."""

    RUNNING = "RUNNING"
    PR_OPEN = "PR_OPEN"
    MERGED_PENDING_RECONCILE = "MERGED_PENDING_RECONCILE"
    RECONCILING = "RECONCILING"
    FINALIZED = "FINALIZED"
    STUCK_NEEDS_RECONCILE = "STUCK_NEEDS_RECONCILE"
    ESCALATED = "ESCALATED"


class StuckReason(str, Enum):
    """8 stuck cases (회장 §1~7 + Telegram cut-off)."""

    TIMER_RUNNING_BUT_PR_MERGED = "TIMER_RUNNING_BUT_PR_MERGED"
    PR_MERGED_BUT_DONE_MISSING = "PR_MERGED_BUT_DONE_MISSING"
    MERGE_COMMIT_BUT_MERGE_DONE_MISSING = "MERGE_COMMIT_BUT_MERGE_DONE_MISSING"
    CI_PASS_BUT_NOT_FINALIZED = "CI_PASS_BUT_NOT_FINALIZED"
    TELEGRAM_REPLY_CUT_OFF = "TELEGRAM_REPLY_CUT_OFF"
    BOT_SESSION_ENDED_BUT_TASK_OK = "BOT_SESSION_ENDED_BUT_TASK_OK"
    FINISH_TASK_INTERRUPTED = "FINISH_TASK_INTERRUPTED"
    STALE_ESCALATE_MARKER = "STALE_ESCALATE_MARKER"


# ---------------------------------------------------------------------------
# Dataclasses
# ---------------------------------------------------------------------------

@dataclass
class LifecycleEvidence:
    """모든 source-of-truth 증거를 모은 unified envelope."""

    task_id: str
    # GitHub upstream
    pr_number: Optional[int]
    pr_state: Optional[str]              # OPEN / CLOSED / MERGED
    merge_commit: Optional[str]
    merged_into_main: bool               # origin/main에 mergeCommit 포함 여부
    ci_status: Optional[str]             # SUCCESS / FAILURE / PENDING / null
    # 후속 evidence
    smoke_status: Optional[str]          # PASS / FAIL / SKIPPED / null
    # task-timer
    timer_status: Optional[str]          # running / completed / null
    timer_end_time: Optional[str]
    # file marker (derived; 우선순위 가장 낮음)
    has_done: bool
    has_done_acked: bool
    has_merge_done: bool
    has_qc_result: bool
    has_followup: bool
    has_escalate_marker: bool
    escalate_marker_age_minutes: Optional[float]
    # cron history 기반
    telegram_reply_truncated: bool       # cron history 마지막 레코드 truncate 의심
    bot_session_status: Optional[str]    # cron status: ok/cancelled/error/null
    # finish-task 흔적
    worktree_exists: bool
    branch_pushed_to_remote: bool


@dataclass
class StuckCase:
    reason: StuckReason
    detail: str


@dataclass
class LifecycleReport:
    """reconcile 결과 envelope."""

    task_id: str
    state: LifecycleState
    stuck_cases: list[StuckCase] = field(default_factory=list)
    evidence: Optional[LifecycleEvidence] = None
    actions_taken: list[str] = field(default_factory=list)
    actions_planned: list[str] = field(default_factory=list)
    dry_run: bool = True
    timestamp: str = ""
    reconcile_run_id: str = ""           # uuid4 hex
    backfill_metadata: dict[str, Any] = field(default_factory=dict)

    def evidence_to_dict(self) -> dict:
        """evidence를 dict로 직렬화 (EscalationPacket 연동 등에서 사용)."""
        if self.evidence is None:
            return {}
        return {k: v for k, v in self.evidence.__dict__.items()}

    def to_dict(self) -> dict:
        return {
            "task_id": self.task_id,
            "state": self.state.value,
            "stuck_cases": [
                {"reason": sc.reason.value, "detail": sc.detail}
                for sc in self.stuck_cases
            ],
            "evidence": self.evidence_to_dict(),
            "actions_taken": self.actions_taken,
            "actions_planned": self.actions_planned,
            "dry_run": self.dry_run,
            "timestamp": self.timestamp,
            "reconcile_run_id": self.reconcile_run_id,
            "backfill_metadata": self.backfill_metadata,
        }

    def to_json(self) -> str:
        return json.dumps(self.to_dict(), indent=2, ensure_ascii=False)


# ---------------------------------------------------------------------------
# Internal helpers
# ---------------------------------------------------------------------------

def _default_runner(
    args: list[str],
    *,
    cwd: Optional[str] = None,
) -> subprocess.CompletedProcess:
    return subprocess.run(
        args,
        cwd=cwd,
        capture_output=True,
        text=True,
        timeout=30,
    )


def _run(
    args: list[str],
    *,
    cwd: Optional[str | Path] = None,
    runner: Optional[RunnerType] = None,
) -> subprocess.CompletedProcess:
    fn = runner if runner is not None else _default_runner
    return fn(args, cwd=str(cwd) if cwd is not None else None)


def _now_iso() -> str:
    return datetime.now(timezone.utc).isoformat()


def _events_dir(workspace_root: Path) -> Path:
    return workspace_root / _EVENTS_DIR_NAME


def _marker_path(workspace_root: Path, task_id: str, suffix: str) -> Path:
    return _events_dir(workspace_root) / f"{task_id}.{suffix}"


def _default_workspace_root() -> Path:
    return _DEFAULT_WORKSPACE


def _resolve_workspace(task_id: str, *, runner: Optional[RunnerType] = None) -> CanonicalWorkspace:
    """Resolve canonical workspace for the task (delegates to canonical_workspace_resolver)."""
    try:
        return resolve_canonical_workspace(task_id, fetch=False, runner=runner)
    except Exception:
        # fallback: build minimal workspace stub pointing to default workspace
        return CanonicalWorkspace(
            task_id=task_id,
            workspace_root=_DEFAULT_WORKSPACE,
            worktree_path=_DEFAULT_WORKSPACE / ".worktrees" / task_id,
            branch_name=f"task/{task_id}",
            main_head_sha="",
            base_sha="",
            cwd=_DEFAULT_WORKSPACE,
            is_main=True,
            is_clean=True,
        )


# ---------------------------------------------------------------------------
# Evidence collection helpers
# ---------------------------------------------------------------------------

def _gather_pr_info(
    task_id: str,
    *,
    runner: Optional[RunnerType] = None,
    workspace_root: Optional[Path] = None,
    pr_lookup: Optional[Callable[[str], dict]] = None,
) -> dict:
    """Fetch PR info via gh CLI or injected pr_lookup."""
    if pr_lookup is not None:
        try:
            return pr_lookup(task_id)
        except Exception as exc:
            logger.warning("pr_lookup failed for %s: %s", task_id, exc)
            return {}

    cwd = str(workspace_root or _default_workspace_root())
    try:
        r = _run(
            [
                "gh", "pr", "list",
                "--search", f"head:task/{task_id}",
                "--state", "all",
                "--json", "number,state,mergeCommit",
                "--limit", "5",
            ],
            cwd=cwd,
            runner=runner,
        )
        if r.returncode != 0 or not r.stdout.strip():
            return {}
        items = json.loads(r.stdout)
        if not items:
            return {}
        # Prefer MERGED, then OPEN, then first
        for item in items:
            if item.get("state") == "MERGED":
                return item
        return items[0]
    except Exception as exc:
        logger.warning("gh pr list failed for %s: %s", task_id, exc)
        return {}


def _check_merged_into_main(
    merge_commit: str,
    *,
    runner: Optional[RunnerType] = None,
    workspace_root: Optional[Path] = None,
) -> bool:
    """Check if merge_commit is an ancestor of origin/main."""
    if not merge_commit:
        return False
    cwd = str(workspace_root or _default_workspace_root())
    try:
        r = _run(
            ["git", "merge-base", "--is-ancestor", merge_commit, "origin/main"],
            cwd=cwd,
            runner=runner,
        )
        return r.returncode == 0
    except Exception as exc:
        logger.warning("merge-base check failed for %s: %s", merge_commit, exc)
        return False


def _gather_ci_status(
    pr_number: int,
    *,
    runner: Optional[RunnerType] = None,
    workspace_root: Optional[Path] = None,
) -> Optional[str]:
    """Fetch CI rollup status via gh pr view --json statusCheckRollup."""
    cwd = str(workspace_root or _default_workspace_root())
    try:
        r = _run(
            [
                "gh", "pr", "view", str(pr_number),
                "--json", "statusCheckRollup",
            ],
            cwd=cwd,
            runner=runner,
        )
        if r.returncode != 0 or not r.stdout.strip():
            return None
        data = json.loads(r.stdout)
        rollup = data.get("statusCheckRollup") or []
        if not rollup:
            return None
        # Aggregate: all SUCCESS → SUCCESS, any FAILURE → FAILURE, else PENDING
        states = {item.get("state") or item.get("conclusion") or "" for item in rollup}
        if "FAILURE" in states or "FAILED" in states:
            return "FAILURE"
        if all(s in {"SUCCESS", "COMPLETED"} for s in states if s):
            return "SUCCESS"
        return "PENDING"
    except Exception as exc:
        logger.warning("CI status fetch failed for PR %s: %s", pr_number, exc)
        return None


def _gather_timer_info(
    task_id: str,
    *,
    workspace_root: Optional[Path] = None,
    timer_loader: Optional[Callable[[str], dict]] = None,
) -> dict:
    """Load timer info from task-timers.json or injected timer_loader."""
    if timer_loader is not None:
        try:
            return timer_loader(task_id)
        except Exception as exc:
            logger.warning("timer_loader failed for %s: %s", task_id, exc)
            return {}

    timers_file = (workspace_root or _default_workspace_root()) / _TIMERS_FILE_NAME
    try:
        if not timers_file.exists():
            return {}
        raw = timers_file.read_text(encoding="utf-8")
        data = json.loads(raw)
        tasks = data.get("tasks", {})
        return tasks.get(task_id, {})
    except Exception as exc:
        logger.warning("task-timers.json read failed for %s: %s", task_id, exc)
        return {}


def _gather_file_markers(
    task_id: str,
    *,
    workspace_root: Optional[Path] = None,
) -> dict:
    """Check file markers in memory/events/."""
    wd = workspace_root or _default_workspace_root()
    ev = _events_dir(wd)

    def exists(suffix: str) -> bool:
        return (ev / f"{task_id}.{suffix}").exists()

    has_escalate = exists("done.escalated")
    escalate_age: Optional[float] = None
    if has_escalate:
        try:
            mtime = (ev / f"{task_id}.done.escalated").stat().st_mtime
            age_secs = datetime.now(timezone.utc).timestamp() - mtime
            escalate_age = age_secs / 60.0
        except Exception:
            escalate_age = None

    return {
        "has_done": exists("done"),
        "has_done_acked": exists("done.acked"),
        "has_merge_done": exists("merge-done"),
        "has_qc_result": exists("qc-result"),
        "has_followup": exists("followup.txt"),
        "has_escalate_marker": has_escalate,
        "escalate_marker_age_minutes": escalate_age,
    }


def _is_truncated_response(text: str) -> bool:
    """Heuristic: response is likely truncated if it ends abruptly."""
    if not text:
        return False
    stripped = text.rstrip()
    # Near max bytes and ends with incomplete sentence / word
    if len(text.encode("utf-8")) >= _TELEGRAM_TRUNCATION_THRESHOLD_BYTES:
        return True
    # Ends with Korean syllable mid-word (hangul range U+AC00–U+D7A3)
    if stripped and "가" <= stripped[-1] <= "힣":
        return True
    # Ends with opening code block marker without closing
    if "```" in stripped:
        count = stripped.count("```")
        if count % 2 != 0:
            return True
    return False


def _gather_cron_history(
    task_id: str,
    *,
    cron_history_dir: Optional[Path] = None,
) -> dict:
    """Scan cron history logs to detect Telegram cut-off and bot session status."""
    history_dir = cron_history_dir or _DEFAULT_CRON_HISTORY_DIR
    result = {
        "telegram_reply_truncated": False,
        "bot_session_status": None,
    }

    try:
        if not history_dir.exists():
            return result

        # Find log files whose last record has task_id in prompt
        matching_last_record: Optional[dict] = None
        for log_file in sorted(history_dir.glob("*.log"), key=lambda p: p.stat().st_mtime, reverse=True):
            try:
                lines = log_file.read_text(encoding="utf-8", errors="replace").splitlines()
                # Search from end for a line containing task_id
                for line in reversed(lines):
                    line = line.strip()
                    if not line:
                        continue
                    try:
                        record = json.loads(line)
                    except json.JSONDecodeError:
                        continue
                    prompt = record.get("prompt", "") or ""
                    if task_id in prompt:
                        matching_last_record = record
                        break
                if matching_last_record is not None:
                    break
            except Exception:
                continue

        if matching_last_record is None:
            return result

        # Check truncation
        response = matching_last_record.get("response", "") or ""
        result["telegram_reply_truncated"] = _is_truncated_response(response)

        # Bot session status
        status = matching_last_record.get("status", None)
        if status is not None:
            result["bot_session_status"] = str(status)

    except Exception as exc:
        logger.warning("cron history scan failed for %s: %s", task_id, exc)

    return result


def _gather_worktree_info(
    task_id: str,
    *,
    runner: Optional[RunnerType] = None,
    workspace_root: Optional[Path] = None,
) -> dict:
    """Check if worktree exists and branch was pushed to remote."""
    wd = workspace_root or _default_workspace_root()
    worktrees_root = wd / ".worktrees"
    result = {
        "worktree_exists": False,
        "branch_pushed_to_remote": False,
    }

    # Check worktree existence
    try:
        import glob as _glob
        pattern = str(worktrees_root / f"{task_id}-*")
        candidates = _glob.glob(pattern)
        result["worktree_exists"] = bool(candidates)
    except Exception:
        pass

    # Check if branch exists on remote
    try:
        r = _run(
            ["git", "ls-remote", "--heads", "origin", f"task/{task_id}*"],
            cwd=str(wd),
            runner=runner,
        )
        if r.returncode == 0 and r.stdout.strip():
            result["branch_pushed_to_remote"] = True
    except Exception as exc:
        logger.warning("ls-remote failed for %s: %s", task_id, exc)

    return result


# ---------------------------------------------------------------------------
# Public API: gather_evidence
# ---------------------------------------------------------------------------

def gather_evidence(
    task_id: str,
    *,
    workspace_root: Optional[Path] = None,
    runner: Optional[RunnerType] = None,
    cron_history_dir: Optional[Path] = None,
    pr_lookup: Optional[Callable[[str], dict]] = None,
    timer_loader: Optional[Callable[[str], dict]] = None,
) -> LifecycleEvidence:
    """모든 evidence source 수집 — gh/git/file/timer/cron history.

    workspace_root가 None이면 resolve_canonical_workspace(task_id, fetch=False)로
    자동 결정 (§6 CanonicalWorkspace 연동). 실패 시 fallback Path('/home/jay/workspace').
    """
    # CanonicalWorkspace 연동 (회장 §6): workspace_root override가 없으면 canonical resolver 사용
    if workspace_root is None:
        try:
            cws: CanonicalWorkspace = _resolve_workspace(task_id, runner=runner)
            wd = cws.workspace_root
        except Exception:
            wd = _DEFAULT_WORKSPACE
    else:
        wd = workspace_root

    # 1. PR info
    pr_info = _gather_pr_info(
        task_id,
        runner=runner,
        workspace_root=wd,
        pr_lookup=pr_lookup,
    )
    pr_number: Optional[int] = pr_info.get("number")
    raw_state = pr_info.get("state")
    pr_state: Optional[str] = str(raw_state).upper() if raw_state else None

    # mergeCommit may be dict {"oid": "..."} or plain string
    raw_mc = pr_info.get("mergeCommit")
    if isinstance(raw_mc, dict):
        merge_commit: Optional[str] = raw_mc.get("oid") or raw_mc.get("sha")
    elif isinstance(raw_mc, str) and raw_mc:
        merge_commit = raw_mc
    else:
        merge_commit = None

    # 2. merged_into_main
    merged_into_main = False
    if merge_commit:
        merged_into_main = _check_merged_into_main(
            merge_commit,
            runner=runner,
            workspace_root=wd,
        )

    # 3. CI status
    ci_status: Optional[str] = None
    if pr_number is not None:
        ci_status = _gather_ci_status(pr_number, runner=runner, workspace_root=wd)

    # 4. smoke status — check qc-result file for PASS/FAIL marker
    smoke_status: Optional[str] = None
    ev_dir = _events_dir(wd)
    qc_file = ev_dir / f"{task_id}.qc-result"
    if qc_file.exists():
        try:
            content = qc_file.read_text(encoding="utf-8").upper()
            if "PASS" in content:
                smoke_status = "PASS"
            elif "FAIL" in content:
                smoke_status = "FAIL"
            else:
                smoke_status = "SKIPPED"
        except Exception:
            smoke_status = None

    # 5. Timer
    timer_info = _gather_timer_info(task_id, workspace_root=wd, timer_loader=timer_loader)
    timer_status: Optional[str] = timer_info.get("status")
    timer_end_time: Optional[str] = timer_info.get("end_time")

    # 6. File markers
    markers = _gather_file_markers(task_id, workspace_root=wd)

    # 7. Cron history
    cron = _gather_cron_history(task_id, cron_history_dir=cron_history_dir)

    # 8. Worktree / branch
    wt_info = _gather_worktree_info(task_id, runner=runner, workspace_root=wd)

    return LifecycleEvidence(
        task_id=task_id,
        pr_number=pr_number,
        pr_state=pr_state,
        merge_commit=merge_commit,
        merged_into_main=merged_into_main,
        ci_status=ci_status,
        smoke_status=smoke_status,
        timer_status=timer_status,
        timer_end_time=timer_end_time,
        has_done=markers["has_done"],
        has_done_acked=markers["has_done_acked"],
        has_merge_done=markers["has_merge_done"],
        has_qc_result=markers["has_qc_result"],
        has_followup=markers["has_followup"],
        has_escalate_marker=markers["has_escalate_marker"],
        escalate_marker_age_minutes=markers["escalate_marker_age_minutes"],
        telegram_reply_truncated=cron["telegram_reply_truncated"],
        bot_session_status=cron["bot_session_status"],
        worktree_exists=wt_info["worktree_exists"],
        branch_pushed_to_remote=wt_info["branch_pushed_to_remote"],
    )


# ---------------------------------------------------------------------------
# Public API: detect_stuck_cases
# ---------------------------------------------------------------------------

def detect_stuck_cases(evidence: LifecycleEvidence) -> list[StuckCase]:
    """8 케이스 자동 감지."""
    cases: list[StuckCase] = []

    # 1. TIMER_RUNNING_BUT_PR_MERGED
    if evidence.timer_status == "running" and evidence.pr_state == "MERGED":
        cases.append(StuckCase(
            reason=StuckReason.TIMER_RUNNING_BUT_PR_MERGED,
            detail=(
                f"timer_status=running but pr_state=MERGED "
                f"(pr_number={evidence.pr_number}, merge_commit={evidence.merge_commit})"
            ),
        ))

    # 2. PR_MERGED_BUT_DONE_MISSING
    if evidence.pr_state == "MERGED" and not evidence.has_done:
        cases.append(StuckCase(
            reason=StuckReason.PR_MERGED_BUT_DONE_MISSING,
            detail=(
                f"pr_state=MERGED but .done marker missing "
                f"(pr_number={evidence.pr_number})"
            ),
        ))

    # 3. MERGE_COMMIT_BUT_MERGE_DONE_MISSING
    if evidence.merge_commit and not evidence.has_merge_done:
        cases.append(StuckCase(
            reason=StuckReason.MERGE_COMMIT_BUT_MERGE_DONE_MISSING,
            detail=(
                f"merge_commit={evidence.merge_commit} present but .merge-done missing"
            ),
        ))

    # 4. CI_PASS_BUT_NOT_FINALIZED
    if (
        evidence.ci_status == "SUCCESS"
        and evidence.smoke_status == "PASS"
        and evidence.pr_state == "MERGED"
        and (not evidence.has_done_acked or not evidence.has_merge_done)
    ):
        cases.append(StuckCase(
            reason=StuckReason.CI_PASS_BUT_NOT_FINALIZED,
            detail=(
                f"ci=SUCCESS, smoke=PASS, pr=MERGED but "
                f"has_done_acked={evidence.has_done_acked}, has_merge_done={evidence.has_merge_done}"
            ),
        ))

    # 5. TELEGRAM_REPLY_CUT_OFF
    if evidence.telegram_reply_truncated:
        cases.append(StuckCase(
            reason=StuckReason.TELEGRAM_REPLY_CUT_OFF,
            detail=(
                f"cron history last response appears truncated "
                f"(bot_session_status={evidence.bot_session_status})"
            ),
        ))

    # 6. BOT_SESSION_ENDED_BUT_TASK_OK
    if evidence.bot_session_status in {"cancelled", "error"} and evidence.merged_into_main:
        cases.append(StuckCase(
            reason=StuckReason.BOT_SESSION_ENDED_BUT_TASK_OK,
            detail=(
                f"bot_session_status={evidence.bot_session_status} but "
                f"merged_into_main=True (task evidence is OK)"
            ),
        ))

    # 7. FINISH_TASK_INTERRUPTED
    if (
        evidence.worktree_exists
        and evidence.branch_pushed_to_remote
        and not evidence.pr_number
    ):
        cases.append(StuckCase(
            reason=StuckReason.FINISH_TASK_INTERRUPTED,
            detail=(
                "worktree exists + branch pushed to remote but no PR found — "
                "finish-task likely interrupted before PR creation"
            ),
        ))

    # 8. STALE_ESCALATE_MARKER
    if evidence.has_escalate_marker and (evidence.escalate_marker_age_minutes or 0) > 30:
        # Check: does any Critical escalation evidence match?
        # If there's no active Critical signal (pr_state not stuck due to CI, etc.) → stale
        has_active_critical = (
            evidence.ci_status in {"FAILURE"} or
            (evidence.pr_state == "MERGED" and not evidence.merged_into_main)
        )
        if not has_active_critical:
            cases.append(StuckCase(
                reason=StuckReason.STALE_ESCALATE_MARKER,
                detail=(
                    f"escalate marker exists for "
                    f"{evidence.escalate_marker_age_minutes:.1f} min but no Critical evidence found"
                ),
            ))

    return cases


# ---------------------------------------------------------------------------
# Public API: determine_state
# ---------------------------------------------------------------------------

def determine_state(evidence: LifecycleEvidence) -> tuple[LifecycleState, list[StuckCase]]:
    """evidence priority 규칙으로 state + stuck cases 결정.

    우선순위 (회장 명시):
    PR state > mergeCommit > origin/main 포함 > CI > smoke > timer > file marker
    """
    stuck_cases = detect_stuck_cases(evidence)

    # FINALIZED: merged + evidence 모두 충족
    if (
        evidence.pr_state == "MERGED"
        and evidence.merged_into_main
        and evidence.ci_status in {"SUCCESS", None}  # CI may not exist for old PRs
        and evidence.has_done_acked
        and evidence.has_merge_done
        and evidence.timer_status in {"completed", None}
    ):
        return LifecycleState.FINALIZED, []

    # MERGED_PENDING_RECONCILE: PR merged + origin/main confirmed, but finalize incomplete
    if evidence.pr_state == "MERGED" and evidence.merged_into_main:
        missing = []
        if not evidence.has_done_acked:
            missing.append(".done.acked")
        if not evidence.has_merge_done:
            missing.append(".merge-done")
        if evidence.timer_status == "running":
            missing.append("timer-end")
        if missing:
            return LifecycleState.MERGED_PENDING_RECONCILE, stuck_cases

    # PR merged (no origin/main confirmation yet) but .done/.merge-done missing → stuck
    if evidence.pr_state == "MERGED" and not evidence.merged_into_main:
        if stuck_cases:
            return LifecycleState.STUCK_NEEDS_RECONCILE, stuck_cases

    # RUNNING: PR open + timer running (normal in-progress)
    if evidence.pr_state == "OPEN" and evidence.timer_status == "running":
        if stuck_cases:
            return LifecycleState.STUCK_NEEDS_RECONCILE, stuck_cases
        return LifecycleState.RUNNING, []

    # PR_OPEN: PR open + timer not running
    if evidence.pr_state == "OPEN" and evidence.timer_status != "running":
        if stuck_cases:
            return LifecycleState.STUCK_NEEDS_RECONCILE, stuck_cases
        return LifecycleState.PR_OPEN, []

    # ESCALATED: escalate marker is active (non-stale)
    if evidence.has_escalate_marker:
        age = evidence.escalate_marker_age_minutes or 0
        if age <= 30:
            return LifecycleState.ESCALATED, stuck_cases

    # Any stuck cases → STUCK_NEEDS_RECONCILE
    if stuck_cases:
        return LifecycleState.STUCK_NEEDS_RECONCILE, stuck_cases

    # No PR info + no timer → unknown, default conservative: RUNNING
    if evidence.pr_state is None and evidence.timer_status is None:
        return LifecycleState.RUNNING, []

    # Timer running, no PR → RUNNING
    if evidence.timer_status == "running" and evidence.pr_state is None:
        return LifecycleState.RUNNING, []

    # Timer completed, no PR → assume FINALIZED if all markers present
    if evidence.timer_status == "completed" and evidence.has_done_acked and evidence.has_merge_done:
        return LifecycleState.FINALIZED, []

    # Fallback
    return LifecycleState.RUNNING, []


# ---------------------------------------------------------------------------
# Public API: assert_no_manual_done_forgery
# ---------------------------------------------------------------------------

def assert_no_manual_done_forgery(
    task_id: str,
    evidence: LifecycleEvidence,
    *,
    workspace_root: Optional[Path] = None,
) -> None:
    """evidence가 부족한데 강제로 .done을 만들려고 하면 RuntimeError.

    충분한 evidence:
    - pr_state == MERGED and merged_into_main, OR
    - merge_commit not None and merged_into_main and (ci_status == SUCCESS or smoke_status == PASS)
    """
    wd = workspace_root or _default_workspace_root()
    done_path = _marker_path(wd, task_id, "done")

    cond_a = evidence.pr_state == "MERGED" and evidence.merged_into_main
    cond_b = (
        bool(evidence.merge_commit)
        and evidence.merged_into_main
        and (evidence.ci_status == "SUCCESS" or evidence.smoke_status == "PASS")
    )
    has_sufficient = cond_a or cond_b

    if not has_sufficient:
        raise RuntimeError(
            f"MANUAL_DONE_FORGERY_BLOCKED: task={task_id} — insufficient evidence to create .done. "
            f"Target path would be: {done_path}. "
            f"Requires (pr_state=MERGED + merged_into_main) OR "
            f"(merge_commit + merged_into_main + ci/smoke PASS). "
            f"Got: pr_state={evidence.pr_state}, merged_into_main={evidence.merged_into_main}, "
            f"merge_commit={evidence.merge_commit}, ci_status={evidence.ci_status}, "
            f"smoke_status={evidence.smoke_status}"
        )


# ---------------------------------------------------------------------------
# Backfill helpers
# ---------------------------------------------------------------------------

def _build_backfill_metadata(
    task_id: str,
    reconcile_run_id: str,
    evidence: LifecycleEvidence,
) -> dict:
    """Build JSON body for backfilled marker files."""
    evidence_source: list[str] = []
    if evidence.pr_state:
        evidence_source.append(f"pr_state={evidence.pr_state}")
    if evidence.merge_commit:
        evidence_source.append(f"merge_commit={evidence.merge_commit}")
    if evidence.ci_status:
        evidence_source.append(f"ci_status={evidence.ci_status}")
    if evidence.smoke_status:
        evidence_source.append(f"smoke_status={evidence.smoke_status}")
    if evidence.merged_into_main:
        evidence_source.append("merged_into_main=True")

    return {
        "task_id": task_id,
        "reconciled_by": "lifecycle_reconciliation_manager",
        "reconcile_run_id": reconcile_run_id,
        "timestamp": _now_iso(),
        "evidence_source": evidence_source,
        "merge_commit": evidence.merge_commit,
        "note": "evidence-based backfill (not manual forgery)",
    }


def _default_file_writer(path: Path, content: str) -> None:
    path.parent.mkdir(parents=True, exist_ok=True)
    path.write_text(content, encoding="utf-8")


def _default_timer_writer(task_id: str, timer_data: dict) -> None:
    """Write updated timer data back to task-timers.json."""
    timers_file = _default_workspace_root() / _TIMERS_FILE_NAME
    try:
        raw = timers_file.read_text(encoding="utf-8") if timers_file.exists() else "{}"
        data = json.loads(raw)
    except Exception:
        data = {}
    tasks = data.setdefault("tasks", {})
    tasks[task_id] = timer_data
    timers_file.write_text(json.dumps(data, indent=2, ensure_ascii=False), encoding="utf-8")


def _backfill_markers(
    task_id: str,
    reconcile_run_id: str,
    evidence: LifecycleEvidence,
    state: LifecycleState,
    *,
    workspace_root: Optional[Path] = None,
    apply: bool = False,
    file_writer: Optional[Callable[[Path, str], None]] = None,
    timer_writer: Optional[Callable[[str, dict], None]] = None,
    actions_taken: list[str],
    actions_planned: list[str],
) -> dict:
    """Backfill missing markers based on evidence. Returns backfill_metadata.

    `state` is recorded in metadata so reconcile audit trails preserve which
    LifecycleState authorised the backfill.
    """

    wd = workspace_root or _default_workspace_root()
    fw = file_writer if file_writer is not None else _default_file_writer
    tw = timer_writer if timer_writer is not None else _default_timer_writer

    meta = _build_backfill_metadata(task_id, reconcile_run_id, evidence)
    meta["authorising_state"] = state.value if isinstance(state, LifecycleState) else str(state)
    meta_json = json.dumps(meta, indent=2, ensure_ascii=False)

    def do_or_plan(action_name: str, fn: Callable[[], None]) -> None:
        if apply:
            try:
                fn()
                actions_taken.append(action_name)
            except Exception as exc:
                logger.error("backfill action %s failed: %s", action_name, exc)
        else:
            actions_planned.append(action_name)

    # Guard: must have sufficient evidence before any .done backfill
    try:
        assert_no_manual_done_forgery(task_id, evidence, workspace_root=wd)
    except RuntimeError as exc:
        logger.warning("Backfill blocked: %s", exc)
        if apply:
            actions_taken.append(f"BLOCKED:{exc}")
        else:
            actions_planned.append(f"BLOCKED (insufficient evidence)")
        return meta

    # .done
    if not evidence.has_done:
        path = _marker_path(wd, task_id, "done")
        do_or_plan("created_done", lambda p=path: fw(p, meta_json))

    # .done.acked
    if not evidence.has_done_acked:
        path = _marker_path(wd, task_id, "done.acked")
        do_or_plan("created_done_acked", lambda p=path: fw(p, meta_json))

    # .merge-done
    if not evidence.has_merge_done:
        path = _marker_path(wd, task_id, "merge-done")
        do_or_plan("wrote_merge_done", lambda p=path: fw(p, meta_json))

    # timer end
    if evidence.timer_status == "running":
        timer_end_iso = _now_iso()
        timer_payload = {
            "status": "completed",
            "end_time": timer_end_iso,
            "ended_by": "lifecycle_reconciliation_manager",
            "reconcile_run_id": reconcile_run_id,
        }
        do_or_plan("ended_timer", lambda tp=timer_payload: tw(task_id, tp))

    return meta


# ---------------------------------------------------------------------------
# Public API: reconcile
# ---------------------------------------------------------------------------

def reconcile(
    task_id: str,
    *,
    apply: bool = False,
    workspace_root: Optional[Path] = None,
    runner: Optional[RunnerType] = None,
    cron_history_dir: Optional[Path] = None,
    pr_lookup: Optional[Callable[[str], dict]] = None,
    timer_loader: Optional[Callable[[str], dict]] = None,
    timer_writer: Optional[Callable[[str, dict], None]] = None,
    file_writer: Optional[Callable[[Path, str], None]] = None,
) -> LifecycleReport:
    """idempotent reconcile.

    - state == FINALIZED → no-op
    - state == MERGED_PENDING_RECONCILE + apply=True → backfill with evidence metadata
    - state == STUCK_NEEDS_RECONCILE + apply=True → reason별 backfill
    - apply=False → actions_planned에만 기록, 실제 변경 없음
    - 동일 reconcile 반복 호출 → 동일 state, no-op (멱등)
    """
    reconcile_run_id = uuid.uuid4().hex
    timestamp = _now_iso()

    evidence = gather_evidence(
        task_id,
        workspace_root=workspace_root,
        runner=runner,
        cron_history_dir=cron_history_dir,
        pr_lookup=pr_lookup,
        timer_loader=timer_loader,
    )

    state, stuck_cases = determine_state(evidence)

    actions_taken: list[str] = []
    actions_planned: list[str] = []
    backfill_metadata: dict[str, Any] = {}

    if state == LifecycleState.FINALIZED:
        # Already finalized — pure no-op
        logger.info("task=%s already FINALIZED, no-op", task_id)
    elif state in {
        LifecycleState.MERGED_PENDING_RECONCILE,
        LifecycleState.STUCK_NEEDS_RECONCILE,
    }:
        backfill_metadata = _backfill_markers(
            task_id,
            reconcile_run_id,
            evidence,
            state,
            workspace_root=workspace_root,
            apply=apply,
            file_writer=file_writer,
            timer_writer=timer_writer,
            actions_taken=actions_taken,
            actions_planned=actions_planned,
        )
    else:
        logger.debug("task=%s state=%s, no backfill needed", task_id, state.value)

    return LifecycleReport(
        task_id=task_id,
        state=state,
        stuck_cases=stuck_cases,
        evidence=evidence,
        actions_taken=actions_taken,
        actions_planned=actions_planned,
        dry_run=not apply,
        timestamp=timestamp,
        reconcile_run_id=reconcile_run_id,
        backfill_metadata=backfill_metadata,
    )


# ---------------------------------------------------------------------------
# Public API: scan_stuck
# ---------------------------------------------------------------------------

def scan_stuck(
    *,
    workspace_root: Optional[Path] = None,
    runner: Optional[RunnerType] = None,
    apply: bool = False,
    timer_loader: Optional[Callable[[], dict]] = None,
) -> list[LifecycleReport]:
    """task-timers.json 전체 순회 → STUCK인 것만 보고."""
    wd = workspace_root or _default_workspace_root()
    timers_file = wd / _TIMERS_FILE_NAME

    task_ids: list[str] = []
    try:
        if timers_file.exists():
            raw = timers_file.read_text(encoding="utf-8")
            data = json.loads(raw)
            task_ids = list(data.get("tasks", {}).keys())
    except Exception as exc:
        logger.warning("scan_stuck: failed to load task-timers.json: %s", exc)

    if not task_ids:
        logger.info("scan_stuck: no tasks found in task-timers.json")
        return []

    stuck_reports: list[LifecycleReport] = []

    for tid in task_ids:
        # Adapt timer_loader signature (scan_stuck provides Callable[[], dict], reconcile expects Callable[[str], dict])
        adapted_loader: Optional[Callable[[str], dict]] = None
        if timer_loader is not None:
            def _make_adapter(tl: Callable[[], dict]) -> Callable[[str], dict]:
                def _adapter(task_id: str) -> dict:
                    all_timers = tl()
                    return all_timers.get("tasks", {}).get(task_id, {})
                return _adapter
            adapted_loader = _make_adapter(timer_loader)

        try:
            report = reconcile(
                tid,
                apply=apply,
                workspace_root=wd,
                runner=runner,
                timer_loader=adapted_loader,
            )
            if report.state in {
                LifecycleState.STUCK_NEEDS_RECONCILE,
                LifecycleState.MERGED_PENDING_RECONCILE,
                LifecycleState.ESCALATED,
            }:
                stuck_reports.append(report)
        except Exception as exc:
            logger.error("scan_stuck: reconcile failed for %s: %s", tid, exc)

    return stuck_reports


# ---------------------------------------------------------------------------
# automation_contracts 연동 헬퍼 (회장 §6)
# ---------------------------------------------------------------------------

def smoke_result_to_status(sr: SmokeResult) -> str:
    """automation_contracts.SmokeResult → lifecycle smoke_status string.

    expects post_merge_smoke_runner.SmokeResult.passed →
    smoke_status='PASS' if True else 'FAIL'
    """
    return "PASS" if sr.passed else "FAIL"


def build_automation_decision(report: LifecycleReport) -> AutomationDecision:
    """LifecycleReport를 AutomationDecision으로 매핑 (자동화 의사결정 contract 연동).

    회장 §6: CanonicalWorkspace + automation_contracts 연동 필수.
    """
    if report.state == LifecycleState.FINALIZED:
        decision_str = "NO_OP"
        requires_chair = False
        critical: Optional[CriticalEscalationType] = None
    elif report.state == LifecycleState.ESCALATED:
        decision_str = "ESCALATE"
        requires_chair = True
        critical = CriticalEscalationType.POST_MERGE_SMOKE_FAILED
    elif report.state == LifecycleState.MERGED_PENDING_RECONCILE:
        decision_str = "BACKFILL"
        requires_chair = False
        critical = None
    else:
        decision_str = "MONITOR"
        requires_chair = False
        critical = None

    return AutomationDecision(
        decision=decision_str,
        reason_codes=[c.reason.value for c in report.stuck_cases],
        critical_escalation_type=critical,
        auto_handled=(not requires_chair),
        requires_chair=requires_chair,
        audit={
            "task_id": report.task_id,
            "reconcile_run_id": report.reconcile_run_id,
            "timestamp": report.timestamp,
        },
    )


def build_escalation_packet(
    report: LifecycleReport, *, pr_number: int = 0
) -> Optional[EscalationPacket]:
    """ESCALATED state에 대해 EscalationPacket 생성. 그 외 None.

    ⚠️ lifecycle은 일반적으로 ESCALATED를 직접 발동하지 않으므로,
    이 헬퍼는 explicit caller가 호출해야 함.
    """
    if report.state != LifecycleState.ESCALATED:
        return None

    stuck_details = "; ".join(
        f"{sc.reason.value}: {sc.detail}" for sc in report.stuck_cases
    ) or "lifecycle ESCALATED state detected"

    return EscalationPacket(
        task_id=report.task_id,
        pr_number=pr_number,
        escalation_type=CriticalEscalationType.POST_MERGE_SMOKE_FAILED,
        reason=stuck_details,
        why_auto_cannot_continue=(
            "lifecycle state is ESCALATED — human review required before proceeding"
        ),
        safe_options=[
            "Review stuck_cases and resolve manually",
            "Run reconcile --apply after resolving root cause",
            "Escalate to chair if unresolvable",
        ],
        recommended_option="Review stuck_cases and resolve manually",
        evidence=report.evidence_to_dict(),
    )


# ---------------------------------------------------------------------------
# CLI entrypoint
# ---------------------------------------------------------------------------

def _build_cli_parser() -> argparse.ArgumentParser:
    p = argparse.ArgumentParser(
        description=(
            "lifecycle_reconciliation_manager — "
            "idempotent task lifecycle reconcile (task-2518 P0)"
        )
    )
    group = p.add_mutually_exclusive_group(required=True)
    group.add_argument(
        "--reconcile",
        action="store_true",
        help="Reconcile a single task (requires --task-id)",
    )
    group.add_argument(
        "--scan-stuck",
        action="store_true",
        help="Scan all tasks in task-timers.json and report stuck ones",
    )
    p.add_argument("--task-id", help="task-NNNN identifier (required for --reconcile)")
    p.add_argument(
        "--apply",
        action="store_true",
        default=False,
        help="Actually apply backfill (default: dry-run)",
    )
    p.add_argument(
        "--json",
        action="store_true",
        dest="output_json",
        default=True,
        help="Output as JSON (default: True)",
    )
    p.add_argument(
        "--workspace-root",
        default=None,
        help="Override workspace root path",
    )
    return p


def _cli_main(argv: Optional[list[str]] = None) -> None:
    parser = _build_cli_parser()
    args = parser.parse_args(argv)

    workspace_root: Optional[Path] = None
    if args.workspace_root:
        workspace_root = Path(args.workspace_root)

    if args.reconcile:
        if not args.task_id:
            parser.error("--reconcile requires --task-id")
        try:
            report = reconcile(
                args.task_id,
                apply=args.apply,
                workspace_root=workspace_root,
            )
            print(report.to_json())
        except Exception as exc:
            print(json.dumps({"error": str(exc), "task_id": args.task_id}), file=sys.stderr)
            sys.exit(1)

    elif args.scan_stuck:
        try:
            reports = scan_stuck(
                workspace_root=workspace_root,
                apply=args.apply,
            )
            output = [r.to_dict() for r in reports]
            print(json.dumps(output, indent=2, ensure_ascii=False))
        except Exception as exc:
            print(json.dumps({"error": str(exc)}), file=sys.stderr)
            sys.exit(1)


if __name__ == "__main__":
    logging.basicConfig(level=logging.INFO, format="%(levelname)s %(name)s: %(message)s")
    _cli_main()
