#!/usr/bin/env python3
"""failure_envelope_writer.py — task-2712 FAILURE_CALLBACK_BEFORE_EXIT_GUARD.

Failure callback envelope 박제 + exactly-one-terminal-marker rule + fallback
chain (stderr emit / syslog journal).

회장 verbatim doctrine (task-2712 §4 / §3.2): 봇이 9 failure terminal state
중 어떤 state 로 종료되든 exit 전에 disk terminal marker 1개를 박제한다.
.done 은 SUCCESS 전용. envelope 은 UTF-8 ≤ 3900 bytes hard.

본 모듈은 §3.2.1 6 marker path · §3.2.4 marker count verification · §4.1 11
mandatory field · §4.2 byte limit · §4.3 fallback rule 을 1:1 mirror 한다.
"""

from __future__ import annotations

import argparse
import json
import os
import subprocess
import sys
from datetime import datetime, timezone
from typing import Optional

try:  # 패키지/직접 실행 양쪽 지원
    from .terminal_state_classifier import (
        ANU_KEY,
        UNCLASSIFIED_TERMINAL_STATE,
        classify_terminal_state,
        is_terminal_state,
    )
except Exception:  # pragma: no cover - 직접 실행 fallback
    sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
    from terminal_state_classifier import (  # type: ignore
        ANU_KEY,
        UNCLASSIFIED_TERMINAL_STATE,
        classify_terminal_state,
        is_terminal_state,
    )

# ── §4.1 11 mandatory field ───────────────────────────────────────────────
MANDATORY_FIELDS = (
    "task_id",
    "team",
    "bot",
    "session",
    "phase",
    "exit_code",
    "failure_kind",
    "artifact_paths",
    "critical7_match",
    "terminal_state",
    "chair_authorization_id",
)

# ── §4.2 byte limit (UTF-8, wc -c) ────────────────────────────────────────
BYTE_LIMIT = 3900

CHAIR_AUTHORIZATION_ID = (
    "CHAIR-AUTH-TASK-2712-FAILURE-CALLBACK-BEFORE-EXIT-GUARD-IMPLEMENTATION-260530"
)

# ── §3.2.1 marker filename suffix (terminal vs fallback) ───────────────────
MARKER_SUFFIX = {
    "failure_envelope": ".failure-envelope.json",
    "failure_handoff": ".failure-handoff-marker.json",
    "supervisor_crash": ".supervisor-crash-marker.json",
    "done": ".done",
}
SYSLOG_TAG = "failure_callback_2712"


def _now_iso() -> str:
    return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")


def build_envelope(
    task_id: str,
    terminal_state: str,
    *,
    team: str = "dev4-team",
    bot: str = "vishnu",
    session: str = "",
    phase: str = "",
    exit_code: int = 1,
    failure_kind: str = "",
    artifact_paths: Optional[list] = None,
    critical7_match: bool = False,
    chair_authorization_id: str = CHAIR_AUTHORIZATION_ID,
    registration_mode: str = "failure_callback_before_exit_guard",
    residual_pid: Optional[int] = None,
    summary: str = "",
) -> dict:
    """§4.1 11 mandatory field + 4 collector strict field envelope 생성."""
    envelope = {
        "task_id": task_id,
        "team": team,
        "bot": bot,
        "session": session or os.environ.get("COKACDIR_SESSION_ID", ""),
        "phase": phase,
        "exit_code": int(exit_code),
        "failure_kind": failure_kind,
        "artifact_paths": list(artifact_paths or []),
        "critical7_match": bool(critical7_match),
        "terminal_state": terminal_state,
        "chair_authorization_id": chair_authorization_id,
        # ── collector strict (§6.3.1 SELF_COLLECTOR_FORBIDDEN) ──
        "collector_role": "ANU",
        "collector_key": ANU_KEY,
        "owner_key": ANU_KEY,
        "self_key_used": False,
        "registration_mode": registration_mode,
        "recorded_at": _now_iso(),
    }
    if residual_pid is not None:
        envelope["residual_pid"] = int(residual_pid)
    if summary:
        envelope["summary"] = summary
    return envelope


def _utf8_len(obj: dict) -> int:
    return len(json.dumps(obj, ensure_ascii=False).encode("utf-8"))


def enforce_byte_limit(envelope: dict, limit: int = BYTE_LIMIT) -> dict:
    """§4.2 / §4.3: UTF-8 ≤ limit bytes. 초과 시 summary 축소 → 제거 →
    artifact_paths 만 유지하며 mandatory field 는 보존한다."""
    if _utf8_len(envelope) <= limit:
        return envelope
    env = dict(envelope)
    # 1) summary 제거
    if "summary" in env:
        env["summary"] = ""
        if _utf8_len(env) <= limit:
            return env
        env.pop("summary", None)
        if _utf8_len(env) <= limit:
            return env
    # 2) failure_kind 축소
    if len(env.get("failure_kind", "")) > 32:
        env["failure_kind"] = env["failure_kind"][:32]
        if _utf8_len(env) <= limit:
            return env
    # 3) artifact_paths 만 유지 (mandatory field 는 보존)
    while len(env.get("artifact_paths", [])) > 1 and _utf8_len(env) > limit:
        env["artifact_paths"] = env["artifact_paths"][:-1]
    return env


def emit_stderr_line(
    task_id: str, terminal_state: str, exit_code: int, failure_kind: str, phase: str
) -> str:
    """§4.3.1 stderr 1-line emit (envelope-write fail fallback). fd=2."""
    line = (
        f"FAILURE_CALLBACK_2712 task_id={task_id} terminal_state={terminal_state} "
        f"exit_code={exit_code} failure_kind={failure_kind} phase={phase}"
    )
    print(line, file=sys.stderr, flush=True)
    return line


def emit_syslog(task_id: str, terminal_state: str, exit_code: int, failure_kind: str) -> bool:
    """§4.3.2 syslog journal last-resort. logger -t failure_callback_2712."""
    try:
        subprocess.run(
            [
                "logger",
                "-t",
                SYSLOG_TAG,
                "-p",
                "user.warning",
                f"task_id={task_id} terminal_state={terminal_state} "
                f"exit_code={exit_code} failure_kind={failure_kind}",
            ],
            check=False,
            timeout=5,
        )
        return True
    except Exception:
        return False


def _existing_terminal_markers(task_id: str, events_dir: str) -> list:
    """task_id 의 현존 terminal marker 경로 목록 (4 terminal class)."""
    found = []
    for suffix in (
        MARKER_SUFFIX["done"],
        MARKER_SUFFIX["failure_envelope"],
        MARKER_SUFFIX["failure_handoff"],
        MARKER_SUFFIX["supervisor_crash"],
    ):
        p = os.path.join(events_dir, f"{task_id}{suffix}")
        if os.path.exists(p):
            found.append(p)
    return found


def write_envelope(
    envelope: dict,
    events_dir: str,
    marker_type: str = "failure_envelope",
) -> dict:
    """terminal marker JSON 박제. exactly-one-terminal-marker rule 적용.

    이미 terminal marker 가 존재하면 (success .done 포함) skip 하여 multi-fire
    를 방지한다 (§3.2 idempotent). write IO 실패 시 stderr 1-line + syslog
    fallback 으로 강등한다 (§4.3).

    return: {"status": ..., "path": ..., ...}
      - WRITTEN              : terminal marker 신규 박제
      - SKIPPED_ALREADY_TERMINAL : 이미 terminal marker 존재 (no-op)
      - FALLBACK_STDERR      : envelope-write fail → stderr emit
      - FALLBACK_STDERR_SYSLOG_FAIL : stderr+syslog 모두 fail
    """
    os.makedirs(events_dir, exist_ok=True)
    task_id = envelope["task_id"]
    existing = _existing_terminal_markers(task_id, events_dir)
    if existing:
        return {"status": "SKIPPED_ALREADY_TERMINAL", "existing": existing}

    envelope = enforce_byte_limit(envelope)
    suffix = MARKER_SUFFIX.get(marker_type, MARKER_SUFFIX["failure_envelope"])
    path = os.path.join(events_dir, f"{task_id}{suffix}")
    try:
        with open(path, "w", encoding="utf-8") as f:
            json.dump(envelope, f, ensure_ascii=False, indent=2)
        return {
            "status": "WRITTEN",
            "path": path,
            "bytes": _utf8_len(envelope),
            "marker_type": marker_type,
        }
    except (OSError, IOError):
        # §4.3 envelope-write FAIL → stderr escape-safe 1-line emit
        line = emit_stderr_line(
            task_id,
            UNCLASSIFIED_TERMINAL_STATE,
            envelope.get("exit_code", -1),
            envelope.get("failure_kind", "disk_full_or_io_error"),
            envelope.get("phase", ""),
        )
        # stderr-emit.log fallback evidence (best-effort, /tmp 우회)
        wrote_log = False
        try:
            log_path = os.path.join(events_dir, f"{task_id}.stderr-emit.log")
            with open(log_path, "a", encoding="utf-8") as lf:
                lf.write(line + "\n")
            wrote_log = True
        except Exception:
            pass
        syslog_ok = emit_syslog(
            task_id,
            UNCLASSIFIED_TERMINAL_STATE,
            envelope.get("exit_code", -1),
            envelope.get("failure_kind", ""),
        )
        if wrote_log or syslog_ok:
            return {"status": "FALLBACK_STDERR", "stderr_line": line, "syslog": syslog_ok}
        return {"status": "FALLBACK_STDERR_SYSLOG_FAIL", "stderr_line": line}


def _syslog_journal_has_entry(task_id: str, tag: str = SYSLOG_TAG) -> bool:
    """journalctl 로 syslog journal 항목 존재 확인 (best-effort)."""
    try:
        out = subprocess.run(
            ["journalctl", "-t", tag, "--no-pager", "-n", "200"],
            capture_output=True,
            text=True,
            timeout=5,
        )
        return task_id in (out.stdout or "")
    except Exception:
        return False


def verify_exactly_one_terminal_marker(task_id: str, events_dir: str = "memory/events") -> dict:
    """§3.2.4 marker count verification. 4 enum 반환:
    OK / ZERO_FIRE / ZERO_FIRE_BUT_FALLBACK_RECOVERABLE / MULTI_FIRE_VIOLATION
    """

    def path_exists(p: str) -> bool:
        return os.path.exists(os.path.join(events_dir, p))

    terminal_markers = [
        path_exists(f"{task_id}.done"),
        path_exists(f"{task_id}.failure-envelope.json"),
        path_exists(f"{task_id}.failure-handoff-marker.json"),
        path_exists(f"{task_id}.supervisor-crash-marker.json"),
    ]
    terminal_count = sum(terminal_markers)
    # §4.3.1 retention: stderr-emit.log 는 비어있지 않을 때만 fallback evidence.
    stderr_log = os.path.join(events_dir, f"{task_id}.stderr-emit.log")
    stderr_evidence = os.path.exists(stderr_log) and os.path.getsize(stderr_log) > 0
    fallback_evidence_present = stderr_evidence or _syslog_journal_has_entry(
        task_id, tag=SYSLOG_TAG
    )
    if terminal_count == 0:
        if fallback_evidence_present:
            return {
                "status": "ZERO_FIRE_BUT_FALLBACK_RECOVERABLE",
                "fallback_evidence": True,
            }
        return {"status": "ZERO_FIRE", "fallback_evidence": False}
    if terminal_count >= 2:
        return {
            "status": "MULTI_FIRE_VIOLATION",
            "fallback_evidence": fallback_evidence_present,
        }
    return {"status": "OK", "fallback_evidence": fallback_evidence_present}


# ── CLI (before_exit_guard_hook.sh 가 호출하는 진입점) ─────────────────────
def _main(argv=None) -> int:
    ap = argparse.ArgumentParser(description="task-2712 failure envelope writer")
    ap.add_argument("--task-id", required=True)
    ap.add_argument("--terminal-state", required=True)
    ap.add_argument("--exit-code", type=int, default=1)
    ap.add_argument("--failure-kind", default="")
    ap.add_argument("--phase", default="")
    ap.add_argument("--team", default="dev4-team")
    ap.add_argument("--bot", default="vishnu")
    ap.add_argument("--session", default="")
    ap.add_argument("--marker-type", default="failure_envelope")
    ap.add_argument("--artifact-paths", default="")  # comma-separated
    ap.add_argument("--critical7", action="store_true")
    ap.add_argument("--residual-pid", type=int, default=None)
    ap.add_argument(
        "--events-dir",
        default=os.environ.get(
            "FAILURE_CALLBACK_2712_EVENTS_DIR", "/home/jay/workspace/memory/events"
        ),
    )
    args = ap.parse_args(argv)

    terminal_state = args.terminal_state
    if not is_terminal_state(terminal_state):
        # hint 로 재분류 시도 (방어)
        terminal_state = classify_terminal_state(args.exit_code, terminal_state)

    artifacts = [p for p in args.artifact_paths.split(",") if p.strip()]
    envelope = build_envelope(
        args.task_id,
        terminal_state,
        team=args.team,
        bot=args.bot,
        session=args.session,
        phase=args.phase,
        exit_code=args.exit_code,
        failure_kind=args.failure_kind,
        artifact_paths=artifacts,
        critical7_match=args.critical7,
        residual_pid=args.residual_pid,
    )
    result = write_envelope(envelope, args.events_dir, marker_type=args.marker_type)
    print(json.dumps(result, ensure_ascii=False))
    # exactly-one rule: skip(이미 존재) / written 모두 정상 종료. fallback도 0.
    return 0


if __name__ == "__main__":
    raise SystemExit(_main())
