# -*- coding: utf-8 -*-
"""anu_v3.callback_4tuple_registry — durable append-only callback ledger.

task-2553+44 (구현목표 B). Standalone, pure stdlib.

Problem (회장 §2.1 / diagnosis report_items.6): +42/+43 normal completion
callbacks DID fire and reach ANU, but the one-shot cron auto-deleted, the
collector ran in a separate spawn session, and there was NO durable 4-tuple
ledger — so the dispatching session saw them "as if absent"
(session-discontinuity observability gap).

This module is a durable append-only JSONL registry. Once a callback
4-tuple is recorded, a later session can confirm
``NORMAL_CALLBACK_COMPLETED`` for a task even though the one-shot cron and
the spawn session are long gone (regression 7/8/9).

Identity 4-tuple (matches dispatch.executor_completion_contract.Callback4Tuple
and schemas/callback_4tuple.schema.json — single definition, cardinality
identical):

    (task_id, dispatch_cron_id, normal_collector_cron_id,
     fallback_callback_cron_id)

The record additionally carries the §3.A observability fields
``dispatch_id / executor / chat_id / role / status``.

Layer A / NO-CRON (9-R.1): append-only ledger WRITE + read-only query only.
ZERO cron register/remove, ZERO dispatch, ZERO merge, ZERO ``cokacdir`` /
``subprocess`` exec. The ledger never adds or cancels a cron; it only
*records* that an executor's (externally fired) lifecycle callback existed.
"""
from __future__ import annotations

import json
import os
from dataclasses import dataclass, asdict
from pathlib import Path
from typing import Dict, List, Optional, Tuple

REGISTRY_SCHEMA = "anu_v3.callback_4tuple_registry.v1"
RECORD_SCHEMA = "callback_4tuple_ledger_record.v1"

# ── classifications ──────────────────────────────────────────────────────────
NORMAL_CALLBACK_COMPLETED = "NORMAL_CALLBACK_COMPLETED"
NORMAL_CALLBACK_PENDING = "NORMAL_CALLBACK_PENDING"
NO_LEDGER_RECORD = "NO_LEDGER_RECORD"
TRACK_MISMATCH = "TRACK_MISMATCH"

# Default durable ledger path (canonical root, append-only JSONL).
DEFAULT_LEDGER_RELPATH = "memory/events/callback_4tuple_index.jsonl"

# Identity tuple fields (track-ownership comparison, 9-R.6 TRACK_MISMATCH).
IDENTITY_FIELDS: Tuple[str, ...] = (
    "task_id",
    "dispatch_cron_id",
    "normal_collector_cron_id",
    "fallback_callback_cron_id",
)


@dataclass(frozen=True)
class Callback4TupleRecord:
    """A durable callback ledger record.

    ``normal_collector_cron_id`` is the executor's own normal completion
    callback cron (MANDATORY lifecycle signal). ``no_fallback`` allows an
    explicit, declared no-fallback contract (§3.A) instead of a
    ``fallback_callback_cron_id``.
    """

    schema: str
    task_id: str
    dispatch_id: str
    dispatch_cron_id: str
    executor: str
    chat_id: str
    normal_collector_cron_id: Optional[str]
    fallback_callback_cron_id: Optional[str]
    role: str
    status: str = "REGISTERED"  # REGISTERED -> COMPLETED
    no_fallback: bool = False
    ts_kst: str = ""

    def identity(self) -> Dict[str, Optional[str]]:
        return {
            "task_id": self.task_id,
            "dispatch_cron_id": self.dispatch_cron_id,
            "normal_collector_cron_id": self.normal_collector_cron_id,
            "fallback_callback_cron_id": self.fallback_callback_cron_id,
        }

    def to_json(self) -> Dict[str, object]:
        return asdict(self)


def make_record(
    *,
    task_id: str,
    dispatch_id: str,
    dispatch_cron_id: str,
    executor: str,
    chat_id: str,
    normal_collector_cron_id: Optional[str],
    fallback_callback_cron_id: Optional[str],
    role: str,
    status: str = "REGISTERED",
    no_fallback: bool = False,
    ts_kst: str = "",
) -> Callback4TupleRecord:
    return Callback4TupleRecord(
        schema=RECORD_SCHEMA,
        task_id=task_id,
        dispatch_id=dispatch_id,
        dispatch_cron_id=dispatch_cron_id,
        executor=executor,
        chat_id=str(chat_id),
        normal_collector_cron_id=normal_collector_cron_id,
        fallback_callback_cron_id=fallback_callback_cron_id,
        role=role,
        status=status,
        no_fallback=no_fallback,
        ts_kst=ts_kst,
    )


def validate_record(rec: Callback4TupleRecord) -> List[str]:
    """FAIL reasons for a ledger record (§3.A).

    * normal_collector_cron_id missing -> FAIL (MANDATORY lifecycle signal).
    * fallback_callback_cron_id missing -> FAIL UNLESS no_fallback=True
      (explicit no-fallback contract).
    """
    reasons: List[str] = []
    if not rec.task_id:
        reasons.append("task_id empty")
    if not rec.dispatch_id:
        reasons.append("dispatch_id empty")
    if not rec.chat_id:
        reasons.append("chat_id empty")
    if not rec.normal_collector_cron_id:
        reasons.append(
            "normal_collector_cron_id missing — MANDATORY executor normal "
            "completion callback lifecycle signal (§3.A). NO-CRON does NOT "
            "exempt this (registry/checkpoint cron-ban only)."
        )
    if not rec.fallback_callback_cron_id and not rec.no_fallback:
        reasons.append(
            "fallback_callback_cron_id missing and no explicit no_fallback "
            "contract declared (§3.A safety path)."
        )
    return reasons


def record_is_valid(rec: Callback4TupleRecord) -> bool:
    return not validate_record(rec)


class Callback4TupleRegistry:
    """Durable append-only JSONL ledger over the canonical workspace root.

    WRITE = append a record line. READ = scan the JSONL. No in-place
    rewrite of historical lines (append-only); a COMPLETED transition is a
    new appended line for the same task_id (last-write-wins on read).
    """

    def __init__(self, ledger_path: os.PathLike) -> None:
        self.ledger_path = Path(ledger_path)

    # -- durable WRITE (append-only) -----------------------------------
    def append(self, rec: Callback4TupleRecord) -> None:
        """Append one record line. Atomic-ish: create dir, append, fsync."""
        self.ledger_path.parent.mkdir(parents=True, exist_ok=True)
        line = json.dumps(rec.to_json(), ensure_ascii=False, sort_keys=True)
        with open(self.ledger_path, "a", encoding="utf-8") as fh:
            fh.write(line + "\n")
            fh.flush()
            os.fsync(fh.fileno())

    def mark_completed(
        self, task_id: str, *, ts_kst: str = ""
    ) -> Optional[Callback4TupleRecord]:
        """Append a COMPLETED line for the latest record of ``task_id``.

        Append-only: the historical REGISTERED line is preserved (regression
        7 — durable history survives one-shot cron auto-delete).
        """
        rec = self.latest_for(task_id)
        if rec is None:
            return None
        completed = Callback4TupleRecord(
            **{**rec.to_json(), "status": "COMPLETED", "ts_kst": ts_kst}
        )
        self.append(completed)
        return completed

    # -- read-only QUERY -----------------------------------------------
    def _iter_records(self) -> List[Callback4TupleRecord]:
        out: List[Callback4TupleRecord] = []
        # Gemini medium: append-only 렛저는 시간이 지나며 커질 수 있어 read_text 로
        # 전체를 메모리에 올리는 대신 한 줄씩 스트리밍 파싱한다(메모리 상한 고정).
        try:
            if not self.ledger_path.is_file():
                return []
            with self.ledger_path.open(encoding="utf-8") as fh:
                for raw in fh:
                    raw = raw.strip()
                    if not raw:
                        continue
                    try:
                        d = json.loads(raw)
                    except json.JSONDecodeError:
                        continue  # fail-safe: skip a corrupt line, never crash
                    d.setdefault("schema", RECORD_SCHEMA)
                    d.setdefault("status", "REGISTERED")
                    d.setdefault("no_fallback", False)
                    d.setdefault("ts_kst", "")
                    try:
                        out.append(Callback4TupleRecord(**d))
                    except TypeError:
                        continue  # fail-safe on unknown/extra keys
        except OSError:
            # fail-safe: an unreadable/locked ledger must never crash the
            # collector classification — defer to schedule_history/artifact.
            return []
        return out

    def history_for(self, task_id: str) -> List[Callback4TupleRecord]:
        """All ledger lines for a task_id, in append order (durable history)."""
        return [r for r in self._iter_records() if r.task_id == task_id]

    def latest_for(self, task_id: str) -> Optional[Callback4TupleRecord]:
        h = self.history_for(task_id)
        return h[-1] if h else None

    # -- classification (registry-first, §3.B) -------------------------
    def classify(
        self,
        *,
        task_id: str,
        expected_task_id: Optional[str] = None,
        expected_dispatch_id: Optional[str] = None,
        expected_chat_id: Optional[str] = None,
        expected_dispatch_cron_id: Optional[str] = None,
    ) -> str:
        """Registry-first verdict (§3.B).

        Fallback / dead-man collectors call this FIRST. If the ledger has a
        COMPLETED record whose identity matches the expectation ->
        ``NORMAL_CALLBACK_COMPLETED`` even though the one-shot cron and the
        spawn session are gone.

        Mismatch semantics (§3.B / regression 13/14/15): a record EXISTS but
        its stored task_id / dispatch_id / chat_id / dispatch_cron_id differs
        from the explicit expectation -> ``TRACK_MISMATCH`` (an unrelated
        task's callback is never cited). An ABSENT record -> ``NO_LEDGER_
        RECORD`` (this is the correct fail-safe: the caller must defer to
        schedule_history + canonical artifact, NOT misjudge it as completed
        or as a mismatch — §3.B).
        """
        rec = self.latest_for(task_id)
        if rec is None:
            return NO_LEDGER_RECORD
        if expected_task_id is not None and rec.task_id != expected_task_id:
            return TRACK_MISMATCH
        if expected_dispatch_id is not None and (
            rec.dispatch_id != expected_dispatch_id
        ):
            return TRACK_MISMATCH
        if expected_chat_id is not None and (
            rec.chat_id != str(expected_chat_id)
        ):
            return TRACK_MISMATCH
        if expected_dispatch_cron_id is not None and (
            rec.dispatch_cron_id != expected_dispatch_cron_id
        ):
            return TRACK_MISMATCH
        if rec.status == "COMPLETED":
            return NORMAL_CALLBACK_COMPLETED
        return NORMAL_CALLBACK_PENDING

    def validate_identity(
        self, task_id: str, observed_identity: Dict[str, Optional[str]]
    ) -> Tuple[bool, str]:
        """Compare an observed identity 4-tuple against the latest record."""
        rec = self.latest_for(task_id)
        if rec is None:
            return False, TRACK_MISMATCH
        ident = rec.identity()
        for f in IDENTITY_FIELDS:
            if observed_identity.get(f) != ident.get(f):
                return False, TRACK_MISMATCH
        return True, "IDENTITY_MATCH"


def default_ledger_path(canonical_root: os.PathLike) -> Path:
    return Path(canonical_root) / DEFAULT_LEDGER_RELPATH
