# -*- coding: utf-8 -*-
"""anu_v3.collector_artifact_lookup — canonical-root, registry-first lookup.

task-2553+44/+46 (구현목표 B fail-safe + 구현목표 C integration). Standalone,
read-only, pure stdlib.

The normal / fallback / dead-man collector verdict pipeline:

  1. registry-first (§3.B): consult the durable callback_4tuple ledger. A
     COMPLETED record => NORMAL_COLLECTOR_COMPLETED even if the one-shot
     cron + spawn session are gone (regression 8/9).
  2. canonical-root artifact re-check (§3.C): if the ledger is absent /
     incomplete, look for result/collector-result/.done UNDER THE CANONICAL
     ROOT (not the autoset cwd) via artifact_root_resolver. An autoset-cwd
     false-miss must NEVER alone yield RESULT_MISSING (regression 10/11).
  3. schedule_history + stale fail-safe (§3.B): only when the canonical
     root has nothing AND schedule_history shows nothing for this
     chat_id AND the dispatch is stale -> RESULT_MISSING / BOT_STALE
     candidate (regression 12). Chat isolation enforced (chat_id).

Layer A / NO-CRON (9-R.1): ZERO write, ZERO cron register/remove, ZERO
dispatch, ZERO merge, ZERO ``cokacdir``/``subprocess`` exec. Pure
stat/read classification.
"""
from __future__ import annotations

import json
from dataclasses import dataclass, field
from pathlib import Path
from typing import List, Optional

from anu_v3.artifact_root_resolver import (
    RESOLVER_SCHEMA,
    resolve_roots,
)
from anu_v3.callback_4tuple_registry import (
    NORMAL_CALLBACK_COMPLETED,
    TRACK_MISMATCH,
    Callback4TupleRegistry,
    default_ledger_path,
)

LOOKUP_SCHEMA = "anu_v3.collector_artifact_lookup.v1"

# ── verdicts ─────────────────────────────────────────────────────────────────
NORMAL_COLLECTOR_COMPLETED = "NORMAL_COLLECTOR_COMPLETED"
RESULT_PRESENT = "RESULT_PRESENT"
RESULT_MISSING = "RESULT_MISSING"
BOT_STALE = "BOT_STALE"
# Forbidden verdicts when the canonical root actually HAS the artifact
# (§3.C / regression 10/11).
FORBIDDEN_WHEN_CANONICAL_PRESENT = frozenset({RESULT_MISSING, "DISPATCH_NOT_RECEIVED"})


@dataclass(frozen=True)
class CollectorLookupResult:
    schema: str
    task_id: str
    verdict: str
    registry_verdict: str
    canonical_root: str
    events_dir: str
    result_present_canonical: bool
    collector_result_present_canonical: bool
    done_present_canonical: bool
    autoset_cwd_root: str
    autoset_only_miss_blocked: bool
    schedule_history_seen: bool
    dispatch_stale: bool
    reasons: List[str] = field(default_factory=list)

    def to_json(self) -> dict:
        return {
            "schema": self.schema,
            "task_id": self.task_id,
            "verdict": self.verdict,
            "registry_verdict": self.registry_verdict,
            "canonical_root": self.canonical_root,
            "events_dir": self.events_dir,
            "result_present_canonical": self.result_present_canonical,
            "collector_result_present_canonical": (
                self.collector_result_present_canonical
            ),
            "done_present_canonical": self.done_present_canonical,
            "autoset_cwd_root": self.autoset_cwd_root,
            "autoset_only_miss_blocked": self.autoset_only_miss_blocked,
            "schedule_history_seen": self.schedule_history_seen,
            "dispatch_stale": self.dispatch_stale,
            "resolver_schema": RESOLVER_SCHEMA,
            "reasons": list(self.reasons),
        }


def _isfile(p: Path) -> bool:
    try:
        return p.is_file()
    except OSError:
        return False


def _schedule_history_seen_for_chat(
    schedule_history_dir: Path, chat_id: str
) -> bool:
    """Read-only: is there ANY history line for this chat_id?

    Chat isolation: lines whose ``chat_id`` differs are skipped and never
    cited (shared cross-chat folder)."""
    try:
        if not schedule_history_dir.is_dir():
            return False
        logs = list(schedule_history_dir.glob("*.log"))
    except OSError:
        # fail-safe: a failed history-dir scan must never escape — treat as
        # "no history seen" so the verdict defers rather than crashes (§3.B).
        return False
    want = str(chat_id)
    for log in logs:
        try:
            text = log.read_text(encoding="utf-8", errors="ignore")
        except OSError:
            continue
        for raw in text.splitlines():
            raw = raw.strip()
            if not raw:
                continue
            try:
                rec = json.loads(raw)
            except json.JSONDecodeError:
                continue
            if str(rec.get("chat_id")) == want:
                return True
    return False


def classify(
    *,
    task_id: str,
    chat_id: str,
    expected_dispatch_id: Optional[str] = None,
    expected_dispatch_cron_id: Optional[str] = None,
    autoset_cwd: Optional[Path] = None,
    dispatch_stale: bool = False,
    result_basename: Optional[str] = None,
    schedule_history_dir: Optional[Path] = None,
) -> CollectorLookupResult:
    """Registry-first, canonical-root-re-check collector classification.

    ``result_basename`` overrides the default ``<task_id>.result.json`` (the
    +44/+46 result is ``task-2553+44_46.result.json``; +39 is
    ``task-2553+39.result.json``). ``schedule_history_dir`` overrides the
    default shared history dir (hermetic regression 12).
    """
    roots = resolve_roots(
        autoset_cwd=autoset_cwd, schedule_history_dir=schedule_history_dir
    )
    events_dir = Path(roots.events_dir)
    reasons: List[str] = []

    # ── step 1: registry-first (§3.B) ────────────────────────────────────
    ledger = Callback4TupleRegistry(
        default_ledger_path(roots.canonical_root)
    )
    reg_verdict = ledger.classify(
        task_id=task_id,
        expected_dispatch_id=expected_dispatch_id,
        expected_chat_id=chat_id,
        expected_dispatch_cron_id=expected_dispatch_cron_id,
    )
    if reg_verdict == TRACK_MISMATCH:
        reasons.append(
            "durable ledger identity mismatch (task/dispatch/chat) "
            "-> TRACK_MISMATCH; unrelated-task callback not cited (§3.B)."
        )

    # ── step 2: canonical-root artifact re-check (§3.C) ──────────────────
    base = result_basename or f"{task_id}.result.json"
    result_p = events_dir / base
    collector_p = events_dir / f"{task_id}.collector-result.json"
    done_p = events_dir / f"{task_id}.done"
    done_acked_p = events_dir / f"{task_id}.done.acked"
    result_present = _isfile(result_p)
    collector_present = _isfile(collector_p)
    done_present = _isfile(done_p) or _isfile(done_acked_p)

    # §3.C / regression 11 — when the autoset/current cwd lacks memory/events
    # we ALWAYS re-check the canonical root first; an autoset-cwd false-miss
    # is therefore never the sole cause of any missing verdict.
    autoset_only_miss_blocked = not roots.autoset_events_dir_present
    if autoset_only_miss_blocked:
        reasons.append(
            "autoset/current cwd has no memory/events; canonical root "
            "re-checked FIRST before any missing verdict (§3.C, "
            "regression 11)."
        )

    # ── verdict resolution ───────────────────────────────────────────────
    if reg_verdict == TRACK_MISMATCH:
        verdict = TRACK_MISMATCH
    elif reg_verdict == NORMAL_CALLBACK_COMPLETED:
        verdict = NORMAL_COLLECTOR_COMPLETED
        reasons.append(
            "durable ledger COMPLETED record present — normal callback "
            "completed survives one-shot cron auto-delete (§3.B)."
        )
    elif collector_present:
        verdict = NORMAL_COLLECTOR_COMPLETED
        reasons.append(
            f"canonical {collector_p.name} present (§3.C re-check)."
        )
    elif result_present or done_present:
        verdict = RESULT_PRESENT
        reasons.append(
            f"canonical artifact present under {events_dir} (§3.C); "
            "autoset-cwd false-miss overridden (regression 10)."
        )
    else:
        # step 3: schedule_history + stale fail-safe (§3.B / regression 12)
        sh_seen = _schedule_history_seen_for_chat(
            Path(roots.schedule_history_dir), chat_id
        )
        if (not sh_seen) and dispatch_stale:
            verdict = RESULT_MISSING
            reasons.append(
                "canonical root absent + no schedule_history for chat + "
                "stale dispatch -> RESULT_MISSING / BOT_STALE candidate "
                "(§3.B, regression 12)."
            )
        else:
            # fail-safe: do NOT misjudge — defer rather than false-miss.
            verdict = BOT_STALE if dispatch_stale else "PENDING_FAILSAFE"
            reasons.append(
                "canonical artifact absent but schedule_history/registry "
                "inconclusive — fail-safe defer, no false RESULT_MISSING "
                "(§3.B)."
            )
        return CollectorLookupResult(
            schema=LOOKUP_SCHEMA,
            task_id=task_id,
            verdict=verdict,
            registry_verdict=reg_verdict,
            canonical_root=roots.canonical_root,
            events_dir=roots.events_dir,
            result_present_canonical=result_present,
            collector_result_present_canonical=collector_present,
            done_present_canonical=done_present,
            autoset_cwd_root=roots.autoset_cwd_root,
            autoset_only_miss_blocked=autoset_only_miss_blocked,
            schedule_history_seen=sh_seen,
            dispatch_stale=dispatch_stale,
            reasons=reasons,
        )

    return CollectorLookupResult(
        schema=LOOKUP_SCHEMA,
        task_id=task_id,
        verdict=verdict,
        registry_verdict=reg_verdict,
        canonical_root=roots.canonical_root,
        events_dir=roots.events_dir,
        result_present_canonical=result_present,
        collector_result_present_canonical=collector_present,
        done_present_canonical=done_present,
        autoset_cwd_root=roots.autoset_cwd_root,
        autoset_only_miss_blocked=autoset_only_miss_blocked,
        schedule_history_seen=False,
        dispatch_stale=dispatch_stale,
        reasons=reasons,
    )