# -*- coding: utf-8 -*-
"""utils.callback_source_cross_checker — 4 source cross-check for callback
authority verification (task-2646 CALLBACK_REGISTRATION_AUTHORITY_GATE).

ANCHOR-3: 4 source cross-check:
  1. schedule_history_records  — /home/jay/.cokacdir/schedule_history/<id>.log
  2. cron_history_records      — ANU key + suspect self-key cron-history
  3. envelope                  — callback prompt original text (collector_key)
  4. result_artifact           — result.json / report / ledger entry

ANCHOR-4: one-shot cron fire after cron-list 0 → CRON_LIST_AUTODELETED_FIRED
  (never conclude CALLBACK_MISSING from cron-list alone)

Layer A / NO-CRON / NO-WRITE / NO-SUBPROCESS: pure function, zero IO.
All inputs are pre-parsed dicts/lists supplied by the caller.
"""
from __future__ import annotations

from dataclasses import dataclass, field
from typing import Any, Dict, List, Optional, Sequence

# Inline constants from dispatch.callback_owner_enforcer to avoid dispatch
# package resolution issues (dispatch/ dir vs dispatch.py shim, task-2646).
# These values are pinned by 회장 §10 and must match exactly.
_ANU_KEY_2553 = "c119085addb0f8b7"
DEFAULT_ANU_KEYS: frozenset = frozenset({_ANU_KEY_2553})


def is_anu_key(key, anu_keys) -> bool:
    """True iff key is a non-empty configured independent ANU key."""
    return bool(key) and key in set(anu_keys)


# ── module constants ──────────────────────────────────────────────────────────
SCHEMA = "utils.callback_source_cross_checker.v1"

# ── verdict ───────────────────────────────────────────────────────────────────
VERDICT_PASS = "PASS"
VERDICT_FAIL = "FAIL"

# ── state enum subset used by this module ─────────────────────────────────────
STATE_CALLBACK_MISSING = "CALLBACK_MISSING"
STATE_CRON_LIST_AUTODELETED_FIRED = "CRON_LIST_AUTODELETED_FIRED"
STATE_SCHEDULE_HISTORY_PENDING = "SCHEDULE_HISTORY_PENDING"
STATE_SOURCE_CROSS_CHECK_PARTIAL = "SOURCE_CROSS_CHECK_PARTIAL"
STATE_OWNER_KEY_VERIFIED = "OWNER_KEY_VERIFIED"
STATE_RESULT_ARTIFACT_SELF_ATTESTED = "RESULT_ARTIFACT_SELF_ATTESTED"


@dataclass
class CrossCheckResult:
    """Result of a 4-source cross-check."""

    schema: str
    verdict: str                        # PASS | FAIL
    state: str                          # one of state enum
    source_count_present: int
    source_count_absent: int
    schedule_history_present: bool
    cron_history_owner_keys: List[str]  # keys found in cron history records
    envelope_collector_key: Optional[str]
    result_artifact_present: bool
    cron_list_present: Optional[bool]   # None=unknown, False=absent, True=present
    actual_owner_key: Optional[str]     # extracted from cron_history if found
    reasons: List[str] = field(default_factory=list)

    @property
    def ok(self) -> bool:
        return self.verdict == VERDICT_PASS

    def to_json(self) -> dict:
        return {
            "schema": self.schema,
            "verdict": self.verdict,
            "state": self.state,
            "source_count_present": self.source_count_present,
            "source_count_absent": self.source_count_absent,
            "schedule_history_present": self.schedule_history_present,
            "cron_history_owner_keys": list(self.cron_history_owner_keys),
            "envelope_collector_key": self.envelope_collector_key,
            "result_artifact_present": self.result_artifact_present,
            "cron_list_present": self.cron_list_present,
            "actual_owner_key": self.actual_owner_key,
            "reasons": list(self.reasons),
        }


def _extract_owner_key_from_cron_history(
    cron_history_records: Dict[str, List[Dict[str, Any]]],
    anu_keys: Sequence[str],
) -> Optional[str]:
    """Extract actual owner key from cron-history records.

    Priority:
    1. ANU key found → return that key
    2. suspect self-key found → return that key
    3. Neither found → return None

    cron_history_records format: {key: [record, ...]}
    A non-empty list means access was granted (key is owner).
    An empty list or missing key means access denied (not that key).
    """
    for key, records in cron_history_records.items():
        if records and is_anu_key(key, anu_keys):
            return key

    # Fallback: any non-empty record (suspect self-key)
    for key, records in cron_history_records.items():
        if records:
            return key

    return None


def _extract_envelope_collector_key(envelope: Optional[Dict[str, Any]]) -> Optional[str]:
    """Extract collector_key from envelope dict."""
    if not envelope:
        return None
    return envelope.get("collector_key") or envelope.get("owner_key")


def cross_check_sources(
    *,
    cron_id: str,
    schedule_history_records: List[Dict[str, Any]],
    cron_history_records: Dict[str, List[Dict[str, Any]]],
    envelope: Optional[Dict[str, Any]],
    result_artifact: Optional[Dict[str, Any]],
    cron_list_present: Optional[bool] = None,
    anu_keys: Sequence[str] = tuple(DEFAULT_ANU_KEYS),
) -> CrossCheckResult:
    """Cross-check 4 sources to determine callback authority.

    Inputs (all pure data, no IO):
    - schedule_history_records: list of JSONL-parsed records from
      /home/jay/.cokacdir/schedule_history/<id>.log
    - cron_history_records: dict mapping key → list of records
      (ANU key + suspect self-key, each queried separately)
    - envelope: dict with collector_key/owner_key from cron prompt
    - result_artifact: dict from result.json/report/ledger (or None)
    - cron_list_present: None=unknown, True=in cron-list, False=absent

    Classification logic (ANCHOR-3, ANCHOR-4):
    - All 4 absent + cron_list False → CALLBACK_MISSING (FAIL)
    - cron_list False + schedule_history present → CRON_LIST_AUTODELETED_FIRED (PASS)
    - schedule_history pending records → SCHEDULE_HISTORY_PENDING (PASS/hold)
    - partial sources → SOURCE_CROSS_CHECK_PARTIAL (PASS/informational)
    - All 4 present + actual owner extracted → OWNER_KEY_VERIFIED (PASS)
    - result_artifact only (no schedule_history) → RESULT_ARTIFACT_SELF_ATTESTED (FAIL)
    """
    reasons: List[str] = []

    # ── Evaluate presence of each source ─────────────────────────────────────
    schedule_history_present = bool(schedule_history_records)
    cron_history_owner_keys: List[str] = [
        k for k, v in cron_history_records.items() if v
    ]
    cron_history_present = bool(cron_history_owner_keys)
    envelope_collector_key = _extract_envelope_collector_key(envelope)
    envelope_present = envelope_collector_key is not None
    result_artifact_present = bool(result_artifact)

    source_count_present = sum([
        schedule_history_present,
        cron_history_present,
        envelope_present,
        result_artifact_present,
    ])
    source_count_absent = 4 - source_count_present

    # Extract actual owner key from cron history
    actual_owner_key = _extract_owner_key_from_cron_history(
        cron_history_records, anu_keys
    )

    # ── Check for pending status in schedule_history ──────────────────────────
    history_has_pending = any(
        r.get("status", "").lower() in {"pending", "scheduled", "queued"}
        for r in schedule_history_records
    )

    # ── Classification (ordered by priority) ──────────────────────────────────

    # Case 1: schedule_history pending → hold (ANCHOR-4: never conclude missing prematurely)
    if schedule_history_present and history_has_pending:
        reasons.append(
            f"schedule_history for cron_id={cron_id!r} has pending records "
            "→ SCHEDULE_HISTORY_PENDING (hold, do not conclude missing)."
        )
        return CrossCheckResult(
            schema=SCHEMA,
            verdict=VERDICT_PASS,
            state=STATE_SCHEDULE_HISTORY_PENDING,
            source_count_present=source_count_present,
            source_count_absent=source_count_absent,
            schedule_history_present=schedule_history_present,
            cron_history_owner_keys=cron_history_owner_keys,
            envelope_collector_key=envelope_collector_key,
            result_artifact_present=result_artifact_present,
            cron_list_present=cron_list_present,
            actual_owner_key=actual_owner_key,
            reasons=reasons,
        )

    # Case 2: cron_list absent + schedule_history present
    # → one-shot cron fired and auto-deleted (ANCHOR-4)
    if cron_list_present is False and schedule_history_present:
        reasons.append(
            f"cron_list_present=False but schedule_history present for "
            f"cron_id={cron_id!r} → CRON_LIST_AUTODELETED_FIRED "
            "(one-shot cron fired and auto-deleted; not CALLBACK_MISSING). "
            "ANCHOR-4: cron-list alone cannot determine missing."
        )
        return CrossCheckResult(
            schema=SCHEMA,
            verdict=VERDICT_PASS,
            state=STATE_CRON_LIST_AUTODELETED_FIRED,
            source_count_present=source_count_present,
            source_count_absent=source_count_absent,
            schedule_history_present=schedule_history_present,
            cron_history_owner_keys=cron_history_owner_keys,
            envelope_collector_key=envelope_collector_key,
            result_artifact_present=result_artifact_present,
            cron_list_present=cron_list_present,
            actual_owner_key=actual_owner_key,
            reasons=reasons,
        )

    # Case 3: All 4 sources absent + cron_list confirmed False → CALLBACK_MISSING
    if source_count_present == 0 and cron_list_present is False:
        reasons.append(
            f"All 4 sources absent for cron_id={cron_id!r} AND "
            "cron_list_present=False → CALLBACK_MISSING (FAIL). "
            "Note: cron-list alone is not sufficient; confirmed only after "
            "4 source cross-check (ANCHOR-3/ANCHOR-4)."
        )
        return CrossCheckResult(
            schema=SCHEMA,
            verdict=VERDICT_FAIL,
            state=STATE_CALLBACK_MISSING,
            source_count_present=0,
            source_count_absent=4,
            schedule_history_present=False,
            cron_history_owner_keys=[],
            envelope_collector_key=envelope_collector_key,
            result_artifact_present=False,
            cron_list_present=cron_list_present,
            actual_owner_key=None,
            reasons=reasons,
        )

    # Case 4: result_artifact only, no schedule_history → RESULT_ARTIFACT_SELF_ATTESTED
    # Bot self-attestation alone cannot grant PASS (회장 policy)
    if result_artifact_present and not schedule_history_present and not cron_history_present:
        reasons.append(
            "result_artifact present but schedule_history and cron_history both absent "
            f"for cron_id={cron_id!r} → RESULT_ARTIFACT_SELF_ATTESTED (FAIL). "
            "Bot self-attestation alone cannot confirm authority "
            "(cross-verification required, ANCHOR-3)."
        )
        return CrossCheckResult(
            schema=SCHEMA,
            verdict=VERDICT_FAIL,
            state=STATE_RESULT_ARTIFACT_SELF_ATTESTED,
            source_count_present=source_count_present,
            source_count_absent=source_count_absent,
            schedule_history_present=False,
            cron_history_owner_keys=[],
            envelope_collector_key=envelope_collector_key,
            result_artifact_present=True,
            cron_list_present=cron_list_present,
            actual_owner_key=None,
            reasons=reasons,
        )

    # Case 5: All 4 sources present + actual owner key extracted → OWNER_KEY_VERIFIED
    if source_count_present == 4 and actual_owner_key is not None:
        reasons.append(
            f"All 4 sources present for cron_id={cron_id!r}, "
            f"actual_owner_key={actual_owner_key!r} extracted from cron_history "
            "→ OWNER_KEY_VERIFIED (PASS, ANCHOR-3 cross-check complete)."
        )
        return CrossCheckResult(
            schema=SCHEMA,
            verdict=VERDICT_PASS,
            state=STATE_OWNER_KEY_VERIFIED,
            source_count_present=4,
            source_count_absent=0,
            schedule_history_present=True,
            cron_history_owner_keys=cron_history_owner_keys,
            envelope_collector_key=envelope_collector_key,
            result_artifact_present=True,
            cron_list_present=cron_list_present,
            actual_owner_key=actual_owner_key,
            reasons=reasons,
        )

    # Case 6: Partial sources → SOURCE_CROSS_CHECK_PARTIAL (informational PASS)
    reasons.append(
        f"Partial sources for cron_id={cron_id!r}: "
        f"{source_count_present}/4 present "
        f"(schedule_history={schedule_history_present}, "
        f"cron_history={cron_history_present}, "
        f"envelope={envelope_present}, "
        f"result_artifact={result_artifact_present}) "
        "→ SOURCE_CROSS_CHECK_PARTIAL (insufficient for definitive verdict; "
        "gather more sources before concluding missing, ANCHOR-3/ANCHOR-4)."
    )
    return CrossCheckResult(
        schema=SCHEMA,
        verdict=VERDICT_PASS,  # informational; not enough to conclude FAIL
        state=STATE_SOURCE_CROSS_CHECK_PARTIAL,
        source_count_present=source_count_present,
        source_count_absent=source_count_absent,
        schedule_history_present=schedule_history_present,
        cron_history_owner_keys=cron_history_owner_keys,
        envelope_collector_key=envelope_collector_key,
        result_artifact_present=result_artifact_present,
        cron_list_present=cron_list_present,
        actual_owner_key=actual_owner_key,
        reasons=reasons,
    )


__all__ = [
    "SCHEMA",
    "VERDICT_PASS",
    "VERDICT_FAIL",
    "CrossCheckResult",
    "cross_check_sources",
]
