# -*- coding: utf-8 -*-
"""utils.canonical_root_resolver — ANU collector canonical-root resolver.

task-2636 — callback collector canonical-root resolver (Core hardening).

Spec: memory/specs/system_callback_collector_canonical_root_spec_260523.md
sha256: 6f0b04810cc458ea4cb12f3c1c9c511d14b1439917b7ef4f1ef91982e32d92c1

회장 verbatim (task md §3):
    CANONICAL_ROOT_RESOLUTION_ORDER:
      1. envelope.canonical_root (explicit override)
      2. CANONICAL_ROOT_DEFAULT = "/home/jay/workspace"
      3. ★ cwd-based lookup 사용 금지 (primary path 에서 0)

    PATH_RESOLUTION_RULE:
      absolute_path → 그대로 사용 (canonical_root 무시)
      relative_path → os.path.join(canonical_root_resolved, relative_path)
      empty_or_none → MISSING classification

    CONTEXT_MISMATCH_DETECTION:
      if os.getcwd() != canonical_root_resolved:
          log CALLBACK_COLLECTOR_CONTEXT_MISMATCH event
          proceed with canonical_root_resolved (do not use cwd)

ANCHOR-1 (spec §14): envelope.canonical_root 우선 · default /home/jay/workspace
· cwd-based primary lookup 코드 경로 0.
ANCHOR-2: cwd != canonical_root 시 CALLBACK_COLLECTOR_CONTEXT_MISMATCH 기록
+ canonical_root 사용 (cwd 결정 권한 0).
"""
from __future__ import annotations

import json
import os
import uuid
from datetime import datetime, timedelta, timezone
from pathlib import Path
from typing import Any, Dict, List, Optional

# task-2636 round-1 (Gemini medium): single source of truth — import CANONICAL_ROOT_DEFAULT
# from schema module (don't re-define here).
from utils.callback_envelope_schema import CANONICAL_ROOT_DEFAULT

RESOLVER_SCHEMA = "utils.canonical_root_resolver.v1"

MISMATCH_EVENT_SCHEMA = "callback_collector_context_mismatch.v1"
MISMATCH_EVENT_LOG_REL = "memory/events/callback_collector_context_mismatch.jsonl"
MISMATCH_DECISION = "PROCEED_WITH_CANONICAL_ROOT"

CLASS_FOUND = "FOUND"
CLASS_MISSING_PATH_FIELD = "MISSING_PATH_FIELD"
CLASS_MISSING_BOTH_PATHS = "MISSING_BOTH_PATHS"
CLASS_NOT_FOUND = "NOT_FOUND"


def _now_iso_kst() -> str:
    """KST ISO timestamp (asia/seoul = UTC+9). Avoid tz-DB dependency."""
    return (
        datetime.now(timezone.utc).astimezone(
            timezone(timedelta(hours=9))
        ).strftime("%Y-%m-%dT%H:%M:%S+09:00")
    )


def resolve_canonical_root(
    envelope: Any,
    default: str = CANONICAL_ROOT_DEFAULT,
) -> str:
    """Return the canonical root path for an envelope (spec §3, §5.1).

    Resolution order:
      1. ``envelope.canonical_root`` — explicit override (must be absolute)
      2. ``default`` — fallback CANONICAL_ROOT_DEFAULT (must be absolute)

    ★ cwd is never consulted (spec ANCHOR-1).
    """
    if not isinstance(envelope, dict):
        if not os.path.isabs(default):
            raise ValueError(
                f"canonical_root default must be absolute path: {default!r}"
            )
        return default

    explicit = envelope.get("canonical_root")
    if explicit:
        if not isinstance(explicit, str):
            raise TypeError(
                f"envelope.canonical_root must be str, got {type(explicit).__name__}"
            )
        if not os.path.isabs(explicit):
            raise ValueError(
                f"envelope.canonical_root must be absolute path: {explicit!r}"
            )
        return explicit

    if not os.path.isabs(default):
        raise ValueError(
            f"canonical_root default must be absolute path: {default!r}"
        )
    return default


def resolve_path(
    envelope: Any,
    path_field: str,
    canonical_root: Optional[str] = None,
) -> str:
    """Resolve a single path field (spec §3 PATH_RESOLUTION_RULE).

    Rules:
      * absolute path → passthrough (canonical_root ignored)
      * relative path → os.path.join(canonical_root, path)
      * empty / None → "" (MISSING classification by caller)
    """
    if canonical_root is None:
        canonical_root = resolve_canonical_root(envelope)
    if not os.path.isabs(canonical_root):
        raise ValueError(
            f"canonical_root must be absolute: {canonical_root!r}"
        )

    if not isinstance(envelope, dict):
        return ""
    raw = envelope.get(path_field)
    if raw is None or raw == "":
        return ""
    if not isinstance(raw, str):
        raise TypeError(
            f"envelope[{path_field!r}] must be str, got {type(raw).__name__}"
        )
    if os.path.isabs(raw):
        return raw
    return os.path.normpath(os.path.join(canonical_root, raw))


def detect_context_mismatch(
    envelope: Any,
    current_cwd: Optional[str] = None,
    *,
    log_root: Optional[str] = None,
    write_event: bool = True,
    clock: Optional[Any] = None,
) -> Dict[str, Any]:
    """Detect cwd vs canonical_root mismatch (spec §3, §6).

    If mismatch → append a `CALLBACK_COLLECTOR_CONTEXT_MISMATCH` event
    record at ``<log_root>/memory/events/callback_collector_context_mismatch.jsonl``
    and return decision=PROCEED_WITH_CANONICAL_ROOT.

    Returns:
        {
            "mismatch": bool,
            "cwd": str,
            "canonical_root": str,
            "recorded_event_id": Optional[str],
            "decision": "PROCEED_WITH_CANONICAL_ROOT",
        }
    """
    canonical_root = resolve_canonical_root(envelope)
    if current_cwd is None:
        current_cwd = os.getcwd()

    if os.path.normpath(current_cwd) == os.path.normpath(canonical_root):
        return {
            "mismatch": False,
            "cwd": current_cwd,
            "canonical_root": canonical_root,
            "recorded_event_id": None,
            "decision": MISMATCH_DECISION,
        }

    event_id = uuid.uuid4().hex[:16] if write_event else None
    if write_event:
        ts = clock() if clock else _now_iso_kst()
        record = {
            "schema": MISMATCH_EVENT_SCHEMA,
            "event_id": event_id,
            "ts_kst": ts,
            "task_id": envelope.get("task_id") if isinstance(envelope, dict) else None,
            "cwd": current_cwd,
            "canonical_root_envelope": (
                envelope.get("canonical_root") if isinstance(envelope, dict) else None
            ),
            "canonical_root_resolved": canonical_root,
            "delta_paths": _delta_paths(envelope, canonical_root),
            "decision": MISMATCH_DECISION,
        }
        _append_mismatch_event(record, log_root=log_root)

    return {
        "mismatch": True,
        "cwd": current_cwd,
        "canonical_root": canonical_root,
        "recorded_event_id": event_id,
        "decision": MISMATCH_DECISION,
    }


def _delta_paths(envelope: Any, canonical_root: str) -> List[str]:
    """Resolved paths that would differ if cwd had been used instead."""
    out: List[str] = []
    if not isinstance(envelope, dict):
        return out
    for field in ("result_path", "report_path"):
        raw = envelope.get(field)
        if isinstance(raw, str) and raw and not os.path.isabs(raw):
            out.append(os.path.normpath(os.path.join(canonical_root, raw)))
    return out


def _append_mismatch_event(record: Dict[str, Any], log_root: Optional[str]) -> None:
    """Append a single JSONL record to the mismatch event log.

    ``log_root`` overrides /home/jay/workspace as the parent of memory/events
    (so regression tests can use tmp_path without touching the live tree).

    Gemini medium 대응(round-1): I/O 실패는 diagnostic 이므로 graceful — collector
    동작은 PROCEED_WITH_CANONICAL_ROOT 로 계속 (read-only fs/permissions/full disk 등).
    """
    try:
        base = Path(log_root) if log_root else Path(CANONICAL_ROOT_DEFAULT)
        log_path = base / MISMATCH_EVENT_LOG_REL
        log_path.parent.mkdir(parents=True, exist_ok=True)
        with log_path.open("a", encoding="utf-8") as fh:
            fh.write(json.dumps(record, ensure_ascii=False, sort_keys=True) + "\n")
    except OSError:
        # diagnostic 로깅 실패가 collector 결정 자체를 막아선 안 됨
        # (PROCEED_WITH_CANONICAL_ROOT 는 envelope.canonical_root 기반으로 유지).
        pass


def find_artifact(
    envelope: Any,
    path_field: str,
    *,
    fs_exists: Optional[Any] = None,
) -> Dict[str, Any]:
    """Apply COLLECTOR_LOOKUP_ORDER (spec §3) and report the outcome.

    Lookup order:
      1. canonical_root (from envelope or default) + path_field
      2. fallback CANONICAL_ROOT_DEFAULT + path_field (if differs)
      3. absolute passthrough (handled implicitly by resolve_path)

    Returns:
        {
            "found": bool,
            "resolved_path": str,
            "lookup_attempts": [paths tried],
            "classification": one of FOUND / MISSING_PATH_FIELD / NOT_FOUND,
        }
    """
    if not isinstance(envelope, dict):
        return {
            "found": False,
            "resolved_path": "",
            "lookup_attempts": [],
            "classification": CLASS_MISSING_PATH_FIELD,
        }
    raw = envelope.get(path_field)
    if raw is None or raw == "":
        return {
            "found": False,
            "resolved_path": "",
            "lookup_attempts": [],
            "classification": CLASS_MISSING_PATH_FIELD,
        }

    exists = fs_exists if fs_exists is not None else os.path.exists

    primary_root = resolve_canonical_root(envelope)
    attempts: List[str] = []

    primary = resolve_path(envelope, path_field, canonical_root=primary_root)
    if primary:
        attempts.append(primary)
        if exists(primary):
            return {
                "found": True,
                "resolved_path": primary,
                "lookup_attempts": attempts,
                "classification": CLASS_FOUND,
            }

    # Absolute paths skip the fallback — they passed through unchanged and
    # carry their own location semantics; trying canonical_root + abs is a no-op.
    if not os.path.isabs(raw) and primary_root != CANONICAL_ROOT_DEFAULT:
        fallback = resolve_path(
            envelope, path_field, canonical_root=CANONICAL_ROOT_DEFAULT
        )
        if fallback and fallback != primary:
            attempts.append(fallback)
            if exists(fallback):
                return {
                    "found": True,
                    "resolved_path": fallback,
                    "lookup_attempts": attempts,
                    "classification": CLASS_FOUND,
                }

    return {
        "found": False,
        "resolved_path": attempts[0] if attempts else "",
        "lookup_attempts": attempts,
        "classification": CLASS_NOT_FOUND,
    }
