"""anu_v3.parallel_batch_coordinator — top-level parallel batch coordinator.

Authority: task-2553+17.md §1 / §2 / §4 / §9 + §12 (9-R.1..9-R.7).

Single entry that, given a parallel batch plan:
  1. derives a deterministic batch_id
  2. registers per-track callback_track_record (5-field) into a registry
  3. builds the dependency matrix + runs overlap / contamination checkers
  4. drives a 13-state track loop machine per track
  5. selects per-task final authority packets (normal > fallback, ts)
  6. classifies duplicate / pending / mismatch callbacks
  7. resolves a single batch-level next action
  8. writes parallel-batch-state.json + a chair-only consolidated summary
  9. (Track3) folds goal-driven loop plans, splitting conflicting
     sub-features to follow-ups (no Track2 silent drop)

Pure stdlib. Persists ONLY new deliverable files; never mutates existing
tracked files (9-R.1).
"""

from __future__ import annotations

import hashlib
import json
import subprocess
from dataclasses import dataclass, field
from pathlib import Path
from typing import Dict, List, Optional, Sequence

from anu_v3.batch_dependency_matrix import BatchDependencyMatrix, TrackSpec
from anu_v3.batch_join_policy import (
    CallbackEvent,
    PacketCandidate,
    TrackArtifacts,
    TrackJoinView,
    batch_next_action,
    classify_callback,
    cross_track_contamination,
    final_authority_packet_selector,
    pending_blocks_chair_decision,
)
from anu_v3.callback_track_registry import (
    CallbackTrackRecord,
    CallbackTrackRegistry,
)
from anu_v3.goal_loop_planner import GoalRequest, LoopPlan, generate_loop_plan
from anu_v3.track_loop_state import TrackLoopState, is_terminal


# HIGH-2: single normalized callback-class key scheme. classify_callbacks()
# emits one class per track_id (the exact key batch_next_action() looks up);
# when several events land on one track the most-severe class wins.
_CLASS_SEVERITY: Dict[str, int] = {
    "TRACK_MISMATCH": 4,
    "CONTAMINATION_HOLD": 3,
    "CALLBACK_PENDING": 2,
    "DUPLICATE_CALLBACK_IGNORED": 1,
    "NORMAL_COLLECTOR_ACCEPTED": 0,
    "": -1,
}


class TrackedPathWriteRefused(RuntimeError):
    """9-R.1/9-R.2: a write was attempted against an existing git-tracked
    path. Only NEW git-untracked deliverables may be written; tracked files
    are byte-0 immutable for this task."""


def _is_git_tracked(p: Path) -> bool:
    """True iff *p* is git-tracked (``git ls-files --error-unmatch`` succeeds
    in the repo that owns it). Outside any git repo -> False (untracked)."""
    try:
        top = subprocess.run(
            ["git", "-C", str(p.parent), "rev-parse", "--show-toplevel"],
            capture_output=True,
            text=True,
        )
        if top.returncode != 0:
            return False
        repo = top.stdout.strip()
        rc = subprocess.run(
            ["git", "-C", repo, "ls-files", "--error-unmatch", str(p.resolve())],
            capture_output=True,
            text=True,
        )
        return rc.returncode == 0
    except Exception:
        return False


@dataclass
class TrackPlan:
    track_id: str
    task_id: str
    dispatch_cron_id: str
    normal_collector_cron_id: str
    fallback_callback_cron_id: str
    expected_files: List[str] = field(default_factory=list)
    forbidden_write_targets: List[str] = field(default_factory=list)
    depends_on: List[str] = field(default_factory=list)
    own_artifacts: List[str] = field(default_factory=list)
    cited_artifacts: List[str] = field(default_factory=list)
    initial_state: str = "PLANNED"
    retry_ceiling: int = 2


@dataclass
class BatchPlan:
    batch_label: str
    tracks: List[TrackPlan]
    goal_requests: List[GoalRequest] = field(default_factory=list)


@dataclass
class CoordinatorResult:
    batch_id: str
    state: Dict[str, object]
    summary_md: str


class ParallelBatchCoordinator:
    def __init__(self, plan: BatchPlan) -> None:
        self.plan = plan
        self.batch_id = self._derive_batch_id(plan)
        self.registry = CallbackTrackRegistry()
        self._track_states: Dict[str, TrackLoopState] = {}
        self._loop_plans: Dict[str, LoopPlan] = {}
        # HIGH-2/HIGH-3: the last classify_callbacks() pass — class map and
        # per-track chair-blocking verdict — so build_state()/run() persist a
        # batch_next_action that reflects classified callbacks (not a blank
        # default that silently drops them).
        self._callback_classes: Dict[str, str] = {}
        self._callback_blocks: Dict[str, bool] = {}
        for tp in plan.tracks:
            self.registry.register(
                CallbackTrackRecord(
                    track_id=tp.track_id,
                    task_id=tp.task_id,
                    dispatch_cron_id=tp.dispatch_cron_id,
                    normal_collector_cron_id=tp.normal_collector_cron_id,
                    fallback_callback_cron_id=tp.fallback_callback_cron_id,
                )
            )
            self._track_states[tp.track_id] = TrackLoopState(
                track_id=tp.track_id,
                state=tp.initial_state,
                retry_ceiling=tp.retry_ceiling,
            )
        self.depmatrix = BatchDependencyMatrix(
            [
                TrackSpec(
                    track_id=tp.track_id,
                    expected_files=tp.expected_files,
                    forbidden_write_targets=tp.forbidden_write_targets,
                    depends_on=tp.depends_on,
                    own_artifacts=tp.own_artifacts,
                )
                for tp in plan.tracks
            ]
        )

    # -- §4(1) deterministic batch_id ------------------------------------
    @staticmethod
    def _derive_batch_id(plan: BatchPlan) -> str:
        basis = plan.batch_label + "|" + "|".join(
            sorted(f"{t.track_id}:{t.task_id}" for t in plan.tracks)
        )
        digest = hashlib.sha256(basis.encode("utf-8")).hexdigest()[:12]
        return f"batch-{plan.batch_label}-{digest}"

    # -- track loop machine access --------------------------------------
    def track_state(self, track_id: str) -> TrackLoopState:
        return self._track_states[track_id]

    def advance(self, track_id: str, dst: str) -> None:
        self._track_states[track_id].transition(dst)

    # -- Track3 fold-in --------------------------------------------------
    def fold_goal_loops(
        self, conflicts: Optional[Dict[str, Sequence[str]]] = None
    ) -> Dict[str, LoopPlan]:
        conflicts = conflicts or {}
        for gr in self.plan.goal_requests:
            for tp in self.plan.tracks:
                if tp.track_id.endswith(gr.goal_id) or tp.task_id == gr.goal_id:
                    binding = gr.bind_track(tp.track_id)
                    self._loop_plans[tp.track_id] = generate_loop_plan(
                        binding, conflicts.get(tp.track_id, ())
                    )
        return self._loop_plans

    # -- §4(7,8) authority packets + callbacks --------------------------
    def select_authority_packets(
        self, candidates: Sequence[PacketCandidate]
    ) -> Dict[str, PacketCandidate]:
        return final_authority_packet_selector(candidates)

    def classify_callbacks(
        self, events: Sequence[CallbackEvent]
    ) -> Dict[str, str]:
        """Classify observed callbacks (HIGH-1/HIGH-2/HIGH-3).

        HIGH-1: the 4-tuple is validated against the cron IDs *observed on
        the event* (``observed_*_cron_id``), never reconstructed from the
        registry — a real cron-ID drift now surfaces as TRACK_MISMATCH
        instead of a self-fulfilling pass.
        HIGH-2: the returned map is keyed by bare ``track_id`` (the exact
        key ``batch_next_action()`` looks up); multiple events on one track
        collapse to the most-severe class.
        HIGH-3: per-track chair-blocking is recorded via
        ``pending_blocks_chair_decision`` for ``batch_next_action()``.
        """
        out: Dict[str, str] = {}
        self._callback_blocks = {}
        self._callback_classes = out
        for ev in events:
            observed_identity = {
                "task_id": ev.task_id,
                "dispatch_cron_id": ev.observed_dispatch_cron_id,
                "normal_collector_cron_id": ev.observed_normal_collector_cron_id,
                "fallback_callback_cron_id": ev.observed_fallback_callback_cron_id,
            }
            ok, _ = self.registry.validate_callback(
                ev.track_id, observed_identity
            )
            ev.identity_ok = ev.identity_ok and ok
            cls = classify_callback(ev)
            prev = out.get(ev.track_id)
            if prev is None or _CLASS_SEVERITY[cls] > _CLASS_SEVERITY[prev]:
                out[ev.track_id] = cls
            blocks = pending_blocks_chair_decision(ev)
            self._callback_blocks[ev.track_id] = (
                self._callback_blocks.get(ev.track_id, False) or blocks
            )
        return out

    # -- §4(6) contamination --------------------------------------------
    def contamination(self) -> List[tuple]:
        return cross_track_contamination(
            [
                TrackArtifacts(
                    track_id=tp.track_id,
                    own_artifacts=tp.own_artifacts,
                    cited_artifacts=tp.cited_artifacts,
                )
                for tp in self.plan.tracks
            ]
        )

    # -- §4(10) batch next action ---------------------------------------
    def batch_next_action(
        self,
        callback_classes: Optional[Dict[str, str]] = None,
        callback_blocks: Optional[Dict[str, bool]] = None,
    ) -> Dict[str, object]:
        """Resolve the single batch-level next action.

        HIGH-3: a ``CALLBACK_PENDING`` track blocks the chair decision only
        when ``pending_blocks_chair_decision`` said so (9-R.6 non-blocking
        rule) — recorded by the last ``classify_callbacks()`` pass or passed
        in explicitly. With no callbacks classified nothing blocks (the
        no-arg path is unchanged).
        """
        if callback_classes is None:
            callback_classes = self._callback_classes
        if callback_blocks is None:
            callback_blocks = self._callback_blocks
        contaminated = bool(self.contamination())
        views: List[TrackJoinView] = []
        for tp in self.plan.tracks:
            ts = self._track_states[tp.track_id]
            cb = callback_classes.get(tp.track_id, "")
            blocks = cb == "CALLBACK_PENDING" and bool(
                callback_blocks.get(tp.track_id, False)
            )
            views.append(
                TrackJoinView(
                    track_id=tp.track_id,
                    state=ts.state,
                    terminal=is_terminal(ts.state),
                    callback_class=cb,
                    blocks_chair=blocks,
                )
            )
        return batch_next_action(views, contaminated)

    # -- §4(11) consolidated chair-only summary -------------------------
    def build_state(self) -> Dict[str, object]:
        bna = self.batch_next_action()
        return {
            "schema": "anu_v3.parallel_batch_state.v1",
            "batch_id": self.batch_id,
            "batch_label": self.plan.batch_label,
            "callback_track_registry": self.registry.to_dict(),
            "dependency_matrix": self.depmatrix.to_dict(),
            "track_states": {
                tid: st.to_dict() for tid, st in self._track_states.items()
            },
            "goal_loop_plans": {
                tid: lp.to_dict() for tid, lp in self._loop_plans.items()
            },
            "contamination": [
                {"citing": a, "owner": b, "artifact": c}
                for (a, b, c) in self.contamination()
            ],
            "batch_next_action": bna["batch_next_action"],
        }

    def build_consolidated_summary(self) -> str:
        state = self.build_state()
        lines: List[str] = []
        lines.append(f"# Parallel Batch Consolidated Summary — {self.batch_id}")
        lines.append("")
        lines.append(
            "> Chair-only: this summary compresses to the chair's final "
            "decision points only (task-2553+17.md §4(11) / §6⑧)."
        )
        lines.append("")
        lines.append(f"- batch_label: `{self.plan.batch_label}`")
        lines.append(
            f"- batch_next_action: **{state['batch_next_action']}**"
        )
        lines.append(
            f"- tracks: {len(self.plan.tracks)} | "
            f"goal_loop_plans: {len(self._loop_plans)}"
        )
        contam = state["contamination"]
        lines.append(
            f"- cross_track_contamination: "
            f"{'NONE' if not contam else contam}"
        )
        lines.append("")
        lines.append("## Per-track chair decision view")
        for tid, st in self._track_states.items():
            terminal = "TERMINAL" if is_terminal(st.state) else "in-loop"
            lines.append(f"- `{tid}` → **{st.state}** ({terminal})")
        if self._loop_plans:
            lines.append("")
            lines.append("## Track3 goal-loop fold-in")
            for tid, lp in self._loop_plans.items():
                lines.append(
                    f"- `{tid}` → status **{lp.status}**"
                    + (
                        f", follow-up split: {lp.split_followups}"
                        if lp.split_followups
                        else ""
                    )
                )
        lines.append("")
        return "\n".join(lines) + "\n"

    # -- persistence (NEW untracked deliverables only — 9-R.1/9-R.2) ----
    # Markers that identify a path as a sanctioned coordinator deliverable,
    # so an *existing untracked* file may be idempotently overwritten only
    # when it is one of the coordinator's own outputs (9-R.2 idempotent
    # overwrite-by-design) — never an arbitrary unrelated untracked file.
    _DELIVERABLE = {
        "state": {
            "name_suffix": ".parallel-batch-state.json",
            "content_marker": '"schema": "anu_v3.parallel_batch_state.v1"',
        },
        "summary": {
            "name_suffix": ".parallel-batch-summary.md",
            "content_marker": "# Parallel Batch Consolidated Summary",
        },
    }

    @classmethod
    def _guarded_write(cls, p: Path, content: str, kind: str) -> None:
        """HIGH-4 / 9-R.2 write-guard, precise envelope:

          * git-tracked path                      -> REFUSE (byte-0 immutable)
          * git-untracked & non-existent          -> ALLOW  (NEW deliverable)
          * git-untracked & existing, and the path
            is a sanctioned coordinator deliverable
            (own state/summary)                   -> ALLOW  (idempotent
                                                     overwrite-by-design)
          * git-untracked & existing, NOT a
            sanctioned deliverable                -> REFUSE (never clobber an
                                                     unrelated untracked file)
        """
        if _is_git_tracked(p):
            raise TrackedPathWriteRefused(
                f"refusing to write git-tracked path {p} "
                "(9-R.1/9-R.2: tracked files are byte-0 immutable)"
            )
        if p.exists():
            spec = cls._DELIVERABLE[kind]
            sanctioned = p.name.endswith(spec["name_suffix"])
            if not sanctioned:
                try:
                    existing = p.read_text(encoding="utf-8")
                    sanctioned = spec["content_marker"] in existing
                except (OSError, UnicodeDecodeError):
                    sanctioned = False
            if not sanctioned:
                raise TrackedPathWriteRefused(
                    f"refusing to overwrite existing untracked non-deliverable "
                    f"{p} (9-R.2: only NEW untracked paths or the coordinator's "
                    f"own {kind} deliverable may be written)"
                )
        p.parent.mkdir(parents=True, exist_ok=True)
        p.write_text(content, encoding="utf-8")

    def write_state(self, path: str | Path) -> Path:
        p = Path(path)
        self._guarded_write(
            p,
            json.dumps(self.build_state(), indent=2, ensure_ascii=False),
            "state",
        )
        return p

    def write_summary(self, path: str | Path) -> Path:
        p = Path(path)
        self._guarded_write(p, self.build_consolidated_summary(), "summary")
        return p

    def run(
        self,
        state_path: str | Path,
        summary_path: str | Path,
    ) -> CoordinatorResult:
        self.write_state(state_path)
        self.write_summary(summary_path)
        return CoordinatorResult(
            batch_id=self.batch_id,
            state=self.build_state(),
            summary_md=self.build_consolidated_summary(),
        )


def load_plan_from_fixture(fixture_path: str | Path) -> BatchPlan:
    """Build a BatchPlan from a parallel-batch fixture JSON (read-only)."""
    data = json.loads(Path(fixture_path).read_text(encoding="utf-8"))
    tracks = [
        TrackPlan(
            track_id=t["track_id"],
            task_id=t["task_id"],
            dispatch_cron_id=t["dispatch_cron_id"],
            normal_collector_cron_id=t["normal_collector_cron_id"],
            fallback_callback_cron_id=t["fallback_callback_cron_id"],
            expected_files=t.get("expected_files", []),
            forbidden_write_targets=t.get("forbidden_write_targets", []),
            depends_on=t.get("depends_on", []),
            own_artifacts=t.get("own_artifacts", []),
            cited_artifacts=t.get("cited_artifacts", []),
            initial_state=t.get("initial_state", "PLANNED"),
            retry_ceiling=t.get("retry_ceiling", 2),
        )
        for t in data["tracks"]
    ]
    return BatchPlan(batch_label=data["batch_label"], tracks=tracks)