"""v3.6 Runtime Harness — finish-task profile judge (P1-B §10, 192-cell matrix).

chair_authorization_id=CHAIR-AUTH-TASK-2706-V36-FINISH-TASK-PROFILE-LAYER-P1B-260529

Implements §10 eight gate evaluation + §11 four-grade decision for
each task_mode per the 192-cell matrix (6 modes × 8 gates × 4 grades).

Public API
----------
- ``evaluate_gates(task_mode, gate_inputs)`` → dict (full profile gates section)
- ``compute_overall(gates_result)``            → tuple[str, str] (overall_result, rationale)
- ``SEVERITY``                                  — grade-to-int mapping

Gate inputs expected keys (all optional, missing = N/A):
    G1: commit_info(dict), lock_sha(str|None), diff_paths(list[str]),
        expected_paths(list[str])
    G2: smoke_result(dict|None)
    G3: scope_diff_paths(list[str]), expected_paths(list[str]),
        forbidden_paths(list[str]), lock_sha(str|None),
        main_diff_paths(list[str]|None)
    G4: dirty_count(int), dirty_classification(str)
    G5: qc_result(dict|None)
    G6: lineage_sha_changed(bool|None)
    G7: (schema only, not yet implemented — P1-C)
    G8: callback_registered(bool|None), envelope_bytes(int|None)

Safe-fail: never raises; any exception produces ESCALATE with error note.
"""
from __future__ import annotations

from .finish_task_profile_schema import (
    GATE_KEYS,
)

# ---------------------------------------------------------------------------
# Severity ordering
# ---------------------------------------------------------------------------
SEVERITY: dict[str, int] = {
    "PASS": 0,
    "WARN": 1,
    "FAIL": 2,
    "ESCALATE": 3,
    "N/A": -1,
}

# ---------------------------------------------------------------------------
# Gate weight matrix: for each (task_mode, gate) → strictness level
# "strict"  = full enforcement; violations → FAIL/ESCALATE
# "medium"  = partial enforcement; violations → WARN
# "lenient" = minor check; violations → WARN at worst
# "na"      = not applicable; always PASS
# ---------------------------------------------------------------------------
_WEIGHT_MATRIX: dict[str, dict[str, str]] = {
    "code": {
        "G1_git_evidence": "strict",
        "G2_L1_smoke": "strict",
        "G3_scope_guard": "strict",
        "G4_dirty_workspace": "medium",
        "G5_qc_verification": "strict",
        "G6_lineage_preservation": "strict",
        "G7_actor_attribution": "schema_only",
        "G8_callback_enforcement": "strict",
    },
    "system_hook": {
        "G1_git_evidence": "lenient",
        "G2_L1_smoke": "medium",
        "G3_scope_guard": "strict",
        "G4_dirty_workspace": "medium",
        "G5_qc_verification": "medium",
        "G6_lineage_preservation": "strict",
        "G7_actor_attribution": "schema_only",
        "G8_callback_enforcement": "medium",
    },
    "local_runtime": {
        "G1_git_evidence": "lenient",
        "G2_L1_smoke": "lenient",
        "G3_scope_guard": "medium",
        "G4_dirty_workspace": "medium",
        "G5_qc_verification": "medium",
        "G6_lineage_preservation": "medium",
        "G7_actor_attribution": "schema_only",
        "G8_callback_enforcement": "lenient",
    },
    "read_only": {
        "G1_git_evidence": "na",
        "G2_L1_smoke": "na",
        "G3_scope_guard": "na",
        "G4_dirty_workspace": "lenient",
        "G5_qc_verification": "strict",
        "G6_lineage_preservation": "na",
        "G7_actor_attribution": "schema_only",
        "G8_callback_enforcement": "na",
    },
    "callback_only": {
        "G1_git_evidence": "na",
        "G2_L1_smoke": "na",
        "G3_scope_guard": "na",
        "G4_dirty_workspace": "na",
        "G5_qc_verification": "na",
        "G6_lineage_preservation": "na",
        "G7_actor_attribution": "schema_only",
        "G8_callback_enforcement": "strict",
    },
    "closeout_marker_only": {
        "G1_git_evidence": "na",
        "G2_L1_smoke": "na",
        "G3_scope_guard": "na",
        "G4_dirty_workspace": "na",
        "G5_qc_verification": "na",
        "G6_lineage_preservation": "na",
        "G7_actor_attribution": "schema_only",
        "G8_callback_enforcement": "lenient",
    },
    "UNKNOWN": {
        "G1_git_evidence": "na",
        "G2_L1_smoke": "na",
        "G3_scope_guard": "na",
        "G4_dirty_workspace": "na",
        "G5_qc_verification": "na",
        "G6_lineage_preservation": "na",
        "G7_actor_attribution": "schema_only",
        "G8_callback_enforcement": "na",
    },
}


# ---------------------------------------------------------------------------
# Individual gate evaluators
# ---------------------------------------------------------------------------

def _gate_result(result: str, evidence: str, rationale: str) -> dict:
    return {"result": result, "evidence": evidence, "rationale": rationale}


def _evaluate_G1(task_mode: str, inputs: dict) -> dict:
    """G1: git_evidence — commit info + lock_sha..HEAD diff."""
    try:
        weight = _WEIGHT_MATRIX.get(task_mode, {}).get("G1_git_evidence", "na")
        if weight == "na":
            return _gate_result("N/A", "not_applicable", "G1 not required for this task_mode")

        commit_info = inputs.get("commit_info")
        lock_sha = inputs.get("lock_sha")
        diff_paths = inputs.get("diff_paths") or []
        expected_paths = inputs.get("expected_paths") or []

        if not commit_info and not diff_paths:
            if weight == "strict":
                return _gate_result(
                    "FAIL",
                    "no_commit_info_no_diff",
                    "G1 strict: no commit info and no diff paths provided",
                )
            return _gate_result(
                "WARN",
                "no_commit_info",
                "G1: missing commit info — cannot verify git evidence",
            )

        evidence_parts = []
        if commit_info:
            evidence_parts.append(f"commit_info={str(commit_info)[:80]}")
        if lock_sha:
            evidence_parts.append(f"lock_sha={lock_sha[:16]}")
        if diff_paths:
            evidence_parts.append(f"diff_paths={len(diff_paths)}")

        # Check if diff is within expected scope
        if expected_paths and diff_paths:
            unexpected = [p for p in diff_paths
                          if not any(p.startswith(ep.rstrip("*")) for ep in expected_paths)]
            if unexpected and weight == "strict":
                return _gate_result(
                    "WARN",
                    "; ".join(evidence_parts),
                    f"G1: {len(unexpected)} diff paths outside expected scope",
                )

        return _gate_result("PASS", "; ".join(evidence_parts) or "commit_present", "G1 evidence verified")

    except Exception as exc:
        return _gate_result("ESCALATE", f"G1_exception: {exc}", "G1 internal error — safe escalate")


def _evaluate_G2(task_mode: str, inputs: dict) -> dict:
    """G2: L1_smoke — server restart + API curl or Playwright."""
    try:
        weight = _WEIGHT_MATRIX.get(task_mode, {}).get("G2_L1_smoke", "na")
        if weight == "na":
            return _gate_result("N/A", "not_applicable", "G2 not required for this task_mode")

        smoke_result = inputs.get("smoke_result")

        if smoke_result is None:
            if weight == "strict":
                return _gate_result("WARN", "smoke_not_run", "G2 strict: L1 smoke not executed")
            return _gate_result("N/A", "smoke_not_run", "G2: smoke not available for this task")

        if isinstance(smoke_result, dict):
            status = smoke_result.get("status", "UNKNOWN")
            if status == "PASS":
                return _gate_result("PASS", f"smoke_status={status}", "G2 L1 smoke PASS")
            if status in ("WARN",):
                return _gate_result("WARN", f"smoke_status={status}", "G2 L1 smoke WARN")
            if status in ("FAIL", "ERROR"):
                return _gate_result(
                    "FAIL" if weight != "strict" else "FAIL",
                    f"smoke_status={status}",
                    f"G2 L1 smoke {status}",
                )

        return _gate_result("WARN", "smoke_result_ambiguous", "G2: smoke result could not be parsed")

    except Exception as exc:
        return _gate_result("ESCALATE", f"G2_exception: {exc}", "G2 internal error — safe escalate")


def _evaluate_G3(task_mode: str, inputs: dict) -> dict:
    """G3: scope_guard — lock_sha..HEAD diff within expected_files scope."""
    try:
        weight = _WEIGHT_MATRIX.get(task_mode, {}).get("G3_scope_guard", "na")
        if weight == "na":
            return _gate_result("N/A", "not_applicable", "G3 not required for this task_mode")

        scope_diff_paths = inputs.get("scope_diff_paths") or []
        expected_paths = inputs.get("expected_paths") or []
        forbidden_paths = inputs.get("forbidden_paths") or []
        lock_sha = inputs.get("lock_sha")
        main_diff_paths = inputs.get("main_diff_paths")  # signals main..HEAD misfire

        evidence_parts = []
        violations = []

        if lock_sha:
            evidence_parts.append(f"lock_sha={lock_sha[:16]}")
        else:
            evidence_parts.append("lock_sha=None(main..HEAD_fallback)")
            # main..HEAD fallback is a caveat
            if main_diff_paths is not None and len(main_diff_paths) > len(scope_diff_paths) + 5:
                evidence_parts.append(f"main_diff={len(main_diff_paths)}_vs_scope_diff={len(scope_diff_paths)}")
                # This is the task-2705+3 misfire pattern
                if weight == "strict":
                    violations.append("main..HEAD_misfire_detected")

        # Check forbidden path violations
        forbidden_hits = []
        for path in scope_diff_paths:
            for fp in forbidden_paths:
                fp_clean = fp.rstrip("*").rstrip("/")
                if path == fp_clean or path.startswith(fp_clean):
                    forbidden_hits.append(path)

        if forbidden_hits:
            violations.append(f"forbidden_path_violation: {forbidden_hits}")
            evidence_parts.append(f"forbidden_hits={len(forbidden_hits)}")

        # Check scope violations
        out_of_scope = []
        if expected_paths and scope_diff_paths:
            for path in scope_diff_paths:
                in_scope = False
                for ep in expected_paths:
                    ep_clean = ep.rstrip("*")
                    if path == ep_clean or path.startswith(ep_clean):
                        in_scope = True
                        break
                if not in_scope:
                    out_of_scope.append(path)

        if out_of_scope:
            evidence_parts.append(f"out_of_scope={len(out_of_scope)}")
            if len(out_of_scope) >= 20:
                violations.append(f"large_scope_violation:{len(out_of_scope)}_paths")
            elif out_of_scope:
                violations.append(f"scope_violation:{len(out_of_scope)}_paths")

        if not violations:
            return _gate_result(
                "PASS",
                "; ".join(evidence_parts) or "scope_clean",
                "G3 scope guard PASS",
            )

        # Determine grade based on severity
        has_forbidden = any("forbidden_path_violation" in v for v in violations)
        has_misfire = any("misfire" in v for v in violations)
        has_large = any("large_scope_violation" in v for v in violations)

        if (has_forbidden or has_misfire) and weight == "strict":
            return _gate_result(
                "ESCALATE",
                "; ".join(evidence_parts),
                f"G3 scope_guard ESCALATE: {'; '.join(violations)}",
            )
        elif has_large and weight in ("strict", "medium"):
            return _gate_result(
                "ESCALATE" if weight == "strict" else "FAIL",
                "; ".join(evidence_parts),
                f"G3 scope_guard: {'; '.join(violations)}",
            )
        elif weight == "strict":
            return _gate_result(
                "FAIL",
                "; ".join(evidence_parts),
                f"G3 scope_guard FAIL: {'; '.join(violations)}",
            )
        else:
            return _gate_result(
                "WARN",
                "; ".join(evidence_parts),
                f"G3 scope_guard WARN: {'; '.join(violations)}",
            )

    except Exception as exc:
        return _gate_result("ESCALATE", f"G3_exception: {exc}", "G3 internal error — safe escalate")


def _evaluate_G4(task_mode: str, inputs: dict) -> dict:
    """G4: dirty_workspace — classify dirty state."""
    try:
        weight = _WEIGHT_MATRIX.get(task_mode, {}).get("G4_dirty_workspace", "na")
        if weight == "na":
            return _gate_result("N/A", "not_applicable", "G4 not required for this task_mode")

        dirty_count = inputs.get("dirty_count", 0)
        dirty_classification = inputs.get("dirty_classification", "UNKNOWN_DIRTY")

        evidence = f"dirty_count={dirty_count}, classification={dirty_classification}"

        if dirty_count == 0 or dirty_classification == "CLEAN":
            return _gate_result("PASS", evidence, "G4 workspace clean")

        # Classify by type and severity
        if dirty_classification == "OWN_DIRTY":
            # Task's own changes only — normal, PASS
            return _gate_result("PASS", evidence, "G4 OWN_DIRTY: task's own changes only")

        elif dirty_classification == "INHERITED_DIRTY":
            # Prior task lineage residue — WARN
            return _gate_result(
                "WARN",
                evidence,
                "G4 INHERITED_DIRTY: prior task lineage residue detected",
            )

        elif dirty_classification == "DAEMON_RECURRENCE":
            # Daemon regeneration — WARN
            return _gate_result(
                "WARN",
                evidence,
                "G4 DAEMON_RECURRENCE: daemon-generated files detected",
            )

        elif dirty_classification == "EXTERNAL_DIRTY":
            # External contamination — severity depends on count
            if dirty_count >= 1000:
                return _gate_result(
                    "FAIL",
                    evidence,
                    f"G4 EXTERNAL_DIRTY: {dirty_count}+ files contaminated",
                )
            elif dirty_count >= 50:
                return _gate_result(
                    "FAIL" if weight == "strict" else "WARN",
                    evidence,
                    f"G4 EXTERNAL_DIRTY: {dirty_count} files contaminated",
                )
            else:
                return _gate_result(
                    "WARN",
                    evidence,
                    f"G4 EXTERNAL_DIRTY: {dirty_count} files",
                )

        else:
            # UNKNOWN_DIRTY
            return _gate_result(
                "WARN" if dirty_count < 100 else "FAIL",
                evidence,
                f"G4 {dirty_classification}: {dirty_count} dirty files, classification uncertain",
            )

    except Exception as exc:
        return _gate_result("ESCALATE", f"G4_exception: {exc}", "G4 internal error — safe escalate")


def _evaluate_G5(task_mode: str, inputs: dict) -> dict:
    """G5: qc_verification — qc_verify.py core gate."""
    try:
        weight = _WEIGHT_MATRIX.get(task_mode, {}).get("G5_qc_verification", "na")
        if weight == "na":
            return _gate_result("N/A", "not_applicable", "G5 not required for this task_mode")

        qc_result = inputs.get("qc_result")

        if qc_result is None:
            if weight == "strict":
                return _gate_result(
                    "WARN",
                    "qc_not_run",
                    "G5 strict: qc_verify not executed",
                )
            return _gate_result("N/A", "qc_not_run", "G5: qc not available")

        if isinstance(qc_result, dict):
            status = qc_result.get("status", "UNKNOWN")
            passed = qc_result.get("passed", None)
            if status == "PASS" or passed is True:
                return _gate_result("PASS", f"qc_status={status}", "G5 qc verification PASS")
            if status in ("WARN",):
                return _gate_result("WARN", f"qc_status={status}", "G5 qc verification WARN")
            if status in ("FAIL", "ERROR"):
                return _gate_result(
                    "FAIL",
                    f"qc_status={status}",
                    f"G5 qc verification FAIL: {qc_result.get('reason', '')}",
                )

        return _gate_result("WARN", "qc_result_ambiguous", "G5: qc result ambiguous")

    except Exception as exc:
        return _gate_result("ESCALATE", f"G5_exception: {exc}", "G5 internal error — safe escalate")


def _evaluate_G6(task_mode: str, inputs: dict) -> dict:
    """G6: lineage_preservation — parent task lineage sha unchanged."""
    try:
        weight = _WEIGHT_MATRIX.get(task_mode, {}).get("G6_lineage_preservation", "na")
        if weight == "na":
            return _gate_result("N/A", "not_applicable", "G6 not required for this task_mode")

        lineage_sha_changed = inputs.get("lineage_sha_changed")

        if lineage_sha_changed is None:
            return _gate_result("N/A", "lineage_not_checked", "G6: lineage sha not checked")

        if lineage_sha_changed is False:
            return _gate_result("PASS", "lineage_sha_unchanged", "G6 lineage preservation PASS")

        if lineage_sha_changed is True:
            if weight == "strict":
                return _gate_result(
                    "FAIL",
                    "lineage_sha_changed=True",
                    "G6 lineage preservation FAIL: parent lineage sha changed",
                )
            return _gate_result(
                "WARN",
                "lineage_sha_changed=True",
                "G6 lineage preservation WARN: parent lineage sha changed",
            )

        return _gate_result("WARN", "lineage_indeterminate", "G6: lineage sha state indeterminate")

    except Exception as exc:
        return _gate_result("ESCALATE", f"G6_exception: {exc}", "G6 internal error — safe escalate")


def _evaluate_G7(task_mode: str, inputs: dict) -> dict:
    """G7: actor_attribution — P1-C schema only (not yet implemented)."""
    # Per §18: G7 is schema-only in this task; real enforcement is P1-C.
    # task_mode/inputs are accepted for interface uniformity with other gates.
    del task_mode, inputs
    return _gate_result(
        "WARN",
        "P1_C_NOT_YET_IMPLEMENTED",
        "G7 actor_attribution: schema defined, enforcement pending P1-C implementation",
    )


def _evaluate_G8(task_mode: str, inputs: dict) -> dict:
    """G8: callback_enforcement — ANU-key cron + envelope ≤3,900 bytes."""
    try:
        weight = _WEIGHT_MATRIX.get(task_mode, {}).get("G8_callback_enforcement", "na")
        if weight == "na":
            return _gate_result("N/A", "not_applicable", "G8 not required for this task_mode")

        callback_registered = inputs.get("callback_registered")
        envelope_bytes = inputs.get("envelope_bytes")

        issues = []
        evidence_parts = []

        if callback_registered is False:
            issues.append("callback_not_registered")
        elif callback_registered is True:
            evidence_parts.append("callback_registered=True")

        if envelope_bytes is not None:
            evidence_parts.append(f"envelope_bytes={envelope_bytes}")
            if envelope_bytes > 3900:
                issues.append(f"envelope_too_large:{envelope_bytes}>3900")

        if not issues:
            if callback_registered is None and envelope_bytes is None:
                if weight == "strict":
                    return _gate_result(
                        "WARN",
                        "callback_not_checked",
                        "G8 strict: callback enforcement not verified",
                    )
                return _gate_result("N/A", "not_checked", "G8: not checked")
            return _gate_result(
                "PASS",
                "; ".join(evidence_parts) or "callback_ok",
                "G8 callback enforcement PASS",
            )

        if weight == "strict":
            return _gate_result(
                "FAIL",
                "; ".join(evidence_parts),
                f"G8 callback enforcement FAIL: {'; '.join(issues)}",
            )
        return _gate_result(
            "WARN",
            "; ".join(evidence_parts),
            f"G8 callback enforcement WARN: {'; '.join(issues)}",
        )

    except Exception as exc:
        return _gate_result("ESCALATE", f"G8_exception: {exc}", "G8 internal error — safe escalate")


# ---------------------------------------------------------------------------
# Gate evaluator dispatch
# ---------------------------------------------------------------------------
_GATE_EVALUATORS = {
    "G1_git_evidence": _evaluate_G1,
    "G2_L1_smoke": _evaluate_G2,
    "G3_scope_guard": _evaluate_G3,
    "G4_dirty_workspace": _evaluate_G4,
    "G5_qc_verification": _evaluate_G5,
    "G6_lineage_preservation": _evaluate_G6,
    "G7_actor_attribution": _evaluate_G7,
    "G8_callback_enforcement": _evaluate_G8,
}


def evaluate_gates(task_mode: str, gate_inputs: dict) -> dict:
    """Evaluate all 8 gates for the given task_mode and inputs.

    Args:
        task_mode: One of the valid task_mode strings (or "UNKNOWN").
        gate_inputs: Dict of gate input values (see module docstring).

    Returns:
        Dict of gate_key → gate_result_dict for all 8 gates.

    Safe-fail: never raises.
    """
    try:
        if task_mode not in _WEIGHT_MATRIX:
            task_mode = "UNKNOWN"

        result = {}
        for gk in GATE_KEYS:
            evaluator = _GATE_EVALUATORS.get(gk)
            if evaluator is None:
                result[gk] = _gate_result("N/A", "evaluator_missing", f"{gk}: no evaluator")
            else:
                try:
                    result[gk] = evaluator(task_mode, gate_inputs)
                except Exception as exc:
                    result[gk] = _gate_result(
                        "ESCALATE",
                        f"gate_eval_exception: {exc}",
                        f"{gk}: evaluation threw exception — safe escalate",
                    )
        return result

    except Exception as exc:
        # Return all gates as ESCALATE
        return {
            gk: _gate_result("ESCALATE", f"evaluate_gates_exception: {exc}", "safe escalate")
            for gk in GATE_KEYS
        }


def compute_overall(gates_result: dict) -> tuple[str, str]:
    """Compute overall_result and rationale from all gate results.

    Worst case wins: ESCALATE > FAIL > WARN > PASS.
    N/A gates are excluded.

    Args:
        gates_result: Dict from evaluate_gates().

    Returns:
        (overall_result: str, rationale: str)

    Safe-fail: never raises.
    """
    try:
        worst_severity = -1
        worst_grade = "PASS"
        worst_gates = []

        for gk, gval in gates_result.items():
            result = gval.get("result", "N/A") if isinstance(gval, dict) else "N/A"
            sev = SEVERITY.get(result, -1)
            if sev > worst_severity:
                worst_severity = sev
                worst_grade = result
                worst_gates = [gk]
            elif sev == worst_severity and sev >= 0:
                worst_gates.append(gk)

        if worst_severity < 0:
            return "PASS", "all gates N/A — defaulting to PASS"

        if worst_grade == "PASS":
            return "PASS", "all applicable gates PASS"

        rationale = (
            f"worst_grade={worst_grade} from gates: {', '.join(worst_gates)} "
            f"(ESCALATE>FAIL>WARN>PASS worst-case wins)"
        )
        return worst_grade, rationale

    except Exception as exc:
        return "ESCALATE", f"compute_overall_exception: {exc}"