"""v3.6 Runtime Harness — finish-task preflight helper (P1-B §12).

chair_authorization_id=CHAIR-AUTH-TASK-2706-V36-FINISH-TASK-PROFILE-LAYER-P1B-260529

Implements finish-task entry preflight: task_mode classification,
lock_sha extraction, dirty audit, and full profile assembly.

Called before finish-task.sh executes. If this layer is inactive or errors,
existing finish-task.sh behavior is preserved without modification.

Public API
----------
- ``run_preflight(task_md_path, task_id, lock_sha_hint)``
  Returns a full profile dict (§11 schema) or a minimal safe-fail dict.

- ``classify_dirty_workspace(git_status_output)``
  Returns (dirty_count, dirty_classification).

- ``extract_lock_sha(task_md_text, task_timers_path)``
  Returns (lock_sha, fallback_reason).

Safe-fail: never raises. All exceptions return UNKNOWN/ESCALATE profile.
"""
from __future__ import annotations

import os
import re
import subprocess
from datetime import datetime, timezone
from typing import Optional

from .finish_task_profile_schema import (
    SCHEMA_VERSION,
)

# ---------------------------------------------------------------------------
# Dirty workspace classification thresholds
# ---------------------------------------------------------------------------
_DAEMON_PATTERNS = [
    re.compile(r"\.log$"),
    re.compile(r"\.pid$"),
    re.compile(r"\.lock$"),
    re.compile(r"__pycache__/"),
    re.compile(r"\.pyc$"),
]

_INHERITED_PATTERNS = [
    re.compile(r"memory/(events|reports)/task-\d"),
    re.compile(r"scripts/harness/"),
    re.compile(r"tests/harness/"),
]

_EXTERNAL_THRESHOLD_LARGE = 500
_EXTERNAL_THRESHOLD_MEDIUM = 50
_INHERITED_THRESHOLD = 5
_DAEMON_THRESHOLD = 5


def classify_dirty_workspace(git_status_output: str) -> tuple[int, str]:
    """Classify the dirty workspace state from git status --porcelain output.

    Args:
        git_status_output: Output of 'git status --porcelain'.

    Returns:
        (dirty_count, dirty_classification)

    Classification logic:
        - 0 lines → CLEAN
        - Mostly daemon files (logs/pid/pyc) → DAEMON_RECURRENCE
        - Mostly memory/events/ task lineage → INHERITED_DIRTY
        - Large number (500+) of diverse files → EXTERNAL_DIRTY
        - Task's own expected changes → OWN_DIRTY
        - Otherwise → UNKNOWN_DIRTY
    """
    try:
        if not git_status_output or not git_status_output.strip():
            return 0, "CLEAN"

        lines = [l for l in git_status_output.strip().splitlines() if l.strip()]
        count = len(lines)

        if count == 0:
            return 0, "CLEAN"

        # Extract paths
        paths = []
        for line in lines:
            # git status --porcelain format: "XY path" or "XY old -> new"
            parts = line.strip().split(None, 1)
            if len(parts) == 2:
                path = parts[1].split(" -> ")[-1].strip().strip('"')
                paths.append(path)

        daemon_count = sum(
            1 for p in paths
            if any(pat.search(p) for pat in _DAEMON_PATTERNS)
        )
        inherited_count = sum(
            1 for p in paths
            if any(pat.search(p) for pat in _INHERITED_PATTERNS)
        )

        # Large dirty → EXTERNAL_DIRTY
        if count >= _EXTERNAL_THRESHOLD_LARGE:
            return count, "EXTERNAL_DIRTY"

        # Mostly inherited (task lineage residue)
        if inherited_count >= max(_INHERITED_THRESHOLD, count * 0.4) and count < 200:
            return count, "INHERITED_DIRTY"

        # Mostly daemon files
        if daemon_count >= max(_DAEMON_THRESHOLD, count * 0.5):
            return count, "DAEMON_RECURRENCE"

        # Medium dirty with mixed files → INHERITED_DIRTY if has any lineage
        if count >= _EXTERNAL_THRESHOLD_MEDIUM and inherited_count > 0:
            return count, "INHERITED_DIRTY"

        # Small dirty likely own task changes
        if count <= 20:
            return count, "OWN_DIRTY"

        return count, "UNKNOWN_DIRTY"

    except Exception:
        return 0, "UNKNOWN_DIRTY"


def _run_git_status(workspace_path: Optional[str] = None) -> Optional[str]:
    """Run git status --porcelain and return output."""
    try:
        cmd = ["git", "status", "--porcelain"]
        cwd = workspace_path or os.environ.get("WORKSPACE", "/home/jay/workspace")
        result = subprocess.run(
            cmd,
            cwd=cwd,
            capture_output=True,
            text=True,
            timeout=15,
        )
        if result.returncode == 0:
            return result.stdout
        return None
    except Exception:
        return None


def extract_lock_sha(
    task_md_text: str,
    task_timers_path: Optional[str] = None,
) -> tuple[Optional[str], str]:
    """Extract lock_sha from task md text or task-timers.json.

    Args:
        task_md_text: Full text content of the task md.
        task_timers_path: Optional path to task-timers.json.

    Returns:
        (lock_sha, fallback_reason)

    Safe-fail: never raises; returns (None, reason) on any error.
    """
    try:
        # 1. Try to find lock_sha in task md text
        sha_patterns = [
            re.compile(r"lock_sha[:\s]+([0-9a-f]{40})", re.IGNORECASE),
            re.compile(r"task_md_sha_before[:\s]+([0-9a-f]{40})", re.IGNORECASE),
            re.compile(r"snapshot_sha[:\s]+([0-9a-f]{40})", re.IGNORECASE),
            re.compile(r"dispatched.*sha.*?([0-9a-f]{40})", re.IGNORECASE),
        ]
        for pat in sha_patterns:
            m = pat.search(task_md_text or "")
            if m:
                return m.group(1), "extracted_from_task_md"

        # 2. Try task-timers.json
        if task_timers_path and os.path.exists(task_timers_path):
            try:
                import json
                with open(task_timers_path, "r", encoding="utf-8") as f:
                    timers = json.load(f)
                if isinstance(timers, dict):
                    for _, timer_data in timers.items():
                        if isinstance(timer_data, dict):
                            sha = timer_data.get("lock_sha") or timer_data.get("task_md_sha_before")
                            if sha and re.match(r"^[0-9a-f]{40}$", sha):
                                return sha, "extracted_from_task_timers"
            except Exception:
                pass

        # 3. No lock_sha found → fallback to main..HEAD
        return None, "no_lock_sha_found_main_HEAD_fallback"

    except Exception as exc:
        return None, f"lock_sha_extraction_exception: {exc}"


def _read_task_md(task_md_path: str) -> tuple[Optional[str], Optional[str]]:
    """Read task md file. Returns (content, error_reason)."""
    try:
        if not task_md_path or not os.path.exists(task_md_path):
            return None, f"task_md_not_found: {task_md_path}"
        with open(task_md_path, "r", encoding="utf-8", errors="replace") as f:
            return f.read(), None
    except Exception as exc:
        return None, f"task_md_read_error: {exc}"


def _build_safe_escalate_profile(
    task_id: str,
    reason: str,
    ts: str,
) -> dict:
    """Build a minimal ESCALATE profile for safe-fail cases."""
    from .finish_task_profile_schema import GATE_KEYS

    gates = {
        gk: {"result": "N/A", "evidence": "preflight_safe_fail", "rationale": reason}
        for gk in GATE_KEYS
    }
    return {
        "schema_version": SCHEMA_VERSION,
        "task_id": task_id or "UNKNOWN",
        "task_mode": "UNKNOWN",
        "task_mode_classification_evidence": {
            "signal": reason,
            "priority_applied": "UNKNOWN",
            "confidence": "UNKNOWN",
        },
        "gates": gates,
        "overall_result": "ESCALATE",
        "overall_rationale": f"preflight_safe_fail: {reason}",
        "lock_sha_used": None,
        "lock_sha_fallback_reason": reason,
        "dirty_workspace_classification": "UNKNOWN_DIRTY",
        "chair_decision_required": True,
        "anu_action_recommended": "ESCALATE_CHAIR",
        "ts": ts,
        "chair_authorization_id_linkage": "CHAIR-AUTH-TASK-2706-V36-FINISH-TASK-PROFILE-LAYER-P1B-260529",
    }


def run_preflight(
    task_md_path: Optional[str],
    task_id: Optional[str] = None,
    lock_sha_hint: Optional[str] = None,
    workspace_path: Optional[str] = None,
    task_timers_path: Optional[str] = None,
    gate_inputs_override: Optional[dict] = None,
) -> dict:
    """Run the finish-task preflight for a given task.

    Args:
        task_md_path: Path to the task md file.
        task_id: Task ID string (e.g. "task-2706").
        lock_sha_hint: Optional pre-known lock_sha.
        workspace_path: Workspace root for git status.
        task_timers_path: Optional path to task-timers.json.
        gate_inputs_override: Optional dict to override gate inputs
                              (used in tests/replay scenarios).

    Returns:
        Full §11 profile dict. Never raises — always returns something.

    Safe-fail: any exception → UNKNOWN/ESCALATE profile.
    """
    ts = datetime.now(timezone.utc).isoformat()

    try:
        # Lazy imports to allow safe-fail if modules not available
        try:
            from .finish_task_profile_classifier import classify_task_mode  # type: ignore[import-not-found]
            from .finish_task_profile_judge import evaluate_gates, compute_overall  # type: ignore[import-not-found]
        except ImportError as imp_err:
            return _build_safe_escalate_profile(
                task_id or "UNKNOWN",
                f"import_error: {imp_err}",
                ts,
            )

        # --- Step 1: Read task md ---
        task_md_text = ""
        if task_md_path:
            content, read_err = _read_task_md(task_md_path)
            if read_err:
                task_md_text = ""
            else:
                task_md_text = content or ""

        # Extract task_id from md if not provided
        if not task_id and task_md_text:
            m = re.search(r"\*\*task_id\*\*:\s*(task-\S+)", task_md_text)
            if m:
                task_id = m.group(1).strip()

        task_id = task_id or "UNKNOWN"

        # --- Step 2: Classify task_mode ---
        classification = classify_task_mode(task_md_text, task_md_path)
        task_mode = classification.get("task_mode", "UNKNOWN")

        # --- Step 3: Extract lock_sha ---
        if lock_sha_hint:
            lock_sha = lock_sha_hint
            lock_sha_fallback = "provided_as_hint"
        else:
            lock_sha, lock_sha_fallback = extract_lock_sha(
                task_md_text, task_timers_path
            )

        # --- Step 4: Dirty workspace audit ---
        git_status_out = _run_git_status(workspace_path)
        dirty_count, dirty_classification = classify_dirty_workspace(
            git_status_out or ""
        )

        # --- Step 5: Build gate inputs ---
        if gate_inputs_override is not None:
            gate_inputs = gate_inputs_override
        else:
            gate_inputs = {
                "lock_sha": lock_sha,
                "dirty_count": dirty_count,
                "dirty_classification": dirty_classification,
            }

        # --- Step 6: Evaluate gates ---
        gates_result = evaluate_gates(task_mode, gate_inputs)

        # --- Step 7: Compute overall ---
        overall_result, overall_rationale = compute_overall(gates_result)

        # --- Step 8: Determine anu_action and chair_decision_required ---
        if overall_result == "PASS":
            anu_action = "PROCEED"
            chair_required = False
        elif overall_result == "WARN":
            anu_action = "CAVEAT_PROCEED"
            chair_required = False
        elif overall_result == "FAIL":
            anu_action = "BLOCK"
            chair_required = True
        else:  # ESCALATE
            anu_action = "ESCALATE_CHAIR"
            chair_required = True

        # --- Build profile ---
        profile = {
            "schema_version": SCHEMA_VERSION,
            "task_id": task_id,
            "task_mode": task_mode,
            "task_mode_classification_evidence": {
                "signal": classification.get("signal", ""),
                "priority_applied": classification.get("priority_applied", "UNKNOWN"),
                "confidence": classification.get("confidence", "UNKNOWN"),
            },
            "gates": gates_result,
            "overall_result": overall_result,
            "overall_rationale": overall_rationale,
            "lock_sha_used": lock_sha,
            "lock_sha_fallback_reason": lock_sha_fallback,
            "dirty_workspace_classification": dirty_classification,
            "chair_decision_required": chair_required,
            "anu_action_recommended": anu_action,
            "ts": ts,
            "chair_authorization_id_linkage": "CHAIR-AUTH-TASK-2706-V36-FINISH-TASK-PROFILE-LAYER-P1B-260529",
        }

        return profile

    except Exception as exc:
        return _build_safe_escalate_profile(
            task_id or "UNKNOWN",
            f"preflight_exception: {exc}",
            ts,
        )