"""v3.6 Runtime Harness — finish-task profile task_mode classifier (P1-B §9).

chair_authorization_id=CHAIR-AUTH-TASK-2706-V36-FINISH-TASK-PROFILE-LAYER-P1B-260529

Implements §9 six task_mode automatic classification from task md signal.

Auto-classification priority (§9):
    code > system_hook > local_runtime > closeout_marker_only > callback_only > read_only > UNKNOWN

Public API
----------
- ``classify_task_mode(task_md_text, task_md_path)``
  Returns a dict:
  {
    "task_mode": str,
    "signal": str,
    "priority_applied": str,
    "confidence": "HIGH" | "MEDIUM" | "LOW" | "UNKNOWN",
  }

Safe-fail: never raises; any exception returns UNKNOWN with LOW confidence.
"""
from __future__ import annotations

import re
from typing import Optional

from .finish_task_profile_schema import VALID_TASK_MODES

# ---------------------------------------------------------------------------
# Internal priority ordering (higher index = lower priority)
# ---------------------------------------------------------------------------
_PRIORITY_ORDER = [
    "code",
    "system_hook",
    "local_runtime",
    "closeout_marker_only",
    "callback_only",
    "read_only",
    "UNKNOWN",
]

# ---------------------------------------------------------------------------
# Path signal patterns for each mode
# ---------------------------------------------------------------------------

# code: STRICT production code paths (not harness/hooks/dispatch)
_CODE_PATTERNS = [
    re.compile(r"\bsrc/"),
    re.compile(r"\bapp/"),
    re.compile(r"\blib/"),
    re.compile(r"\bapi/"),
    re.compile(r"\bmodels/"),
    re.compile(r"\bcontrollers/"),
    re.compile(r"\bservices/"),
    re.compile(r"\bviews/"),
    # Generic tests (not harness tests) — lookahead NOT harness
    re.compile(r"\btests/(?!harness)"),
    re.compile(r"\btest_(?!v36)"),
    re.compile(r"\.ts[\"' ]"),
    re.compile(r"\.js[\"' ]"),
    re.compile(r"\.go[\"' ]"),
]

# system_hook: .claude/hooks, settings.json, runtime hook, finish-task.sh paths
_SYSTEM_HOOK_PATTERNS = [
    re.compile(r"\.claude/hooks/"),
    re.compile(r"settings\.json"),
    re.compile(r"finish[-_]task\.sh"),
    re.compile(r"scripts/finish"),
    re.compile(r"hooks/"),
    re.compile(r"\.claude/"),
    re.compile(r"session[-_]watchdog"),
    re.compile(r"task[-_]scope[-_]guard"),
]

# local_runtime: dispatch wrapper, watchdog, taskctl, cron, harness
_LOCAL_RUNTIME_PATTERNS = [
    re.compile(r"scripts/harness/"),
    re.compile(r"dispatch/"),
    re.compile(r"watchdog"),
    re.compile(r"taskctl"),
    re.compile(r"task[-_]timer"),
    re.compile(r"cron"),
    re.compile(r"scripts/harness/v\d+/"),
    re.compile(r"dispatch_marker"),
    re.compile(r"spawn_detector"),
    re.compile(r"closeout_marker_watcher"),
    re.compile(r"tests/harness/"),
    re.compile(r"test_v36_"),
]

# closeout_marker_only: memory/reports, new_marker_artifacts, closeout marker
_CLOSEOUT_PATTERNS = [
    re.compile(r"new_marker_artifacts"),
    re.compile(r"closeout"),
    re.compile(r"memory/events/.*\.done"),
    re.compile(r"memory/events/.*\.acked"),
    re.compile(r"memory/events/.*\.notified"),
    re.compile(r"memory/events/.*\.escalated"),
]

# callback_only: ANU callback envelope
_CALLBACK_PATTERNS = [
    re.compile(r"callback_only"),
    re.compile(r"ANU.*callback.*envelope", re.IGNORECASE),
    re.compile(r"envelope.*only", re.IGNORECASE),
    re.compile(r"collector_key.*ANU", re.IGNORECASE),
    re.compile(r"anu_action.*PROCEED", re.IGNORECASE),
]

# read_only: diagnosis/audit/report, no mutation
_READ_ONLY_PATTERNS = [
    re.compile(r"read[-_]only"),
    re.compile(r"audit"),
    re.compile(r"diagnosis"),
    re.compile(r"memory/reports/"),
    re.compile(r"memory/specs/"),
    re.compile(r"diagnostic"),
    re.compile(r"report[-_]only"),
    re.compile(r"edits_existing.*\[\]"),
    re.compile(r"new_modules.*\[\]"),
]

# Patterns for expected_files.new_modules vs edits_existing
_HAS_EDITS_EXISTING = re.compile(r"edits_existing:\s*\n\s*-\s+")
_EMPTY_EDITS = re.compile(r"edits_existing:\s*\[\]")
_HAS_NEW_MODULES = re.compile(r"new_modules:\s*\n\s*-\s+path:")
_HAS_MARKER_ONLY = re.compile(r"new_marker_artifacts:")
_HAS_NEW_MODULES_ONLY = re.compile(r"new_modules:")


def _count_pattern_hits(text: str, patterns: list) -> int:
    """Count how many patterns match the text (each pattern 0 or 1 hit)."""
    return sum(1 for p in patterns if p.search(text))


def _score_mode(text: str) -> dict[str, int]:
    """Return hit count for each task_mode."""
    return {
        "code": _count_pattern_hits(text, _CODE_PATTERNS),
        "system_hook": _count_pattern_hits(text, _SYSTEM_HOOK_PATTERNS),
        "local_runtime": _count_pattern_hits(text, _LOCAL_RUNTIME_PATTERNS),
        "closeout_marker_only": _count_pattern_hits(text, _CLOSEOUT_PATTERNS),
        "callback_only": _count_pattern_hits(text, _CALLBACK_PATTERNS),
        "read_only": _count_pattern_hits(text, _READ_ONLY_PATTERNS),
    }


def _pick_mode_by_priority(scores: dict[str, int]) -> tuple[str, str, str]:
    """Apply priority order with dominance check.

    Priority: code > system_hook > local_runtime > closeout_marker_only > callback_only > read_only.
    A higher-priority mode wins ONLY if its score is meaningfully present.
    For code: must have score >= 2, or score >= 1 AND no stronger competing mode signal.

    Returns (task_mode, priority_applied, confidence).
    """
    total = sum(scores.values())
    if total == 0:
        return "UNKNOWN", "UNKNOWN", "UNKNOWN"

    # Walk priority list in order (highest priority first)
    for idx, mode in enumerate(_PRIORITY_ORDER):
        if mode == "UNKNOWN":
            break
        score = scores.get(mode, 0)
        if score <= 0:
            continue

        dominant_ratio = score / total if total > 0 else 0

        # For code specifically: require at least 2 signals, OR score dominates (>50%)
        # OR score >= 1 and no competing higher-or-equal mode has a larger score
        if mode == "code":
            # Check if any lower-priority-but-stronger mode exists
            competing_max = max(
                (scores.get(m, 0) for m in _PRIORITY_ORDER[idx+1:]
                 if m != "UNKNOWN"),
                default=0,
            )
            # code wins if: score >= 2, or score > competing_max, or dominant
            if score < 2 and score <= competing_max and dominant_ratio < 0.5:
                continue  # code signals too weak; try system_hook next

        # Compute confidence
        if score >= 3 and dominant_ratio >= 0.5:
            confidence = "HIGH"
        elif score >= 2 or dominant_ratio >= 0.35:
            confidence = "MEDIUM"
        else:
            confidence = "LOW"

        return mode, mode, confidence

    return "UNKNOWN", "UNKNOWN", "UNKNOWN"


def _extract_allowed_paths(text: str) -> list[str]:
    """Extract path strings from allowed_resources.paths section."""
    paths: list[str] = []
    # Find paths under allowed_resources section
    allowed_section = re.search(
        r"allowed_resources:.*?(?=\n\w|\Z)", text, re.DOTALL
    )
    if allowed_section:
        section_text = allowed_section.group(0)
        paths = re.findall(r'"([^"]+)"', section_text)
        paths += re.findall(r"'([^']+)'", section_text)
        paths += re.findall(r"- (scripts/\S+|tests/\S+|src/\S+|app/\S+)", section_text)
    return paths


def _extract_expected_paths(text: str) -> list[str]:
    """Extract path strings from expected_files section."""
    expected_section = re.search(
        r"expected_files:.*?(?=\n#{1,3} |\Z)", text, re.DOTALL
    )
    if expected_section:
        section_text = expected_section.group(0)
        paths = re.findall(r'"([^"]+)"', section_text)
        paths += re.findall(r"'([^']+)'", section_text)
        return paths
    return []


def classify_task_mode(
    task_md_text: str,
    task_md_path: Optional[str] = None,
) -> dict:
    """Classify a task's task_mode from its task md content.

    Args:
        task_md_text: Full text content of the task md file.
        task_md_path: Optional path for logging; not used in classification.

    Returns:
        dict with keys: task_mode, signal, priority_applied, confidence.

    Safe-fail: never raises. Returns UNKNOWN on any error.
    """
    try:
        if not task_md_text or not isinstance(task_md_text, str):
            return {
                "task_mode": "UNKNOWN",
                "signal": "empty_or_invalid_task_md",
                "priority_applied": "UNKNOWN",
                "confidence": "UNKNOWN",
            }

        text = task_md_text

        # --- Special structure checks ---

        # Check 1: new_marker_artifacts only → closeout_marker_only candidate
        has_marker = bool(_HAS_MARKER_ONLY.search(text))
        has_edits = bool(_HAS_EDITS_EXISTING.search(text))
        has_new_modules = bool(_HAS_NEW_MODULES.search(text))
        empty_edits = bool(_EMPTY_EDITS.search(text))

        # Build combined text including extracted paths
        allowed_paths = _extract_allowed_paths(text)
        expected_paths = _extract_expected_paths(text)
        combined = text + "\n" + " ".join(allowed_paths) + " ".join(expected_paths)

        # Score each mode
        scores = _score_mode(combined)

        # Structural boosts
        # local_runtime boost: harness v36 paths are local_runtime signals
        harness_paths = [p for p in (allowed_paths + expected_paths)
                         if "harness" in p or "dispatch" in p]
        if harness_paths:
            scores["local_runtime"] = max(scores["local_runtime"], 1)

        # system_hook boost
        hook_paths = [p for p in (allowed_paths + expected_paths)
                      if ".claude" in p or "hooks" in p or "settings.json" in p]
        if hook_paths:
            scores["system_hook"] = max(scores["system_hook"], 1)

        # code boost: any .py/.sh/.ts/.js/.go file paths (broad production code signal)
        prod_paths = [p for p in (allowed_paths + expected_paths)
                      if any(p.startswith(pfx) for pfx in ("src/", "app/", "lib/", "api/"))
                      or p.endswith((".py", ".sh", ".ts", ".js", ".go"))]
        if prod_paths:
            scores["code"] = max(scores["code"], 1)

        # Strong code boost: edits_existing is non-empty (modifying existing files = code task)
        # This overrides local_runtime/system_hook for tasks that have non-trivial edits
        if has_edits and not empty_edits:
            # Concrete file edits are the hallmark of a code task
            scores["code"] = max(scores["code"] + 2, 2)

        # Marker-only detection (structural override)
        if has_marker and not has_edits and not has_new_modules and empty_edits:
            # Only closeout markers → closeout_marker_only
            scores["closeout_marker_only"] = max(scores["closeout_marker_only"] + 2, 3)
            # Suppress other modes
            for k in ("code", "system_hook", "local_runtime", "callback_only", "read_only"):
                scores[k] = 0

        # Callback-only detection: task_mode_classification mentions callback_only
        if re.search(r"callback_only", text) and not has_new_modules and empty_edits:
            scores["callback_only"] = max(scores["callback_only"] + 2, 2)

        # Read-only detection: no mutations at all
        if empty_edits and not has_new_modules and not has_marker:
            scores["read_only"] = max(scores["read_only"] + 1, 1)

        task_mode, priority_applied, confidence = _pick_mode_by_priority(scores)

        # Build signal summary
        signal_parts = []
        for mode in _PRIORITY_ORDER[:-1]:
            s = scores.get(mode, 0)
            if s > 0:
                signal_parts.append(f"{mode}:{s}")
        signal = "scores=[" + ",".join(signal_parts) + "]"
        if allowed_paths:
            signal += f" allowed_paths={len(allowed_paths)}"
        if expected_paths:
            signal += f" expected_paths={len(expected_paths)}"

        return {
            "task_mode": task_mode,
            "signal": signal,
            "priority_applied": priority_applied,
            "confidence": confidence,
        }

    except Exception as exc:
        return {
            "task_mode": "UNKNOWN",
            "signal": f"classifier_exception: {exc}",
            "priority_applied": "UNKNOWN",
            "confidence": "UNKNOWN",
        }
