"""v3.6 Runtime Harness — Layer 3: Watchdog Suppression Gate.

chair_authorization_id=CHAIR-AUTH-TASK-2704-V36-CONTROL-PLANE-P0-MVP-260528

Contract:
- evaluate_alert(task_id, hb_age, ev_age, retry_count, max_retry,
                 has_progress_marker, ...) -> dict
  Returns {"verdict": <6-state enum>, "chat_allowed": bool,
           "alive_signals": [...], "reason": str}

- 6 states: QUIET, WATCH, ALERT_INFO, ALERT_WARN, ESCALATE_CHAIR, SUPPRESSED

- Alive OR-7 signals (any 1 true → suppress chat):
  1. dispatch marker exists
  2. closeout marker exists
  3. escalate marker exists
  4. spawn state >= WORK_STARTED
  5. bot session process active
  6. worktree mtime < 900s
  7. recent artifact mtime < 900s

- hb_age == -1 AND ev_age == -1 ALONE must NOT escalate to chat.
- chat_allowed = True ONLY when state is ESCALATE_CHAIR.
- ESCALATE_CHAIR: silently_stalled 2 cycle consecutive AND (Critical 7 OR explicit chair escalate).
- .escalate / .escalate.acked marker → immediate SUPPRESSED (task-2405 fix#A preserved).

CLI usage:
    python -m scripts.harness.v36.watchdog_suppression_gate \\
        --task-id task-2703 --hb-age 900 --ev-age 900 [options]
"""
from __future__ import annotations

import glob
import os
import sys
import time
from typing import Optional

CHAIR_AUTHORIZATION_ID = "CHAIR-AUTH-TASK-2704-V36-CONTROL-PLANE-P0-MVP-260528"

# Verdict enum
QUIET = "QUIET"
WATCH = "WATCH"
ALERT_INFO = "ALERT_INFO"
ALERT_WARN = "ALERT_WARN"
ESCALATE_CHAIR = "ESCALATE_CHAIR"
SUPPRESSED = "SUPPRESSED"

_WORKSPACE = "/home/jay/workspace"
_EVENTS_DIR = os.path.join(_WORKSPACE, "memory/events")
_ALIVE_THRESHOLD_SECONDS = 900


# ─── Public API ───────────────────────────────────────────────────────────────

def evaluate_alert(
    task_id: str,
    hb_age: int,
    ev_age: int,
    retry_count: int = 0,
    max_retry: int = 2,
    has_progress_marker: bool = False,
    consecutive_stalled_cycles: int = 0,
    is_critical_7: bool = False,
    explicit_chair_escalate: bool = False,
    events_dir: Optional[str] = None,
    # Override individual alive signals for testing
    _override_dispatch_marker: Optional[bool] = None,
    _override_closeout_marker: Optional[bool] = None,
    _override_escalate_marker: Optional[bool] = None,
    _override_spawn_ge_work_started: Optional[bool] = None,
    _override_bot_session_active: Optional[bool] = None,
    _override_worktree_mtime_lt_900: Optional[bool] = None,
    _override_artifact_mtime_lt_900: Optional[bool] = None,
) -> dict:
    """Evaluate whether a watchdog alert should be sent.

    Args:
        task_id: Task identifier.
        hb_age: Seconds since last heartbeat (-1 if unmeasured).
        ev_age: Seconds since last events file modification (-1 if unmeasured).
        retry_count: Current retry count for this task.
        max_retry: Maximum allowed retry count.
        has_progress_marker: True if a named progress marker exists.
        consecutive_stalled_cycles: Number of consecutive cycles with stalled verdict.
        is_critical_7: True if task is classified as Critical-7.
        explicit_chair_escalate: True if an explicit chair escalate flag is set.
        events_dir: Override events directory path.
        _override_*: For testing — override individual alive signal checks.

    Returns:
        {
            "verdict": str,         # 6-state enum
            "chat_allowed": bool,   # True only when verdict == ESCALATE_CHAIR
            "alive_signals": list,  # list of signal names that are True
            "reason": str,          # human-readable reason
        }

    Never raises. Returns safe defaults on any error.
    """
    try:
        return _evaluate_alert_impl(
            task_id=task_id,
            hb_age=hb_age,
            ev_age=ev_age,
            retry_count=retry_count,
            max_retry=max_retry,
            has_progress_marker=has_progress_marker,
            consecutive_stalled_cycles=consecutive_stalled_cycles,
            is_critical_7=is_critical_7,
            explicit_chair_escalate=explicit_chair_escalate,
            events_dir=events_dir or _EVENTS_DIR,
            overrides={
                "dispatch_marker": _override_dispatch_marker,
                "closeout_marker": _override_closeout_marker,
                "escalate_marker": _override_escalate_marker,
                "spawn_ge_work_started": _override_spawn_ge_work_started,
                "bot_session_active": _override_bot_session_active,
                "worktree_mtime_lt_900": _override_worktree_mtime_lt_900,
                "artifact_mtime_lt_900": _override_artifact_mtime_lt_900,
            },
        )
    except Exception as exc:
        return {
            "verdict": WATCH,
            "chat_allowed": False,
            "alive_signals": [],
            "reason": f"evaluate_alert safe-fail: {exc}",
        }


# ─── Internal implementation ──────────────────────────────────────────────────

def _evaluate_alert_impl(
    task_id: str,
    hb_age: int,
    ev_age: int,
    retry_count: int,
    max_retry: int,
    has_progress_marker: bool,
    consecutive_stalled_cycles: int,
    is_critical_7: bool,
    explicit_chair_escalate: bool,
    events_dir: str,
    overrides: dict,
) -> dict:
    now = time.time()

    # ── Task-2405 fix#A: .escalate / .escalate.acked → immediate SUPPRESSED ──
    escalate_marker_exists = _check_escalate_marker(task_id, events_dir, overrides)
    if escalate_marker_exists:
        return _verdict(
            SUPPRESSED,
            ["escalate_marker"],
            "escalate or escalate.acked marker present — immediate suppress (task-2405 fix#A)",
        )

    # ── Alive OR-7 signal evaluation ─────────────────────────────────────────
    alive_signals = []

    # Signal 1: dispatch marker
    sig1 = _check_dispatch_marker(task_id, events_dir, overrides)
    if sig1:
        alive_signals.append("dispatch_marker")

    # Signal 2: closeout marker
    sig2 = _check_closeout_marker(task_id, events_dir, overrides)
    if sig2:
        alive_signals.append("closeout_marker")

    # Signal 3: escalate marker (already checked above — if we get here, it's False)
    # (escalate_marker_exists is False at this point; still track for signal list)

    # Signal 4: spawn state >= WORK_STARTED
    sig4 = _check_spawn_ge_work_started(task_id, overrides)
    if sig4:
        alive_signals.append("spawn_ge_work_started")

    # Signal 5: bot session process active
    sig5 = _check_bot_session_active(overrides)
    if sig5:
        alive_signals.append("bot_session_active")

    # Signal 6: worktree mtime < 900s
    sig6 = _check_worktree_fresh(task_id, overrides, now)
    if sig6:
        alive_signals.append("worktree_mtime_lt_900")

    # Signal 7: recent artifact mtime < 900s
    sig7 = _check_artifact_fresh(task_id, events_dir, overrides, now)
    if sig7:
        alive_signals.append("artifact_mtime_lt_900")

    # Progress marker from watchdog's own check
    if has_progress_marker:
        alive_signals.append("progress_marker")

    is_alive = len(alive_signals) > 0

    # ── hb_age == -1 AND ev_age == -1 alone must NOT escalate ────────────────
    only_negative_ages = (hb_age == -1 and ev_age == -1 and not is_alive)

    # ── Determine if silently_stalled ─────────────────────────────────────────
    # silently_stalled = alive signals all false + ev_age > 900 + hb_age unmeasurable
    # Special case: hb_age=-1 AND ev_age=-1 ALONE is NOT silently_stalled
    hb_stale = hb_age == -1 or hb_age >= _ALIVE_THRESHOLD_SECONDS
    ev_stale = ev_age == -1 or ev_age >= _ALIVE_THRESHOLD_SECONDS

    if only_negative_ages:
        # Cannot determine stalled from -1/-1 alone
        silently_stalled = False
    else:
        silently_stalled = not is_alive and hb_stale and ev_stale

    # ── ESCALATE_CHAIR: only if silently_stalled 2 cycles + critical criteria ─
    if silently_stalled and consecutive_stalled_cycles >= 2:
        if is_critical_7 or explicit_chair_escalate:
            return _verdict(
                ESCALATE_CHAIR,
                alive_signals,
                (
                    f"silently_stalled {consecutive_stalled_cycles} consecutive cycles + "
                    f"critical_7={is_critical_7} explicit_chair_escalate={explicit_chair_escalate} — "
                    "ESCALATE_CHAIR: chat sendMessage ALLOWED"
                ),
            )
        else:
            # 2+ cycles but no critical criteria → ALERT_WARN, no chat
            return _verdict(
                ALERT_WARN,
                alive_signals,
                f"silently_stalled {consecutive_stalled_cycles} cycles but not critical_7/explicit — ALERT_WARN, chat=0",
            )

    # ── Single stall cycle ───────────────────────────────────────────────────
    if silently_stalled and consecutive_stalled_cycles == 1:
        return _verdict(
            ALERT_INFO,
            alive_signals,
            "silently_stalled cycle 1 — ALERT_INFO log only, chat=0",
        )

    # ── Alive signals present → QUIET or WATCH ───────────────────────────────
    if is_alive:
        # If ev_age is slightly elevated but alive signals exist → WATCH
        ev_elevated = ev_age > 0 and ev_age > _ALIVE_THRESHOLD_SECONDS // 2
        if ev_elevated:
            return _verdict(WATCH, alive_signals, "alive signals present but ev_age elevated — WATCH")
        return _verdict(QUIET, alive_signals, "alive signals present — QUIET")

    # ── hb_age=-1/ev_age=-1 alone — treat as WATCH (cannot determine) ────────
    if hb_age == -1 and ev_age == -1:
        return _verdict(
            WATCH,
            alive_signals,
            "hb_age=-1 and ev_age=-1 alone — insufficient evidence, cannot assert stalled — WATCH",
        )

    # ── Partial stale without confirmed stall cycles ──────────────────────────
    if hb_stale or ev_stale:
        return _verdict(WATCH, alive_signals, "partial stale signals — WATCH, awaiting next cycle")

    return _verdict(QUIET, alive_signals, "no stall signals — QUIET")


def _verdict(state: str, alive_signals: list, reason: str) -> dict:
    return {
        "verdict": state,
        "chat_allowed": state == ESCALATE_CHAIR,
        "alive_signals": alive_signals,
        "reason": reason,
    }


# ─── Signal checkers ─────────────────────────────────────────────────────────

def _check_escalate_marker(task_id: str, events_dir: str, overrides: dict) -> bool:
    if overrides.get("escalate_marker") is not None:
        return overrides["escalate_marker"]
    try:
        pattern = os.path.join(events_dir, f"{task_id}.escalate*")
        return len(glob.glob(pattern)) > 0
    except Exception:
        return False


def _check_dispatch_marker(task_id: str, events_dir: str, overrides: dict) -> bool:
    if overrides.get("dispatch_marker") is not None:
        return overrides["dispatch_marker"]
    try:
        pattern = os.path.join(events_dir, f"{task_id}.dispatched*")
        return len(glob.glob(pattern)) > 0
    except Exception:
        return False


def _check_closeout_marker(task_id: str, events_dir: str, overrides: dict) -> bool:
    if overrides.get("closeout_marker") is not None:
        return overrides["closeout_marker"]
    try:
        patterns = [
            os.path.join(events_dir, f"{task_id}.harness-mvp-active*"),
            os.path.join(events_dir, f"{task_id}.*active.json"),
            os.path.join(events_dir, f"{task_id}.done*"),
            os.path.join(events_dir, f"{task_id}.completion.txt"),
        ]
        for pat in patterns:
            if glob.glob(pat):
                return True
        return False
    except Exception:
        return False


def _check_spawn_ge_work_started(task_id: str, overrides: dict) -> bool:
    if overrides.get("spawn_ge_work_started") is not None:
        return overrides["spawn_ge_work_started"]
    try:
        from scripts.harness.v36.spawn_detector import detect_spawn_state, state_ge, WORK_STARTED
        result = detect_spawn_state(task_id)
        return state_ge(result["state"], WORK_STARTED)
    except Exception:
        return False


def _check_bot_session_active(overrides: dict) -> bool:
    if overrides.get("bot_session_active") is not None:
        return overrides["bot_session_active"]
    try:
        from scripts.harness.v36.spawn_detector import _list_claude_processes, _get_anu_session_hex
        anu_hex = _get_anu_session_hex()
        for proc_hex in _list_claude_processes():
            if proc_hex and proc_hex != anu_hex:
                return True
        return False
    except Exception:
        return False


def _check_worktree_fresh(task_id: str, overrides: dict, now: float) -> bool:
    if overrides.get("worktree_mtime_lt_900") is not None:
        return overrides["worktree_mtime_lt_900"]
    try:
        from scripts.harness.v36.spawn_detector import _get_worktree_mtime_seconds
        mtime_s = _get_worktree_mtime_seconds(task_id, now)
        return mtime_s is not None and mtime_s < _ALIVE_THRESHOLD_SECONDS
    except Exception:
        return False


def _check_artifact_fresh(task_id: str, events_dir: str, overrides: dict, now: float) -> bool:
    if overrides.get("artifact_mtime_lt_900") is not None:
        return overrides["artifact_mtime_lt_900"]
    try:
        from scripts.harness.v36.spawn_detector import _get_artifact_mtime_seconds
        mtime_s = _get_artifact_mtime_seconds(task_id, events_dir, now)
        return mtime_s is not None and mtime_s < _ALIVE_THRESHOLD_SECONDS
    except Exception:
        return False


# ─── CLI wrapper ─────────────────────────────────────────────────────────────

def _cli_main() -> None:
    import argparse
    import json as _json

    parser = argparse.ArgumentParser(
        description="Evaluate watchdog suppression gate — prints JSON verdict"
    )
    parser.add_argument("--task-id", required=True, help="Task ID")
    parser.add_argument("--hb-age", type=int, required=True, help="Heartbeat age in seconds (-1 if unknown)")
    parser.add_argument("--ev-age", type=int, required=True, help="Events file age in seconds (-1 if unknown)")
    parser.add_argument("--retry-count", type=int, default=0)
    parser.add_argument("--max-retry", type=int, default=2)
    parser.add_argument("--has-progress-marker", action="store_true", default=False)
    parser.add_argument("--consecutive-stalled-cycles", type=int, default=0)
    parser.add_argument("--is-critical-7", action="store_true", default=False)
    parser.add_argument("--explicit-chair-escalate", action="store_true", default=False)
    parser.add_argument("--events-dir", default=None)

    args = parser.parse_args()
    result = evaluate_alert(
        task_id=args.task_id,
        hb_age=args.hb_age,
        ev_age=args.ev_age,
        retry_count=args.retry_count,
        max_retry=args.max_retry,
        has_progress_marker=args.has_progress_marker,
        consecutive_stalled_cycles=args.consecutive_stalled_cycles,
        is_critical_7=args.is_critical_7,
        explicit_chair_escalate=args.explicit_chair_escalate,
        events_dir=args.events_dir,
    )
    print(_json.dumps(result, ensure_ascii=False, indent=2))


if __name__ == "__main__":
    _cli_main()
