"""task-2736 — harness-first PreToolUse gate repair regression tests.

Covers the chair item #9 regression matrix (instruction §6):
  1. guard.py / logger.py import PASS
  2. pre_tool_use_v36_harness.py standalone run PASS (mock event)
  3. mock high-risk payload is NOT unconditionally ALLOW (BLOCK or degraded)
  4. benign / mock-safe payload PASS (normal ALLOW)
  5. crash log: no repeated stage=import_guard
  6. existing finish-task / dispatch / callback modules unaffected (coexistence)

Plus the §3 fail-behavior design and §2 6-rule core / §1.4 mapping.

Isolation: every test runs with ANU_V36_HARNESS_TEST_MODE=1 so JSONL writes go
to /tmp. No real tool is executed — only mock payloads / dry-run.
"""
from __future__ import annotations

import json
import os
import subprocess
import sys
from pathlib import Path

import pytest

# Repo root = .worktrees/task-2736-dev2 (three parents up from this file).
_REPO_ROOT = Path(__file__).resolve().parents[2]
_HOOK_PATH = Path(os.path.expanduser("~/.claude/hooks/pre_tool_use_v36_harness.py"))


@pytest.fixture(autouse=True)
def _test_mode_env(monkeypatch):
    monkeypatch.setenv("ANU_V36_HARNESS_TEST_MODE", "1")
    # Make sure the degraded ALLOW marker path doesn't depend on prod logging.
    monkeypatch.delenv("V36_HARNESS_LOG_ALLOW", raising=False)
    yield


# ---------------------------------------------------------------------------
# §6.1 — import PASS
# ---------------------------------------------------------------------------
def test_guard_logger_import_pass():
    from scripts.harness.v36.guard import evaluate, is_high_risk  # noqa: F401
    from scripts.harness.v36.logger import log_decision, log_error  # noqa: F401
    from scripts.harness.v36.rules import ALL_RULES
    from scripts.harness.v36.schema import validate_record  # noqa: F401

    assert callable(evaluate)
    assert callable(log_decision)
    assert callable(log_error)
    assert len(ALL_RULES) == 6


# ---------------------------------------------------------------------------
# §2 — interface contract matches what the hook calls
# ---------------------------------------------------------------------------
def test_evaluate_interface_contract():
    from scripts.harness.v36.guard import evaluate

    result = evaluate("Bash", {"command": "ls"}, {"task_id": "t", "session_id": "s"})
    assert set(("decision", "matched_rule", "reason")).issubset(result.keys())
    assert result["decision"] in ("ALLOW", "DENY", "HOLD_FOR_CHAIR")


# ---------------------------------------------------------------------------
# §6.4 — benign payload → clean ALLOW
# ---------------------------------------------------------------------------
@pytest.mark.parametrize(
    "cmd",
    ["ls -la", "git status", "cat README.md", "python3 -m pytest -q", "echo hello"],
)
def test_benign_payload_allows(cmd):
    from scripts.harness.v36.guard import evaluate

    result = evaluate("Bash", {"command": cmd}, {})
    assert result["decision"] == "ALLOW"
    assert result.get("matched_rule") is None  # clean, not degraded


# ---------------------------------------------------------------------------
# §2 / §1.4 — 6-rule normal-path enforcement still works
# ---------------------------------------------------------------------------
def test_rule5_forbidden_git_push_denies():
    from scripts.harness.v36.guard import evaluate

    r = evaluate("Bash", {"command": "git push origin main"}, {})
    assert r["decision"] == "DENY"
    assert r["matched_rule"] == "pattern.forbidden_tool_or_shell"


def test_rule5_branch_delete_main_denies():
    from scripts.harness.v36.guard import evaluate

    r = evaluate("Bash", {"command": "git branch -D main"}, {})
    assert r["decision"] == "DENY"


def test_rule2_direct_ci_watch_denies():
    from scripts.harness.v36.guard import evaluate

    r = evaluate("Bash", {"command": "gh run watch 123"}, {})
    assert r["decision"] == "DENY"
    assert r["matched_rule"] == "pattern.anu_direct_ci_gemini_wait"


def test_rule6_doctrine_only_holds_for_chair():
    from scripts.harness.v36.guard import evaluate

    r = evaluate(
        "Bash",
        {"command": "bash finish-task.sh"},
        {"new_code_files": 0, "new_md_files": 3},
    )
    assert r["decision"] == "HOLD_FOR_CHAIR"
    assert r["matched_rule"] == "pattern.doctrine_only_no_code"


# ---------------------------------------------------------------------------
# §3 / §6.3 — fail behavior: high-risk on the DEGRADED path is NOT a silent
# ALLOW.  We force the normal evaluation path to throw, then assert the
# degraded decision fails closed for high-risk and stays ALLOW (marked) for
# benign.
# ---------------------------------------------------------------------------
def _force_core_failure(monkeypatch):
    import scripts.harness.v36.guard as g

    def _boom(*_a, **_k):
        raise RuntimeError("forced core failure")

    monkeypatch.setattr(g, "_evaluate_core", _boom)


@pytest.mark.parametrize(
    "cmd",
    [
        "rm -rf /home/jay/workspace",
        "rm -rf /",
        "git push --force origin main",
        "git push -f origin feature",
        "git reset --hard HEAD~3",
        "git branch -D main",
        "/usr/local/bin/cokacdir --cron-remove abc123 --chat 1 --key k",
        "gh pr merge 42 --merge",
        "systemctl enable myservice",
    ],
)
def test_degraded_high_risk_not_allowed(monkeypatch, cmd):
    from scripts.harness.v36.guard import evaluate

    _force_core_failure(monkeypatch)
    r = evaluate("Bash", {"command": cmd}, {"task_id": "task-2736"})
    assert r["decision"] == "DENY", f"high-risk must fail closed on degraded path: {cmd!r}"
    assert r.get("degraded") is True
    assert r["matched_rule"].startswith("degraded.")


def test_degraded_benign_still_allows(monkeypatch):
    from scripts.harness.v36.guard import evaluate

    _force_core_failure(monkeypatch)
    r = evaluate("Bash", {"command": "ls -la"}, {"task_id": "task-2736"})
    assert r["decision"] == "ALLOW"
    assert r.get("degraded") is True
    assert r["matched_rule"] == "degraded.allow_benign"


def test_degraded_writes_structured_error_log(monkeypatch, tmp_path):
    """§3.1 — a degraded evaluation must leave a structured error record."""
    import scripts.harness.v36.logger as logger
    from scripts.harness.v36.guard import evaluate

    err_path = tmp_path / "err.jsonl"
    monkeypatch.setattr(logger, "_TEST_ERROR_JSONL", str(err_path))
    _force_core_failure(monkeypatch)

    evaluate("Bash", {"command": "rm -rf /home/jay/workspace"}, {"task_id": "task-2736"})

    assert err_path.exists(), "structured error log must be written on degraded path"
    rec = json.loads(err_path.read_text().strip().splitlines()[-1])
    assert rec["stage"] == "evaluate_core"
    assert rec["degraded"] is True
    assert rec["exc_type"] == "RuntimeError"


def test_is_high_risk_production_write_path():
    from scripts.harness.v36.guard import is_high_risk

    assert is_high_risk("Write", {"file_path": "/etc/systemd/system/anu.service"}) is True
    assert is_high_risk("Write", {"file_path": "scripts/notes.md"}) is False


# ---------------------------------------------------------------------------
# §6.2 / §6.5 — hook standalone run on mock events; no import_guard crash.
# Runs the real hook with ANU_WORKSPACE_ROOT pointed at this worktree so it
# imports the restored guard.  No real tool is ever executed.
# ---------------------------------------------------------------------------
def _run_hook(event: dict, extra_env: dict | None = None):
    if not _HOOK_PATH.exists():
        pytest.skip(f"hook not present at {_HOOK_PATH}")
    env = dict(os.environ)
    env["ANU_WORKSPACE_ROOT"] = str(_REPO_ROOT)
    env["ANU_V36_HARNESS_TEST_MODE"] = "1"
    env["PYTHONPATH"] = str(_REPO_ROOT)
    if extra_env:
        env.update(extra_env)
    proc = subprocess.run(
        [sys.executable, str(_HOOK_PATH)],
        input=json.dumps(event),
        capture_output=True,
        text=True,
        env=env,
        timeout=30,
    )
    return proc


def _crash_log_path() -> Path:
    return _REPO_ROOT / "memory/system/.v36_harness_crash.jsonl"


def test_hook_standalone_benign_allows():
    crash = _crash_log_path()
    before = crash.read_text() if crash.exists() else ""
    proc = _run_hook({"tool_name": "Bash", "tool_input": {"command": "ls -la"}})
    assert proc.returncode == 0, f"benign should ALLOW (exit 0); stderr={proc.stderr}"
    # §6.5 — no NEW import_guard crash appended by this run.
    after = crash.read_text() if crash.exists() else ""
    new_lines = after[len(before):]
    assert "import_guard" not in new_lines, "guard import must succeed (no import_guard crash)"


def test_hook_standalone_forbidden_blocks():
    proc = _run_hook({"tool_name": "Bash", "tool_input": {"command": "git push origin main"}})
    # DENY → hook exits 2 and emits a block JSON on stdout.
    assert proc.returncode == 2, f"forbidden should BLOCK (exit 2); stdout={proc.stdout}"
    payload = json.loads(proc.stdout)
    assert payload["decision"] == "block"
    assert "V36_HARNESS" in payload["reason"]


def test_hook_dry_run_converts_deny_to_allow():
    proc = _run_hook(
        {"tool_name": "Bash", "tool_input": {"command": "git push origin main"}},
        extra_env={"ANU_V36_HARNESS_DRY_RUN": "1"},
    )
    assert proc.returncode == 0, "dry-run converts DENY → ALLOW (would-have-denied)"


# ---------------------------------------------------------------------------
# §6.6 — coexistence: restoring these modules must not break the other
# scripts.harness.v36.* modules already on canonical main.
# ---------------------------------------------------------------------------
def test_existing_harness_modules_still_importable():
    import importlib

    for mod in (
        "scripts.harness.v36.terminal_state_callback",
        "scripts.harness.v36.terminal_state_classifier",
    ):
        importlib.import_module(mod)


# ---------------------------------------------------------------------------
# schema contract sanity (DENY records require matched_rule + reason)
# ---------------------------------------------------------------------------
def test_schema_validates_deny_record():
    from scripts.harness.v36.schema import validate_record

    ok, errors = validate_record(
        {
            "ts": "2026-06-12T00:00:00+00:00",
            "timestamp": 1.0,
            "decision": "DENY",
            "matched_rule": "pattern.forbidden_tool_or_shell",
            "reason": "x",
            "command_or_tool": "git push",
            "task_id": "task-2736",
        }
    )
    assert ok, errors
