# -*- coding: utf-8 -*-
"""tests.test_prompt_byte_classifier_2645 — task-2645 regression (회장 verbatim).

회장 verbatim 4 구간 + 4096 absolute + path-only compact mode. wc -c bytes 측정.
"""
from __future__ import annotations

import hashlib
import json
import subprocess
from pathlib import Path

import pytest

from utils.prompt_byte_classifier import (
    CHANNEL_ABSOLUTE_OVERFLOW,
    COKACDIR_CHANNEL_ABSOLUTE_LIMIT,
    HARD_BLOCK,
    OK_ABOVE_TARGET,
    OK_TARGET,
    PROMPT_HARD_BLOCK_THRESHOLD,
    PROMPT_OK_ABOVE_TARGET_MAX,
    PROMPT_OK_TARGET_MAX,
    PROMPT_WARNING_MAX,
    WARNING_BUT_ALLOWED,
    build_path_only_compact_prompt,
    classify_byte_count,
    classify_prompt_bytes,
    is_allowed,
    measure_utf8_bytes,
    requires_path_only_compact,
)


FIXTURE_DIR = Path(__file__).parent / "fixtures" / "dispatch_gate"


# ── 박제 상수 검증 (회장 verbatim 4 구간 paraphrase 금지) ──────────────────
def test_policy_constants_match_chair_verbatim() -> None:
    assert PROMPT_OK_TARGET_MAX == 3200
    assert PROMPT_OK_ABOVE_TARGET_MAX == 3499
    assert PROMPT_WARNING_MAX == 3900
    assert PROMPT_HARD_BLOCK_THRESHOLD == 3900
    assert COKACDIR_CHANNEL_ABSOLUTE_LIMIT == 4096


def test_verdict_labels_are_chair_verbatim() -> None:
    assert OK_TARGET == "OK_TARGET"
    assert OK_ABOVE_TARGET == "OK_ABOVE_TARGET"
    assert WARNING_BUT_ALLOWED == "WARNING_BUT_ALLOWED"
    assert HARD_BLOCK == "HARD_BLOCK"
    assert CHANNEL_ABSOLUTE_OVERFLOW == "CHANNEL_ABSOLUTE_OVERFLOW"


# ── 측정 단위: wc -c bytes vs wc -m chars ────────────────────────────────
def test_measure_utf8_bytes_matches_wc_c(tmp_path: Path) -> None:
    """python len(encode) == ``wc -c < file`` (raw UTF-8 bytes 동등성)."""
    sample = "한글 prompt 측정 — UTF-8 multibyte 검증 / 2645"
    py_bytes = measure_utf8_bytes(sample)
    raw = tmp_path / "p.bin"
    raw.write_bytes(sample.encode("utf-8"))
    proc = subprocess.run(
        ["bash", "-c", f"wc -c < {raw}"],
        capture_output=True, text=True, check=True,
    )
    wc_bytes = int(proc.stdout.strip())
    assert py_bytes == wc_bytes, f"py={py_bytes} wc={wc_bytes}"
    # chars 와 다름을 명시적으로 확인 (한글은 char 1 = bytes 3)
    assert py_bytes != len(sample)


def test_measure_utf8_bytes_handles_none_and_invalid() -> None:
    assert measure_utf8_bytes(None) == 0
    with pytest.raises(TypeError):
        measure_utf8_bytes(b"bytes-not-str")


# ── 4 구간 경계 verbatim 검증 ────────────────────────────────────────────
@pytest.mark.parametrize(
    "n,verdict,allowed,warning,hard_block,channel,compact",
    [
        (0,    OK_TARGET,                  True,  False, False, False, False),
        (3200, OK_TARGET,                  True,  False, False, False, False),
        (3201, OK_ABOVE_TARGET,            True,  False, False, False, False),
        (3499, OK_ABOVE_TARGET,            True,  False, False, False, False),
        (3500, WARNING_BUT_ALLOWED,        True,  True,  False, False, False),
        (3900, WARNING_BUT_ALLOWED,        True,  True,  False, False, False),
        (3901, HARD_BLOCK,                 False, False, True,  False, True),
        (4000, HARD_BLOCK,                 False, False, True,  False, True),
        (4096, HARD_BLOCK,                 False, False, True,  False, True),
        (4097, CHANNEL_ABSOLUTE_OVERFLOW,  False, False, True,  True,  True),
        (6500, CHANNEL_ABSOLUTE_OVERFLOW,  False, False, True,  True,  True),
    ],
)
def test_classify_byte_count_boundaries(
    n: int, verdict: str, allowed: bool, warning: bool,
    hard_block: bool, channel: bool, compact: bool,
) -> None:
    c = classify_byte_count(n)
    assert c.verdict == verdict
    assert c.allowed is allowed
    assert c.warning is warning
    assert c.hard_block is hard_block
    assert c.channel_overflow is channel
    assert c.requires_compact_mode is compact
    assert c.utf8_bytes == n


def test_classify_byte_count_rejects_negative() -> None:
    with pytest.raises(ValueError):
        classify_byte_count(-1)


# ── §17.14 3270 bytes prompt MUST be allowed regression ─────────────────
def test_3270_bytes_prompt_must_be_allowed_regression() -> None:
    fix = json.loads((FIXTURE_DIR / "prompt_3270_allow.json").read_text())
    n = fix["input"]["prompt_byte_length"]
    prompt = "x" * n  # ASCII -> bytes = chars
    assert measure_utf8_bytes(prompt) == 3270
    c = classify_prompt_bytes(prompt)
    assert c.verdict == fix["expected"]["verdict"] == OK_ABOVE_TARGET
    assert c.allowed is True
    assert c.hard_block is False
    assert c.requires_compact_mode is False
    assert is_allowed(prompt) is True


# ── §17.15 3901 bytes prompt MUST be blocked regression ─────────────────
def test_3901_bytes_prompt_must_be_blocked_regression() -> None:
    fix = json.loads((FIXTURE_DIR / "prompt_3901_block.json").read_text())
    n = fix["input"]["prompt_byte_length"]
    prompt = "x" * n
    assert measure_utf8_bytes(prompt) == 3901
    c = classify_prompt_bytes(prompt)
    assert c.verdict == fix["expected"]["verdict"] == HARD_BLOCK
    assert c.allowed is False
    assert c.hard_block is True
    assert c.requires_compact_mode is True
    assert is_allowed(prompt) is False
    assert requires_path_only_compact(prompt) is True


# ── §17.16 4096+ silent drop fixture (673AA5A6 박제) ────────────────────
def test_4096_plus_silent_drop_fixture_chair_overflow() -> None:
    fix = json.loads((FIXTURE_DIR / "prompt_4096_plus_silent_drop.json").read_text())
    n = fix["input"]["prompt_byte_length"]
    prompt = "x" * n
    c = classify_prompt_bytes(prompt)
    assert c.verdict == fix["expected"]["verdict"] == CHANNEL_ABSOLUTE_OVERFLOW
    assert c.channel_overflow is True
    assert c.hard_block is True
    # alternative 6500 추정 wrap 도 동일 차단
    n2 = fix["input"]["alternative_byte_length"]
    c2 = classify_prompt_bytes("y" * n2)
    assert c2.verdict == fix["expected"]["alternative_verdict_for_6500"]
    assert c2.channel_overflow is True


# ── §17.7 path-only compact mode ─────────────────────────────────────────
def test_path_only_compact_mode_uses_task_md_path_and_sha(tmp_path: Path) -> None:
    md = tmp_path / "task-2645.md"
    payload = b"# task-2645 verbatim spec"
    md.write_bytes(payload)
    compact = build_path_only_compact_prompt(md, extra_context="dispatch verify")
    sha = hashlib.sha256(payload).hexdigest()
    assert str(md) in compact
    assert sha in compact
    assert "[DISPATCH_PATH_ONLY_COMPACT]" in compact
    # compact prompt 자체는 ≤3900 bytes 이어야 envelope 친화.
    assert measure_utf8_bytes(compact) <= PROMPT_WARNING_MAX


def test_path_only_compact_handles_missing_file(tmp_path: Path) -> None:
    md = tmp_path / "nonexistent.md"
    compact = build_path_only_compact_prompt(md)
    assert "MISSING" in compact


# ── JSON serialization 1:1 schema ────────────────────────────────────────
def test_classification_json_includes_policy_and_verdict() -> None:
    c = classify_byte_count(3270)
    j = c.to_json()
    assert j["schema"] == "dispatch.prompt_byte_classification.v1"
    assert j["verdict"] == OK_ABOVE_TARGET
    assert j["policy"]["ok_target_max"] == 3200
    assert j["policy"]["warning_max"] == 3900
    assert j["policy"]["channel_absolute_limit"] == 4096
