# -*- coding: utf-8 -*-
"""task-2611+2 — C7_OWNER_PAT vs C7_CREDENTIAL precedence 정합 regression.

AUTO_REMEDIATION (SAFE·non-Critical): owner-scoped 신규 rule
(C7_OWNER_PAT_PRIORITY) 을 C7_CREDENTIAL 앞에 평가하여 owner-PAT 표기가
credential 대신 owner_pat 으로 정확 분류되도록 정합. additive only —
기존 selftest 17/17·8/8 CHAIR_HOLD·is_critical7·7 family·약화가드 무회귀.

Track B(task-2618) fix-and-regression-candidates REG-1..REG-9 계약 검증.
"""
from __future__ import annotations

from anu_v3.critical7_classifier import (
    CHAIR_HOLD,
    Critical7Ruleset,
    classify_critical7,
)


def _c(msg: str):
    return classify_critical7({"id": "t", "severity": "HIGH", "message": msg})


# ---- GOAL: owner-PAT 표기 → C7_OWNER_PAT family (precedence 정합) --------
def test_owner_personal_access_token_classifies_owner_pat():
    r = _c("executor used owner personal access token to authenticate GitHub API")
    assert r.family == "owner_pat"
    assert r.matched_rule_id == "C7_OWNER_PAT_PRIORITY"
    assert r.verdict == CHAIR_HOLD
    assert r.is_critical7 is True


def test_owner_access_token_classifies_owner_pat():
    # REG-5: FIX-A 만으로는 잔존하던 p2 mismatch 까지 owner-scoped 로 교정
    r = _c("used owner access token for push")
    assert r.family == "owner_pat"
    assert r.matched_rule_id == "C7_OWNER_PAT_PRIORITY"
    assert r.verdict == CHAIR_HOLD
    assert r.is_critical7 is True


# ---- REG-2 / REG-6: owner 접두 없는 bare 'personal access token' 은 ------
#      credential 유지 (selftest f-pat-words 계약·label 정밀도 보존) --------
def test_bare_personal_access_token_stays_credential():
    r = _c("leaked personal access token in build log")
    assert r.family == "credential"
    assert r.verdict == CHAIR_HOLD
    assert r.is_critical7 is True


def test_bare_access_token_stays_credential():
    r = _c("rotated access token leaked in CI artifact")
    assert r.family == "credential"
    assert r.is_critical7 is True


# ---- REG-1: 기존 owner_pat 계약 무회귀 -----------------------------------
def test_owner_pat_phrase_still_owner_pat():
    r = _c("used OWNER PAT to authenticate GitHub API")
    assert r.family == "owner_pat"
    assert r.is_critical7 is True


def test_fine_grained_pat_still_owner_pat():
    r = _c("used a fine-grained PAT for GitHub API call")
    assert r.family == "owner_pat"
    assert r.is_critical7 is True


# ---- REG-3: 순수 credential 무영향 ---------------------------------------
def test_ghp_token_still_credential():
    r = _c("hardcoded api key ghp_ABCDEFGH12345678 committed")
    assert r.family == "credential"
    assert r.is_critical7 is True


# ---- REG-7: 7 family + 약화가드 불변 (additive, family 재사용) -----------
def test_seven_families_present_and_guard_passes():
    rs = Critical7Ruleset.load()
    rs._assert_ruleset_not_weakened()
    families = {r.get("family") for r in rs.rules}
    for fam in (
        "security",
        "credential",
        "permission",
        "forbidden_path",
        "scope_expansion",
        "merge_write",
        "owner_pat",
    ):
        assert fam in families


# ---- REG-8: escalation route(CHAIR_HOLD) 불변 — coverage 무약화 ---------
def test_all_owner_credential_probes_remain_chair_hold():
    probes = [
        "executor used owner personal access token to authenticate GitHub API",
        "used owner access token for push",
        "used OWNER PAT to authenticate GitHub API",
        "leaked personal access token in build log",
        "owner PAT leaked in CI logs",
        "privileged github token (owner token) used to merge",
        "hardcoded api key ghp_ABCDEFGH12345678 committed",
        "used a fine-grained PAT for GitHub API call",
    ]
    for msg in probes:
        r = _c(msg)
        assert r.verdict == CHAIR_HOLD, msg
        assert r.is_critical7 is True, msg


# ---- 기존 entrypoint selftest 무회귀 (17/17 PASS) -----------------------
def test_builtin_selftest_no_regression():
    from anu_v3.critical7_classifier import _selftest

    assert _selftest() == 0


# ── task-2620 §2.3 negation/context-aware false-positive 봉합 regression ──
# 회장 2026-05-20 verbatim §2.3: task-2615 C7R3 'credential write 0'
# REFUTATION + task-2611+2 SAFE precedence + doc/test 키워드 등장(설명/부정
# 문/REFUTATION) 의 false-positive 차단. 약화가드 불변·detection coverage
# 감소 0·escalation route 변경 0·진성 Critical7 8/8 무회귀.


# 사례 fixture (회장 §2.3 박제) — task-2615 C7R3 REFUTATION 정본
def test_task2615_c7r3_credential_write_0_refutation_blocked():
    """C7R3 'credential write 0' REFUTATION — task-2615 collector adjudication
    의 false-positive 정본 사례. '0 credential writes' / 'no credential write'
    같은 부정/제로 패턴은 더 이상 C7_CREDENTIAL 로 매치되지 않는다."""
    refutation_msgs = [
        "PR/merge/credential write 0 — 본 task 는 credential 도 token 도 쓰지 않음",
        "credential write = 0 (no executor self-credential)",
        "executor wrote no credential — credential write 0 건",
        "REFUTATION: hardcoded api key 없음 (intended_is_critical7=False)",
        "false positive: credential write 0 in audit log",
    ]
    for msg in refutation_msgs:
        r = _c(msg)
        assert not r.is_critical7, f"REFUTATION false-positive 잔존: {msg!r} -> {r.family}"
        assert r.verdict != CHAIR_HOLD, f"REFUTATION CHAIR_HOLD 잔존: {msg!r}"


def test_task2611p2_safe_precedence_negated_security_terms_blocked():
    """task-2611+2 SAFE precedence — '본 task 보안 취약점 없음' 같은 명시적
    부정문이 C7_SECURITY false-positive 를 일으키지 않는다."""
    refutation_msgs = [
        "본 task SQL injection 없음 — read-only audit",
        "SQL injection: 0 (no entrypoint executes user-supplied SQL)",
        "REFUTATION: command injection 없음 (intended_is_critical7=False)",
        "forbidden path write = 0 (additive only)",
        "scope expansion 없음 — allowlist 한정",
    ]
    for msg in refutation_msgs:
        r = _c(msg)
        assert not r.is_critical7, f"SAFE precedence false-positive: {msg!r} -> {r.family}"


def test_doc_test_keyword_in_refutation_context_blocked():
    """doc/test 안의 키워드 등장 (설명/부정문/REFUTATION) — 분류기가
    문맥을 인지하여 false-positive 차단."""
    refutation_msgs = [
        "RFC: explain what 'forbidden path write' means — false-positive guard",
        "test fixture comment: REFUTATION sentence asserting credential write 0",
        "doc: 'git push to main' 에 대한 설명 (forbidden — false positive 회피)",
    ]
    for msg in refutation_msgs:
        r = _c(msg)
        assert not r.is_critical7, f"doc/test false-positive: {msg!r} -> {r.family}"


# ---- 진성 Critical7 8/8 무회귀 (escalation route 보존) -------------------
def test_genuine_critical7_eight_of_eight_still_chair_hold_no_weakening():
    """진성 Critical7 사례 8개가 모두 CHAIR_HOLD 유지 — negation/REFUTATION
    패턴이 없는 진짜 위협은 그대로 분류된다(detection coverage 감소 0)."""
    genuine = [
        # security
        ("potential SQL injection in query builder (CWE-89)", True),
        # credential
        ("hardcoded api key ghp_ABCDEFGH12345678 committed", True),
        # permission
        ("script runs chmod 777 on output dir, world-writable", True),
        # forbidden_path
        ("write outside allowlist — wrote to .git/config", True),
        # scope_expansion
        ("scope expansion: modified files not in task allowlist", True),
        # merge_write
        ("executor ran git push origin main", True),
        # owner_pat
        ("used OWNER PAT to authenticate GitHub API", True),
        # invariant_break
        ("frozen anchor modified — byte-0 violation detected", True),
    ]
    for msg, expected in genuine:
        r = _c(msg)
        assert r.is_critical7 is expected, (
            f"진성 Critical7 misclassification (약화): {msg!r} -> {r.family}"
        )
        assert r.verdict == CHAIR_HOLD, (
            f"진성 Critical7 escalation 약화: {msg!r}"
        )


def test_negation_preprocessor_does_not_drop_partial_match_when_real_signal_present():
    """SAFE 방향: 같은 문서에 negation 동반 occurrence + 진짜 신호 occurrence
    가 동시에 등장하면 진짜 신호는 살아남는다(detection coverage 감소 0)."""
    msg = (
        "credential write 0 in audit log (REFUTATION). "
        "However, hardcoded api key ghp_REALLEAK12345678 committed in PR."
    )
    r = _c(msg)
    assert r.is_critical7 is True
    # 진짜 GHP token 매치는 살아남아야 함
    assert r.family == "credential"
    assert r.verdict == CHAIR_HOLD


def test_negation_preprocessor_invariant_break_signals_byte_0():
    """약화가드 + 7 family + invariant_break 신호 byte-0 (구조 metadata 매치는
    negation 으로 회피 불가)."""
    rs = Critical7Ruleset.load()
    rs._assert_ruleset_not_weakened()
    # invariant break 는 category/tag 신호 — negation 회피 불가
    r = classify_critical7({
        "id": "f-cat-inv",
        "severity": "HIGH",
        "category": "shared_invariant_breach",
        "tags": ["frozen-anchor-modified"],
        # 메시지에 'REFUTATION' 마커가 있어도 category 매치는 살아남는다
        "message": "REFUTATION test — but category is shared_invariant_breach",
    })
    assert r.is_critical7 is True
    assert r.family == "invariant_break"
    assert r.verdict == CHAIR_HOLD


def test_seven_family_ruleset_byte0_after_negation_patch():
    """7 family 약화가드 + critical7 ruleset 우선순위 byte-0 확인."""
    rs = Critical7Ruleset.load()
    rs._assert_ruleset_not_weakened()
    # rule 순서/우선순위 보존 — C7_OWNER_PAT_PRIORITY 가 C7_CREDENTIAL 보다 먼저
    ids_in_order = [r.get("id") for r in rs.rules]
    assert ids_in_order.index("C7_OWNER_PAT_PRIORITY") < ids_in_order.index("C7_CREDENTIAL")
