# -*- coding: utf-8 -*-
"""utils.source_attribution_guard_v2 — Stop hook 사후 거짓표현 차단 가드 (v2).

task-2644+1 ANU_CALLBACK_COLLECTOR_CONTROL_PLANE_CLEAN_REPLACEMENT
task md: memory/tasks/task-2644+1.md
spec (read-only): memory/specs/system_anu_callback_collector_control_plane_spec_260524.md

회장 verbatim 8 enum (보강-1 + §15):
    1. RECEIVED_INBOUND_THIS_SESSION
    2. LOG_LOOKUP_AFTER_CHAIR_QUESTION
    3. LOG_LOOKUP_PROACTIVE
    4. MEMORY_RECALL
    5. INFERENCE_ONLY
    6. CALLBACK_COLLECTOR_PROCESSED  (★ task-2644 주 경로)
    7. CALLBACK_LEDGER_RECONCILED  (post-hoc reconciliation)
    8. LOG_LOOKUP_OR_SCHEDULE_HISTORY_VERIFICATION  (★ §15 1C0F6F52 박제)

ANCHOR-9: "거짓말 패턴 (수신 vs 사후 조회 흐림) Stop hook 으로 사전 차단".
ANCHOR-11: "source attribution enum 8".

v2 변경: schema name + 본 task replacement 표시. logic 동일 (회장 박제 1:1).
"""
from __future__ import annotations

import re
from enum import Enum
from typing import Dict, List, Optional, Tuple


SCHEMA = "utils.source_attribution_guard.v2"
REPLACEMENT_OF = "utils.source_attribution_guard.v1"


class SourceAttributionV2(str, Enum):
    RECEIVED_INBOUND_THIS_SESSION = "RECEIVED_INBOUND_THIS_SESSION"
    LOG_LOOKUP_AFTER_CHAIR_QUESTION = "LOG_LOOKUP_AFTER_CHAIR_QUESTION"
    LOG_LOOKUP_PROACTIVE = "LOG_LOOKUP_PROACTIVE"
    MEMORY_RECALL = "MEMORY_RECALL"
    INFERENCE_ONLY = "INFERENCE_ONLY"
    CALLBACK_COLLECTOR_PROCESSED = "CALLBACK_COLLECTOR_PROCESSED"
    CALLBACK_LEDGER_RECONCILED = "CALLBACK_LEDGER_RECONCILED"
    LOG_LOOKUP_OR_SCHEDULE_HISTORY_VERIFICATION = (
        "LOG_LOOKUP_OR_SCHEDULE_HISTORY_VERIFICATION"
    )


ENUM_VALUES: Tuple[str, ...] = tuple(e.value for e in SourceAttributionV2)


_RECEIVED_PHRASE_PATTERNS = (
    "callback received",
    "callback 받았",
    "callback 도착",
    "callback 수신",
    "콜백 도착",
    "콜백 수신",
    "콜백 받았",
    "callback inbound",
    "inbound callback",
    "수신 완료",
    "받음 확인",
)


_INBOUND_SOURCES = frozenset({
    SourceAttributionV2.RECEIVED_INBOUND_THIS_SESSION.value,
    SourceAttributionV2.CALLBACK_COLLECTOR_PROCESSED.value,
})


def _normalised(text: str) -> str:
    return re.sub(r"\s+", " ", text or "").strip().lower()


def find_received_phrases(text: str) -> List[str]:
    norm = _normalised(text)
    return [p for p in _RECEIVED_PHRASE_PATTERNS if p in norm]


def is_inbound(source: Optional[str]) -> bool:
    return source in _INBOUND_SOURCES if source else False


def detect_received_phrase_misuse(
    text: str,
    source_attribution: Optional[str],
) -> Tuple[bool, List[str]]:
    """received/수신 표현이 있는데 source 가 inbound 가 아니면 violation."""
    matches = find_received_phrases(text)
    if not matches:
        return False, []
    if source_attribution is None or not is_inbound(source_attribution):
        return True, matches
    return False, matches


def detect_schedule_history_as_inbound(
    text: str,
    source_attribution: Optional[str],
) -> bool:
    """LOG_LOOKUP_OR_SCHEDULE_HISTORY_VERIFICATION + received-phrase = 1C0F6F52 패턴."""
    if source_attribution != SourceAttributionV2.LOG_LOOKUP_OR_SCHEDULE_HISTORY_VERIFICATION.value:
        return False
    return bool(find_received_phrases(text))


def classify_source(
    *,
    inbound_envelope_present: bool,
    collector_mode: bool,
    ledger_lookup_only: bool,
    schedule_history_lookup_only: bool,
    chair_question_triggered: bool,
) -> str:
    if collector_mode and inbound_envelope_present:
        return SourceAttributionV2.CALLBACK_COLLECTOR_PROCESSED.value
    if inbound_envelope_present:
        return SourceAttributionV2.RECEIVED_INBOUND_THIS_SESSION.value
    if ledger_lookup_only:
        return SourceAttributionV2.CALLBACK_LEDGER_RECONCILED.value
    if schedule_history_lookup_only:
        return SourceAttributionV2.LOG_LOOKUP_OR_SCHEDULE_HISTORY_VERIFICATION.value
    if chair_question_triggered:
        return SourceAttributionV2.LOG_LOOKUP_AFTER_CHAIR_QUESTION.value
    return SourceAttributionV2.INFERENCE_ONLY.value


def validate(text: str, source_attribution: Optional[str]) -> Dict[str, object]:
    """Stop hook v2 entry point."""
    misuse, matches = detect_received_phrase_misuse(text, source_attribution)
    if misuse:
        if source_attribution is None:
            reason = (
                "RECEIVED_PHRASE_WITHOUT_SOURCE_ATTRIBUTION "
                "(v2 Stop hook 조건 7: '도착/수신' 표현 + source 미명시)"
            )
        else:
            reason = (
                "RECEIVED_PHRASE_NON_INBOUND_SOURCE "
                f"(source={source_attribution} 은 inbound 아님)"
            )
        return {
            "schema": SCHEMA,
            "violation": True,
            "reason": reason,
            "matched_phrases": matches,
            "source_attribution": source_attribution,
        }
    if detect_schedule_history_as_inbound(text, source_attribution):
        return {
            "schema": SCHEMA,
            "violation": True,
            "reason": (
                "SCHEDULE_HISTORY_VERIFICATION_AS_INBOUND "
                "(v2 Stop hook 조건 8: 1C0F6F52 패턴 — schedule_history "
                "사후 조회를 수신처럼 표현)"
            ),
            "matched_phrases": find_received_phrases(text),
            "source_attribution": source_attribution,
        }
    return {
        "schema": SCHEMA,
        "violation": False,
        "reason": None,
        "matched_phrases": [],
        "source_attribution": source_attribution,
    }


__all__ = [
    "SCHEMA",
    "REPLACEMENT_OF",
    "SourceAttributionV2",
    "ENUM_VALUES",
    "find_received_phrases",
    "is_inbound",
    "detect_received_phrase_misuse",
    "detect_schedule_history_as_inbound",
    "classify_source",
    "validate",
]