"""hook_scorer.py — thread-hook-formula 자동 스코어링

5감정 훅 공식 체크리스트를 자동으로 평가한다.
- 분노(anger): "절대 안 됨", "왜 다들", "이러면 안 되는", "문제는", "심각한"
- 놀라움(surprise): "진짜", "실화", "충격", "몰랐", "알고 보니", 비라운드 숫자 패턴
- 공감(empathy): "나도", "겪어본", "공감", "맞아", "그렇지", "이거 아는 사람"
- 두려움(fear): "안 하면", "모르면 손해", "놓치면", "위험", "주의", "잃"
- 호기심(curiosity): "아무도 안 알려주는", "비밀", "알면 소름", "이거 알면", "왜 아무도"
"""

import json
import re
from dataclasses import dataclass, field
from pathlib import Path
from typing import Optional

PIPELINE_DIR = Path(__file__).parent

# 감정별 키워드 패턴
EMOTION_PATTERNS: dict[str, list[str]] = {
    "anger": [
        r"절대\s*안\s*됨", r"왜\s*다들", r"이러면\s*안", r"문제는",
        r"심각한", r"화가\s*나", r"분통", r"어이없", r"말이\s*안\s*됨",
        r"이게\s*말이\s*됨\?", r"도대체", r"한심",
    ],
    "surprise": [
        r"진짜\??", r"실화", r"충격", r"몰랐", r"알고\s*보니",
        r"이게\s*되", r"놀라", r"대박", r"헐",
    ],
    "empathy": [
        r"나도", r"겪어\s*본", r"공감", r"맞아", r"그렇지",
        r"이거\s*아는\s*사람", r"다들\s*그런", r"저도", r"우리\s*모두",
    ],
    "fear": [
        r"안\s*하면", r"모르면\s*손해", r"놓치면", r"위험",
        r"주의", r"잃", r"후회", r"늦으면", r"빨리\s*안\s*하면",
        r"큰일", r"손해",
    ],
    "curiosity": [
        r"아무도\s*안\s*알려", r"비밀", r"알면\s*소름", r"이거\s*알면",
        r"왜\s*아무도", r"숨겨진", r"몰래", r"진짜\s*이유",
    ],
}

# 비라운드 숫자 패턴 (47,382 같은 구체적 숫자)
NON_ROUND_NUMBER_PATTERN = re.compile(
    r"\d{1,3}(,\d{3})+(?!\s*[%원만억천]?\s*(명|건|개|원|만|억|천)?\s*$)"
    r"|\d+[,.]?\d*(?:명|건|개|원)"
)

# 라운드 숫자 패턴 (100, 1000, 5만 등)
ROUND_NUMBER_PATTERN = re.compile(
    r"\b\d+0{2,}\b|약\s*\d+[만억천]"
)


@dataclass
class HookScore:
    """스코어링 결과"""
    emotions_detected: dict[str, bool] = field(default_factory=dict)
    emotion_count: int = 0
    has_min_emotions: bool = False  # 최소 2개
    has_non_round_numbers: bool = False
    has_round_numbers: bool = False  # 경고용
    has_company_mention: bool = False  # 블랙리스트 회사 언급
    mentioned_companies: list[str] = field(default_factory=list)
    is_within_length: bool = False
    char_count: int = 0
    max_length: int = 500
    passed: bool = False
    fail_reasons: list[str] = field(default_factory=list)


def _load_blacklist() -> dict:
    """blacklist.json 로드"""
    bl_path = PIPELINE_DIR / "blacklist.json"
    if bl_path.exists():
        with open(bl_path, "r", encoding="utf-8") as f:
            return json.load(f)
    return {"company_names": [], "keywords": [], "patterns": []}


def detect_emotions(text: str) -> dict[str, bool]:
    """텍스트에서 5감정 감지"""
    result: dict[str, bool] = {}
    for emotion, patterns in EMOTION_PATTERNS.items():
        detected = any(re.search(p, text) for p in patterns)
        result[emotion] = detected
    return result


def check_non_round_numbers(text: str) -> bool:
    """비라운드 숫자 사용 여부 확인"""
    # 구체적 숫자 패턴: 47,382명 등
    specific = re.search(r"\d{2,3},\d{3}", text)
    if specific:
        return True
    # N자리 이상의 비라운드 숫자
    numbers = re.findall(r"\d+", text)
    for num_str in numbers:
        num = int(num_str)
        if num >= 100 and num % 10 != 0 and num % 100 != 0:
            return True
    return False


def check_round_numbers(text: str) -> bool:
    """라운드 숫자 사용 여부 (경고)"""
    return bool(ROUND_NUMBER_PATTERN.search(text))


def check_company_mentions(text: str, blacklist: Optional[dict] = None) -> tuple[bool, list[str]]:
    """특정 회사 언급 여부 체크"""
    if blacklist is None:
        blacklist = _load_blacklist()
    company_names = blacklist.get("company_names", [])
    mentioned = [c for c in company_names if c in text]
    return bool(mentioned), mentioned


def check_blacklist_keywords(text: str, blacklist: Optional[dict] = None) -> tuple[bool, list[str]]:
    """블랙리스트 키워드 저촉 여부"""
    if blacklist is None:
        blacklist = _load_blacklist()
    keywords = blacklist.get("keywords", [])
    found = [k for k in keywords if k in text]
    # 정규식 패턴 체크
    for pattern in blacklist.get("patterns", []):
        if re.search(pattern, text):
            found.append(f"pattern:{pattern}")
    return bool(found), found


def score(text: str, max_length: int = 500) -> HookScore:
    """텍스트에 대한 전체 스코어링 수행

    Args:
        text: 스코어링할 텍스트
        max_length: 최대 글자 수 (기본 500)

    Returns:
        HookScore 결과 객체
    """
    blacklist = _load_blacklist()
    result = HookScore(max_length=max_length)

    # 1. 감정 감지
    result.emotions_detected = detect_emotions(text)
    result.emotion_count = sum(1 for v in result.emotions_detected.values() if v)
    result.has_min_emotions = result.emotion_count >= 2

    # 2. 비라운드 숫자
    result.has_non_round_numbers = check_non_round_numbers(text)
    result.has_round_numbers = check_round_numbers(text)

    # 3. 회사 언급
    result.has_company_mention, result.mentioned_companies = check_company_mentions(text, blacklist)

    # 4. 길이
    result.char_count = len(text)
    result.is_within_length = result.char_count <= max_length

    # 5. 종합 판정
    result.fail_reasons = []
    if not result.has_min_emotions:
        result.fail_reasons.append(
            f"5감정 중 {result.emotion_count}개만 감지 (최소 2개 필요)"
        )
    if result.has_company_mention:
        result.fail_reasons.append(
            f"특정 회사 언급: {', '.join(result.mentioned_companies)}"
        )
    if not result.is_within_length:
        result.fail_reasons.append(
            f"글자 수 초과: {result.char_count}자 (최대 {max_length}자)"
        )

    result.passed = len(result.fail_reasons) == 0

    return result


if __name__ == "__main__":
    # 테스트 예시
    sample = (
        "7월부터 수수료 구조가 완전히 바뀌는데, 아직도 모르는 설계사가 47,382명이나 된다니. "
        "이거 겪어본 사람? 안 하면 진짜 큰일남."
    )
    result = score(sample)
    print(f"통과 여부: {result.passed}")
    print(f"감지된 감정: {result.emotions_detected}")
    print(f"감정 수: {result.emotion_count}")
    print(f"비라운드 숫자: {result.has_non_round_numbers}")
    print(f"회사 언급: {result.has_company_mention}")
    print(f"글자 수: {result.char_count}/{result.max_length}")
    if result.fail_reasons:
        print(f"실패 사유: {result.fail_reasons}")