"""
test_transcript_learner.py

scripts/transcript-learner.py 단위 테스트 (TDD)

테스트 항목:
1. 지적 패턴(criticism) 감지
2. 칭찬 패턴(praise) 감지
3. 선호 패턴(preference) 감지
4. 중복 방지 (같은 텍스트 2번 → count 증가)
5. 빈 입력 graceful 처리
6. learnings.json 없는 경우 (신규 생성)
7. learnings.json 있는 경우 (누적)
8. stdin 읽기 테스트 (monkeypatch)
9. 여러 키워드가 한 줄에 있는 경우
10. stdout 출력 형식 검증
"""

import importlib.util
import json
import sys
from pathlib import Path

import pytest

# scripts 디렉토리를 import path에 추가
_SCRIPTS_DIR = Path(__file__).parent.parent
sys.path.insert(0, str(_SCRIPTS_DIR))

# transcript-learner.py는 하이픈이 있으므로 importlib으로 임포트
_MODULE_PATH = _SCRIPTS_DIR / "transcript-learner.py"
spec = importlib.util.spec_from_file_location("transcript_learner", _MODULE_PATH)
assert spec is not None
transcript_learner = importlib.util.module_from_spec(spec)
assert spec.loader is not None
spec.loader.exec_module(transcript_learner)


# ---------------------------------------------------------------------------
# Helper
# ---------------------------------------------------------------------------


def make_learnings(tmp_path: Path, data: dict) -> Path:
    whisper_dir = tmp_path / "whisper"
    whisper_dir.mkdir(parents=True, exist_ok=True)
    f = whisper_dir / "learnings.json"
    f.write_text(json.dumps(data, ensure_ascii=False), encoding="utf-8")
    return tmp_path


def get_learnings_path(tmp_path: Path) -> Path:
    return tmp_path / "whisper" / "learnings.json"


# ---------------------------------------------------------------------------
# 1. 지적 패턴 감지 테스트
# ---------------------------------------------------------------------------


class TestCriticismDetection:
    """지적 패턴 키워드 감지"""

    def test_detect_igeo_wae(self):
        line = "이거 왜 이렇게 느려?"
        result = transcript_learner.extract_patterns_from_line(line)
        assert any(p["type"] == "criticism" for p in result)
        assert any(p["keyword"] == "이거 왜" for p in result)

    def test_detect_haetjana(self):
        line = "분명히 했잖아 그거를"
        result = transcript_learner.extract_patterns_from_line(line)
        assert any(p["type"] == "criticism" for p in result)

    def test_detect_ani_geuge(self):
        line = "아니 그게 맞냐고"
        result = transcript_learner.extract_patterns_from_line(line)
        assert any(p["type"] == "criticism" for p in result)

    def test_detect_dasi(self):
        line = "다시 해줘 이거"
        result = transcript_learner.extract_patterns_from_line(line)
        assert any(p["type"] == "criticism" for p in result)

    def test_detect_akka_malhaetjana(self):
        line = "아까 말했잖아 그 부분을"
        result = transcript_learner.extract_patterns_from_line(line)
        assert any(p["type"] == "criticism" for p in result)

    def test_detect_imi_haetjana(self):
        line = "이미 했잖아 이거"
        result = transcript_learner.extract_patterns_from_line(line)
        assert any(p["type"] == "criticism" for p in result)

    def test_detect_tto(self):
        line = "또? 왜 또 그래"
        result = transcript_learner.extract_patterns_from_line(line)
        assert any(p["type"] == "criticism" for p in result)

    def test_detect_anirago(self):
        line = "아니라고 했잖아"
        result = transcript_learner.extract_patterns_from_line(line)
        assert any(p["type"] == "criticism" for p in result)


# ---------------------------------------------------------------------------
# 2. 칭찬 패턴 감지 테스트
# ---------------------------------------------------------------------------


class TestPraiseDetection:
    """칭찬 패턴 키워드 감지"""

    def test_detect_joha(self):
        line = "오 좋아 이렇게 해줘"
        result = transcript_learner.extract_patterns_from_line(line)
        assert any(p["type"] == "praise" for p in result)

    def test_detect_o_gwaenchanheunde(self):
        line = "오 괜찮은데 이거"
        result = transcript_learner.extract_patterns_from_line(line)
        assert any(p["type"] == "praise" for p in result)

    def test_detect_igeo_majeo(self):
        line = "이거 맞어 이렇게 하면 돼"
        result = transcript_learner.extract_patterns_from_line(line)
        assert any(p["type"] == "praise" for p in result)

    def test_detect_jalhaesseo(self):
        line = "잘했어 이번엔"
        result = transcript_learner.extract_patterns_from_line(line)
        assert any(p["type"] == "praise" for p in result)

    def test_detect_johne(self):
        line = "결과 좋네 마음에 들어"
        result = transcript_learner.extract_patterns_from_line(line)
        assert any(p["type"] == "praise" for p in result)

    def test_detect_gwaenchanta(self):
        line = "괜찮다 이 방식"
        result = transcript_learner.extract_patterns_from_line(line)
        assert any(p["type"] == "praise" for p in result)

    def test_detect_geuraee_igeoya(self):
        line = "그래 이거야 딱 원하던 거"
        result = transcript_learner.extract_patterns_from_line(line)
        assert any(p["type"] == "praise" for p in result)


# ---------------------------------------------------------------------------
# 3. 선호 패턴 감지 테스트
# ---------------------------------------------------------------------------


class TestPreferenceDetection:
    """선호 패턴 키워드 감지"""

    def test_detect_hangsang(self):
        line = "항상 이렇게 해줘"
        result = transcript_learner.extract_patterns_from_line(line)
        assert any(p["type"] == "preference" for p in result)

    def test_detect_haji_ma(self):
        line = "그렇게 하지 마"
        result = transcript_learner.extract_patterns_from_line(line)
        assert any(p["type"] == "preference" for p in result)

    def test_detect_euro_hae(self):
        line = "영어로 해줘"
        result = transcript_learner.extract_patterns_from_line(line)
        assert any(p["type"] == "preference" for p in result)

    def test_detect_nan_ga_joha(self):
        line = "난 이 방식이 좋아"
        result = transcript_learner.extract_patterns_from_line(line)
        assert any(p["type"] == "preference" for p in result)


# ---------------------------------------------------------------------------
# 4. 매칭 없는 줄 테스트
# ---------------------------------------------------------------------------


class TestNoMatch:
    """패턴 없는 줄"""

    def test_no_match_plain_text(self):
        line = "오늘 날씨가 맑습니다"
        result = transcript_learner.extract_patterns_from_line(line)
        assert result == []

    def test_no_match_empty_line(self):
        line = ""
        result = transcript_learner.extract_patterns_from_line(line)
        assert result == []

    def test_no_match_whitespace_only(self):
        line = "   "
        result = transcript_learner.extract_patterns_from_line(line)
        assert result == []


# ---------------------------------------------------------------------------
# 5. 여러 키워드가 한 줄에 있는 경우
# ---------------------------------------------------------------------------


class TestMultipleKeywordsOneLine:
    """한 줄에 여러 키워드"""

    def test_criticism_and_praise_same_line(self):
        line = "이거 왜 이러지 그래도 좋아"
        result = transcript_learner.extract_patterns_from_line(line)
        types = [p["type"] for p in result]
        assert "criticism" in types
        assert "praise" in types

    def test_multiple_criticism_keywords(self):
        line = "이거 왜 다시 했잖아"
        result = transcript_learner.extract_patterns_from_line(line)
        criticism_results = [p for p in result if p["type"] == "criticism"]
        assert len(criticism_results) >= 2

    def test_context_is_full_line(self):
        line = "이거 왜 이렇게 했어?"
        result = transcript_learner.extract_patterns_from_line(line)
        assert all(p["context"] == line for p in result)


# ---------------------------------------------------------------------------
# 6. learnings.json 없는 경우 (신규 생성)
# ---------------------------------------------------------------------------


class TestNewLearningsFile:
    """learnings.json 신규 생성"""

    def test_load_learnings_missing_returns_empty(self, tmp_path: Path):
        result = transcript_learner.load_learnings(base_dir=tmp_path)
        assert result["version"] == 1
        assert result["patterns"] == []

    def test_process_transcript_creates_file(self, tmp_path: Path):
        text = "이거 왜 이렇게 느려?"
        transcript_learner.process_transcript(text, base_dir=tmp_path)
        path = get_learnings_path(tmp_path)
        assert path.exists()

    def test_process_transcript_new_pattern_saved(self, tmp_path: Path):
        text = "이거 왜 이렇게 느려?"
        transcript_learner.process_transcript(text, base_dir=tmp_path)
        data = json.loads(get_learnings_path(tmp_path).read_text(encoding="utf-8"))
        assert len(data["patterns"]) >= 1
        assert any(p["keyword"] == "이거 왜" for p in data["patterns"])

    def test_process_transcript_version_1(self, tmp_path: Path):
        text = "좋아 이 방식이 마음에 들어"
        transcript_learner.process_transcript(text, base_dir=tmp_path)
        data = json.loads(get_learnings_path(tmp_path).read_text(encoding="utf-8"))
        assert data["version"] == 1

    def test_process_transcript_has_last_updated(self, tmp_path: Path):
        text = "잘했어"
        transcript_learner.process_transcript(text, base_dir=tmp_path)
        data = json.loads(get_learnings_path(tmp_path).read_text(encoding="utf-8"))
        assert "last_updated" in data
        assert data["last_updated"] != ""

    def test_pattern_has_required_fields(self, tmp_path: Path):
        text = "이거 왜 이렇게 됐어?"
        transcript_learner.process_transcript(text, base_dir=tmp_path)
        data = json.loads(get_learnings_path(tmp_path).read_text(encoding="utf-8"))
        pattern = next(p for p in data["patterns"] if p["keyword"] == "이거 왜")
        assert "type" in pattern
        assert "keyword" in pattern
        assert "context" in pattern
        assert "count" in pattern
        assert "first_seen" in pattern
        assert "last_seen" in pattern
        assert pattern["count"] == 1


# ---------------------------------------------------------------------------
# 7. learnings.json 있는 경우 (누적)
# ---------------------------------------------------------------------------


class TestExistingLearningsFile:
    """기존 learnings.json에 누적"""

    def test_accumulates_new_pattern(self, tmp_path: Path):
        existing = {
            "version": 1,
            "last_updated": "2026-03-10T09:00:00",
            "patterns": [
                {
                    "type": "criticism",
                    "keyword": "이거 왜",
                    "context": "이거 왜 이렇게 느려?",
                    "count": 2,
                    "first_seen": "2026-03-10",
                    "last_seen": "2026-03-10",
                }
            ],
        }
        make_learnings(tmp_path, existing)
        text = "잘했어 이번엔"
        transcript_learner.process_transcript(text, base_dir=tmp_path)
        data = json.loads(get_learnings_path(tmp_path).read_text(encoding="utf-8"))
        # 기존 패턴 + 새 패턴
        assert len(data["patterns"]) >= 2

    def test_load_learnings_existing(self, tmp_path: Path):
        existing = {
            "version": 1,
            "last_updated": "2026-03-10T09:00:00",
            "patterns": [
                {
                    "type": "praise",
                    "keyword": "좋아",
                    "context": "좋아 이거",
                    "count": 1,
                    "first_seen": "2026-03-10",
                    "last_seen": "2026-03-10",
                }
            ],
        }
        make_learnings(tmp_path, existing)
        result = transcript_learner.load_learnings(base_dir=tmp_path)
        assert len(result["patterns"]) == 1
        assert result["patterns"][0]["keyword"] == "좋아"

    def test_load_learnings_corrupted_returns_empty(self, tmp_path: Path):
        """손상된 learnings.json → 빈 데이터로 계속"""
        whisper_dir = tmp_path / "whisper"
        whisper_dir.mkdir(parents=True, exist_ok=True)
        (whisper_dir / "learnings.json").write_text("{ not valid json!!!", encoding="utf-8")
        result = transcript_learner.load_learnings(base_dir=tmp_path)
        assert result["patterns"] == []


# ---------------------------------------------------------------------------
# 8. 중복 방지 테스트
# ---------------------------------------------------------------------------


class TestDuplicatePrevention:
    """중복 학습 방지"""

    def test_same_context_increases_count(self, tmp_path: Path):
        text = "이거 왜 이렇게 느려?"
        transcript_learner.process_transcript(text, base_dir=tmp_path)
        transcript_learner.process_transcript(text, base_dir=tmp_path)
        data = json.loads(get_learnings_path(tmp_path).read_text(encoding="utf-8"))
        pattern = next(p for p in data["patterns"] if p["keyword"] == "이거 왜")
        assert pattern["count"] == 2

    def test_same_context_no_duplicate_entry(self, tmp_path: Path):
        text = "이거 왜 이렇게 느려?"
        transcript_learner.process_transcript(text, base_dir=tmp_path)
        transcript_learner.process_transcript(text, base_dir=tmp_path)
        data = json.loads(get_learnings_path(tmp_path).read_text(encoding="utf-8"))
        matching = [p for p in data["patterns"] if p["keyword"] == "이거 왜" and p["context"] == text.strip()]
        assert len(matching) == 1

    def test_different_context_adds_new_entry(self, tmp_path: Path):
        text1 = "이거 왜 이렇게 느려?"
        text2 = "이거 왜 안 되는 거야?"
        transcript_learner.process_transcript(text1, base_dir=tmp_path)
        transcript_learner.process_transcript(text2, base_dir=tmp_path)
        data = json.loads(get_learnings_path(tmp_path).read_text(encoding="utf-8"))
        igeo_wae_patterns = [p for p in data["patterns"] if p["keyword"] == "이거 왜"]
        assert len(igeo_wae_patterns) == 2

    def test_last_seen_updated_on_duplicate(self, tmp_path: Path):
        existing = {
            "version": 1,
            "last_updated": "2026-03-10T09:00:00",
            "patterns": [
                {
                    "type": "criticism",
                    "keyword": "이거 왜",
                    "context": "이거 왜 이렇게 느려?",
                    "count": 1,
                    "first_seen": "2026-03-10",
                    "last_seen": "2026-03-10",
                }
            ],
        }
        make_learnings(tmp_path, existing)
        text = "이거 왜 이렇게 느려?"
        transcript_learner.process_transcript(text, base_dir=tmp_path)
        data = json.loads(get_learnings_path(tmp_path).read_text(encoding="utf-8"))
        pattern = next(p for p in data["patterns"] if p["keyword"] == "이거 왜")
        assert pattern["last_seen"] >= "2026-03-10"
        assert pattern["count"] == 2


# ---------------------------------------------------------------------------
# 9. 빈 입력 graceful 처리
# ---------------------------------------------------------------------------


class TestEmptyInput:
    """빈 입력 처리"""

    def test_empty_string_no_exception(self, tmp_path: Path):
        try:
            transcript_learner.process_transcript("", base_dir=tmp_path)
        except Exception as e:
            pytest.fail(f"빈 입력에서 예외 발생: {e}")

    def test_empty_string_returns_zero_new(self, tmp_path: Path):
        result = transcript_learner.process_transcript("", base_dir=tmp_path)
        assert result["new_patterns"] == 0
        assert result["updated_patterns"] == 0

    def test_whitespace_only_no_exception(self, tmp_path: Path):
        try:
            transcript_learner.process_transcript("   \n\n   ", base_dir=tmp_path)
        except Exception as e:
            pytest.fail(f"공백 입력에서 예외 발생: {e}")

    def test_no_matching_text_returns_zero_new(self, tmp_path: Path):
        result = transcript_learner.process_transcript("오늘 날씨가 좋다", base_dir=tmp_path)
        assert result["new_patterns"] == 0
        assert result["updated_patterns"] == 0


# ---------------------------------------------------------------------------
# 10. stdout 출력 형식 검증
# ---------------------------------------------------------------------------


class TestOutputFormat:
    """process_transcript() 반환값 형식"""

    def test_returns_dict_with_required_keys(self, tmp_path: Path):
        result = transcript_learner.process_transcript("이거 왜 느려?", base_dir=tmp_path)
        assert "new_patterns" in result
        assert "updated_patterns" in result
        assert "total_patterns" in result

    def test_new_patterns_count(self, tmp_path: Path):
        text = "이거 왜 느려?\n좋아 이거"
        result = transcript_learner.process_transcript(text, base_dir=tmp_path)
        assert result["new_patterns"] >= 2

    def test_updated_patterns_count(self, tmp_path: Path):
        text = "이거 왜 느려?"
        transcript_learner.process_transcript(text, base_dir=tmp_path)
        result = transcript_learner.process_transcript(text, base_dir=tmp_path)
        assert result["updated_patterns"] >= 1

    def test_total_patterns_matches_file(self, tmp_path: Path):
        text = "이거 왜 느려?\n잘했어 이번엔"
        result = transcript_learner.process_transcript(text, base_dir=tmp_path)
        data = json.loads(get_learnings_path(tmp_path).read_text(encoding="utf-8"))
        assert result["total_patterns"] == len(data["patterns"])

    def test_total_accumulates_across_calls(self, tmp_path: Path):
        transcript_learner.process_transcript("이거 왜 느려?", base_dir=tmp_path)
        result = transcript_learner.process_transcript("잘했어 이번엔", base_dir=tmp_path)
        assert result["total_patterns"] >= 2


# ---------------------------------------------------------------------------
# 11. stdin 읽기 테스트 (monkeypatch)
# ---------------------------------------------------------------------------


class TestStdinReading:
    """stdin 입력 처리"""

    def test_read_from_stdin(self, monkeypatch, tmp_path: Path):
        import io

        fake_stdin = io.StringIO("이거 왜 이렇게 느려?\n좋아 이거")
        monkeypatch.setattr(sys, "stdin", fake_stdin)
        text = transcript_learner.read_input(input_file=None)
        assert "이거 왜" in text
        assert "좋아" in text

    def test_read_from_file(self, tmp_path: Path):
        f = tmp_path / "session.txt"
        f.write_text("잘했어 이번에\n항상 이렇게 해줘", encoding="utf-8")
        text = transcript_learner.read_input(input_file=str(f))
        assert "잘했어" in text
        assert "항상" in text

    def test_read_from_missing_file_raises(self, tmp_path: Path):
        with pytest.raises((FileNotFoundError, OSError)):
            transcript_learner.read_input(input_file=str(tmp_path / "nonexistent.txt"))

    def test_stdin_text_processed_correctly(self, monkeypatch, tmp_path: Path):
        import io

        fake_stdin = io.StringIO("이거 왜 이렇게 느려?")
        monkeypatch.setattr(sys, "stdin", fake_stdin)
        text = transcript_learner.read_input(input_file=None)
        result = transcript_learner.process_transcript(text, base_dir=tmp_path)
        assert result["new_patterns"] >= 1


# ---------------------------------------------------------------------------
# 12. 멀티라인 텍스트 처리
# ---------------------------------------------------------------------------


class TestMultilineProcessing:
    """여러 줄 텍스트 처리"""

    def test_multiline_detects_all_patterns(self, tmp_path: Path):
        text = """오늘 회의 내용입니다
이거 왜 이렇게 됐어?
잘했어 이번엔
항상 이렇게 해줘
그냥 평범한 문장
또? 왜 또 이래
"""
        result = transcript_learner.process_transcript(text, base_dir=tmp_path)
        assert result["new_patterns"] >= 4

    def test_multiline_saves_each_pattern(self, tmp_path: Path):
        text = "이거 왜 느려?\n좋아 이거야\n항상 이렇게 해"
        transcript_learner.process_transcript(text, base_dir=tmp_path)
        data = json.loads(get_learnings_path(tmp_path).read_text(encoding="utf-8"))
        types = {p["type"] for p in data["patterns"]}
        assert "criticism" in types
        assert "praise" in types
        assert "preference" in types