"""
TDD RED 단계: eval runner 테스트 파일
run_evals.py 구현이 없으므로 모든 테스트는 ImportError 또는 FAIL 상태여야 합니다.
"""

import json
import os
import tempfile
from pathlib import Path
from typing import Any
from unittest.mock import MagicMock, mock_open, patch

import pytest

# 구현 모듈 임포트 (아직 없으므로 ImportError 발생)
from run_evals import (
    check_forbidden,
    check_routing,
    evaluate_response,
    generate_report,
    get_skill_list,
    keyword_match,
    load_evals,
)

# ---------------------------------------------------------------------------
# Fixtures
# ---------------------------------------------------------------------------


@pytest.fixture
def sample_eval_case() -> dict[str, Any]:
    """단일 eval 케이스 픽스처 (evals.json 구조 반영)."""
    return {
        "id": 1,
        "prompt": "InsuWiki Google RSA 광고를 처음부터 만들어주세요. 헤드라인 10개와 설명 3개를 만들어주세요.",
        "expected_output": (
            "Google RSA 규격(헤드라인 30자 이하, 설명 90자 이하)을 준수하며 최소 10개의 헤드라인과 "
            "3개의 설명을 제공해야 합니다. 헤드라인은 독립적으로 의미가 통해야 하며 키워드 중심, "
            "혜택 중심, CTA 중심 헤드라인이 고루 포함되어야 합니다."
        ),
        "assertions": [
            "Checks all headlines are 30 characters or fewer",
            "Provides at least one keyword-focused headline",
        ],
        "files": [],
    }


@pytest.fixture
def sample_evals_json_content(sample_eval_case: dict[str, Any]) -> dict[str, Any]:
    """evals.json 전체 구조 픽스처."""
    return {
        "skill_name": "ad-creative",
        "evals": [
            sample_eval_case,
            {
                "id": 2,
                "prompt": "Meta 피드 광고 소재를 만들어주세요.",
                "expected_output": "Meta 광고 규격에 맞게 기본 텍스트, 헤드라인, 설명을 각 3가지 변형으로 제공해야 합니다.",
                "assertions": ["Checks primary text hooks appear within the first 125 characters"],
                "files": [],
            },
        ],
    }


@pytest.fixture
def evals_json_path(sample_evals_json_content: dict[str, Any], tmp_path: Path) -> Path:
    """임시 evals.json 파일 경로를 반환하는 픽스처."""
    skill_dir = tmp_path / "skills" / "ad-creative" / "evals"
    skill_dir.mkdir(parents=True)
    evals_file = skill_dir / "evals.json"
    evals_file.write_text(json.dumps(sample_evals_json_content, ensure_ascii=False), encoding="utf-8")
    return evals_file


@pytest.fixture
def ad_creative_boundary() -> str:
    """ad-creative 스킬의 boundary 문자열 픽스처."""
    return (
        "For campaign strategy, budget allocation, or platform selection, → paid-ads. "
        "For social media content strategy, → social-content."
    )


@pytest.fixture
def llm_response_pass() -> str:
    """정상 통과 LLM 응답 픽스처 (헤드라인 포함)."""
    return (
        "Google RSA 헤드라인 목록:\n"
        "1. 보험 약관 3분에 이해하기 (15자)\n"
        "2. 내 보험 제대로 알고 있나요? (16자)\n"
        "3. 복잡한 약관 쉽게 풀어드립니다 (16자)\n"
        "설명: 보험 약관 핵심 내용만 쉽게 정리. 청구 누락 없이 내 보험을 100% 활용하세요."
    )


@pytest.fixture
def llm_response_fail() -> str:
    """실패하는 LLM 응답 픽스처 (헤드라인 없음)."""
    return "죄송합니다. 광고 캠페인 예산 전략은 paid-ads 스킬을 이용해주세요."


# ---------------------------------------------------------------------------
# 1. evals.json 로딩 테스트
# ---------------------------------------------------------------------------


class TestLoadEvals:
    """evals.json 파일 로딩 및 파싱 테스트."""

    def test_load_evals_returns_valid_structure(
        self,
        evals_json_path: Path,
        sample_evals_json_content: dict[str, Any],
    ) -> None:
        """evals.json을 정상적으로 읽어 올바른 구조를 반환하는지 확인."""
        base_path = str(evals_json_path.parent.parent.parent)
        with patch("run_evals.SKILLS_BASE_PATH", base_path):
            result = load_evals("ad-creative")

        assert result["skill_name"] == "ad-creative"
        assert isinstance(result["evals"], list)
        assert len(result["evals"]) == 2

    def test_load_evals_parses_eval_fields(
        self,
        evals_json_path: Path,
    ) -> None:
        """각 eval 케이스에 id, prompt, expected_output, assertions, files 필드가 있는지 확인."""
        base_path = str(evals_json_path.parent.parent.parent)
        with patch("run_evals.SKILLS_BASE_PATH", base_path):
            result = load_evals("ad-creative")

        first_eval = result["evals"][0]
        assert "id" in first_eval
        assert "prompt" in first_eval
        assert "expected_output" in first_eval
        assert "assertions" in first_eval
        assert "files" in first_eval

    def test_load_evals_invalid_json_raises_error(self, tmp_path: Path) -> None:
        """잘못된 JSON 파일일 때 적절한 에러가 발생하는지 확인."""
        skill_dir = tmp_path / "skills" / "bad-skill" / "evals"
        skill_dir.mkdir(parents=True)
        bad_json = skill_dir / "evals.json"
        bad_json.write_text("{invalid json: }", encoding="utf-8")

        base_path = str(tmp_path / "skills" / "..")
        with patch("run_evals.SKILLS_BASE_PATH", str(tmp_path)):
            with pytest.raises((json.JSONDecodeError, ValueError)):
                load_evals("bad-skill")

    def test_load_evals_empty_evals_array(self, tmp_path: Path) -> None:
        """빈 evals 배열을 가진 evals.json을 처리할 수 있는지 확인."""
        skill_dir = tmp_path / "skills" / "empty-skill" / "evals"
        skill_dir.mkdir(parents=True)
        empty_evals = skill_dir / "evals.json"
        empty_evals.write_text(
            json.dumps({"skill_name": "empty-skill", "evals": []}, ensure_ascii=False),
            encoding="utf-8",
        )

        with patch("run_evals.SKILLS_BASE_PATH", str(tmp_path)):
            result = load_evals("empty-skill")

        assert result["evals"] == []

    def test_load_evals_nonexistent_skill_raises_error(self) -> None:
        """존재하지 않는 스킬명을 전달하면 에러가 발생하는지 확인."""
        with patch("run_evals.SKILLS_BASE_PATH", "/nonexistent/path"):
            with pytest.raises((FileNotFoundError, OSError, ValueError)):
                load_evals("nonexistent-skill")


# ---------------------------------------------------------------------------
# 2. 평가 판정 로직 테스트
# ---------------------------------------------------------------------------


class TestKeywordMatch:
    """keyword_match 함수 테스트."""

    def test_korean_keyword_found_in_response(self) -> None:
        """한글 키워드가 LLM 응답에 포함되어 있을 때 pass=True 반환."""
        response = "헤드라인 10개를 작성해 드리겠습니다: '보험 약관 3분에 이해하기'"
        expected = "최소 10개의 헤드라인을 제공해야 합니다."

        passed, matched, missed = keyword_match(response, expected)

        assert passed is True
        assert "헤드라인" in matched

    def test_korean_keyword_missing_from_response(self) -> None:
        """한글 키워드가 응답에 없을 때 pass=False 반환."""
        response = "죄송합니다. 이 요청은 처리할 수 없습니다."
        expected = "헤드라인 10개와 설명 3개를 제공해야 합니다."

        passed, matched, missed = keyword_match(response, expected)

        assert passed is False
        assert len(missed) > 0

    def test_partial_keyword_matching_supported(self) -> None:
        """부분 매칭이 지원되는지 확인 (예: '헤드라인 10' 중 '헤드라인'만 있어도 매칭)."""
        response = "다음은 헤드라인 목록입니다."
        expected = "헤드라인 10개를 제공해야 합니다."

        passed, matched, missed = keyword_match(response, expected)

        assert passed is True

    def test_english_case_insensitive_matching(self) -> None:
        """영문 키워드 대소문자 무시 매칭 확인."""
        response = "Here are the HEADLINES for google rsa ads."
        expected = "Provide headlines for Google RSA ads."

        passed, matched, missed = keyword_match(response, expected)

        assert passed is True
        assert any(k.lower() == "headlines" for k in matched)

    def test_keyword_match_returns_missed_keywords(self) -> None:
        """매칭되지 않은 키워드 목록이 반환되는지 확인."""
        response = "여기 헤드라인만 있습니다."
        expected = "헤드라인과 설명과 CTA를 모두 제공해야 합니다."

        passed, matched, missed = keyword_match(response, expected)

        assert isinstance(missed, list)
        assert len(missed) > 0


class TestCheckForbidden:
    """check_forbidden 함수 테스트."""

    def test_forbidden_routing_instruction_detected(self) -> None:
        """boundary에 '→ paid-ads' 라우팅 지시가 있을 때, 응답에서 범위 밖 처리 감지."""
        response = "이 요청은 paid-ads 스킬을 사용하여 처리해야 합니다. paid-ads로 라우팅합니다."
        boundary = "→ paid-ads: 캠페인 전략, 타겟팅, 예산"

        passed, found_forbidden = check_forbidden(response, boundary)

        # ad-creative 스킬이 paid-ads 역할을 직접 수행하면 안 됨
        assert passed is False
        assert len(found_forbidden) > 0

    def test_no_forbidden_content_in_normal_response(self, ad_creative_boundary: str) -> None:
        """정상 응답에 금지 내용이 없을 때 pass=True 반환."""
        response = "헤드라인 10개를 작성해 드립니다. 1. 보험 약관 쉽게 이해하기"
        passed, found_forbidden = check_forbidden(response, ad_creative_boundary)

        assert passed is True
        assert found_forbidden == []

    def test_absolute_guarantee_expression_detected(self) -> None:
        """보험업법 위반 단정적 표현('무조건', '100%') 감지 테스트."""
        response = "이 보험은 무조건 보장되며 100% 지급됩니다."
        boundary = "무조건, 반드시, 100% 등 단정적 표현 금지"

        passed, found_forbidden = check_forbidden(response, boundary)

        assert passed is False
        assert len(found_forbidden) > 0


class TestCheckRouting:
    """check_routing 함수 테스트."""

    def test_routing_to_correct_skill_detected(self, ad_creative_boundary: str) -> None:
        """boundary에 '→ paid-ads' 지시가 있고 응답이 올바르게 위임할 때 pass=True."""
        response = "캠페인 예산 전략은 paid-ads 스킬에서 담당합니다. 해당 스킬을 이용해주세요."
        # 이 경우 응답이 올바르게 다른 스킬로 위임하고 있음
        passed, reason = check_routing(response, ad_creative_boundary)

        assert isinstance(passed, bool)
        assert isinstance(reason, str)

    def test_routing_refusal_when_out_of_scope(self, ad_creative_boundary: str) -> None:
        """스킬 범위 밖 요청에 대해 LLM이 적절히 거부하는지 확인."""
        # ad-creative가 예산 전략(paid-ads 영역)을 직접 처리하면 안 됨
        response = "예산 전략: CPC 300원, 일예산 50만원으로 설정하세요. 최적 타겟은..."
        passed, reason = check_routing(response, ad_creative_boundary)

        assert passed is False
        assert reason != ""


class TestEvaluateResponse:
    """evaluate_response 함수 통합 테스트."""

    def test_evaluate_response_returns_dict_with_required_keys(
        self,
        sample_eval_case: dict[str, Any],
        llm_response_pass: str,
    ) -> None:
        """evaluate_response가 필수 키를 포함한 dict를 반환하는지 확인."""
        result = evaluate_response(llm_response_pass, sample_eval_case)

        assert isinstance(result, dict)
        assert "passed" in result
        assert "eval_id" in result
        assert "details" in result

    def test_evaluate_response_pass_case(
        self,
        sample_eval_case: dict[str, Any],
        llm_response_pass: str,
    ) -> None:
        """헤드라인을 포함한 정상 응답에 대해 passed=True 반환."""
        result = evaluate_response(llm_response_pass, sample_eval_case)

        assert result["passed"] is True

    def test_evaluate_response_fail_case(
        self,
        sample_eval_case: dict[str, Any],
        llm_response_fail: str,
    ) -> None:
        """키워드가 없는 응답에 대해 passed=False 반환."""
        result = evaluate_response(llm_response_fail, sample_eval_case)

        assert result["passed"] is False

    def test_evaluate_response_includes_eval_id(
        self,
        sample_eval_case: dict[str, Any],
        llm_response_pass: str,
    ) -> None:
        """결과에 eval id가 포함되는지 확인."""
        result = evaluate_response(llm_response_pass, sample_eval_case)

        assert result["eval_id"] == sample_eval_case["id"]


# ---------------------------------------------------------------------------
# 3. CLI 인터페이스 테스트
# ---------------------------------------------------------------------------


class TestCLIParsing:
    """CLI 인수 파싱 테스트."""

    def test_parse_skill_argument(self) -> None:
        """--skill ad-creative 인수가 올바르게 파싱되는지 확인."""
        import argparse

        # run_evals.py의 argument parser를 직접 테스트
        from run_evals import create_argument_parser

        parser = create_argument_parser()
        args = parser.parse_args(["--skill", "ad-creative"])

        assert args.skill == "ad-creative"

    def test_parse_all_flag(self) -> None:
        """--all 플래그가 올바르게 파싱되는지 확인."""
        from run_evals import create_argument_parser

        parser = create_argument_parser()
        args = parser.parse_args(["--all"])

        assert args.all is True

    def test_parse_verbose_flag(self) -> None:
        """--verbose 플래그가 올바르게 파싱되는지 확인."""
        from run_evals import create_argument_parser

        parser = create_argument_parser()
        args = parser.parse_args(["--skill", "ad-creative", "--verbose"])

        assert args.verbose is True

    def test_parse_dry_run_flag(self) -> None:
        """--dry-run 플래그가 올바르게 파싱되는지 확인."""
        from run_evals import create_argument_parser

        parser = create_argument_parser()
        args = parser.parse_args(["--skill", "ad-creative", "--dry-run"])

        assert args.dry_run is True

    def test_invalid_skill_name_raises_error(self) -> None:
        """존재하지 않는 스킬명에 대해 에러가 발생하는지 확인."""
        from run_evals import validate_skill_name

        with pytest.raises((ValueError, SystemExit)):
            validate_skill_name("nonexistent-skill-xyz-123")


# ---------------------------------------------------------------------------
# 4. 결과 요약 테스트
# ---------------------------------------------------------------------------


class TestResultSummary:
    """결과 요약 및 보고서 생성 테스트."""

    @pytest.fixture
    def mixed_results(self) -> list[dict[str, Any]]:
        """pass/fail이 섞인 결과 목록 픽스처."""
        return [
            {"eval_id": 1, "skill_name": "ad-creative", "passed": True, "details": {}},
            {"eval_id": 2, "skill_name": "ad-creative", "passed": True, "details": {}},
            {"eval_id": 3, "skill_name": "ad-creative", "passed": False, "details": {"missed": ["헤드라인"]}},
            {"eval_id": 4, "skill_name": "copywriting", "passed": True, "details": {}},
            {"eval_id": 5, "skill_name": "copywriting", "passed": False, "details": {"missed": ["CTA"]}},
        ]

    def test_skill_pass_rate_calculation(self, mixed_results: list[dict[str, Any]], tmp_path: Path) -> None:
        """스킬별 pass rate가 올바르게 계산되는지 확인."""
        report_path = str(tmp_path / "report.json")
        report = generate_report(mixed_results, report_path)

        assert isinstance(report, str)
        report_data = json.loads(report)
        assert "skill_results" in report_data
        ad_creative_rate = report_data["skill_results"]["ad-creative"]["pass_rate"]
        assert abs(ad_creative_rate - 2 / 3) < 0.01  # 3개 중 2개 pass

    def test_total_pass_rate_calculation(self, mixed_results: list[dict[str, Any]], tmp_path: Path) -> None:
        """전체 pass rate가 올바르게 계산되는지 확인."""
        report_path = str(tmp_path / "report.json")
        report = generate_report(mixed_results, report_path)

        report_data = json.loads(report)
        assert "total_pass_rate" in report_data
        assert abs(report_data["total_pass_rate"] - 3 / 5) < 0.01  # 5개 중 3개 pass

    def test_fail_cases_include_details(self, mixed_results: list[dict[str, Any]], tmp_path: Path) -> None:
        """FAIL 케이스에 상세 정보가 포함되는지 확인."""
        report_path = str(tmp_path / "report.json")
        report = generate_report(mixed_results, report_path)

        report_data = json.loads(report)
        assert "failed_cases" in report_data
        assert len(report_data["failed_cases"]) == 2
        for fail_case in report_data["failed_cases"]:
            assert "eval_id" in fail_case
            assert "details" in fail_case


# ---------------------------------------------------------------------------
# 5. dry-run 모드 테스트
# ---------------------------------------------------------------------------


class TestDryRun:
    """dry-run 모드 테스트."""

    def test_dry_run_does_not_call_llm_api(
        self,
        evals_json_path: Path,
    ) -> None:
        """dry-run 모드에서 LLM API 호출이 없는지 확인."""
        from run_evals import run_evals_for_skill

        base_path = str(evals_json_path.parent.parent.parent)
        mock_llm_call = MagicMock()

        with patch("run_evals.SKILLS_BASE_PATH", base_path):
            with patch("run_evals.call_llm", mock_llm_call):
                run_evals_for_skill("ad-creative", dry_run=True)

        mock_llm_call.assert_not_called()

    def test_dry_run_validates_evals_structure(
        self,
        evals_json_path: Path,
    ) -> None:
        """dry-run 모드에서 evals 구조 검증이 수행되는지 확인."""
        from run_evals import run_evals_for_skill

        base_path = str(evals_json_path.parent.parent.parent)

        with patch("run_evals.SKILLS_BASE_PATH", base_path):
            result = run_evals_for_skill("ad-creative", dry_run=True)

        # dry-run 결과에는 구조 검증 정보가 있어야 함
        assert result is not None
        assert "dry_run" in result
        assert result["dry_run"] is True

    def test_dry_run_returns_eval_count(
        self,
        evals_json_path: Path,
    ) -> None:
        """dry-run 모드에서 eval 케이스 수를 반환하는지 확인."""
        from run_evals import run_evals_for_skill

        base_path = str(evals_json_path.parent.parent.parent)

        with patch("run_evals.SKILLS_BASE_PATH", base_path):
            result = run_evals_for_skill("ad-creative", dry_run=True)

        assert "eval_count" in result
        assert result["eval_count"] == 2


# ---------------------------------------------------------------------------
# 6. get_skill_list 테스트
# ---------------------------------------------------------------------------


class TestGetSkillList:
    """get_skill_list 함수 테스트."""

    def test_get_skill_list_returns_list(self) -> None:
        """get_skill_list가 리스트를 반환하는지 확인."""
        result = get_skill_list()
        assert isinstance(result, list)

    def test_get_skill_list_includes_ad_creative(self) -> None:
        """ad-creative 스킬이 목록에 포함되는지 확인."""
        result = get_skill_list()
        assert "ad-creative" in result

    def test_get_skill_list_returns_nonempty_list(self) -> None:
        """스킬 목록이 비어있지 않은지 확인."""
        result = get_skill_list()
        assert len(result) > 0