"""test_judge.py - judge 모듈 테스트"""

import io
import unittest
from unittest.mock import MagicMock, patch

SAMPLE_CHECKLIST_YAML = """\
name: skill-quality
version: 1
description: "스킬 품질 체크리스트"
items:
  - id: clarity
    question: "지시문이 명확한가?"
    weight: 2.0
  - id: examples
    question: "예시가 포함되어 있는가?"
    weight: 1.0
  - id: no_ambiguity
    question: "모호한 표현이 없는가?"
    weight: 1.5
scoring:
  method: "weighted_average"
"""

SAMPLE_CHECKLIST_DICT = {
    "name": "skill-quality",
    "version": 1,
    "description": "스킬 품질 체크리스트",
    "items": [
        {"id": "clarity", "question": "지시문이 명확한가?", "weight": 2.0},
        {"id": "examples", "question": "예시가 포함되어 있는가?", "weight": 1.0},
        {"id": "no_ambiguity", "question": "모호한 표현이 없는가?", "weight": 1.5},
    ],
    "scoring": {"method": "weighted_average"},
}


class TestLoadChecklist(unittest.TestCase):
    """load_checklist 함수 테스트"""

    def setUp(self):
        from autoresearch.judge import load_checklist

        self.load_checklist = load_checklist

    def test_load_normal_yaml(self, tmp_path=None):
        """정상 YAML 파일 로드"""
        import os
        import tempfile

        with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as f:
            f.write(SAMPLE_CHECKLIST_YAML)
            path = f.name
        try:
            result = self.load_checklist(path)
            self.assertEqual(result["name"], "skill-quality")
            self.assertEqual(result["version"], 1)
            self.assertEqual(len(result["items"]), 3)
            self.assertEqual(result["items"][0]["id"], "clarity")
            self.assertEqual(result["items"][0]["weight"], 2.0)
        finally:
            os.unlink(path)

    def test_too_many_items_raises_value_error(self):
        """items 7개 이상이면 ValueError"""
        import os
        import tempfile

        yaml_content = """\
name: test
version: 1
description: "test"
items:
  - {id: i1, question: "q1", weight: 1.0}
  - {id: i2, question: "q2", weight: 1.0}
  - {id: i3, question: "q3", weight: 1.0}
  - {id: i4, question: "q4", weight: 1.0}
  - {id: i5, question: "q5", weight: 1.0}
  - {id: i6, question: "q6", weight: 1.0}
  - {id: i7, question: "q7", weight: 1.0}
scoring:
  method: "weighted_average"
"""
        with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as f:
            f.write(yaml_content)
            path = f.name
        try:
            with self.assertRaises(ValueError):
                self.load_checklist(path)
        finally:
            os.unlink(path)

    def test_exactly_six_items_ok(self):
        """items 정확히 6개는 통과"""
        import os
        import tempfile

        yaml_content = """\
name: test
version: 1
description: "test"
items:
  - {id: i1, question: "q1", weight: 1.0}
  - {id: i2, question: "q2", weight: 1.0}
  - {id: i3, question: "q3", weight: 1.0}
  - {id: i4, question: "q4", weight: 1.0}
  - {id: i5, question: "q5", weight: 1.0}
  - {id: i6, question: "q6", weight: 1.0}
scoring:
  method: "weighted_average"
"""
        with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as f:
            f.write(yaml_content)
            path = f.name
        try:
            result = self.load_checklist(path)
            self.assertEqual(len(result["items"]), 6)
        finally:
            os.unlink(path)

    def test_returns_dict_with_required_fields(self):
        """반환 dict에 필수 필드 포함 확인"""
        import os
        import tempfile

        with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as f:
            f.write(SAMPLE_CHECKLIST_YAML)
            path = f.name
        try:
            result = self.load_checklist(path)
            for field in ("name", "version", "description", "items", "scoring"):
                self.assertIn(field, result)
        finally:
            os.unlink(path)


class TestFormatChecklistForPrompt(unittest.TestCase):
    """format_checklist_for_prompt 함수 테스트"""

    def setUp(self):
        from autoresearch.judge import format_checklist_for_prompt

        self.format_checklist = format_checklist_for_prompt

    def test_format_contains_id(self):
        """출력에 item id 포함"""
        result = self.format_checklist(SAMPLE_CHECKLIST_DICT)
        self.assertIn("clarity", result)
        self.assertIn("examples", result)
        self.assertIn("no_ambiguity", result)

    def test_format_contains_question(self):
        """출력에 질문 포함"""
        result = self.format_checklist(SAMPLE_CHECKLIST_DICT)
        self.assertIn("지시문이 명확한가?", result)
        self.assertIn("예시가 포함되어 있는가?", result)

    def test_format_contains_weight(self):
        """출력에 weight 포함"""
        result = self.format_checklist(SAMPLE_CHECKLIST_DICT)
        self.assertIn("2.0", result)
        self.assertIn("1.0", result)
        self.assertIn("1.5", result)

    def test_format_each_item_on_separate_line(self):
        """각 항목이 별도 줄에 있는지 확인"""
        result = self.format_checklist(SAMPLE_CHECKLIST_DICT)
        lines = [l for l in result.split("\n") if l.strip()]
        self.assertGreaterEqual(len(lines), 3)

    def test_format_dash_prefix(self):
        """각 항목이 '- '로 시작하는지 확인"""
        result = self.format_checklist(SAMPLE_CHECKLIST_DICT)
        lines = [l.strip() for l in result.split("\n") if l.strip()]
        for line in lines:
            self.assertTrue(line.startswith("- "), f"Line does not start with '- ': {line!r}")

    def test_format_weight_keyword(self):
        """'weight:' 키워드 포함 확인"""
        result = self.format_checklist(SAMPLE_CHECKLIST_DICT)
        self.assertIn("weight:", result)


class TestBuildJudgePrompt(unittest.TestCase):
    """build_judge_prompt 함수 테스트"""

    def setUp(self):
        from autoresearch.judge import build_judge_prompt

        self.build_judge_prompt = build_judge_prompt

    def test_contains_skill_output(self):
        """프롬프트에 skill_output 포함"""
        result = self.build_judge_prompt(SAMPLE_CHECKLIST_DICT, "결과물 내용")
        self.assertIn("결과물 내용", result)

    def test_contains_checklist_items(self):
        """프롬프트에 체크리스트 항목 포함"""
        result = self.build_judge_prompt(SAMPLE_CHECKLIST_DICT, "결과물")
        self.assertIn("clarity", result)

    def test_returns_string(self):
        """문자열 반환 확인"""
        result = self.build_judge_prompt(SAMPLE_CHECKLIST_DICT, "결과물")
        self.assertIsInstance(result, str)


class TestParseJudgeResponse(unittest.TestCase):
    """parse_judge_response 함수 테스트"""

    def setUp(self):
        from autoresearch.judge import parse_judge_response

        self.parse = parse_judge_response

    def _make_json_response(self, items_data, total_score=0.9, summary="좋음"):
        import json

        data = {
            "items": items_data,
            "total_score": total_score,
            "summary": summary,
        }
        return json.dumps(data, ensure_ascii=False)

    def test_parse_normal_response(self):
        """정상 JSON 파싱"""
        items_data = [
            {"id": "clarity", "result": "PASS", "reason": "명확함"},
            {"id": "examples", "result": "FAIL", "reason": "예시 없음"},
            {"id": "no_ambiguity", "result": "PASS", "reason": "모호함 없음"},
        ]
        response = self._make_json_response(items_data)
        result = self.parse(response, SAMPLE_CHECKLIST_DICT)

        self.assertEqual(len(result["items"]), 3)
        self.assertEqual(result["items"][0]["result"], "PASS")
        self.assertEqual(result["items"][1]["result"], "FAIL")
        self.assertIsInstance(result["total_score"], float)
        self.assertIsInstance(result["summary"], str)

    def test_weighted_average_recalculation(self):
        """total_score 가중평균 직접 재계산 검증

        checklist weights: clarity=2.0, examples=1.0, no_ambiguity=1.5
        PASS: clarity(2.0), no_ambiguity(1.5) -> 3.5 / 4.5 = 0.7777...
        FAIL: examples(1.0)
        """
        items_data = [
            {"id": "clarity", "result": "PASS", "reason": "명확함"},
            {"id": "examples", "result": "FAIL", "reason": "예시 없음"},
            {"id": "no_ambiguity", "result": "PASS", "reason": "모호함 없음"},
        ]
        # LLM이 total_score=0.5라고 답해도 재계산해야 함
        response = self._make_json_response(items_data, total_score=0.5)
        result = self.parse(response, SAMPLE_CHECKLIST_DICT)

        expected = (2.0 + 0.0 + 1.5) / (2.0 + 1.0 + 1.5)
        self.assertAlmostEqual(result["total_score"], expected, places=5)

    def test_all_pass_score_is_one(self):
        """모두 PASS이면 score=1.0"""
        items_data = [
            {"id": "clarity", "result": "PASS", "reason": "ok"},
            {"id": "examples", "result": "PASS", "reason": "ok"},
            {"id": "no_ambiguity", "result": "PASS", "reason": "ok"},
        ]
        response = self._make_json_response(items_data, total_score=0.0)
        result = self.parse(response, SAMPLE_CHECKLIST_DICT)
        self.assertAlmostEqual(result["total_score"], 1.0, places=5)

    def test_all_fail_score_is_zero(self):
        """모두 FAIL이면 score=0.0"""
        items_data = [
            {"id": "clarity", "result": "FAIL", "reason": "no"},
            {"id": "examples", "result": "FAIL", "reason": "no"},
            {"id": "no_ambiguity", "result": "FAIL", "reason": "no"},
        ]
        response = self._make_json_response(items_data, total_score=1.0)
        result = self.parse(response, SAMPLE_CHECKLIST_DICT)
        self.assertAlmostEqual(result["total_score"], 0.0, places=5)

    def test_result_keys(self):
        """반환 dict 키 확인"""
        items_data = [
            {"id": "clarity", "result": "PASS", "reason": "ok"},
            {"id": "examples", "result": "PASS", "reason": "ok"},
            {"id": "no_ambiguity", "result": "PASS", "reason": "ok"},
        ]
        response = self._make_json_response(items_data)
        result = self.parse(response, SAMPLE_CHECKLIST_DICT)
        for key in ("items", "total_score", "summary"):
            self.assertIn(key, result)

    def test_invalid_json_raises(self):
        """JSON 파싱 실패 시 ValueError"""
        with self.assertRaises((ValueError, Exception)):
            self.parse("이건 JSON이 아닙니다", SAMPLE_CHECKLIST_DICT)

    def test_json_embedded_in_text(self):
        """텍스트에 JSON이 포함된 경우도 파싱"""
        import json

        items_data = [
            {"id": "clarity", "result": "PASS", "reason": "ok"},
            {"id": "examples", "result": "PASS", "reason": "ok"},
            {"id": "no_ambiguity", "result": "PASS", "reason": "ok"},
        ]
        data = {"items": items_data, "total_score": 0.9, "summary": "좋음"}
        json_str = json.dumps(data, ensure_ascii=False)
        response_with_text = f"다음은 채점 결과입니다:\n{json_str}\n\n이상입니다."
        result = self.parse(response_with_text, SAMPLE_CHECKLIST_DICT)
        self.assertIsNotNone(result)
        self.assertIn("items", result)


class TestJudgeOutput(unittest.TestCase):
    """judge_output 함수 테스트 (call_claude 모킹)"""

    def _make_llm_json(self, items_data, total_score=1.0, summary="LGTM"):
        import json

        return json.dumps(
            {
                "items": items_data,
                "total_score": total_score,
                "summary": summary,
            },
            ensure_ascii=False,
        )

    @patch("autoresearch.judge.call_claude")
    def test_judge_output_returns_result_with_tokens(self, mock_call_claude):
        """judge_output이 파싱 결과 + 토큰 정보 반환"""
        items_data = [
            {"id": "clarity", "result": "PASS", "reason": "ok"},
            {"id": "examples", "result": "PASS", "reason": "ok"},
            {"id": "no_ambiguity", "result": "PASS", "reason": "ok"},
        ]
        json_text = self._make_llm_json(items_data)
        mock_call_claude.return_value = json_text

        from autoresearch.judge import judge_output

        result = judge_output(SAMPLE_CHECKLIST_DICT, "결과물")

        self.assertIn("items", result)
        self.assertIn("total_score", result)
        self.assertIn("summary", result)
        self.assertGreater(result["input_tokens"], 0)
        self.assertGreater(result["output_tokens"], 0)

    @patch("autoresearch.judge.call_claude")
    def test_judge_output_default_model_haiku(self, mock_call_claude):
        """기본 모델이 haiku인지 확인"""
        items_data = [
            {"id": "clarity", "result": "PASS", "reason": "ok"},
            {"id": "examples", "result": "PASS", "reason": "ok"},
            {"id": "no_ambiguity", "result": "PASS", "reason": "ok"},
        ]
        mock_call_claude.return_value = self._make_llm_json(items_data)

        from autoresearch.judge import judge_output

        judge_output(SAMPLE_CHECKLIST_DICT, "결과물")

        call_kwargs = mock_call_claude.call_args
        self.assertIn("haiku", call_kwargs.kwargs["model"])

    @patch("autoresearch.judge.call_claude")
    def test_judge_output_calls_call_claude(self, mock_call_claude):
        """call_claude가 호출되는지 확인"""
        items_data = [
            {"id": "clarity", "result": "PASS", "reason": "ok"},
            {"id": "examples", "result": "PASS", "reason": "ok"},
            {"id": "no_ambiguity", "result": "PASS", "reason": "ok"},
        ]
        mock_call_claude.return_value = self._make_llm_json(items_data)

        from autoresearch.judge import judge_output

        judge_output(SAMPLE_CHECKLIST_DICT, "결과물")

        mock_call_claude.assert_called_once()


if __name__ == "__main__":
    unittest.main()
