"""test_runner.py - runner 모듈 TDD 테스트 (RED → GREEN)"""

import sys
import tempfile
import unittest
from pathlib import Path
from typing import Any
from unittest.mock import MagicMock, call, patch

# 모듈 경로 추가: /home/jay/workspace/scripts 가 sys.path에 없을 경우 대비
_AUTORESEARCH_PARENT = str(Path(__file__).resolve().parent.parent.parent)
if _AUTORESEARCH_PARENT not in sys.path:
    sys.path.insert(0, _AUTORESEARCH_PARENT)


# ---------------------------------------------------------------------------
# 헬퍼: 임시 스킬 디렉토리 구성
# ---------------------------------------------------------------------------


def _make_skill_dir(tmp_path: Path, skill_name: str, content: str) -> None:
    """임시 디렉토리에 SKILL.md 생성"""
    skill_dir = tmp_path / skill_name
    skill_dir.mkdir(parents=True, exist_ok=True)
    (skill_dir / "SKILL.md").write_text(content, encoding="utf-8")


SAMPLE_SKILL_CONTENT = """\
---
name: test-skill
version: 1
---

# 테스트 스킬 본문

규칙:
- 규칙 1
"""

SAMPLE_CHECKLIST: dict[str, Any] = {
    "name": "quality",
    "version": 1,
    "description": "품질 체크",
    "items": [
        {"id": "clarity", "question": "명확한가?", "weight": 1.0},
    ],
    "scoring": {"method": "weighted_average"},
}


# ---------------------------------------------------------------------------
# TestBackupOriginal
# ---------------------------------------------------------------------------


class TestBackupOriginal(unittest.TestCase):
    """backup_original() 단위 테스트"""

    def test_backup_creates_file(self) -> None:
        """백업 파일이 evals/backup-original.md에 생성되어야 함"""
        with tempfile.TemporaryDirectory() as tmpdir:
            _make_skill_dir(Path(tmpdir), "my-skill", SAMPLE_SKILL_CONTENT)

            from autoresearch.runner import backup_original

            backup_path = backup_original("my-skill", tmpdir)

            expected = Path(tmpdir) / "my-skill" / "evals" / "backup-original.md"
            self.assertTrue(expected.exists(), f"백업 파일이 없음: {expected}")
            self.assertEqual(backup_path, str(expected))

    def test_backup_content_matches_original(self) -> None:
        """백업 파일 내용이 원본과 동일해야 함"""
        with tempfile.TemporaryDirectory() as tmpdir:
            _make_skill_dir(Path(tmpdir), "my-skill", SAMPLE_SKILL_CONTENT)

            from autoresearch.runner import backup_original

            backup_original("my-skill", tmpdir)

            backup_file = Path(tmpdir) / "my-skill" / "evals" / "backup-original.md"
            self.assertEqual(backup_file.read_text(encoding="utf-8"), SAMPLE_SKILL_CONTENT)

    def test_backup_returns_path_string(self) -> None:
        """반환값이 문자열 경로여야 함"""
        with tempfile.TemporaryDirectory() as tmpdir:
            _make_skill_dir(Path(tmpdir), "my-skill", SAMPLE_SKILL_CONTENT)

            from autoresearch.runner import backup_original

            result = backup_original("my-skill", tmpdir)
            self.assertIsInstance(result, str)


# ---------------------------------------------------------------------------
# TestRestoreSkill
# ---------------------------------------------------------------------------


class TestRestoreSkill(unittest.TestCase):
    """restore_skill() 단위 테스트"""

    def test_restore_writes_original_content(self) -> None:
        """복원 후 SKILL.md가 원본 내용으로 돌아와야 함"""
        with tempfile.TemporaryDirectory() as tmpdir:
            _make_skill_dir(Path(tmpdir), "my-skill", "---\nname: x\n---\n원본 본문")

            from autoresearch.runner import restore_skill

            # 현재 파일 수정
            skill_path = Path(tmpdir) / "my-skill" / "SKILL.md"
            skill_path.write_text("---\nname: x\n---\n변경된 본문", encoding="utf-8")

            # 복원
            restore_skill("my-skill", "원본 본문", tmpdir, "name: x")

            content = skill_path.read_text(encoding="utf-8")
            self.assertIn("원본 본문", content)
            self.assertNotIn("변경된 본문", content)

    def test_restore_preserves_frontmatter(self) -> None:
        """복원 시 프론트매터가 보존되어야 함"""
        with tempfile.TemporaryDirectory() as tmpdir:
            _make_skill_dir(Path(tmpdir), "my-skill", "---\nname: x\nversion: 2\n---\n본문")

            from autoresearch.runner import restore_skill

            restore_skill("my-skill", "복원된 본문", tmpdir, "name: x\nversion: 2")

            skill_path = Path(tmpdir) / "my-skill" / "SKILL.md"
            content = skill_path.read_text(encoding="utf-8")
            self.assertIn("name: x", content)
            self.assertIn("version: 2", content)
            self.assertIn("복원된 본문", content)


# ---------------------------------------------------------------------------
# TestApplyMutation
# ---------------------------------------------------------------------------


class TestApplyMutation(unittest.TestCase):
    """apply_mutation() 단위 테스트"""

    def test_apply_mutation_writes_new_body(self) -> None:
        """변경된 본문이 SKILL.md에 저장되어야 함"""
        with tempfile.TemporaryDirectory() as tmpdir:
            _make_skill_dir(Path(tmpdir), "my-skill", SAMPLE_SKILL_CONTENT)

            from autoresearch.runner import apply_mutation

            apply_mutation("my-skill", "# 새로운 본문\n- 새 규칙", tmpdir, "name: test-skill\nversion: 1")

            skill_path = Path(tmpdir) / "my-skill" / "SKILL.md"
            content = skill_path.read_text(encoding="utf-8")
            self.assertIn("# 새로운 본문", content)
            self.assertIn("- 새 규칙", content)

    def test_apply_mutation_preserves_frontmatter(self) -> None:
        """변경 후에도 프론트매터가 보존되어야 함"""
        with tempfile.TemporaryDirectory() as tmpdir:
            _make_skill_dir(Path(tmpdir), "my-skill", SAMPLE_SKILL_CONTENT)

            from autoresearch.runner import apply_mutation

            apply_mutation("my-skill", "새 본문", tmpdir, "name: test-skill\nversion: 1")

            skill_path = Path(tmpdir) / "my-skill" / "SKILL.md"
            content = skill_path.read_text(encoding="utf-8")
            self.assertTrue(content.startswith("---\n"), f"프론트매터 시작 없음: {content[:50]!r}")
            self.assertIn("name: test-skill", content)

    def test_apply_mutation_frontmatter_separator_present(self) -> None:
        """저장 파일에 --- 구분자가 두 번 있어야 함"""
        with tempfile.TemporaryDirectory() as tmpdir:
            _make_skill_dir(Path(tmpdir), "my-skill", SAMPLE_SKILL_CONTENT)

            from autoresearch.runner import apply_mutation

            apply_mutation("my-skill", "본문", tmpdir, "name: x")

            skill_path = Path(tmpdir) / "my-skill" / "SKILL.md"
            content = skill_path.read_text(encoding="utf-8")
            # 첫 --- 와 두 번째 --- 가 있어야 함
            self.assertGreaterEqual(content.count("---"), 2)


# ---------------------------------------------------------------------------
# TestRunRound
# ---------------------------------------------------------------------------


class TestRunRound(unittest.TestCase):
    """run_round() 단위 테스트 - 외부 의존성 mock"""

    def _make_mutation_result(
        self,
        mutation_type: str = "규칙 추가",
        description: str = "새 규칙",
        modified_body: str = "# 변경된 본문",
        input_tokens: int = 100,
        output_tokens: int = 50,
    ) -> dict[str, Any]:
        return {
            "mutation_type": mutation_type,
            "mutation_description": description,
            "modified_skill_md": modified_body,
            "input_tokens": input_tokens,
            "output_tokens": output_tokens,
        }

    def _make_execute_result(self, output: str = "결과물") -> dict[str, Any]:
        return {
            "output": output,
            "input_tokens": 10,
            "output_tokens": 20,
            "model": "claude-sonnet-4-6",
        }

    def _make_judge_result(self, score: float = 0.8) -> dict[str, Any]:
        return {
            "items": [{"id": "clarity", "result": "PASS", "reason": "ok"}],
            "total_score": score,
            "summary": "좋음",
            "input_tokens": 5,
            "output_tokens": 10,
        }

    @patch("autoresearch.runner.judge_output")
    @patch("autoresearch.runner.execute_skill")
    @patch("autoresearch.runner.apply_mutation")
    @patch("autoresearch.runner.generate_mutation")
    @patch("autoresearch.runner.get_recent_changelog")
    def test_run_round_keep_when_score_improves(
        self,
        mock_recent: MagicMock,
        mock_gen: MagicMock,
        mock_apply: MagicMock,
        mock_exec: MagicMock,
        mock_judge: MagicMock,
    ) -> None:
        """점수가 올라가면 KEEP 결정"""
        with tempfile.TemporaryDirectory() as tmpdir:
            _make_skill_dir(Path(tmpdir), "sk", SAMPLE_SKILL_CONTENT)

            mock_recent.return_value = "변경 이력 없음"
            mock_gen.return_value = self._make_mutation_result(modified_body="# 새 본문")
            mock_exec.return_value = self._make_execute_result()
            mock_judge.return_value = self._make_judge_result(score=0.9)

            import yaml
            from autoresearch.changelog import create_log
            from autoresearch.runner import run_round

            log = create_log("sk")
            checklist = SAMPLE_CHECKLIST
            body, score, updated_log = run_round(
                skill_name="sk",
                current_body="# 원래 본문",
                frontmatter="name: sk",
                checklist=checklist,
                test_input="테스트 입력",
                log=log,
                round_num=1,
                prev_score=0.6,
                skills_dir=tmpdir,
                model_mutate="claude-sonnet-4-6",
                model_judge="claude-haiku-4-5-20251001",
            )

            self.assertGreaterEqual(score, 0.6)
            self.assertEqual(updated_log["kept"], 1)
            self.assertEqual(updated_log["reverted"], 0)

    @patch("autoresearch.runner.judge_output")
    @patch("autoresearch.runner.execute_skill")
    @patch("autoresearch.runner.apply_mutation")
    @patch("autoresearch.runner.restore_skill")
    @patch("autoresearch.runner.generate_mutation")
    @patch("autoresearch.runner.get_recent_changelog")
    def test_run_round_revert_when_score_drops(
        self,
        mock_recent: MagicMock,
        mock_gen: MagicMock,
        mock_restore: MagicMock,
        mock_apply: MagicMock,
        mock_exec: MagicMock,
        mock_judge: MagicMock,
    ) -> None:
        """점수가 내려가면 REVERT 결정 + restore_skill 호출"""
        with tempfile.TemporaryDirectory() as tmpdir:
            _make_skill_dir(Path(tmpdir), "sk", SAMPLE_SKILL_CONTENT)

            mock_recent.return_value = "변경 이력 없음"
            mock_gen.return_value = self._make_mutation_result(modified_body="# 나쁜 본문")
            mock_exec.return_value = self._make_execute_result()
            mock_judge.return_value = self._make_judge_result(score=0.3)

            from autoresearch.changelog import create_log
            from autoresearch.runner import run_round

            log = create_log("sk")
            body, score, updated_log = run_round(
                skill_name="sk",
                current_body="# 원래 본문",
                frontmatter="name: sk",
                checklist=SAMPLE_CHECKLIST,
                test_input="입력",
                log=log,
                round_num=1,
                prev_score=0.8,
                skills_dir=tmpdir,
                model_mutate="claude-sonnet-4-6",
                model_judge="claude-haiku-4-5-20251001",
            )

            self.assertEqual(score, 0.8)  # 점수 롤백
            self.assertEqual(updated_log["reverted"], 1)
            self.assertEqual(updated_log["kept"], 0)
            mock_restore.assert_called_once()

    @patch("autoresearch.runner.judge_output")
    @patch("autoresearch.runner.execute_skill")
    @patch("autoresearch.runner.apply_mutation")
    @patch("autoresearch.runner.restore_skill")
    @patch("autoresearch.runner.generate_mutation")
    @patch("autoresearch.runner.get_recent_changelog")
    def test_run_round_skips_on_mutation_error(
        self,
        mock_recent: MagicMock,
        mock_gen: MagicMock,
        mock_restore: MagicMock,
        mock_apply: MagicMock,
        mock_exec: MagicMock,
        mock_judge: MagicMock,
    ) -> None:
        """mutation 생성 실패 시 라운드 스킵 (점수/본문 유지)"""
        with tempfile.TemporaryDirectory() as tmpdir:
            _make_skill_dir(Path(tmpdir), "sk", SAMPLE_SKILL_CONTENT)

            mock_recent.return_value = "이력 없음"
            mock_gen.side_effect = Exception("API 실패")

            from autoresearch.changelog import create_log
            from autoresearch.runner import run_round

            log = create_log("sk")
            body, score, updated_log = run_round(
                skill_name="sk",
                current_body="# 원래 본문",
                frontmatter="name: sk",
                checklist=SAMPLE_CHECKLIST,
                test_input="입력",
                log=log,
                round_num=1,
                prev_score=0.7,
                skills_dir=tmpdir,
                model_mutate="claude-sonnet-4-6",
                model_judge="claude-haiku-4-5-20251001",
            )

            # 에러 시 원본 유지
            self.assertEqual(score, 0.7)
            self.assertEqual(body, "# 원래 본문")
            # apply/judge는 호출되지 않아야 함
            mock_apply.assert_not_called()
            mock_judge.assert_not_called()


# ---------------------------------------------------------------------------
# TestRun (메인 루프)
# ---------------------------------------------------------------------------


class TestRun(unittest.TestCase):
    """run() 메인 루프 테스트"""

    def _make_execute_result(self, output: str = "결과물") -> dict[str, Any]:
        return {"output": output, "input_tokens": 10, "output_tokens": 20, "model": "m"}

    def _make_judge_result(self, score: float = 0.5) -> dict[str, Any]:
        return {
            "items": [],
            "total_score": score,
            "summary": "ok",
            "input_tokens": 5,
            "output_tokens": 10,
        }

    def _make_mutation_result(self, body: str = "# 변경된 본문") -> dict[str, Any]:
        return {
            "mutation_type": "규칙 추가",
            "mutation_description": "새 규칙 추가",
            "modified_skill_md": body,
            "input_tokens": 100,
            "output_tokens": 50,
        }

    @patch("autoresearch.runner.save_log")
    @patch("autoresearch.runner.finalize_log")
    @patch("autoresearch.runner.run_round")
    @patch("autoresearch.runner.backup_original")
    @patch("autoresearch.runner.judge_output")
    @patch("autoresearch.runner.execute_skill")
    @patch("autoresearch.runner.load_checklist")
    @patch("autoresearch.runner.load_skill")
    def test_run_returns_log_dict(
        self,
        mock_load_skill: MagicMock,
        mock_load_checklist: MagicMock,
        mock_exec: MagicMock,
        mock_judge: MagicMock,
        mock_backup: MagicMock,
        mock_run_round: MagicMock,
        mock_finalize: MagicMock,
        mock_save: MagicMock,
    ) -> None:
        """run() 함수가 로그 dict를 반환해야 함"""
        with tempfile.TemporaryDirectory() as tmpdir:
            _make_skill_dir(Path(tmpdir), "sk", SAMPLE_SKILL_CONTENT)

            mock_load_skill.return_value = ("name: sk", "# 본문")
            mock_load_checklist.return_value = SAMPLE_CHECKLIST
            mock_exec.return_value = self._make_execute_result()
            mock_judge.return_value = self._make_judge_result(score=0.5)
            mock_backup.return_value = str(Path(tmpdir) / "sk" / "evals" / "backup-original.md")

            # run_round가 target_score 이상 반환하여 consecutive 달성
            high_score_log: dict[str, Any] = {
                "skill": "sk",
                "started_at": "2024-01-01T00:00:00+00:00",
                "rounds": [{"round": 1, "decision": "kept"}],
                "final_score": 0,
                "total_rounds": 0,
                "kept": 1,
                "reverted": 0,
            }
            mock_run_round.return_value = ("# 본문", 0.96, high_score_log)
            mock_finalize.side_effect = lambda log, score: {
                **log,
                "final_score": score,
                "total_rounds": len(log["rounds"]),
            }
            mock_save.return_value = "/tmp/log.json"

            from autoresearch.runner import run

            result = run(
                skill_name="sk",
                checklist_path="/fake/checklist.yaml",
                test_input="테스트",
                rounds=10,
                target_score=0.95,
                consecutive=3,
                skills_dir=tmpdir,
            )

            self.assertIsInstance(result, dict)
            self.assertIn("skill", result)

    @patch("autoresearch.runner.save_log")
    @patch("autoresearch.runner.finalize_log")
    @patch("autoresearch.runner.run_round")
    @patch("autoresearch.runner.backup_original")
    @patch("autoresearch.runner.judge_output")
    @patch("autoresearch.runner.execute_skill")
    @patch("autoresearch.runner.load_checklist")
    @patch("autoresearch.runner.load_skill")
    def test_run_dry_run_executes_one_round(
        self,
        mock_load_skill: MagicMock,
        mock_load_checklist: MagicMock,
        mock_exec: MagicMock,
        mock_judge: MagicMock,
        mock_backup: MagicMock,
        mock_run_round: MagicMock,
        mock_finalize: MagicMock,
        mock_save: MagicMock,
    ) -> None:
        """dry_run=True이면 1라운드만 실행 후 종료"""
        with tempfile.TemporaryDirectory() as tmpdir:
            _make_skill_dir(Path(tmpdir), "sk", SAMPLE_SKILL_CONTENT)

            mock_load_skill.return_value = ("name: sk", "# 본문")
            mock_load_checklist.return_value = SAMPLE_CHECKLIST
            mock_exec.return_value = self._make_execute_result()
            mock_judge.return_value = self._make_judge_result(score=0.5)
            mock_backup.return_value = "/tmp/backup.md"

            round_log: dict[str, Any] = {
                "skill": "sk",
                "started_at": "2024-01-01T00:00:00+00:00",
                "rounds": [{"round": 1, "decision": "kept"}],
                "final_score": 0,
                "total_rounds": 0,
                "kept": 1,
                "reverted": 0,
            }
            mock_run_round.return_value = ("# 본문", 0.5, round_log)
            mock_finalize.side_effect = lambda log, score: {
                **log,
                "final_score": score,
                "total_rounds": len(log["rounds"]),
            }
            mock_save.return_value = "/tmp/log.json"

            from autoresearch.runner import run

            run(
                skill_name="sk",
                checklist_path="/fake/checklist.yaml",
                test_input="테스트",
                rounds=50,
                target_score=0.95,
                consecutive=3,
                dry_run=True,
                skills_dir=tmpdir,
            )

            # dry_run이면 run_round가 정확히 1번 호출되어야 함
            self.assertEqual(mock_run_round.call_count, 1)

    @patch("autoresearch.runner.save_log")
    @patch("autoresearch.runner.finalize_log")
    @patch("autoresearch.runner.run_round")
    @patch("autoresearch.runner.backup_original")
    @patch("autoresearch.runner.restore_skill")
    @patch("autoresearch.runner.judge_output")
    @patch("autoresearch.runner.execute_skill")
    @patch("autoresearch.runner.load_checklist")
    @patch("autoresearch.runner.load_skill")
    def test_run_dry_run_restores_original(
        self,
        mock_load_skill: MagicMock,
        mock_load_checklist: MagicMock,
        mock_exec: MagicMock,
        mock_judge: MagicMock,
        mock_restore: MagicMock,
        mock_backup: MagicMock,
        mock_run_round: MagicMock,
        mock_finalize: MagicMock,
        mock_save: MagicMock,
    ) -> None:
        """dry_run 종료 후 원본이 복원되어야 함"""
        with tempfile.TemporaryDirectory() as tmpdir:
            _make_skill_dir(Path(tmpdir), "sk", SAMPLE_SKILL_CONTENT)

            mock_load_skill.return_value = ("name: sk", "# 원본 본문")
            mock_load_checklist.return_value = SAMPLE_CHECKLIST
            mock_exec.return_value = self._make_execute_result()
            mock_judge.return_value = self._make_judge_result(score=0.5)
            mock_backup.return_value = "/tmp/backup.md"

            round_log: dict[str, Any] = {
                "skill": "sk",
                "started_at": "2024-01-01T00:00:00+00:00",
                "rounds": [{"round": 1, "decision": "kept"}],
                "final_score": 0,
                "total_rounds": 0,
                "kept": 1,
                "reverted": 0,
            }
            mock_run_round.return_value = ("# 변경된 본문", 0.5, round_log)
            mock_finalize.side_effect = lambda log, score: {
                **log,
                "final_score": score,
                "total_rounds": len(log["rounds"]),
            }
            mock_save.return_value = "/tmp/log.json"

            from autoresearch.runner import run

            run(
                skill_name="sk",
                checklist_path="/fake/checklist.yaml",
                test_input="테스트",
                dry_run=True,
                skills_dir=tmpdir,
            )

            # dry_run 후 restore_skill이 호출되어야 함
            mock_restore.assert_called_once()

    @patch("autoresearch.runner.save_log")
    @patch("autoresearch.runner.finalize_log")
    @patch("autoresearch.runner.run_round")
    @patch("autoresearch.runner.backup_original")
    @patch("autoresearch.runner.judge_output")
    @patch("autoresearch.runner.execute_skill")
    @patch("autoresearch.runner.load_checklist")
    @patch("autoresearch.runner.load_skill")
    def test_run_stops_after_consecutive_target(
        self,
        mock_load_skill: MagicMock,
        mock_load_checklist: MagicMock,
        mock_exec: MagicMock,
        mock_judge: MagicMock,
        mock_backup: MagicMock,
        mock_run_round: MagicMock,
        mock_finalize: MagicMock,
        mock_save: MagicMock,
    ) -> None:
        """연속 consecutive회 target_score 달성 시 조기 종료"""
        with tempfile.TemporaryDirectory() as tmpdir:
            _make_skill_dir(Path(tmpdir), "sk", SAMPLE_SKILL_CONTENT)

            mock_load_skill.return_value = ("name: sk", "# 본문")
            mock_load_checklist.return_value = SAMPLE_CHECKLIST
            mock_exec.return_value = self._make_execute_result()
            mock_judge.return_value = self._make_judge_result(score=0.5)
            mock_backup.return_value = "/tmp/backup.md"

            call_count = [0]

            def side_effect_run_round(*args: Any, **kwargs: Any) -> tuple[str, float, dict[str, Any]]:
                call_count[0] += 1
                log_arg = kwargs.get("log") if "log" in kwargs else args[5]
                # 항상 high score 반환
                updated_log: dict[str, Any] = dict(log_arg)
                rounds = list(updated_log.get("rounds", []))
                rounds.append({"round": call_count[0], "decision": "kept"})
                updated_log["rounds"] = rounds
                updated_log["kept"] = updated_log.get("kept", 0) + 1
                return ("# 본문", 0.97, updated_log)

            mock_run_round.side_effect = side_effect_run_round
            mock_finalize.side_effect = lambda log, score: {
                **log,
                "final_score": score,
                "total_rounds": len(log["rounds"]),
            }
            mock_save.return_value = "/tmp/log.json"

            from autoresearch.runner import run

            run(
                skill_name="sk",
                checklist_path="/fake/checklist.yaml",
                test_input="테스트",
                rounds=50,
                target_score=0.95,
                consecutive=3,
                skills_dir=tmpdir,
            )

            # 연속 3회 달성 후 종료되어야 하므로 최대 rounds(50)보다 적어야 함
            self.assertLessEqual(call_count[0], 10)
            self.assertGreaterEqual(call_count[0], 3)

    @patch("autoresearch.runner.save_log")
    @patch("autoresearch.runner.finalize_log")
    @patch("autoresearch.runner.run_round")
    @patch("autoresearch.runner.backup_original")
    @patch("autoresearch.runner.judge_output")
    @patch("autoresearch.runner.execute_skill")
    @patch("autoresearch.runner.load_checklist")
    @patch("autoresearch.runner.load_skill")
    def test_run_consecutive_count_resets_on_low_score(
        self,
        mock_load_skill: MagicMock,
        mock_load_checklist: MagicMock,
        mock_exec: MagicMock,
        mock_judge: MagicMock,
        mock_backup: MagicMock,
        mock_run_round: MagicMock,
        mock_finalize: MagicMock,
        mock_save: MagicMock,
    ) -> None:
        """consecutive_count는 target 미달 시 0으로 리셋"""
        with tempfile.TemporaryDirectory() as tmpdir:
            _make_skill_dir(Path(tmpdir), "sk", SAMPLE_SKILL_CONTENT)

            mock_load_skill.return_value = ("name: sk", "# 본문")
            mock_load_checklist.return_value = SAMPLE_CHECKLIST
            mock_exec.return_value = self._make_execute_result()
            mock_judge.return_value = self._make_judge_result(score=0.5)
            mock_backup.return_value = "/tmp/backup.md"

            # 점수 패턴: 0.96, 0.96, 0.5(리셋), 0.96, 0.96, 0.96(종료)
            score_seq = [0.96, 0.96, 0.5, 0.96, 0.96, 0.96]
            call_count = [0]

            def side_effect(*args: Any, **kwargs: Any) -> tuple[str, float, dict[str, Any]]:
                idx = call_count[0]
                call_count[0] += 1
                score = score_seq[idx] if idx < len(score_seq) else 0.96
                log_arg = kwargs.get("log") if "log" in kwargs else args[5]
                updated_log: dict[str, Any] = dict(log_arg)
                rounds = list(updated_log.get("rounds", []))
                rounds.append({"round": idx + 1, "decision": "kept"})
                updated_log["rounds"] = rounds
                updated_log["kept"] = updated_log.get("kept", 0) + 1
                return ("# 본문", score, updated_log)

            mock_run_round.side_effect = side_effect
            mock_finalize.side_effect = lambda log, score: {
                **log,
                "final_score": score,
                "total_rounds": len(log["rounds"]),
            }
            mock_save.return_value = "/tmp/log.json"

            from autoresearch.runner import run

            run(
                skill_name="sk",
                checklist_path="/fake/checklist.yaml",
                test_input="테스트",
                rounds=50,
                target_score=0.95,
                consecutive=3,
                skills_dir=tmpdir,
            )

            # 리셋으로 인해 최소 6라운드 이상 실행되어야 함
            self.assertGreaterEqual(call_count[0], 6)

    @patch("autoresearch.runner.save_log")
    @patch("autoresearch.runner.finalize_log")
    @patch("autoresearch.runner.run_round")
    @patch("autoresearch.runner.backup_original")
    @patch("autoresearch.runner.judge_output")
    @patch("autoresearch.runner.execute_skill")
    @patch("autoresearch.runner.load_checklist")
    @patch("autoresearch.runner.load_skill")
    def test_run_calls_save_log(
        self,
        mock_load_skill: MagicMock,
        mock_load_checklist: MagicMock,
        mock_exec: MagicMock,
        mock_judge: MagicMock,
        mock_backup: MagicMock,
        mock_run_round: MagicMock,
        mock_finalize: MagicMock,
        mock_save: MagicMock,
    ) -> None:
        """run() 종료 시 save_log가 호출되어야 함"""
        with tempfile.TemporaryDirectory() as tmpdir:
            _make_skill_dir(Path(tmpdir), "sk", SAMPLE_SKILL_CONTENT)

            mock_load_skill.return_value = ("name: sk", "# 본문")
            mock_load_checklist.return_value = SAMPLE_CHECKLIST
            mock_exec.return_value = self._make_execute_result()
            mock_judge.return_value = self._make_judge_result(score=0.5)
            mock_backup.return_value = "/tmp/backup.md"

            round_log: dict[str, Any] = {
                "skill": "sk",
                "started_at": "2024-01-01T00:00:00+00:00",
                "rounds": [{"round": 1, "decision": "kept"}],
                "final_score": 0,
                "total_rounds": 0,
                "kept": 1,
                "reverted": 0,
            }
            mock_run_round.return_value = ("# 본문", 0.5, round_log)
            mock_finalize.side_effect = lambda log, score: {
                **log,
                "final_score": score,
                "total_rounds": len(log["rounds"]),
            }
            mock_save.return_value = "/tmp/log.json"

            from autoresearch.runner import run

            run(
                skill_name="sk",
                checklist_path="/fake/checklist.yaml",
                test_input="테스트",
                rounds=2,
                skills_dir=tmpdir,
            )

            mock_save.assert_called_once()


# ---------------------------------------------------------------------------
# TestMain (CLI)
# ---------------------------------------------------------------------------


class TestMain(unittest.TestCase):
    """main() CLI 인자 파싱 테스트"""

    @patch("autoresearch.runner.run")
    def test_main_passes_correct_args(self, mock_run: MagicMock) -> None:
        """CLI 인자가 run()에 올바르게 전달되어야 함"""
        mock_run.return_value = {
            "skill": "my-skill",
            "total_rounds": 5,
            "final_score": 0.8,
            "kept": 3,
            "reverted": 2,
            "rounds": [],
            "started_at": "2024-01-01T00:00:00+00:00",
        }

        from autoresearch.runner import main

        exit_code = main(
            [
                "--skill",
                "my-skill",
                "--checklist",
                "/path/to/checklist.yaml",
                "--test-input",
                "테스트 입력",
                "--rounds",
                "20",
                "--target-score",
                "0.9",
                "--consecutive",
                "2",
                "--model-mutate",
                "claude-opus-4-6",
                "--model-judge",
                "claude-haiku-4-5-20251001",
                "--skills-dir",
                "/some/skills",
            ]
        )

        self.assertEqual(exit_code, 0)
        mock_run.assert_called_once_with(
            skill_name="my-skill",
            checklist_path="/path/to/checklist.yaml",
            test_input="테스트 입력",
            rounds=20,
            target_score=0.9,
            consecutive=2,
            model_mutate="claude-opus-4-6",
            model_judge="claude-haiku-4-5-20251001",
            dry_run=False,
            skills_dir="/some/skills",
        )

    @patch("autoresearch.runner.run")
    def test_main_dry_run_flag(self, mock_run: MagicMock) -> None:
        """--dry-run 플래그가 dry_run=True로 전달되어야 함"""
        mock_run.return_value = {
            "skill": "sk",
            "total_rounds": 1,
            "final_score": 0.5,
            "kept": 1,
            "reverted": 0,
            "rounds": [],
            "started_at": "2024-01-01T00:00:00+00:00",
        }

        from autoresearch.runner import main

        main(
            [
                "--skill",
                "sk",
                "--checklist",
                "/c.yaml",
                "--test-input",
                "입력",
                "--dry-run",
            ]
        )

        call_kwargs = mock_run.call_args.kwargs
        self.assertTrue(call_kwargs["dry_run"])

    @patch("autoresearch.runner.run")
    def test_main_default_values(self, mock_run: MagicMock) -> None:
        """기본값이 올바르게 설정되어야 함"""
        mock_run.return_value = {
            "skill": "sk",
            "total_rounds": 0,
            "final_score": 0.0,
            "kept": 0,
            "reverted": 0,
            "rounds": [],
            "started_at": "2024-01-01T00:00:00+00:00",
        }

        from autoresearch.runner import main

        main(
            [
                "--skill",
                "sk",
                "--checklist",
                "/c.yaml",
                "--test-input",
                "입력",
            ]
        )

        call_kwargs = mock_run.call_args.kwargs
        self.assertEqual(call_kwargs["rounds"], 50)
        self.assertAlmostEqual(call_kwargs["target_score"], 0.95)
        self.assertEqual(call_kwargs["consecutive"], 3)
        self.assertEqual(call_kwargs["model_mutate"], "claude-sonnet-4-6")
        self.assertIn("haiku", call_kwargs["model_judge"])
        self.assertFalse(call_kwargs["dry_run"])
        self.assertEqual(call_kwargs["skills_dir"], "/home/jay/workspace/skills")

    @patch("autoresearch.runner.run")
    def test_main_returns_zero_on_success(self, mock_run: MagicMock) -> None:
        """성공 시 exit code 0 반환"""
        mock_run.return_value = {
            "skill": "sk",
            "total_rounds": 1,
            "final_score": 0.8,
            "kept": 1,
            "reverted": 0,
            "rounds": [],
            "started_at": "2024-01-01T00:00:00+00:00",
        }

        from autoresearch.runner import main

        code = main(["--skill", "sk", "--checklist", "/c.yaml", "--test-input", "in"])
        self.assertEqual(code, 0)


# ---------------------------------------------------------------------------
# TestLoadTestInputs
# ---------------------------------------------------------------------------


class TestLoadTestInputs(unittest.TestCase):
    """load_test_inputs() 단위 테스트"""

    def test_load_valid_yaml(self) -> None:
        """정상 YAML 파일 로드 시 inputs 리스트 반환"""
        yaml_content = """\
inputs:
  - id: insurance-fa-recruit
    text: "보험 FA 모집 광고, 타겟: 30대 보험설계사"
  - id: insurance-product-promo
    text: "삼성생명 종신보험 신상품 출시 광고"
"""
        with tempfile.TemporaryDirectory() as tmpdir:
            yaml_path = Path(tmpdir) / "test-inputs.yaml"
            yaml_path.write_text(yaml_content, encoding="utf-8")

            from autoresearch.runner import load_test_inputs

            inputs = load_test_inputs(str(yaml_path))
            self.assertEqual(len(inputs), 2)
            self.assertEqual(inputs[0]["id"], "insurance-fa-recruit")
            self.assertIn("보험 FA 모집", inputs[0]["text"])
            self.assertEqual(inputs[1]["id"], "insurance-product-promo")

    def test_load_missing_file(self) -> None:
        """존재하지 않는 파일 경로 시 FileNotFoundError 발생"""
        from autoresearch.runner import load_test_inputs

        with self.assertRaises(FileNotFoundError):
            load_test_inputs("/nonexistent/path/test-inputs.yaml")

    def test_load_empty_inputs(self) -> None:
        """inputs 리스트가 비어 있으면 ValueError 발생"""
        yaml_content = "inputs: []\n"
        with tempfile.TemporaryDirectory() as tmpdir:
            yaml_path = Path(tmpdir) / "empty.yaml"
            yaml_path.write_text(yaml_content, encoding="utf-8")

            from autoresearch.runner import load_test_inputs

            with self.assertRaises(ValueError):
                load_test_inputs(str(yaml_path))


# ---------------------------------------------------------------------------
# TestMultiInput
# ---------------------------------------------------------------------------


class TestMultiInput(unittest.TestCase):
    """멀티 입력 지원 테스트"""

    def _make_mutation_result(self, modified_body: str = "# 변경된 본문") -> dict[str, Any]:
        return {
            "mutation_type": "규칙 추가",
            "mutation_description": "새 규칙 추가",
            "modified_skill_md": modified_body,
            "input_tokens": 100,
            "output_tokens": 50,
        }

    def _make_execute_result(self, output: str = "결과물") -> dict[str, Any]:
        return {
            "output": output,
            "input_tokens": 10,
            "output_tokens": 20,
            "model": "claude-sonnet-4-6",
        }

    def _make_judge_result(self, score: float = 0.8) -> dict[str, Any]:
        return {
            "items": [{"id": "clarity", "result": "PASS", "reason": "ok"}],
            "total_score": score,
            "summary": "좋음",
            "input_tokens": 5,
            "output_tokens": 10,
        }

    @patch("autoresearch.runner.judge_output")
    @patch("autoresearch.runner.execute_skill")
    @patch("autoresearch.runner.apply_mutation")
    @patch("autoresearch.runner.generate_mutation")
    @patch("autoresearch.runner.get_recent_changelog")
    def test_run_round_multi_input_averages_scores(
        self,
        mock_recent: MagicMock,
        mock_gen: MagicMock,
        mock_apply: MagicMock,
        mock_exec: MagicMock,
        mock_judge: MagicMock,
    ) -> None:
        """멀티 입력 시 평균 점수로 KEEP/REVERT 판정"""
        with tempfile.TemporaryDirectory() as tmpdir:
            _make_skill_dir(Path(tmpdir), "sk", SAMPLE_SKILL_CONTENT)

            mock_recent.return_value = "변경 이력 없음"
            mock_gen.return_value = self._make_mutation_result()
            mock_exec.return_value = self._make_execute_result()
            # 두 입력에 대해 각각 0.6, 0.8 반환 → 평균 0.7
            mock_judge.side_effect = [
                self._make_judge_result(score=0.6),
                self._make_judge_result(score=0.8),
            ]

            from autoresearch.changelog import create_log
            from autoresearch.runner import run_round

            log = create_log("sk")
            body, score, updated_log = run_round(
                skill_name="sk",
                current_body="# 원래 본문",
                frontmatter="name: sk",
                checklist=SAMPLE_CHECKLIST,
                test_inputs=["입력1", "입력2"],
                log=log,
                round_num=1,
                prev_score=0.5,
                skills_dir=tmpdir,
                model_mutate="claude-sonnet-4-6",
                model_judge="claude-haiku-4-5-20251001",
            )

            # 평균 0.7 >= prev_score 0.5 이므로 KEEP
            self.assertAlmostEqual(score, 0.7, places=5)
            self.assertEqual(updated_log["kept"], 1)
            # execute_skill이 2번 호출되어야 함
            self.assertEqual(mock_exec.call_count, 2)
            # judge_output이 2번 호출되어야 함
            self.assertEqual(mock_judge.call_count, 2)

    @patch("autoresearch.runner.save_log")
    @patch("autoresearch.runner.finalize_log")
    @patch("autoresearch.runner.run_round")
    @patch("autoresearch.runner.backup_original")
    @patch("autoresearch.runner.judge_output")
    @patch("autoresearch.runner.execute_skill")
    @patch("autoresearch.runner.load_checklist")
    @patch("autoresearch.runner.load_skill")
    def test_run_multi_input_via_run_function(
        self,
        mock_load_skill: MagicMock,
        mock_load_checklist: MagicMock,
        mock_exec: MagicMock,
        mock_judge: MagicMock,
        mock_backup: MagicMock,
        mock_run_round: MagicMock,
        mock_finalize: MagicMock,
        mock_save: MagicMock,
    ) -> None:
        """run() 함수에 test_inputs 리스트 전달 시 정상 동작"""
        with tempfile.TemporaryDirectory() as tmpdir:
            _make_skill_dir(Path(tmpdir), "sk", SAMPLE_SKILL_CONTENT)

            mock_load_skill.return_value = ("name: sk", "# 본문")
            mock_load_checklist.return_value = SAMPLE_CHECKLIST
            mock_exec.return_value = {"output": "결과물", "input_tokens": 10, "output_tokens": 20}
            mock_judge.side_effect = [
                {"items": [], "total_score": 0.6, "summary": "ok", "input_tokens": 5, "output_tokens": 10},
                {"items": [], "total_score": 0.8, "summary": "ok", "input_tokens": 5, "output_tokens": 10},
            ]
            mock_backup.return_value = "/tmp/backup.md"

            round_log: dict[str, Any] = {
                "skill": "sk",
                "started_at": "2024-01-01T00:00:00+00:00",
                "rounds": [{"round": 1, "decision": "kept"}],
                "final_score": 0,
                "total_rounds": 0,
                "kept": 1,
                "reverted": 0,
            }
            mock_run_round.return_value = ("# 본문", 0.7, round_log)
            mock_finalize.side_effect = lambda log, score: {
                **log,
                "final_score": score,
                "total_rounds": len(log["rounds"]),
            }
            mock_save.return_value = "/tmp/log.json"

            from autoresearch.runner import run

            result = run(
                skill_name="sk",
                checklist_path="/fake/checklist.yaml",
                test_inputs=["입력1", "입력2"],
                rounds=1,
                skills_dir=tmpdir,
            )

            self.assertIsInstance(result, dict)
            # run_round에 test_inputs가 전달되어야 함
            call_kwargs = mock_run_round.call_args.kwargs
            self.assertIn("test_inputs", call_kwargs)
            self.assertEqual(call_kwargs["test_inputs"], ["입력1", "입력2"])


# ---------------------------------------------------------------------------
# TestMainMultiInput
# ---------------------------------------------------------------------------


class TestMainMultiInput(unittest.TestCase):
    """main() CLI 멀티 입력 관련 테스트"""

    @patch("autoresearch.runner.load_test_inputs")
    @patch("autoresearch.runner.run")
    def test_main_test_inputs_file(self, mock_run: MagicMock, mock_load: MagicMock) -> None:
        """--test-inputs-file 인자가 load_test_inputs를 통해 run()에 전달되어야 함"""
        mock_load.return_value = [
            {"id": "input1", "text": "첫 번째 입력"},
            {"id": "input2", "text": "두 번째 입력"},
        ]
        mock_run.return_value = {
            "skill": "sk",
            "total_rounds": 1,
            "final_score": 0.8,
            "kept": 1,
            "reverted": 0,
            "rounds": [],
            "started_at": "2024-01-01T00:00:00+00:00",
        }

        from autoresearch.runner import main

        exit_code = main(
            [
                "--skill",
                "sk",
                "--checklist",
                "/c.yaml",
                "--test-inputs-file",
                "/path/to/test-inputs.yaml",
            ]
        )

        self.assertEqual(exit_code, 0)
        mock_load.assert_called_once_with("/path/to/test-inputs.yaml")
        call_kwargs = mock_run.call_args.kwargs
        self.assertIn("test_inputs", call_kwargs)
        self.assertEqual(call_kwargs["test_inputs"], ["첫 번째 입력", "두 번째 입력"])

    def test_main_mutual_exclusive(self) -> None:
        """--test-input과 --test-inputs-file 동시 사용 시 에러"""
        from autoresearch.runner import main

        with self.assertRaises(SystemExit) as ctx:
            main(
                [
                    "--skill",
                    "sk",
                    "--checklist",
                    "/c.yaml",
                    "--test-input",
                    "입력",
                    "--test-inputs-file",
                    "/path/to/file.yaml",
                ]
            )

        self.assertNotEqual(ctx.exception.code, 0)


# ---------------------------------------------------------------------------
# TestBackground
# ---------------------------------------------------------------------------


class TestBackground(unittest.TestCase):
    """--background 플래그 테스트"""

    @patch("autoresearch.runner.run")
    def test_main_background_flag(self, mock_run: MagicMock) -> None:
        """--background 플래그가 background=True로 전달되어야 함"""
        mock_run.return_value = {
            "skill": "sk",
            "total_rounds": 1,
            "final_score": 0.8,
            "kept": 1,
            "reverted": 0,
            "rounds": [],
            "started_at": "2024-01-01T00:00:00+00:00",
        }

        from autoresearch.runner import main

        with tempfile.TemporaryDirectory() as tmpdir:
            exit_code = main(
                [
                    "--skill",
                    "sk",
                    "--checklist",
                    "/c.yaml",
                    "--test-input",
                    "입력",
                    "--background",
                    "--skills-dir",
                    tmpdir,
                ]
            )

        self.assertEqual(exit_code, 0)
        call_kwargs = mock_run.call_args.kwargs
        self.assertTrue(call_kwargs.get("background", False))


# ---------------------------------------------------------------------------
# TestRunRoundTokenBreakdown
# ---------------------------------------------------------------------------


class TestRunRoundTokenBreakdown(unittest.TestCase):
    """run_round()에서 토큰 세분화 필드가 add_round에 전달되는지 테스트"""

    @patch("autoresearch.runner.judge_output")
    @patch("autoresearch.runner.execute_skill")
    @patch("autoresearch.runner.apply_mutation")
    @patch("autoresearch.runner.generate_mutation")
    @patch("autoresearch.runner.get_recent_changelog")
    def test_token_breakdown_in_log(
        self,
        mock_recent: MagicMock,
        mock_gen: MagicMock,
        mock_apply: MagicMock,
        mock_exec: MagicMock,
        mock_judge: MagicMock,
    ) -> None:
        """라운드 로그에 세분화 토큰 필드가 포함됨"""
        with tempfile.TemporaryDirectory() as tmpdir:
            _make_skill_dir(Path(tmpdir), "sk", SAMPLE_SKILL_CONTENT)

            mock_recent.return_value = "이력 없음"
            mock_gen.return_value = {
                "mutation_type": "규칙 추가",
                "mutation_description": "새 규칙",
                "modified_skill_md": "# 변경됨",
                "input_tokens": 100,
                "output_tokens": 50,
            }
            mock_exec.return_value = {
                "output": "결과물",
                "input_tokens": 200,
                "output_tokens": 150,
                "model": "m",
            }
            mock_judge.return_value = {
                "items": [{"id": "clarity", "result": "PASS", "reason": "ok"}],
                "total_score": 0.9,
                "summary": "좋음",
                "input_tokens": 30,
                "output_tokens": 20,
            }

            from autoresearch.changelog import create_log
            from autoresearch.runner import run_round

            log = create_log("sk")
            _, _, updated_log = run_round(
                skill_name="sk",
                current_body="# 본문",
                frontmatter="name: sk",
                checklist=SAMPLE_CHECKLIST,
                test_input="입력",
                log=log,
                round_num=1,
                prev_score=0.5,
                skills_dir=tmpdir,
                model_mutate="claude-sonnet-4-6",
                model_judge="claude-haiku-4-5-20251001",
            )

            rd = updated_log["rounds"][0]
            self.assertEqual(rd["mutation_input_tokens"], 100)
            self.assertEqual(rd["mutation_output_tokens"], 50)
            self.assertEqual(rd["execution_input_tokens"], 200)
            self.assertEqual(rd["execution_output_tokens"], 150)
            self.assertEqual(rd["judge_input_tokens"], 30)
            self.assertEqual(rd["judge_output_tokens"], 20)
            # 합산 필드도 확인
            self.assertEqual(rd["input_tokens"], 100 + 200 + 30)
            self.assertEqual(rd["output_tokens"], 50 + 150 + 20)


if __name__ == "__main__":
    unittest.main()
