#!/usr/bin/env python3
"""Tests for skill-judge.py — TDD (RED → GREEN)"""

import importlib.util
import json
import sys
from pathlib import Path

import pytest

# skill-judge.py 는 하이픈 파일명이므로 importlib 으로 임포트
_SCRIPTS_DIR = Path(__file__).parent.parent
_MODULE_PATH = _SCRIPTS_DIR / "skill-judge.py"
spec = importlib.util.spec_from_file_location("skill_judge", _MODULE_PATH)
assert spec is not None and spec.loader is not None
sj = importlib.util.module_from_spec(spec)
sys.modules["skill_judge"] = sj
spec.loader.exec_module(sj)  # type: ignore[union-attr]

# ---------------------------------------------------------------------------
# Test fixtures / sample data
# ---------------------------------------------------------------------------

FULL_SKILL_MD = """\
# MySkill

## Description
This skill does something very useful. It can help users accomplish tasks
efficiently by providing structured guidance. The skill is designed for
developers who need to perform complex operations quickly.

## When to Use
Use this skill when the user asks about automated tasks or when you need
to perform repetitive operations. Trigger on keywords like "automate" or
"batch process". This applies broadly to workflow automation scenarios.

## Instructions
Follow these steps:
1. First, analyze the input carefully
2. Then, prepare the execution environment
3. Execute the main logic with proper error handling
4. Validate the output against expected criteria
5. Report results clearly

```python
def example():
    return "done"
```

- Step A: Do the first thing
- Step B: Do the second thing
- Step C: Finalize and clean up

## Output Format
The output should be structured as follows:
- Summary section with key metrics
- Detailed breakdown by category
- Recommendations for next steps

```json
{
  "status": "success",
  "result": "value"
}
```

## Examples
Here is a usage example:

```bash
python3 my_script.py --input file.txt --output result.json
```

Example 1: Basic usage
  Input: simple text file
  Output: processed JSON

Example 2: Advanced usage
  Input: complex dataset
  Output: detailed analysis report
"""

MINIMAL_SKILL_MD = """\
## Description
Short description.

## When to Use
Trigger condition here.

## Instructions
Do something.

## Output Format
Some output.

## Examples
An example.
"""

ENGLISH_ALTERNATE_TITLES = """\
## Overview
This is an overview section with sufficient content to pass the minimum
length requirement. It describes what the skill does in detail.

## Trigger
When this happens, use the skill. Multiple trigger conditions apply here
and should be considered carefully before invoking.

## Workflow
Step 1: Do this
Step 2: Do that
Step 3: Complete
- bullet item one
- bullet item two

## Output
The result will be formatted like this with proper structure and details
about the expected output format for this skill.

## Example
Here is one example of how to use this skill properly.
"""

KOREAN_TITLES_MD = """\
## 개요
이 스킬은 매우 유용한 작업을 수행합니다. 사용자가 복잡한 작업을
효율적으로 완료할 수 있도록 구조화된 지침을 제공합니다.

## 사용 시점
자동화 작업이 필요할 때 이 스킬을 사용하세요. 반복 작업이나
배치 처리가 필요한 상황에서 활성화됩니다.

## 지침
다음 단계를 따르세요:
1. 입력을 신중하게 분석합니다
2. 실행 환경을 준비합니다
3. 적절한 오류 처리와 함께 주요 로직을 실행합니다

```python
def 예시():
    return "완료"
```

## 출력 형식
출력은 다음과 같이 구조화됩니다:
- 주요 지표가 있는 요약 섹션
- 카테고리별 세부 분류
- 다음 단계에 대한 권장 사항

## 예시
사용 예시는 다음과 같습니다:

```bash
python3 스크립트.py --입력 파일.txt
```
"""

MISSING_SECTIONS_MD = """\
## Description
This skill has description and instructions but is missing other sections.
The description is detailed enough to pass the minimum length requirement.

## Instructions
Here are the steps:
1. Do the first thing
2. Do the second thing
3. Finish up
"""

EMPTY_CONTENT = ""

SHORT_CONTENT = """\
## Description
Short.
"""

# Content just under 500 chars to trigger penalty
PENALTY_CONTENT = """\
## Description
A description.

## When to Use
Some trigger.

## Instructions
Do something with steps.

## Output Format
Some format.

## Examples
An example.
"""

# Ensure it's under 500 chars
assert len(PENALTY_CONTENT) < 500, f"PENALTY_CONTENT too long: {len(PENALTY_CONTENT)}"


# ---------------------------------------------------------------------------
# 1. parse_skill_sections tests
# ---------------------------------------------------------------------------
class TestParseSkillSections:
    def test_parses_all_english_standard_sections(self):
        result = sj.parse_skill_sections(FULL_SKILL_MD)
        assert result["description"]["found"] is True
        assert result["when_to_use"]["found"] is True
        assert result["instructions"]["found"] is True
        assert result["output_format"]["found"] is True
        assert result["examples"]["found"] is True

    def test_parses_alternate_english_titles(self):
        result = sj.parse_skill_sections(ENGLISH_ALTERNATE_TITLES)
        assert result["description"]["found"] is True  # Overview
        assert result["when_to_use"]["found"] is True  # Trigger
        assert result["instructions"]["found"] is True  # Workflow
        assert result["output_format"]["found"] is True  # Output
        assert result["examples"]["found"] is True  # Example

    def test_parses_korean_titles(self):
        result = sj.parse_skill_sections(KOREAN_TITLES_MD)
        assert result["description"]["found"] is True  # 개요
        assert result["when_to_use"]["found"] is True  # 사용 시점
        assert result["instructions"]["found"] is True  # 지침
        assert result["output_format"]["found"] is True  # 출력 형식
        assert result["examples"]["found"] is True  # 예시

    def test_missing_sections_marked_not_found(self):
        result = sj.parse_skill_sections(MISSING_SECTIONS_MD)
        assert result["description"]["found"] is True
        assert result["instructions"]["found"] is True
        assert result["when_to_use"]["found"] is False
        assert result["output_format"]["found"] is False
        assert result["examples"]["found"] is False

    def test_empty_content_all_sections_missing(self):
        result = sj.parse_skill_sections(EMPTY_CONTENT)
        for section in ["description", "when_to_use", "instructions", "output_format", "examples"]:
            assert result[section]["found"] is False

    def test_section_content_length_captured(self):
        result = sj.parse_skill_sections(FULL_SKILL_MD)
        assert result["description"]["length"] > 50
        assert result["instructions"]["length"] > 200

    def test_returns_dict_with_all_five_keys(self):
        result = sj.parse_skill_sections(FULL_SKILL_MD)
        assert set(result.keys()) == {
            "description",
            "when_to_use",
            "instructions",
            "output_format",
            "examples",
        }

    def test_each_section_has_found_and_length_and_content(self):
        result = sj.parse_skill_sections(FULL_SKILL_MD)
        for key, val in result.items():
            assert "found" in val, f"Missing 'found' in {key}"
            assert "length" in val, f"Missing 'length' in {key}"
            assert "content" in val, f"Missing 'content' in {key}"

    def test_not_found_section_has_zero_length(self):
        result = sj.parse_skill_sections(MISSING_SECTIONS_MD)
        assert result["when_to_use"]["length"] == 0
        assert result["output_format"]["length"] == 0
        assert result["examples"]["length"] == 0

    def test_rules_title_maps_to_instructions(self):
        content = "## Rules\nDo this and that with careful consideration.\n" * 5
        result = sj.parse_skill_sections(content)
        assert result["instructions"]["found"] is True

    def test_설명_title_maps_to_description(self):
        content = "## 설명\n이것은 설명입니다.\n" + "설명 내용이 충분히 길어야 합니다." * 5
        result = sj.parse_skill_sections(content)
        assert result["description"]["found"] is True

    def test_트리거_title_maps_to_when_to_use(self):
        content = "## 트리거\n이 조건에서 사용하세요.\n" + "트리거 조건 설명." * 5
        result = sj.parse_skill_sections(content)
        assert result["when_to_use"]["found"] is True

    def test_결과물_title_maps_to_output_format(self):
        content = "## 결과물\n결과는 이렇게 나옵니다.\n" + "결과 형식 설명." * 5
        result = sj.parse_skill_sections(content)
        assert result["output_format"]["found"] is True


# ---------------------------------------------------------------------------
# 2. score_section tests
# ---------------------------------------------------------------------------
class TestScoreSection:
    def test_empty_content_scores_zero(self):
        score = sj.score_section("description", "")
        assert score.score == 0
        assert score.found is False

    def test_short_content_scores_five(self):
        # found but under 50 chars => +5 only
        score = sj.score_section("description", "Short text.")
        assert score.score == 5
        assert score.found is True

    def test_content_over_50_chars_scores_ten(self):
        content = "This is a description that is definitely longer than fifty characters in total."
        score = sj.score_section("description", content)
        assert score.score == 10

    def test_content_with_code_block_adds_specificity(self):
        content = "Description.\n```python\ncode here\n```\n" + "x" * 30
        score = sj.score_section("description", content)
        assert score.score >= 15  # found + min_length + specificity

    def test_content_with_list_adds_specificity(self):
        content = "Some content with a list.\n- item one\n- item two\n- item three\n" + "x" * 50
        score = sj.score_section("description", content)
        assert score.score >= 15

    def test_full_marks_for_comprehensive_section(self):
        content = (
            "A very detailed description of the skill.\n"
            "- Point one\n- Point two\n- Point three\n"
            "```python\ncode example\n```\n"
        ) * 5  # Makes it over 200 chars
        score = sj.score_section("description", content)
        assert score.score == 20

    def test_score_is_sectioncore_dataclass(self):
        score = sj.score_section("description", "Some content here.")
        assert hasattr(score, "score")
        assert hasattr(score, "found")
        assert hasattr(score, "length")

    def test_max_score_per_section_is_twenty(self):
        content = "Extremely detailed content.\n- item\n```code```\n" + "x" * 500
        score = sj.score_section("instructions", content)
        assert score.score <= 20

    def test_length_reflects_actual_content_length(self):
        content = "Hello, world!"
        score = sj.score_section("description", content)
        assert score.length == len(content)


# ---------------------------------------------------------------------------
# 3. evaluate_skill tests
# ---------------------------------------------------------------------------
class TestEvaluateSkill:
    def test_full_skill_gets_high_score(self, tmp_path):
        skill_file = tmp_path / "SKILL.md"
        skill_file.write_text(FULL_SKILL_MD)
        result = sj.evaluate_skill(str(skill_file))
        assert result.total_score >= 75

    def test_minimal_skill_gets_lower_score(self, tmp_path):
        skill_file = tmp_path / "SKILL.md"
        skill_file.write_text(MINIMAL_SKILL_MD)
        result = sj.evaluate_skill(str(skill_file))
        assert result.total_score < 75

    def test_empty_file_scores_zero_or_near_zero(self, tmp_path):
        skill_file = tmp_path / "SKILL.md"
        skill_file.write_text(EMPTY_CONTENT)
        result = sj.evaluate_skill(str(skill_file))
        assert result.total_score <= 0

    def test_penalty_applied_for_short_file(self, tmp_path):
        skill_file = tmp_path / "SKILL.md"
        skill_file.write_text(PENALTY_CONTENT)
        result = sj.evaluate_skill(str(skill_file))
        assert result.penalties == -10

    def test_bonus_applied_for_long_file(self, tmp_path):
        skill_file = tmp_path / "SKILL.md"
        long_content = FULL_SKILL_MD + "\n" + "Additional content.\n" * 600
        assert len(long_content) >= 10000, f"Content too short: {len(long_content)}"
        skill_file.write_text(long_content)
        result = sj.evaluate_skill(str(skill_file))
        assert result.bonuses == 5

    def test_no_bonus_for_medium_length(self, tmp_path):
        skill_file = tmp_path / "SKILL.md"
        skill_file.write_text(FULL_SKILL_MD)
        result = sj.evaluate_skill(str(skill_file))
        assert result.bonuses == 0

    def test_result_has_skill_path(self, tmp_path):
        skill_file = tmp_path / "SKILL.md"
        skill_file.write_text(FULL_SKILL_MD)
        result = sj.evaluate_skill(str(skill_file))
        assert result.skill_path == str(skill_file)

    def test_result_has_grade(self, tmp_path):
        skill_file = tmp_path / "SKILL.md"
        skill_file.write_text(FULL_SKILL_MD)
        result = sj.evaluate_skill(str(skill_file))
        assert result.grade in ["A", "B", "C", "D", "F"]

    def test_result_mode_is_rule_based(self, tmp_path):
        skill_file = tmp_path / "SKILL.md"
        skill_file.write_text(FULL_SKILL_MD)
        result = sj.evaluate_skill(str(skill_file))
        assert result.mode == "rule-based"

    def test_result_has_sections_dict(self, tmp_path):
        skill_file = tmp_path / "SKILL.md"
        skill_file.write_text(FULL_SKILL_MD)
        result = sj.evaluate_skill(str(skill_file))
        assert hasattr(result, "sections")
        assert isinstance(result.sections, dict)

    def test_missing_file_raises_file_not_found(self):
        with pytest.raises(FileNotFoundError):
            sj.evaluate_skill("/nonexistent/path/SKILL.md")

    def test_grade_a_for_score_90_plus(self, tmp_path):
        # Create a very high-quality SKILL.md
        skill_file = tmp_path / "SKILL.md"
        high_quality = FULL_SKILL_MD + "\n" + "More content.\n" * 50
        skill_file.write_text(high_quality)
        result = sj.evaluate_skill(str(skill_file))
        if result.total_score >= 90:
            assert result.grade == "A"

    def test_grade_f_for_score_44_or_less(self, tmp_path):
        skill_file = tmp_path / "SKILL.md"
        skill_file.write_text(EMPTY_CONTENT)
        result = sj.evaluate_skill(str(skill_file))
        if result.total_score <= 44:
            assert result.grade == "F"

    def test_total_score_is_int(self, tmp_path):
        skill_file = tmp_path / "SKILL.md"
        skill_file.write_text(FULL_SKILL_MD)
        result = sj.evaluate_skill(str(skill_file))
        assert isinstance(result.total_score, int)


# ---------------------------------------------------------------------------
# 4. Grade boundary tests
# ---------------------------------------------------------------------------
class TestGradeBoundaries:
    def _make_skill_with_score(self, tmp_path, content):
        skill_file = tmp_path / "SKILL.md"
        skill_file.write_text(content)
        return sj.evaluate_skill(str(skill_file))

    def test_grade_d_boundary(self, tmp_path):
        # Minimal content — should land in D or F range
        result = self._make_skill_with_score(tmp_path, MINIMAL_SKILL_MD)
        assert result.grade in ["D", "F", "C"]

    def test_compute_grade_directly_90(self):
        assert sj.compute_grade(90) == "A"

    def test_compute_grade_directly_100(self):
        assert sj.compute_grade(100) == "A"

    def test_compute_grade_directly_89(self):
        assert sj.compute_grade(89) == "B"

    def test_compute_grade_directly_75(self):
        assert sj.compute_grade(75) == "B"

    def test_compute_grade_directly_74(self):
        assert sj.compute_grade(74) == "C"

    def test_compute_grade_directly_60(self):
        assert sj.compute_grade(60) == "C"

    def test_compute_grade_directly_59(self):
        assert sj.compute_grade(59) == "D"

    def test_compute_grade_directly_45(self):
        assert sj.compute_grade(45) == "D"

    def test_compute_grade_directly_44(self):
        assert sj.compute_grade(44) == "F"

    def test_compute_grade_directly_0(self):
        assert sj.compute_grade(0) == "F"

    def test_compute_grade_negative(self):
        assert sj.compute_grade(-10) == "F"


# ---------------------------------------------------------------------------
# 5. evaluate_directory tests
# ---------------------------------------------------------------------------
class TestEvaluateDirectory:
    def test_empty_directory_returns_empty_list(self, tmp_path):
        result = sj.evaluate_directory(str(tmp_path))
        assert result == []

    def test_single_skill_in_directory(self, tmp_path):
        skill_dir = tmp_path / "myskill"
        skill_dir.mkdir()
        (skill_dir / "SKILL.md").write_text(FULL_SKILL_MD)
        result = sj.evaluate_directory(str(tmp_path))
        assert len(result) == 1

    def test_multiple_skills_in_directory(self, tmp_path):
        for i in range(3):
            skill_dir = tmp_path / f"skill{i}"
            skill_dir.mkdir()
            (skill_dir / "SKILL.md").write_text(FULL_SKILL_MD)
        result = sj.evaluate_directory(str(tmp_path))
        assert len(result) == 3

    def test_directory_with_no_skill_md(self, tmp_path):
        # Directory has files but no SKILL.md
        (tmp_path / "other.md").write_text("not a skill")
        (tmp_path / "script.py").write_text("# code")
        result = sj.evaluate_directory(str(tmp_path))
        assert result == []

    def test_nonexistent_directory_raises_error(self):
        with pytest.raises((FileNotFoundError, NotADirectoryError)):
            sj.evaluate_directory("/nonexistent/directory/path")

    def test_results_are_skill_result_instances(self, tmp_path):
        skill_dir = tmp_path / "skill1"
        skill_dir.mkdir()
        (skill_dir / "SKILL.md").write_text(FULL_SKILL_MD)
        results = sj.evaluate_directory(str(tmp_path))
        assert len(results) == 1
        result = results[0]
        assert hasattr(result, "total_score")
        assert hasattr(result, "grade")
        assert hasattr(result, "skill_path")

    def test_direct_skill_md_in_root(self, tmp_path):
        # SKILL.md placed directly in directory root
        (tmp_path / "SKILL.md").write_text(FULL_SKILL_MD)
        result = sj.evaluate_directory(str(tmp_path))
        assert len(result) == 1


# ---------------------------------------------------------------------------
# 6. LLM stub tests
# ---------------------------------------------------------------------------
class TestLLMStub:
    def test_llm_evaluate_returns_result_with_llm_stub_mode(self, tmp_path):
        skill_file = tmp_path / "SKILL.md"
        skill_file.write_text(FULL_SKILL_MD)
        result = sj.llm_evaluate(str(skill_file))
        assert result.mode == "llm-stub"

    def test_llm_evaluate_has_message_about_api_key(self, tmp_path):
        skill_file = tmp_path / "SKILL.md"
        skill_file.write_text(FULL_SKILL_MD)
        result = sj.llm_evaluate(str(skill_file))
        assert hasattr(result, "message")
        assert "api-key" in result.message.lower() or "api_key" in result.message.lower()

    def test_llm_evaluate_does_not_raise(self, tmp_path):
        skill_file = tmp_path / "SKILL.md"
        skill_file.write_text(FULL_SKILL_MD)
        # Should NOT raise NotImplementedError
        result = sj.llm_evaluate(str(skill_file))
        assert result is not None

    def test_llm_evaluate_skill_path_set(self, tmp_path):
        skill_file = tmp_path / "SKILL.md"
        skill_file.write_text(FULL_SKILL_MD)
        result = sj.llm_evaluate(str(skill_file))
        assert result.skill_path == str(skill_file)


# ---------------------------------------------------------------------------
# 7. JSON serialization tests
# ---------------------------------------------------------------------------
class TestJSONOutput:
    def test_result_to_json_dict_has_required_keys(self, tmp_path):
        skill_file = tmp_path / "SKILL.md"
        skill_file.write_text(FULL_SKILL_MD)
        result = sj.evaluate_skill(str(skill_file))
        d = sj.result_to_dict(result)
        assert "skill_path" in d
        assert "total_score" in d
        assert "grade" in d
        assert "sections" in d
        assert "penalties" in d
        assert "bonuses" in d
        assert "mode" in d

    def test_result_to_json_is_serializable(self, tmp_path):
        skill_file = tmp_path / "SKILL.md"
        skill_file.write_text(FULL_SKILL_MD)
        result = sj.evaluate_skill(str(skill_file))
        d = sj.result_to_dict(result)
        # Must not raise
        json_str = json.dumps(d)
        assert isinstance(json_str, str)

    def test_sections_dict_contains_all_five(self, tmp_path):
        skill_file = tmp_path / "SKILL.md"
        skill_file.write_text(FULL_SKILL_MD)
        result = sj.evaluate_skill(str(skill_file))
        d = sj.result_to_dict(result)
        sections = d["sections"]
        for key in ["description", "when_to_use", "instructions", "output_format", "examples"]:
            assert key in sections

    def test_each_section_in_dict_has_found_score_length(self, tmp_path):
        skill_file = tmp_path / "SKILL.md"
        skill_file.write_text(FULL_SKILL_MD)
        result = sj.evaluate_skill(str(skill_file))
        d = sj.result_to_dict(result)
        for key, val in d["sections"].items():
            assert "found" in val, f"Missing 'found' in section {key}"
            assert "score" in val, f"Missing 'score' in section {key}"
            assert "length" in val, f"Missing 'length' in section {key}"


# ---------------------------------------------------------------------------
# 8. Edge case tests
# ---------------------------------------------------------------------------
class TestEdgeCases:
    def test_skill_with_only_h1_heading(self, tmp_path):
        skill_file = tmp_path / "SKILL.md"
        skill_file.write_text("# Just a Title\n")
        result = sj.evaluate_skill(str(skill_file))
        assert result.total_score <= 10

    def test_skill_with_malformed_markdown(self, tmp_path):
        skill_file = tmp_path / "SKILL.md"
        skill_file.write_text("##NoSpace\nContent\n#AnotherNoSpace\nMore content\n")
        result = sj.evaluate_skill(str(skill_file))
        assert result is not None  # Should not crash

    def test_very_large_file(self, tmp_path):
        skill_file = tmp_path / "SKILL.md"
        large_content = FULL_SKILL_MD + "\n" + "x" * 20000
        assert len(large_content) >= 10000
        skill_file.write_text(large_content)
        result = sj.evaluate_skill(str(skill_file))
        assert result.bonuses == 5
        assert result.total_score > 0

    def test_unicode_content(self, tmp_path):
        skill_file = tmp_path / "SKILL.md"
        skill_file.write_text(KOREAN_TITLES_MD)
        result = sj.evaluate_skill(str(skill_file))
        assert result is not None

    def test_windows_line_endings(self, tmp_path):
        skill_file = tmp_path / "SKILL.md"
        content = FULL_SKILL_MD.replace("\n", "\r\n")
        skill_file.write_bytes(content.encode("utf-8"))
        result = sj.evaluate_skill(str(skill_file))
        assert result is not None
        assert result.total_score > 0