import importlib.util
import json
import sys
from datetime import date
from pathlib import Path

import pytest

_SCRIPTS_DIR = Path(__file__).parent.parent
sys.path.insert(0, str(_SCRIPTS_DIR))

_MODULE_PATH = _SCRIPTS_DIR / "learning-analyzer.py"
spec = importlib.util.spec_from_file_location("learning_analyzer", _MODULE_PATH)
assert spec is not None
learning_analyzer = importlib.util.module_from_spec(spec)
assert spec.loader is not None
spec.loader.exec_module(learning_analyzer)


# ---------------------------------------------------------------------------
# 테스트 데이터 헬퍼
# ---------------------------------------------------------------------------


def make_audit_trail(tmp_path: Path, records: list[dict]) -> Path:
    f = tmp_path / "audit-trail.jsonl"
    lines = [json.dumps(r) for r in records]
    f.write_text("\n".join(lines), encoding="utf-8")
    return f


def make_whitelist(tmp_path: Path, files: list[str] | None = None, patterns: list[str] | None = None) -> Path:
    content = ""
    if files:
        content += "files:\n"
        for f in files:
            content += f'  - "{f}"\n'
    if patterns:
        content += "patterns:\n"
        for p in patterns:
            content += f'  - "{p}"\n'
    f = tmp_path / "whitelist.yaml"
    f.write_text(content, encoding="utf-8")
    return f


def make_thresholds(tmp_path: Path, min_task_count: int = 3, min_task_ratio: float = 0.3) -> Path:
    content = f"min_task_count: {min_task_count}\nmin_task_ratio: {min_task_ratio}\n"
    f = tmp_path / "thresholds.yaml"
    f.write_text(content, encoding="utf-8")
    return f


def make_sample_records(file_path: str, task_ids: list[str], ts_base: str = "2026-04-07") -> list[dict]:
    records = []
    for i, tid in enumerate(task_ids):
        records.append(
            {
                "ts": f"{ts_base}T{10+i:02d}:00:00Z",
                "bot": "dev1",
                "tool": "Edit",
                "file": f"/home/jay/workspace/{file_path}",
                "task_id": tid,
                "schema_version": 2,
            }
        )
    return records


# ---------------------------------------------------------------------------
# 1. TestLoadWhitelist
# ---------------------------------------------------------------------------


class TestLoadWhitelist:
    def test_load_whitelist_normal(self, tmp_path: Path):
        wl_path = make_whitelist(
            tmp_path,
            files=["/home/jay/workspace/config/settings.yaml"],
            patterns=["config/*", "tests/*"],
        )
        result = learning_analyzer.load_whitelist(wl_path)
        assert "files" in result or "patterns" in result

    def test_load_whitelist_missing(self, tmp_path: Path):
        missing = tmp_path / "nonexistent.yaml"
        result = learning_analyzer.load_whitelist(missing)
        assert result == {} or isinstance(result, dict)

    def test_load_whitelist_empty(self, tmp_path: Path):
        f = tmp_path / "empty.yaml"
        f.write_text("", encoding="utf-8")
        result = learning_analyzer.load_whitelist(f)
        assert result == {} or isinstance(result, dict)


# ---------------------------------------------------------------------------
# 2. TestLoadThresholds
# ---------------------------------------------------------------------------


class TestLoadThresholds:
    def test_load_thresholds_normal(self, tmp_path: Path):
        th_path = make_thresholds(tmp_path, min_task_count=5, min_task_ratio=0.4)
        result = learning_analyzer.load_thresholds(th_path)
        assert result["min_task_count"] == 5
        assert result["min_task_ratio"] == pytest.approx(0.4)

    def test_load_thresholds_missing(self, tmp_path: Path):
        missing = tmp_path / "nonexistent.yaml"
        result = learning_analyzer.load_thresholds(missing)
        assert result["min_task_count"] == 3
        assert result["min_task_ratio"] == pytest.approx(0.3)

    def test_load_thresholds_partial(self, tmp_path: Path):
        f = tmp_path / "partial.yaml"
        f.write_text("min_task_count: 5\n", encoding="utf-8")
        result = learning_analyzer.load_thresholds(f)
        assert result["min_task_count"] == 5
        assert result["min_task_ratio"] == pytest.approx(0.3)


# ---------------------------------------------------------------------------
# 3. TestParseAuditTrail
# ---------------------------------------------------------------------------


class TestParseAuditTrail:
    def test_parse_normal(self, tmp_path: Path):
        records = make_sample_records("src/app.py", ["task-1.1", "task-2.1"])
        trail_path = make_audit_trail(tmp_path, records)
        week_start = date(2026, 4, 7)
        week_end = date(2026, 4, 13)
        result = learning_analyzer.parse_audit_trail(trail_path, week_start, week_end)
        assert len(result) == 2

    def test_parse_excludes_outside_range(self, tmp_path: Path):
        inside = make_sample_records("src/app.py", ["task-1.1"], ts_base="2026-04-08")
        outside = make_sample_records("src/other.py", ["task-2.1"], ts_base="2026-04-01")
        trail_path = make_audit_trail(tmp_path, inside + outside)
        week_start = date(2026, 4, 7)
        week_end = date(2026, 4, 13)
        result = learning_analyzer.parse_audit_trail(trail_path, week_start, week_end)
        assert len(result) == 1
        assert result[0]["task_id"] == "task-1.1"

    def test_parse_excludes_tmp_paths(self, tmp_path: Path):
        records = [
            {
                "ts": "2026-04-08T10:00:00Z",
                "bot": "dev1",
                "tool": "Edit",
                "file": "/tmp/scratch.py",
                "task_id": "task-1.1",
                "schema_version": 2,
            }
        ]
        trail_path = make_audit_trail(tmp_path, records)
        week_start = date(2026, 4, 7)
        week_end = date(2026, 4, 13)
        result = learning_analyzer.parse_audit_trail(trail_path, week_start, week_end)
        assert len(result) == 0

    def test_parse_excludes_anu(self, tmp_path: Path):
        records = [
            {
                "ts": "2026-04-08T10:00:00Z",
                "bot": "anu",
                "tool": "Edit",
                "file": "/home/jay/workspace/src/app.py",
                "task_id": "task-1.1",
                "schema_version": 2,
            }
        ]
        trail_path = make_audit_trail(tmp_path, records)
        week_start = date(2026, 4, 7)
        week_end = date(2026, 4, 13)
        result = learning_analyzer.parse_audit_trail(trail_path, week_start, week_end)
        assert len(result) == 0

    def test_parse_excludes_unknown_task_id(self, tmp_path: Path):
        records = [
            {
                "ts": "2026-04-08T10:00:00Z",
                "bot": "dev1",
                "tool": "Edit",
                "file": "/home/jay/workspace/src/app.py",
                "task_id": "unknown",
                "schema_version": 2,
            },
            {
                "ts": "2026-04-08T11:00:00Z",
                "bot": "dev1",
                "tool": "Edit",
                "file": "/home/jay/workspace/src/app.py",
                "task_id": "",
                "schema_version": 2,
            },
        ]
        trail_path = make_audit_trail(tmp_path, records)
        week_start = date(2026, 4, 7)
        week_end = date(2026, 4, 13)
        result = learning_analyzer.parse_audit_trail(trail_path, week_start, week_end)
        assert len(result) == 0

    def test_parse_empty_file(self, tmp_path: Path):
        trail_path = make_audit_trail(tmp_path, [])
        week_start = date(2026, 4, 7)
        week_end = date(2026, 4, 13)
        result = learning_analyzer.parse_audit_trail(trail_path, week_start, week_end)
        assert result == []

    def test_parse_missing_file(self, tmp_path: Path):
        missing = tmp_path / "nonexistent.jsonl"
        week_start = date(2026, 4, 7)
        week_end = date(2026, 4, 13)
        result = learning_analyzer.parse_audit_trail(missing, week_start, week_end)
        assert result == []


# ---------------------------------------------------------------------------
# 4. TestComputeHotspots
# ---------------------------------------------------------------------------


class TestComputeHotspots:
    _THRESHOLDS_DEFAULT = {"min_task_count": 3, "min_task_ratio": 0.3}
    _WHITELIST_EMPTY = {"files": [], "patterns": []}

    def test_hotspot_above_threshold(self):
        records = make_sample_records("src/app.py", ["task-1.1", "task-2.1", "task-3.1", "task-4.1"])
        thresholds = {"min_task_count": 3, "min_task_ratio": 0.1}
        hotspots, _ = learning_analyzer.compute_hotspots(records, self._WHITELIST_EMPTY, thresholds)
        assert len(hotspots) == 1
        assert "app.py" in hotspots[0]["file"]

    def test_hotspot_below_threshold(self):
        records = make_sample_records("src/app.py", ["task-1.1", "task-2.1"])
        hotspots, _ = learning_analyzer.compute_hotspots(records, self._WHITELIST_EMPTY, self._THRESHOLDS_DEFAULT)
        assert len(hotspots) == 0

    def test_hotspot_ratio_check(self):
        records_app = make_sample_records("src/app.py", ["task-1.1", "task-2.1", "task-3.1"])
        total_task_count = 30
        extra_records = []
        for i in range(4, total_task_count + 1):
            extra_records.append(
                {
                    "ts": f"2026-04-07T10:00:00Z",
                    "bot": "dev1",
                    "tool": "Edit",
                    "file": f"/home/jay/workspace/src/other_{i}.py",
                    "task_id": f"task-{i}.1",
                    "schema_version": 2,
                }
            )
        thresholds = {"min_task_count": 3, "min_task_ratio": 0.5}
        hotspots, _ = learning_analyzer.compute_hotspots(records_app + extra_records, self._WHITELIST_EMPTY, thresholds)
        assert len(hotspots) == 0

    def test_hotspot_and_condition(self):
        records = make_sample_records("src/app.py", ["task-1.1", "task-2.1", "task-3.1"])
        extra = []
        for i in range(4, 20):
            extra.append(
                {
                    "ts": "2026-04-07T10:00:00Z",
                    "bot": "dev1",
                    "tool": "Edit",
                    "file": f"/home/jay/workspace/src/other_{i}.py",
                    "task_id": f"task-{i}.1",
                    "schema_version": 2,
                }
            )
        thresholds = {"min_task_count": 3, "min_task_ratio": 0.5}
        hotspots, _ = learning_analyzer.compute_hotspots(records + extra, self._WHITELIST_EMPTY, thresholds)
        assert len(hotspots) == 0

    def test_whitelist_exclusion(self):
        records = make_sample_records("src/app.py", ["task-1.1", "task-2.1", "task-3.1", "task-4.1"])
        whitelist = {"files": {"src/app.py"}, "patterns": []}
        thresholds = {"min_task_count": 3, "min_task_ratio": 0.1}
        hotspots, whitelisted = learning_analyzer.compute_hotspots(records, whitelist, thresholds)
        assert len(hotspots) == 0
        assert len(whitelisted) == 1

    def test_whitelist_pattern_matching(self):
        records = make_sample_records("config/settings.yaml", ["task-1.1", "task-2.1", "task-3.1", "task-4.1"])
        whitelist = {"files": [], "patterns": ["config/*"]}
        thresholds = {"min_task_count": 3, "min_task_ratio": 0.1}
        hotspots, whitelisted = learning_analyzer.compute_hotspots(records, whitelist, thresholds)
        assert len(hotspots) == 0
        assert len(whitelisted) == 1

    def test_hotspot_type_classification(self):
        records_py = make_sample_records("src/app.py", ["task-1.1", "task-2.1", "task-3.1", "task-4.1"])
        records_yaml = make_sample_records("config/settings.yaml", ["task-1.1", "task-2.1", "task-3.1", "task-4.1"])
        thresholds = {"min_task_count": 3, "min_task_ratio": 0.1}
        hotspots, _ = learning_analyzer.compute_hotspots(records_py + records_yaml, self._WHITELIST_EMPTY, thresholds)
        types = {h["file"]: h.get("type") for h in hotspots}
        py_file = next((h["file"] for h in hotspots if h["file"].endswith(".py")), None)
        yaml_file = next((h["file"] for h in hotspots if h["file"].endswith(".yaml")), None)
        assert py_file is not None
        assert yaml_file is not None
        assert types[py_file] == "refactor_candidate"
        assert types[yaml_file] == "config_hotspot"

    def test_hotspot_priority_classification(self):
        records_high = make_sample_records("src/high.py", ["task-1.1", "task-2.1", "task-3.1", "task-4.1", "task-5.1"])
        records_medium = make_sample_records("src/medium.py", ["task-1.1", "task-2.1", "task-3.1"])
        thresholds = {"min_task_count": 3, "min_task_ratio": 0.1}
        hotspots, _ = learning_analyzer.compute_hotspots(
            records_high + records_medium, self._WHITELIST_EMPTY, thresholds
        )
        priority_map = {h["file"]: h.get("priority") for h in hotspots}
        high_file = next((h["file"] for h in hotspots if "high.py" in h["file"]), None)
        medium_file = next((h["file"] for h in hotspots if "medium.py" in h["file"]), None)
        assert high_file is not None
        assert medium_file is not None
        assert priority_map[high_file] == "high"
        assert priority_map[medium_file] == "medium"

    def test_hotspot_includes_task_list(self):
        task_ids = ["task-1.1", "task-2.1", "task-3.1", "task-4.1"]
        records = make_sample_records("src/app.py", task_ids)
        thresholds = {"min_task_count": 3, "min_task_ratio": 0.1}
        hotspots, _ = learning_analyzer.compute_hotspots(records, self._WHITELIST_EMPTY, thresholds)
        assert len(hotspots) == 1
        assert "task_ids" in hotspots[0] or "tasks" in hotspots[0]
        task_list = hotspots[0].get("task_ids") or hotspots[0].get("tasks")
        for tid in task_ids:
            assert tid in task_list

    def test_empty_records(self):
        hotspots, whitelisted = learning_analyzer.compute_hotspots([], self._WHITELIST_EMPTY, self._THRESHOLDS_DEFAULT)
        assert hotspots == []
        assert whitelisted == []


# ---------------------------------------------------------------------------
# 5. TestGenerateReport
# ---------------------------------------------------------------------------


class TestGenerateReport:
    _WEEK_START = date(2026, 4, 7)
    _WEEK_END = date(2026, 4, 14)

    def _make_hotspot(self, file_path: str, task_count: int = 4, priority: str = "high", total_tasks: int = 10) -> dict:
        return {
            "file": file_path,
            "task_count": task_count,
            "task_ratio": task_count / total_tasks if total_tasks else 0,
            "task_ids": [f"task-{i}.1" for i in range(1, task_count + 1)],
            "type": "refactor_candidate",
            "priority": priority,
            "total_tasks": total_tasks,
        }

    def test_report_created(self, tmp_path: Path):
        hotspots = [self._make_hotspot("/home/jay/workspace/src/app.py")]
        output_path = learning_analyzer.generate_report(hotspots, [], self._WEEK_START, self._WEEK_END, 10, tmp_path)
        assert output_path.exists()

    def test_report_contains_header(self, tmp_path: Path):
        hotspots = [self._make_hotspot("/home/jay/workspace/src/app.py")]
        output_path = learning_analyzer.generate_report(hotspots, [], self._WEEK_START, self._WEEK_END, 10, tmp_path)
        content = output_path.read_text(encoding="utf-8")
        assert "2026-04-07" in content
        assert "2026-04-13" in content

    def test_report_contains_v1_limitation(self, tmp_path: Path):
        output_path = learning_analyzer.generate_report([], [], self._WEEK_START, self._WEEK_END, 0, tmp_path)
        content = output_path.read_text(encoding="utf-8")
        assert "학습 기능 v1" in content
        assert "에러 유사도 미지원" in content
        assert "v2 예정" in content

    def test_report_contains_hotspot_status(self, tmp_path: Path):
        hotspots = [self._make_hotspot("/home/jay/workspace/src/app.py")]
        output_path = learning_analyzer.generate_report(hotspots, [], self._WEEK_START, self._WEEK_END, 10, tmp_path)
        content = output_path.read_text(encoding="utf-8")
        assert "**status**: pending" in content

    def test_report_contains_summary(self, tmp_path: Path):
        hotspots = [self._make_hotspot("/home/jay/workspace/src/app.py")]
        output_path = learning_analyzer.generate_report(hotspots, [], self._WEEK_START, self._WEEK_END, 10, tmp_path)
        content = output_path.read_text(encoding="utf-8")
        assert "summary" in content.lower() or "요약" in content

    def test_report_filename_format(self, tmp_path: Path):
        output_path = learning_analyzer.generate_report([], [], self._WEEK_START, self._WEEK_END, 0, tmp_path)
        assert output_path.name.startswith("weekly-")
        assert output_path.name.endswith(".md")
        assert "2026" in output_path.name


# ---------------------------------------------------------------------------
# 6. TestCLI
# ---------------------------------------------------------------------------


class TestCLI:
    _WEEK_START = date(2026, 4, 7)
    _WEEK_END = date(2026, 4, 14)

    def test_cli_with_sample_data(self, tmp_path: Path):
        records = make_sample_records(
            "src/hot.py", ["task-1.1", "task-2.1", "task-3.1", "task-4.1"]
        ) + make_sample_records("src/normal.py", ["task-1.1"])
        trail_path = make_audit_trail(tmp_path, records)
        wl_path = make_whitelist(tmp_path)
        th_path = make_thresholds(tmp_path)
        output_dir = tmp_path / "reports"
        output_dir.mkdir()

        parsed = learning_analyzer.parse_audit_trail(trail_path, self._WEEK_START, self._WEEK_END)
        whitelist = learning_analyzer.load_whitelist(wl_path)
        thresholds = learning_analyzer.load_thresholds(th_path)
        hotspots, whitelisted = learning_analyzer.compute_hotspots(parsed, whitelist, thresholds)
        total_tasks = len({r["task_id"] for r in parsed})
        report_path = learning_analyzer.generate_report(
            hotspots, whitelisted, self._WEEK_START, self._WEEK_END, total_tasks, output_dir
        )
        assert report_path.exists()
        content = report_path.read_text(encoding="utf-8")
        assert "hot.py" in content

    def test_cli_missing_audit_trail(self, tmp_path: Path):
        missing = tmp_path / "nonexistent.jsonl"
        wl_path = make_whitelist(tmp_path)
        th_path = make_thresholds(tmp_path)
        output_dir = tmp_path / "reports"
        output_dir.mkdir()

        try:
            parsed = learning_analyzer.parse_audit_trail(missing, self._WEEK_START, self._WEEK_END)
            assert parsed == []
            whitelist = learning_analyzer.load_whitelist(wl_path)
            thresholds = learning_analyzer.load_thresholds(th_path)
            hotspots, whitelisted = learning_analyzer.compute_hotspots(parsed, whitelist, thresholds)
            report_path = learning_analyzer.generate_report(
                hotspots, whitelisted, self._WEEK_START, self._WEEK_END, 0, output_dir
            )
            assert report_path.exists()
        except SystemExit:
            pass
        except Exception as e:
            pytest.fail(f"audit-trail 없을 때 예상치 못한 예외 발생: {e}")