# pyright: reportMissingImports=false
"""
knowledge_graph 단위 테스트
테스터: 루 (개발3팀)
"""
import json
import pathlib

import pytest

from kakao_knowledge.knowledge_graph import KnowledgeGraph


# ---------------------------------------------------------------------------
# Fixtures
# ---------------------------------------------------------------------------


@pytest.fixture
def sample_insights() -> list[dict]:
    """테스트용 InsightV2 dict 리스트 (5개, 다양한 카테고리/태그)"""
    return [
        {
            "id": "insight-001",
            "title": "광응고술 보상 기준",
            "type": "qa",
            "category": "보상/장기",
            "summary": "광응고술은 약관 개정으로 실비 보상이 가능합니다.",
            "key_points": ["약관 개정", "레이저 수술 포함", "실비 적용"],
            "expert": "이해철/프라임/부산",
            "confidence": "high",
            "related_topics": ["실비보상", "약관해석", "레이저수술"],
            "tags": ["광응고술", "보상", "실비", "약관해석"],
            "source_date": "2025-12-03",
            "source_chat": "보험설계사 커뮤니티",
            "raw_thread": ["[이해철] 광응고술은 보상됩니다"],
            "participants": ["이해철/프라임/부산"],
            "question": "광응고술 시 실비 보상이 가능한가요?",
            "answer": "네, 약관 개정 이후 보상 가능합니다.",
        },
        {
            "id": "insight-002",
            "title": "고지의무 위반 시 보상 거절 사례",
            "type": "case_analysis",
            "category": "고지의무",
            "summary": "비만약 처방 미고지로 면책된 사례가 발생했습니다.",
            "key_points": ["비만약", "고지의무 위반", "면책"],
            "expert": "박유진/인카/서울",
            "confidence": "high",
            "related_topics": ["고지의무", "면책", "비만약"],
            "tags": ["고지의무", "면책", "보상"],
            "source_date": "2025-12-04",
            "source_chat": "보험설계사 커뮤니티",
            "raw_thread": ["[박유진] 비만약 면책 사례"],
            "participants": ["박유진/인카/서울"],
            "question": "",
            "answer": "",
        },
        {
            "id": "insight-003",
            "title": "실비보험 자기부담금 계산법",
            "type": "practical_tip",
            "category": "보상/일반",
            "summary": "실비보험 자기부담금은 의원/병원/종합병원에 따라 달라집니다.",
            "key_points": ["자기부담금", "의원 10%", "종합병원 20%"],
            "expert": "이해철/프라임/부산",
            "confidence": "medium",
            "related_topics": ["실비보상", "자기부담금", "병원급"],
            "tags": ["실비", "자기부담금", "약관해석"],
            "source_date": "2025-12-05",
            "source_chat": "보험설계사 커뮤니티",
            "raw_thread": ["[이해철] 자기부담금 계산 방법"],
            "participants": ["이해철/프라임/부산"],
            "question": "",
            "answer": "",
        },
        {
            "id": "insight-004",
            "title": "암보험 진단비 지급 기준",
            "type": "regulation_interpretation",
            "category": "보상/장기",
            "summary": "암 진단비는 조직검사 결과지 기준으로 지급됩니다.",
            "key_points": ["조직검사", "암 진단서", "지급 시기"],
            "expert": "김민수/교보/서울",
            "confidence": "high",
            "related_topics": ["암보험", "진단비", "약관해석"],
            "tags": ["암보험", "진단비", "보상"],
            "source_date": "2025-12-06",
            "source_chat": "장기보험 채널",
            "raw_thread": ["[김민수] 암 진단비 기준"],
            "participants": ["김민수/교보/서울"],
            "question": "",
            "answer": "",
        },
        {
            "id": "insight-005",
            "title": "영업 노하우: 거절 고객 재접근 타이밍",
            "type": "expert_opinion",
            "category": "영업노하우",
            "summary": "거절 고객에게는 3개월 후 재접근이 효과적입니다.",
            "key_points": ["거절 후 3개월", "라이프 이벤트 활용", "재접근 스크립트"],
            "expert": "박유진/인카/서울",
            "confidence": "medium",
            "related_topics": ["영업전략", "고객관리", "재접근"],
            "tags": ["영업", "고객관리", "노하우"],
            "source_date": "2025-12-07",
            "source_chat": "영업 전략 채널",
            "raw_thread": ["[박유진] 거절 고객 재접근 방법"],
            "participants": ["박유진/인카/서울"],
            "question": "",
            "answer": "",
        },
    ]


@pytest.fixture
def graph_builder(tmp_path, sample_insights):
    """임시 디렉토리에 KnowledgeGraph 인스턴스 생성 후 build까지 실행"""
    insights_dir = str(tmp_path / "insights")
    graph_path = str(tmp_path / "graph.json")
    index_path = str(tmp_path / "insights_index.json")
    builder = KnowledgeGraph(insights_dir, graph_path, index_path)
    builder.build_from_insights(sample_insights)
    return builder


# ---------------------------------------------------------------------------
# TestKnowledgeGraph
# ---------------------------------------------------------------------------


class TestKnowledgeGraph:
    """지식 그래프 빌더 테스트"""

    def test_build_creates_markdown_files(self, graph_builder, sample_insights):
        """build_from_insights 후 마크다운 파일이 생성되어야 한다"""
        insights_dir = pathlib.Path(graph_builder.insights_dir)
        md_files = list(insights_dir.glob("*.md"))
        assert len(md_files) == len(sample_insights), (
            f"마크다운 파일 수 불일치: 기대={len(sample_insights)}, 실제={len(md_files)}"
        )

    def test_markdown_has_yaml_frontmatter(self, graph_builder):
        """마크다운 파일에 YAML frontmatter가 있어야 한다"""
        insights_dir = pathlib.Path(graph_builder.insights_dir)
        md_file = insights_dir / "insight-001.md"
        assert md_file.exists(), "insight-001.md 파일이 없음"
        content = md_file.read_text(encoding="utf-8")
        assert content.startswith("---"), "YAML frontmatter가 '---'로 시작하지 않음"
        # 두 번째 '---' 닫힘 확인
        lines = content.split("\n")
        close_count = sum(1 for i, line in enumerate(lines) if line.strip() == "---" and i > 0)
        assert close_count >= 1, "YAML frontmatter 닫힘 '---'가 없음"

    def test_markdown_frontmatter_contains_required_fields(self, graph_builder):
        """YAML frontmatter에 필수 필드(id, title, type, category, tags)가 있어야 한다"""
        insights_dir = pathlib.Path(graph_builder.insights_dir)
        md_file = insights_dir / "insight-001.md"
        content = md_file.read_text(encoding="utf-8")
        for field in ["id:", "title:", "type:", "category:", "tags:", "confidence:"]:
            assert field in content, f"frontmatter에 '{field}' 필드 없음"

    def test_markdown_has_backlinks(self, graph_builder):
        """관련 인사이트에 [[backlink]] 형식이 있어야 한다"""
        insights_dir = pathlib.Path(graph_builder.insights_dir)
        # 연결된 인사이트가 있는 파일 중 하나를 확인
        found_backlink = False
        for md_file in insights_dir.glob("*.md"):
            content = md_file.read_text(encoding="utf-8")
            if "[[" in content and "]]" in content:
                found_backlink = True
                break
        assert found_backlink, "[[backlink]] 형식의 관련 인사이트 링크가 없음"

    def test_markdown_qa_has_question_section(self, graph_builder):
        """type=qa인 인사이트 마크다운에 ## 질문 섹션이 있어야 한다"""
        insights_dir = pathlib.Path(graph_builder.insights_dir)
        md_file = insights_dir / "insight-001.md"
        content = md_file.read_text(encoding="utf-8")
        assert "## 질문" in content, "qa 타입 마크다운에 '## 질문' 섹션 없음"
        assert "## 답변" in content, "qa 타입 마크다운에 '## 답변' 섹션 없음"

    def test_markdown_non_qa_has_summary_section(self, graph_builder):
        """type이 qa가 아닌 인사이트 마크다운에 ## 요약 섹션이 있어야 한다"""
        insights_dir = pathlib.Path(graph_builder.insights_dir)
        md_file = insights_dir / "insight-002.md"
        content = md_file.read_text(encoding="utf-8")
        assert "## 요약" in content, "non-qa 타입 마크다운에 '## 요약' 섹션 없음"

    def test_markdown_has_key_points_section(self, graph_builder):
        """마크다운에 ## 핵심 포인트 섹션이 있어야 한다"""
        insights_dir = pathlib.Path(graph_builder.insights_dir)
        md_file = insights_dir / "insight-001.md"
        content = md_file.read_text(encoding="utf-8")
        assert "## 핵심 포인트" in content, "마크다운에 '## 핵심 포인트' 섹션 없음"

    def test_graph_json_created(self, graph_builder):
        """graph.json 파일이 생성되어야 한다"""
        assert pathlib.Path(graph_builder.graph_path).exists(), "graph.json 파일이 없음"

    def test_graph_has_nodes_and_edges(self, graph_builder, sample_insights):
        """graph.json에 nodes와 edges가 있어야 한다"""
        graph_data = json.loads(pathlib.Path(graph_builder.graph_path).read_text(encoding="utf-8"))
        assert "nodes" in graph_data, "graph.json에 'nodes' 키 없음"
        assert "edges" in graph_data, "graph.json에 'edges' 키 없음"
        assert len(graph_data["nodes"]) == len(sample_insights), (
            f"nodes 수 불일치: 기대={len(sample_insights)}, 실제={len(graph_data['nodes'])}"
        )

    def test_graph_has_stats(self, graph_builder):
        """graph.json에 stats 섹션이 있어야 한다"""
        graph_data = json.loads(pathlib.Path(graph_builder.graph_path).read_text(encoding="utf-8"))
        assert "stats" in graph_data, "graph.json에 'stats' 키 없음"
        stats = graph_data["stats"]
        assert "total_nodes" in stats, "stats에 'total_nodes' 없음"
        assert "total_edges" in stats, "stats에 'total_edges' 없음"
        assert "categories" in stats, "stats에 'categories' 없음"

    def test_shared_tag_creates_edge(self, graph_builder):
        """같은 태그를 가진 인사이트 사이에 edge가 있어야 한다"""
        graph_data = json.loads(pathlib.Path(graph_builder.graph_path).read_text(encoding="utf-8"))
        edges = graph_data["edges"]
        # insight-001과 insight-002는 "보상" 태그를 공유
        shared_tag_edges = [
            e for e in edges if e.get("relation") == "shared_tag"
        ]
        assert len(shared_tag_edges) > 0, "shared_tag 관계 엣지가 없음"
        # insight-001 ↔ insight-002 사이에 shared_tag 엣지 있는지 확인
        pair = {("insight-001", "insight-002"), ("insight-002", "insight-001")}
        found = any(
            (e["from"], e["to"]) in pair for e in shared_tag_edges
        )
        assert found, "insight-001과 insight-002 사이에 shared_tag 엣지 없음 (공유 태그: '보상')"

    def test_same_category_creates_edge(self, graph_builder):
        """같은 카테고리 인사이트 사이에 edge가 있어야 한다"""
        graph_data = json.loads(pathlib.Path(graph_builder.graph_path).read_text(encoding="utf-8"))
        edges = graph_data["edges"]
        same_cat_edges = [e for e in edges if e.get("relation") == "same_category"]
        assert len(same_cat_edges) > 0, "same_category 관계 엣지가 없음"
        # insight-001과 insight-004는 "보상/장기" 카테고리 동일
        pair = {("insight-001", "insight-004"), ("insight-004", "insight-001")}
        found = any(
            (e["from"], e["to"]) in pair for e in same_cat_edges
        )
        assert found, "insight-001과 insight-004 사이에 same_category 엣지 없음"

    def test_related_topic_creates_edge(self, graph_builder):
        """related_topics 교집합이 2개 이상이면 edge가 생기고, 1개 이하면 생기지 않아야 한다"""
        graph_data = json.loads(
            pathlib.Path(graph_builder.graph_path).read_text(encoding="utf-8")
        )
        edges = graph_data["edges"]
        rt_edges = [e for e in edges if e.get("relation") == "related_topic"]
        # 테스트 데이터에서 related_topics 교집합이 2개 이상인 쌍이 없으므로
        # related_topic 엣지는 0개여야 함
        assert len(rt_edges) == 0, (
            f"교집합 2개 미만인 데이터인데 related_topic 엣지가 생성됨: {len(rt_edges)}개"
        )

    def test_same_expert_creates_edge(self, graph_builder):
        """같은 expert가 답변한 인사이트 사이에 edge가 있어야 한다"""
        graph_data = json.loads(pathlib.Path(graph_builder.graph_path).read_text(encoding="utf-8"))
        edges = graph_data["edges"]
        same_expert_edges = [e for e in edges if e.get("relation") == "same_expert"]
        assert len(same_expert_edges) > 0, "same_expert 관계 엣지가 없음"
        # insight-001과 insight-003은 "이해철/프라임/부산" 동일
        pair = {("insight-001", "insight-003"), ("insight-003", "insight-001")}
        found = any(
            (e["from"], e["to"]) in pair for e in same_expert_edges
        )
        assert found, "insight-001과 insight-003 사이에 same_expert 엣지 없음"

    def test_no_duplicate_edges(self, graph_builder):
        """중복 엣지가 없어야 한다 (A→B와 B→A는 하나만)"""
        graph_data = json.loads(pathlib.Path(graph_builder.graph_path).read_text(encoding="utf-8"))
        edges = graph_data["edges"]
        seen = set()
        for e in edges:
            key = tuple(sorted([e["from"], e["to"]]) + [e["relation"]])
            assert key not in seen, f"중복 엣지 발견: {e['from']} ↔ {e['to']} ({e['relation']})"
            seen.add(key)

    def test_edge_has_weight(self, graph_builder):
        """모든 엣지에 weight 값이 있어야 한다"""
        graph_data = json.loads(pathlib.Path(graph_builder.graph_path).read_text(encoding="utf-8"))
        for edge in graph_data["edges"]:
            assert "weight" in edge, f"엣지에 weight 없음: {edge}"
            assert isinstance(edge["weight"], (int, float)), f"weight가 숫자가 아님: {edge['weight']}"

    def test_index_json_created(self, graph_builder):
        """insights_index.json 파일이 생성되어야 한다"""
        assert pathlib.Path(graph_builder.index_path).exists(), "insights_index.json 파일이 없음"

    def test_index_has_all_keys(self, graph_builder):
        """인덱스에 필수 키(by_id, by_category, by_tag, by_expert, by_type)가 있어야 한다"""
        index_data = json.loads(pathlib.Path(graph_builder.index_path).read_text(encoding="utf-8"))
        for key in ["by_id", "by_category", "by_tag", "by_expert", "by_type"]:
            assert key in index_data, f"인덱스에 '{key}' 키 없음"

    def test_index_by_id(self, graph_builder, sample_insights):
        """인덱스 by_id에 모든 인사이트가 있어야 한다"""
        index_data = json.loads(pathlib.Path(graph_builder.index_path).read_text(encoding="utf-8"))
        by_id = index_data["by_id"]
        for insight in sample_insights:
            assert insight["id"] in by_id, f"by_id에 {insight['id']} 없음"
            assert "title" in by_id[insight["id"]], "by_id 항목에 title 없음"
            assert "file" in by_id[insight["id"]], "by_id 항목에 file 없음"

    def test_index_by_category(self, graph_builder):
        """인덱스에서 카테고리별 조회가 정확해야 한다"""
        index_data = json.loads(pathlib.Path(graph_builder.index_path).read_text(encoding="utf-8"))
        by_category = index_data["by_category"]
        # "보상/장기" 카테고리에 insight-001, insight-004 있어야 함
        assert "보상/장기" in by_category, "by_category에 '보상/장기' 없음"
        assert "insight-001" in by_category["보상/장기"], "보상/장기에 insight-001 없음"
        assert "insight-004" in by_category["보상/장기"], "보상/장기에 insight-004 없음"

    def test_index_by_tag(self, graph_builder):
        """인덱스 by_tag에 태그별 인사이트가 있어야 한다"""
        index_data = json.loads(pathlib.Path(graph_builder.index_path).read_text(encoding="utf-8"))
        by_tag = index_data["by_tag"]
        # "보상" 태그: insight-001, insight-002, insight-004
        assert "보상" in by_tag, "by_tag에 '보상' 없음"
        assert "insight-001" in by_tag["보상"], "by_tag['보상']에 insight-001 없음"

    def test_index_by_expert(self, graph_builder):
        """인덱스 by_expert에 전문가별 인사이트가 있어야 한다"""
        index_data = json.loads(pathlib.Path(graph_builder.index_path).read_text(encoding="utf-8"))
        by_expert = index_data["by_expert"]
        assert "이해철/프라임/부산" in by_expert, "by_expert에 '이해철/프라임/부산' 없음"
        assert "insight-001" in by_expert["이해철/프라임/부산"]

    def test_index_by_type(self, graph_builder):
        """인덱스 by_type에 타입별 인사이트가 있어야 한다"""
        index_data = json.loads(pathlib.Path(graph_builder.index_path).read_text(encoding="utf-8"))
        by_type = index_data["by_type"]
        assert "qa" in by_type, "by_type에 'qa' 없음"
        assert "insight-001" in by_type["qa"]

    def test_get_related_returns_connected(self, graph_builder):
        """get_related가 연결된 인사이트를 반환해야 한다"""
        related = graph_builder.get_related("insight-001", max_depth=1)
        assert isinstance(related, list), "get_related 반환값이 리스트가 아님"
        assert len(related) > 0, "insight-001의 연결된 인사이트가 없음"
        # 반환된 각 항목에 id 키가 있어야 함
        for item in related:
            assert "id" in item, f"related 항목에 id 없음: {item}"

    def test_get_related_depth2(self, graph_builder):
        """get_related max_depth=2는 2-hop 연결도 반환해야 한다"""
        related_d1 = graph_builder.get_related("insight-001", max_depth=1)
        related_d2 = graph_builder.get_related("insight-001", max_depth=2)
        # depth=2는 depth=1 이상의 결과를 포함해야 함
        assert len(related_d2) >= len(related_d1), "depth=2가 depth=1보다 적은 결과 반환"

    def test_search_by_tag(self, graph_builder):
        """태그 검색이 정확해야 한다"""
        results = graph_builder.search_by_tag("보상")
        assert isinstance(results, list), "search_by_tag 반환값이 리스트가 아님"
        assert "insight-001" in results, "search_by_tag('보상')에 insight-001 없음"
        assert "insight-002" in results, "search_by_tag('보상')에 insight-002 없음"

    def test_search_by_tag_not_found(self, graph_builder):
        """존재하지 않는 태그 검색 시 빈 리스트 반환"""
        results = graph_builder.search_by_tag("존재하지않는태그xyz")
        assert results == [], f"없는 태그 검색 시 빈 리스트 아님: {results}"

    def test_search_by_category(self, graph_builder):
        """카테고리 검색이 정확해야 한다"""
        results = graph_builder.search_by_category("보상/장기")
        assert isinstance(results, list), "search_by_category 반환값이 리스트가 아님"
        assert "insight-001" in results, "search_by_category('보상/장기')에 insight-001 없음"
        assert "insight-004" in results, "search_by_category('보상/장기')에 insight-004 없음"

    def test_search_by_category_not_found(self, graph_builder):
        """존재하지 않는 카테고리 검색 시 빈 리스트 반환"""
        results = graph_builder.search_by_category("없는카테고리xyz")
        assert results == [], f"없는 카테고리 검색 시 빈 리스트 아님: {results}"

    def test_build_returns_stats(self, tmp_path, sample_insights):
        """build_from_insights 반환값에 nodes, edges, categories가 있어야 한다"""
        builder = KnowledgeGraph(
            str(tmp_path / "insights"),
            str(tmp_path / "graph.json"),
            str(tmp_path / "insights_index.json"),
        )
        result = builder.build_from_insights(sample_insights)
        assert "nodes" in result, "build 반환값에 'nodes' 없음"
        assert "edges" in result, "build 반환값에 'edges' 없음"
        assert "categories" in result, "build 반환값에 'categories' 없음"
        assert isinstance(result["nodes"], int), "nodes가 int가 아님"
        assert isinstance(result["edges"], int), "edges가 int가 아님"
        assert isinstance(result["categories"], dict), "categories가 dict가 아님"
        assert result["nodes"] == len(sample_insights)

    def test_empty_insights_no_error(self, tmp_path):
        """빈 인사이트 리스트에 에러 없이 빈 그래프 생성"""
        builder = KnowledgeGraph(
            str(tmp_path / "insights"),
            str(tmp_path / "graph.json"),
            str(tmp_path / "insights_index.json"),
        )
        result = builder.build_from_insights([])
        assert result["nodes"] == 0, "빈 입력 시 nodes가 0이 아님"
        assert result["edges"] == 0, "빈 입력 시 edges가 0이 아님"
        graph_data = json.loads(pathlib.Path(builder.graph_path).read_text(encoding="utf-8"))
        assert graph_data["nodes"] == [], "빈 그래프의 nodes가 빈 리스트가 아님"
        assert graph_data["edges"] == [], "빈 그래프의 edges가 빈 리스트가 아님"