# pyright: reportMissingImports=false
"""벡터 DB 래퍼 — chromadb + sentence-transformers 기반 의미 검색"""

from __future__ import annotations

import json
import logging
from typing import Optional

import chromadb
from sentence_transformers import SentenceTransformer

logger = logging.getLogger(__name__)

_DEFAULT_MODEL = "paraphrase-multilingual-MiniLM-L12-v2"


class VectorStore:
    """인사이트의 벡터 DB 관리 및 검색"""

    def __init__(
        self,
        db_path: str,
        collection_name: str = "insights",
        model_name: str = _DEFAULT_MODEL,
    ):
        """
        Args:
            db_path: chromadb 저장 경로 (예: /home/jay/projects/insuwiki/data/chroma_db/)
            collection_name: 컬렉션 이름
            model_name: sentence-transformers 모델명
        """
        self._client = chromadb.PersistentClient(path=db_path)
        self._model = SentenceTransformer(model_name, device="cpu")
        self._collection = self._client.get_or_create_collection(
            name=collection_name,
            metadata={"hnsw:space": "cosine"},
        )
        logger.info(
            "VectorStore 초기화 완료 — db_path=%s, collection=%s, model=%s",
            db_path,
            collection_name,
            model_name,
        )

    # ------------------------------------------------------------------
    # Public API
    # ------------------------------------------------------------------

    def add_insights(self, insights: list[dict]) -> int:
        """인사이트 리스트를 벡터 DB에 추가.

        각 인사이트의 title + summary + key_points를 결합하여 임베딩 텍스트 생성.
        이미 존재하는 ID는 업데이트(upsert).

        Returns:
            추가/갱신된 인사이트 수
        """
        if not insights:
            return 0

        ids: list[str] = []
        texts: list[str] = []
        metadatas: list[dict] = []

        for insight in insights:
            insight_id = insight.get("id", "")
            if not insight_id:
                logger.warning("ID 없는 인사이트 스킵: %s", insight.get("title", ""))
                continue

            embed_text = self._build_embed_text(insight)
            metadata = {
                "id": insight_id,
                "title": insight.get("title", ""),
                "category": insight.get("category", ""),
                "type": str(insight.get("type", "")),
                "summary": insight.get("summary", ""),
                "confidence": insight.get("confidence", "medium"),
                "expert": insight.get("expert", ""),
                "source_date": insight.get("source_date", ""),
                "source_chat": insight.get("source_chat", ""),
                "key_points": json.dumps(
                    insight.get("key_points", []), ensure_ascii=False
                ),
                "participants": json.dumps(
                    insight.get("participants", []), ensure_ascii=False
                ),
                "related_topics": json.dumps(
                    insight.get("related_topics", []), ensure_ascii=False
                ),
                "tags": json.dumps(insight.get("tags", []), ensure_ascii=False),
                "question": insight.get("question", ""),
                "answer": insight.get("answer", ""),
            }

            ids.append(insight_id)
            texts.append(embed_text)
            metadatas.append(metadata)

        if not ids:
            return 0

        embeddings = self._model.encode(texts).tolist()

        self._collection.upsert(
            ids=ids,
            embeddings=embeddings,
            metadatas=metadatas,  # type: ignore[arg-type]
            documents=texts,
        )

        logger.info("VectorStore upsert 완료: %d건", len(ids))
        return len(ids)

    def search_similar(self, query: str, top_k: int = 5) -> list[dict]:
        """의미 기반 유사 인사이트 검색.

        Returns:
            [{"id": str, "title": str, "category": str, "distance": float, "summary": str}, ...]
        """
        if self.count() == 0:
            return []

        query_embedding = self._model.encode([query]).tolist()[0]

        n_results = min(top_k, self.count())
        results = self._collection.query(
            query_embeddings=[query_embedding],
            n_results=n_results,
            include=["metadatas", "distances"],
        )

        output: list[dict] = []
        raw_metadatas = results.get("metadatas") or [[]]
        raw_distances = results.get("distances") or [[]]
        metadatas_list = raw_metadatas[0]
        distances_list = raw_distances[0]

        for meta, dist in zip(metadatas_list, distances_list):
            output.append(
                {
                    "id": meta.get("id", ""),
                    "title": meta.get("title", ""),
                    "category": meta.get("category", ""),
                    "distance": float(dist),
                    "summary": meta.get("summary", ""),
                }
            )

        return output

    def get_by_id(self, insight_id: str) -> Optional[dict]:
        """ID로 인사이트 조회"""
        try:
            results = self._collection.get(
                ids=[insight_id],
                include=["metadatas"],
            )
            metadatas = results.get("metadatas", [])
            if not metadatas:
                return None
            meta = metadatas[0]
            if not meta:
                return None
            result = dict(meta)
            # JSON 직렬화된 리스트 필드 역직렬화
            for list_field in ("key_points", "participants", "related_topics", "tags"):
                val = result.get(list_field)
                if isinstance(val, str):
                    try:
                        result[list_field] = json.loads(val)
                    except (json.JSONDecodeError, TypeError):
                        result[list_field] = []
            return result
        except Exception as exc:
            logger.debug("get_by_id 오류 (id=%s): %s", insight_id, exc)
            return None

    def delete_by_id(self, insight_id: str) -> bool:
        """ID로 인사이트 삭제"""
        # 먼저 존재 여부 확인
        existing = self.get_by_id(insight_id)
        if existing is None:
            return False

        try:
            self._collection.delete(ids=[insight_id])
            logger.info("VectorStore 삭제 완료: %s", insight_id)
            return True
        except Exception as exc:
            logger.warning("delete_by_id 오류 (id=%s): %s", insight_id, exc)
            return False

    def count(self) -> int:
        """저장된 인사이트 수 반환"""
        return self._collection.count()

    # ------------------------------------------------------------------
    # Private helpers
    # ------------------------------------------------------------------

    def _build_embed_text(self, insight: dict) -> str:
        """인사이트에서 임베딩용 텍스트 생성.

        format: "{title} {summary} {' '.join(key_points)}"
        """
        title = insight.get("title", "")
        summary = insight.get("summary", "")
        key_points = insight.get("key_points", [])
        key_points_text = " ".join(str(kp) for kp in key_points)
        return f"{title} {summary} {key_points_text}".strip()
