"""
search.py

Supabase knowledge base에 대한 의미 검색, 키워드 검색, 혼합 검색 기능을 제공한다.

의존성:
- embedding_service.get_embedding: 텍스트 임베딩 벡터 생성
- supabase: 벡터 DB 및 전문 검색

구현 전략:
- semantic_search: hybrid_search(semantic_weight=1.0, keyword_weight=0.0)
- keyword_search: hybrid_search(semantic_weight=0.0, keyword_weight=1.0)
- hybrid_search: Supabase RPC("hybrid_search") 직접 호출
"""

import logging
import os
from typing import Any

from embedding_service import get_embedding  # type: ignore[import-not-found]
from supabase import Client, create_client

logger = logging.getLogger(__name__)


# ---------------------------------------------------------------------------
# Supabase client
# ---------------------------------------------------------------------------


def _get_supabase_client() -> Client:
    """
    환경변수에서 Supabase 연결 정보를 읽어 클라이언트를 반환한다.

    Raises:
        ValueError: 필요한 환경변수가 설정되지 않은 경우.
    """
    url = os.environ.get("INSURO_SUPABASE_URL") or os.environ.get("INSURO_NEW_SUPABASE_URL")
    key = os.environ.get("INSURO_NEW_SERVICE_ROLE_KEY") or os.environ.get("INSURO_SUPABASE_SERVICE_ROLE_KEY")
    if not url or not key:
        raise ValueError("Supabase URL and service role key must be set")
    return create_client(url, key)


# ---------------------------------------------------------------------------
# Public API
# ---------------------------------------------------------------------------


def semantic_search(
    query: str,
    limit: int = 10,
    source_filter: str | None = None,
) -> list[dict[str, Any]]:
    """
    쿼리 텍스트의 임베딩 벡터를 사용하여 코사인 유사도 기반 의미 검색을 수행한다.

    내부적으로 hybrid_search(semantic_weight=1.0, keyword_weight=0.0)를 호출한다.

    Args:
        query: 검색할 텍스트.
        limit: 반환할 최대 결과 수. 기본값 10.
        source_filter: knowledge_documents.source 기준 필터. None이면 전체 검색.

    Returns:
        검색 결과 딕셔너리 목록. similarity 내림차순 정렬.
        각 항목: {"content": str, "similarity": float, "source": str, "title": str, "document_id": str}
    """
    results = hybrid_search(
        query=query,
        limit=limit,
        semantic_weight=1.0,
        keyword_weight=0.0,
        source_filter=source_filter,
    )
    # hybrid_search 결과에서 semantic_search 반환 형식으로 변환
    # combined_score 필드를 제거하고 similarity 기준 정렬
    semantic_results: list[dict[str, Any]] = []
    for row in results:
        semantic_results.append(
            {
                "content": row["content"],
                "similarity": row["similarity"],
                "source": row["source"],
                "title": row["title"],
                "document_id": row["document_id"],
            }
        )
    return sorted(semantic_results, key=lambda x: x["similarity"], reverse=True)


def keyword_search(
    query: str,
    limit: int = 10,
    source_filter: str | None = None,
) -> list[dict[str, Any]]:
    """
    전문 검색(full-text search)을 사용하여 키워드 기반 검색을 수행한다.

    Supabase RPC("hybrid_search")를 semantic_weight=0.0, keyword_weight=1.0으로 호출한다.
    embedding 생성 없이 더미 벡터를 사용하여 키워드 전용 검색을 수행한다.

    Args:
        query: 검색할 키워드 텍스트.
        limit: 반환할 최대 결과 수. 기본값 10.
        source_filter: knowledge_documents.source 기준 필터. None이면 전체 검색.

    Returns:
        검색 결과 딕셔너리 목록.
        각 항목: {"content": str, "similarity": float, "source": str, "title": str, "document_id": str}
    """
    logger.debug(
        "keyword_search 호출: query=%r, limit=%d",
        query[:50],
        limit,
    )

    supabase = _get_supabase_client()

    # keyword_weight=1.0 이므로 query_embedding은 사용되지 않으나 RPC 시그니처상 전달 필요
    dummy_embedding: list[float] = [0.0] * 1536

    params: dict[str, Any] = {
        "query_text": query,
        "query_embedding": dummy_embedding,
        "match_count": limit,
        "semantic_weight": 0.0,
        "keyword_weight": 1.0,
        "source_filter": source_filter,
    }

    response = supabase.rpc("hybrid_search", params).execute()
    rows: list[dict[str, Any]] = response.data or []  # type: ignore[assignment]

    results: list[dict[str, Any]] = []
    for row in rows:
        results.append(
            {
                "content": str(row.get("content", "")),
                "similarity": float(row.get("similarity", 0.0)),
                "source": str(row.get("source", "")),
                "title": str(row.get("title", "")),
                "document_id": str(row.get("document_id", "")),
            }
        )

    # Trigram/ILIKE fallback: RPC 결과가 비어있으면 직접 ILIKE 검색
    if not rows:
        logger.debug("keyword_search: RPC 결과 없음, ILIKE fallback 시도: %r", query[:50])
        try:
            fallback_response = (
                supabase.from_("knowledge_chunks")
                .select("id, document_id, content")
                .ilike("content", f"%{query}%")
                .limit(limit)
                .execute()
            )
            fallback_data = fallback_response.data
            fallback_rows: list[dict[str, Any]] = fallback_data if isinstance(fallback_data, list) else []  # type: ignore[assignment]
            for row in fallback_rows:
                results.append(
                    {
                        "content": str(row.get("content", "")),
                        "similarity": 0.5,  # ILIKE 매칭 기본 점수
                        "source": "",
                        "title": "",
                        "document_id": str(row.get("document_id", "")),
                    }
                )
            return results
        except Exception as exc:
            logger.warning("ILIKE fallback 실패: %s", exc)

    return results


def hybrid_search(
    query: str,
    limit: int = 10,
    semantic_weight: float = 0.7,
    keyword_weight: float = 0.3,
    source_filter: str | None = None,
) -> list[dict[str, Any]]:
    """
    의미 검색과 키워드 검색을 결합한 혼합 검색을 수행한다.

    Supabase RPC("hybrid_search")를 호출하며, combined_score 내림차순으로 정렬하여 반환한다.

    Args:
        query: 검색할 텍스트.
        limit: 반환할 최대 결과 수. 기본값 10.
        semantic_weight: 의미 검색 가중치. 기본값 0.7.
        keyword_weight: 키워드 검색 가중치. 기본값 0.3.
        source_filter: knowledge_documents.source 기준 필터. None이면 전체 검색.

    Returns:
        검색 결과 딕셔너리 목록. combined_score 내림차순 정렬.
        각 항목: {"content": str, "similarity": float, "combined_score": float,
                  "source": str, "title": str, "document_id": str}
    """
    logger.debug(
        "hybrid_search 호출: query=%r, limit=%d, semantic_weight=%.2f, keyword_weight=%.2f",
        query[:50],
        limit,
        semantic_weight,
        keyword_weight,
    )

    # 쿼리 임베딩 생성 (keyword_weight=1.0 전용이더라도 embedding 호출은 hybrid_search 담당)
    query_embedding = get_embedding(query)

    supabase = _get_supabase_client()

    params: dict[str, Any] = {
        "query_text": query,
        "query_embedding": query_embedding,
        "match_count": limit,
        "semantic_weight": semantic_weight,
        "keyword_weight": keyword_weight,
        "source_filter": source_filter,
    }

    response = supabase.rpc("hybrid_search", params).execute()
    rows: list[dict[str, Any]] = response.data or []  # type: ignore[assignment]

    results: list[dict[str, Any]] = []
    for row in rows:
        results.append(
            {
                "content": str(row.get("content", "")),
                "similarity": float(row.get("similarity", 0.0)),
                "combined_score": float(row.get("combined_score", 0.0)),
                "source": str(row.get("source", "")),
                "title": str(row.get("title", "")),
                "document_id": str(row.get("document_id", "")),
            }
        )

    return sorted(results, key=lambda x: x["combined_score"], reverse=True)