"""SQLite FTS5 기반 메모리 인덱서.

claude 메모리 디렉토리(diary, anu_memory)의 마크다운 파일을 SQLite FTS5 테이블에
인덱싱하여 한국어 포함 전문 검색을 지원한다.

주요 기능:
- YAML frontmatter 파싱 (표준 라이브러리만 사용, yaml 모듈 미사용)
- SHA256 해시 기반 증분 인덱싱 (skipped / updated / indexed)
- FTS5 외부 콘텐츠 테이블 + 트리거 자동 동기화
- 한국어 검색: FTS5 MATCH 시도 후 LIKE fallback
- CLI 진입점 없음 (memory_search.py 에서 import 하여 사용)
"""

import glob
import hashlib
import os
import re
import sqlite3
from datetime import datetime
from pathlib import Path
from typing import Optional


class MemoryIndexer:
    """SQLite FTS5 기반 메모리 파일 인덱서."""

    DIARY_DIR = "/home/jay/.claude/memory/diary"
    ANU_MEMORY_DIR = "/home/jay/.claude/projects/-home-jay--cokacdir-workspace-autoset/memory"
    DB_PATH_DEFAULT = "/home/jay/.claude/memory/memory_index.db"

    # ------------------------------------------------------------------ init

    def __init__(self, db_path: Optional[str] = None) -> None:
        self._db_path = db_path if db_path is not None else self.DB_PATH_DEFAULT
        # DB 디렉토리 생성
        Path(self._db_path).parent.mkdir(parents=True, exist_ok=True)
        self._conn: sqlite3.Connection = sqlite3.connect(self._db_path)
        self._conn.row_factory = sqlite3.Row
        self._closed = False
        self._init_db()

    # --------------------------------------------------------------- _init_db

    def _init_db(self) -> None:
        """DB 스키마(테이블, FTS5 가상 테이블, 트리거) 초기화."""
        cur = self._conn.cursor()

        cur.executescript("""
            CREATE TABLE IF NOT EXISTS memories (
                id          INTEGER PRIMARY KEY,
                file_path   TEXT UNIQUE,
                title       TEXT,
                type        TEXT,
                team_id     TEXT,
                task_id     TEXT,
                tags        TEXT,
                content     TEXT,
                created_at  TEXT,
                updated_at  TEXT,
                file_hash   TEXT
            );

            CREATE VIRTUAL TABLE IF NOT EXISTS memories_fts USING fts5(
                title, type, tags, content,
                content=memories, content_rowid=id
            );

            CREATE TRIGGER IF NOT EXISTS memories_ai
            AFTER INSERT ON memories BEGIN
                INSERT INTO memories_fts(rowid, title, type, tags, content)
                VALUES (new.id, new.title, new.type, new.tags, new.content);
            END;

            CREATE TRIGGER IF NOT EXISTS memories_au
            AFTER UPDATE ON memories BEGIN
                INSERT INTO memories_fts(memories_fts, rowid, title, type, tags, content)
                VALUES ('delete', old.id, old.title, old.type, old.tags, old.content);
                INSERT INTO memories_fts(rowid, title, type, tags, content)
                VALUES (new.id, new.title, new.type, new.tags, new.content);
            END;

            CREATE TRIGGER IF NOT EXISTS memories_ad
            AFTER DELETE ON memories BEGIN
                INSERT INTO memories_fts(memories_fts, rowid, title, type, tags, content)
                VALUES ('delete', old.id, old.title, old.type, old.tags, old.content);
            END;
            """)
        self._conn.commit()

    # ----------------------------------------------------------- _parse_frontmatter

    @staticmethod
    def _parse_frontmatter(text: str) -> tuple[dict, str]:
        """YAML frontmatter 파싱 (표준 라이브러리만 사용).

        Returns
        -------
        (front: dict, body: str)
        """
        front: dict = {}
        body = text

        # --- 구분자로 시작하는 경우만 처리
        if not text.startswith("---"):
            return front, body

        # 첫 번째 --- 이후 다음 --- 까지 추출
        rest = text[3:]
        # 줄 끝 --- 탐색
        end_match = re.search(r"\n---[ \t]*(\n|$)", rest)
        if not end_match:
            return front, body

        fm_block = rest[: end_match.start()]
        body = rest[end_match.end() :]

        # key: value 파싱
        for line in fm_block.splitlines():
            line = line.strip()
            if not line or line.startswith("#"):
                continue
            if ":" not in line:
                continue
            key, _, raw_val = line.partition(":")
            key = key.strip()
            raw_val = raw_val.strip()

            # 인라인 리스트 [item1, item2]
            if raw_val.startswith("[") and raw_val.endswith("]"):
                inner = raw_val[1:-1]
                items = [v.strip().strip("'\"") for v in inner.split(",") if v.strip()]
                front[key] = items
            else:
                # 따옴표 제거
                val = raw_val.strip("'\"")
                front[key] = val

        return front, body

    # -------------------------------------------------------------- _parse_file

    def _parse_file(self, file_path: str) -> Optional[dict]:
        """파일 경로로부터 메타데이터 + 본문을 파싱하여 dict 반환."""
        try:
            text = Path(file_path).read_text(encoding="utf-8")
        except Exception:
            return None

        try:
            front, body = self._parse_frontmatter(text)
        except Exception:
            return None

        mtime = datetime.fromtimestamp(os.path.getmtime(file_path)).isoformat(timespec="seconds")

        # diary 파일 판별: frontmatter에 'date' + 'session' 키가 있으면 diary
        if "date" in front and "session" in front:
            date_val = str(front.get("date", "")).strip("'\"")
            session_val = str(front.get("session", "")).strip("'\"")
            title = f"{date_val} session {session_val}"

            raw_tags = front.get("tags", [])
            if isinstance(raw_tags, list):
                tags_str = ",".join(raw_tags)
            else:
                tags_str = str(raw_tags)

            return {
                "file_path": file_path,
                "title": title,
                "type": "diary",
                "team_id": front.get("team_id"),
                "task_id": front.get("task_id"),
                "tags": tags_str,
                "content": body.strip(),
                "created_at": front.get("date", mtime),
                "updated_at": mtime,
            }

        # 일반 메모리 파일
        raw_tags = front.get("tags", [])
        if isinstance(raw_tags, list):
            tags_str = ",".join(raw_tags)
        else:
            tags_str = str(raw_tags) if raw_tags else ""

        return {
            "file_path": file_path,
            "title": front.get("name") or front.get("title") or Path(file_path).stem,
            "type": front.get("type", "unknown"),
            "team_id": front.get("team_id") or None,
            "task_id": front.get("task_id") or None,
            "tags": tags_str,
            "content": body.strip(),
            "created_at": mtime,
            "updated_at": mtime,
        }

    # ---------------------------------------------------------- _compute_hash

    def _compute_hash(self, file_path: str) -> str:
        """파일 내용의 SHA256 해시 반환."""
        sha256 = hashlib.sha256()
        with open(file_path, "rb") as fh:
            for chunk in iter(lambda: fh.read(65536), b""):
                sha256.update(chunk)
        return sha256.hexdigest()

    # --------------------------------------------------------------- index_file

    def index_file(self, file_path: str) -> str:
        """단일 파일 인덱싱.

        Returns
        -------
        "indexed" | "updated" | "skipped" | "error"
        """
        try:
            file_hash = self._compute_hash(file_path)
        except Exception:
            return "error"

        parsed = self._parse_file(file_path)
        if parsed is None:
            return "error"

        cur = self._conn.cursor()
        cur.execute("SELECT id, file_hash FROM memories WHERE file_path = ?", (file_path,))
        row = cur.fetchone()

        now = datetime.now().isoformat(timespec="seconds")

        if row is not None:
            if row["file_hash"] == file_hash:
                return "skipped"
            # 내용 변경 → UPDATE
            try:
                cur.execute(
                    """
                    UPDATE memories
                    SET title=?, type=?, team_id=?, task_id=?, tags=?,
                        content=?, updated_at=?, file_hash=?
                    WHERE file_path=?
                    """,
                    (
                        parsed["title"],
                        parsed["type"],
                        parsed.get("team_id"),
                        parsed.get("task_id"),
                        parsed.get("tags", ""),
                        parsed.get("content", ""),
                        now,
                        file_hash,
                        file_path,
                    ),
                )
                self._conn.commit()
                return "updated"
            except Exception:
                self._conn.rollback()
                return "error"

        # 신규 INSERT
        try:
            cur.execute(
                """
                INSERT INTO memories
                    (file_path, title, type, team_id, task_id, tags,
                     content, created_at, updated_at, file_hash)
                VALUES (?,?,?,?,?,?,?,?,?,?)
                """,
                (
                    file_path,
                    parsed["title"],
                    parsed["type"],
                    parsed.get("team_id"),
                    parsed.get("task_id"),
                    parsed.get("tags", ""),
                    parsed.get("content", ""),
                    parsed.get("created_at", now),
                    now,
                    file_hash,
                ),
            )
            self._conn.commit()
            return "indexed"
        except Exception:
            self._conn.rollback()
            return "error"

    # --------------------------------------------------------- index_directory

    def index_directory(self, dir_path: str, file_pattern: str = "*.md") -> dict:
        """디렉토리 내 파일 패턴 매칭 후 인덱싱.

        Returns
        -------
        {"indexed": N, "updated": N, "skipped": N, "errors": N}
        """
        stats = {"indexed": 0, "updated": 0, "skipped": 0, "errors": 0}
        pattern = os.path.join(dir_path, file_pattern)
        files = glob.glob(pattern)

        for fp in files:
            result = self.index_file(fp)
            if result == "indexed":
                stats["indexed"] += 1
            elif result == "updated":
                stats["updated"] += 1
            elif result == "skipped":
                stats["skipped"] += 1
            else:
                stats["errors"] += 1

        return stats

    # --------------------------------------------------------------- reindex_all

    def reindex_all(self) -> dict:
        """전체 재인덱싱: memories 테이블 초기화 후 DIARY_DIR + ANU_MEMORY_DIR 인덱싱."""
        cur = self._conn.cursor()
        # 전체 삭제 (트리거로 memories_fts도 자동 정리)
        cur.execute("DELETE FROM memories")
        self._conn.commit()

        # FTS5 리빌드
        try:
            cur.execute("INSERT INTO memories_fts(memories_fts) VALUES('rebuild')")
            self._conn.commit()
        except Exception:
            pass

        combined: dict = {"indexed": 0, "updated": 0, "skipped": 0, "errors": 0}

        for directory in (self.DIARY_DIR, self.ANU_MEMORY_DIR):
            if not os.path.isdir(directory):
                continue
            s = self.index_directory(directory)
            for k in combined:
                combined[k] += s.get(k, 0)

        return combined

    # ------------------------------------------------------------------ search

    def search(
        self,
        query: str,
        type_filter: Optional[str] = None,
        team_filter: Optional[str] = None,
        limit: int = 5,
        layer: str = "full",
    ) -> list:
        """FTS5 MATCH 검색 (한국어 LIKE fallback 포함).

        Parameters
        ----------
        layer : str
            "index"   — id, title, type, score만 반환 (~50-100 토큰)
            "summary" — id, title, type, score, snippet(50자) 반환 (~200-300 토큰)
            "full"    — 기존 동작과 동일 (기본값, 하위 호환)

        Returns
        -------
        list of dict
            - "index":   id, title, type, score
            - "summary": id, title, type, score, snippet
            - "full":    file_path, title, type, team_id, tags, snippet, score
        """
        if layer not in ("index", "summary", "full"):
            raise ValueError(f"layer must be 'index', 'summary', or 'full', got '{layer}'")

        cur = self._conn.cursor()

        # 공통 WHERE 조건 빌더
        def _extra_conditions() -> tuple[str, list]:
            clauses = []
            params = []
            if type_filter:
                clauses.append("m.type = ?")
                params.append(type_filter)
            if team_filter:
                clauses.append("m.team_id = ?")
                params.append(team_filter)
            if clauses:
                return " AND " + " AND ".join(clauses), params
            return "", params

        extra_sql, extra_params = _extra_conditions()

        # layer별 SELECT 절 결정
        if layer == "index":
            fts_select = "m.id, m.title, m.type, memories_fts.rank AS score"
            like_select = "m.id, m.title, m.type, 0.0 AS score"
        elif layer == "summary":
            fts_select = "m.id, m.title, m.type, SUBSTR(m.content, 1, 50) AS snippet, memories_fts.rank AS score"
            like_select = "m.id, m.title, m.type, SUBSTR(m.content, 1, 50) AS snippet, 0.0 AS score"
        else:  # "full"
            fts_select = (
                "m.file_path, m.title, m.type, m.team_id, m.tags, "
                "snippet(memories_fts, 3, '[', ']', '...', 20) AS snippet, "
                "memories_fts.rank AS score"
            )
            like_select = (
                "m.file_path, m.title, m.type, m.team_id, m.tags, " "SUBSTR(m.content, 1, 200) AS snippet, 0.0 AS score"
            )

        # 1) FTS5 MATCH 시도
        # 각 토큰을 따옴표로 감싸 OR 결합 (한국어 분절 최소화)
        tokens = query.split()
        if len(tokens) == 1:
            fts_query = f'"{tokens[0]}"'
        else:
            fts_query = " OR ".join(f'"{t}"' for t in tokens)

        fts_sql = f"""
            SELECT {fts_select}
            FROM memories_fts
            JOIN memories m ON memories_fts.rowid = m.id
            WHERE memories_fts MATCH ?
            {extra_sql}
            ORDER BY memories_fts.rank
            LIMIT ?
        """

        rows = None
        try:
            cur.execute(fts_sql, [fts_query] + extra_params + [limit])
            rows = cur.fetchall()
        except Exception:
            rows = []

        # 2) FTS5 결과 없으면 LIKE fallback (한국어 대응)
        if not rows:
            like_pattern = f"%{query}%"
            like_sql = f"""
                SELECT {like_select}
                FROM memories m
                WHERE (m.title LIKE ? OR m.content LIKE ? OR m.tags LIKE ?)
                {extra_sql}
                LIMIT ?
            """
            try:
                cur.execute(
                    like_sql,
                    [like_pattern, like_pattern, like_pattern] + extra_params + [limit],
                )
                rows = cur.fetchall()
            except Exception:
                rows = []

        return [dict(row) for row in rows] if rows else []

    # --------------------------------------------------------------- get_by_ids

    def get_by_ids(self, ids: list[int]) -> list[dict]:
        """ID 목록으로 메모리 전체 내용 조회 (Layer 3 전용).

        Parameters
        ----------
        ids : list[int]
            조회할 메모리 ID 목록. 빈 리스트 입력 시 빈 리스트 반환.

        Returns
        -------
        list of dict: id, file_path, title, type, team_id, tags, content, created_at
        """
        if not ids:
            return []

        placeholders = ", ".join("?" for _ in ids)
        cur = self._conn.cursor()
        cur.execute(
            f"SELECT * FROM memories WHERE id IN ({placeholders})",
            ids,
        )
        rows = cur.fetchall()
        return [dict(row) for row in rows] if rows else []

    # ------------------------------------------------------------------- stats

    def stats(self) -> dict:
        """인덱스 통계 반환.

        Returns
        -------
        {"total": N, "by_type": {...}, "by_team": {...}, "last_indexed": "ISO"}
        """
        cur = self._conn.cursor()

        cur.execute("SELECT COUNT(*) FROM memories")
        total = cur.fetchone()[0]

        cur.execute("SELECT type, COUNT(*) FROM memories GROUP BY type")
        by_type = {row[0]: row[1] for row in cur.fetchall()}

        cur.execute("SELECT team_id, COUNT(*) FROM memories WHERE team_id IS NOT NULL GROUP BY team_id")
        by_team = {row[0]: row[1] for row in cur.fetchall()}

        cur.execute("SELECT MAX(updated_at) FROM memories")
        last_row = cur.fetchone()
        last_indexed = last_row[0] if last_row and last_row[0] else None

        return {
            "total": total,
            "by_type": by_type,
            "by_team": by_team,
            "last_indexed": last_indexed,
        }

    # ------------------------------------------------------------------- close

    def close(self) -> None:
        """DB 연결 종료."""
        if not self._closed:
            self._conn.close()
            self._closed = True