
    SiF                     h    d Z ddlZddlZddlZddlZddlZddlmZ ddlmZ ddl	m
Z
  G d d      Zy)u4  SQLite FTS5 기반 메모리 인덱서.

claude 메모리 디렉토리(diary, anu_memory)의 마크다운 파일을 SQLite FTS5 테이블에
인덱싱하여 한국어 포함 전문 검색을 지원한다.

주요 기능:
- YAML frontmatter 파싱 (표준 라이브러리만 사용, yaml 모듈 미사용)
- SHA256 해시 기반 증분 인덱싱 (skipped / updated / indexed)
- FTS5 외부 콘텐츠 테이블 + 트리거 자동 동기화
- 한국어 검색: FTS5 MATCH 시도 후 LIKE fallback
- CLI 진입점 없음 (memory_search.py 에서 import 하여 사용)
    N)datetime)Path)Optionalc                      e Zd ZdZdZdZdZddee   ddfdZ	dd	Z
ed
edeeef   fd       Zdedee   fdZdedefdZdedefdZd dededefdZdefdZ	 	 	 	 d!dedee   dee   dededefdZdee   dee   fdZdefdZddZy)"MemoryIndexeru.   SQLite FTS5 기반 메모리 파일 인덱서.z/home/jay/.claude/memory/diaryzG/home/jay/.claude/projects/-home-jay--cokacdir-workspace-autoset/memoryz(/home/jay/.claude/memory/memory_index.dbNdb_pathreturnc                 B   ||n| j                   | _        t        | j                        j                  j	                  dd       t        j                  | j                        | _        t
        j                  | j                  _	        d| _
        | j                          y )NT)parentsexist_okF)DB_PATH_DEFAULT_db_pathr   parentmkdirsqlite3connect_connRowrow_factory_closed_init_db)selfr   s     E/home/jay/workspace/.worktrees/task-2117-dev1/utils/memory_indexer.py__init__zMemoryIndexer.__init__!   sl    #*#6D<P<PT]]""(((E)0)G
!(

    c                     | j                   j                         }|j                  d       | j                   j                          y)uD   DB 스키마(테이블, FTS5 가상 테이블, 트리거) 초기화.az  
            CREATE TABLE IF NOT EXISTS memories (
                id          INTEGER PRIMARY KEY,
                file_path   TEXT UNIQUE,
                title       TEXT,
                type        TEXT,
                team_id     TEXT,
                task_id     TEXT,
                tags        TEXT,
                content     TEXT,
                created_at  TEXT,
                updated_at  TEXT,
                file_hash   TEXT
            );

            CREATE VIRTUAL TABLE IF NOT EXISTS memories_fts USING fts5(
                title, type, tags, content,
                content=memories, content_rowid=id
            );

            CREATE TRIGGER IF NOT EXISTS memories_ai
            AFTER INSERT ON memories BEGIN
                INSERT INTO memories_fts(rowid, title, type, tags, content)
                VALUES (new.id, new.title, new.type, new.tags, new.content);
            END;

            CREATE TRIGGER IF NOT EXISTS memories_au
            AFTER UPDATE ON memories BEGIN
                INSERT INTO memories_fts(memories_fts, rowid, title, type, tags, content)
                VALUES ('delete', old.id, old.title, old.type, old.tags, old.content);
                INSERT INTO memories_fts(rowid, title, type, tags, content)
                VALUES (new.id, new.title, new.type, new.tags, new.content);
            END;

            CREATE TRIGGER IF NOT EXISTS memories_ad
            AFTER DELETE ON memories BEGIN
                INSERT INTO memories_fts(memories_fts, rowid, title, type, tags, content)
                VALUES ('delete', old.id, old.title, old.type, old.tags, old.content);
            END;
            N)r   cursorexecutescriptcommit)r   curs     r   r   zMemoryIndexer._init_db,   s:    jj! ' '	P 	

r   textc                    i }| }| j                  d      s||fS | dd }t        j                  d|      }|s||fS |d|j                          }||j	                         d }|j                         D ]  }|j                         }|r|j                  d      r'd|vr,|j                  d      \  }}}	|j                         }|	j                         }	|	j                  d      rh|	j                  d      rW|	d	d
 }
|
j                  d      D cg c]2  }|j                         s|j                         j                  d      4 }}|||<   |	j                  d      }|||<    ||fS c c}w )u   YAML frontmatter 파싱 (표준 라이브러리만 사용).

        Returns
        -------
        (front: dict, body: str)
        z---   Nz\n---[ \t]*(\n|$)#:[]   ,'")

startswithresearchstartend
splitlinesstrip	partitionendswithsplit)r!   frontbodyrest	end_matchfm_blocklinekey_raw_valinnervitemsvals                 r   _parse_frontmatterz MemoryIndexer._parse_frontmatter\   sq     u%$; ABxII2D9	$;+)//+,IMMO%& '') 	!D::<D4??3/$"nnS1OCG))+CmmoG !!#&7+;+;C+@"9>S9IWAQWWY/WW"c
 mmE* c
%	!( d{ Xs   E'"!E'	file_pathc                    	 t        |      j                  d      }	 | j                  |      \  }}t	        j
                  t        j                  j                  |            j                  d      }d|v rd|v rt        |j                  dd            j                  d	      }t        |j                  dd            j                  d	      }| d
| }|j                  dg       }	t        |	t              rdj                  |	      }
nt        |	      }
||d|j                  d      |j                  d      |
|j                         |j                  d|      |d	S |j                  dg       }	t        |	t              rdj                  |	      }
n|	rt        |	      nd}
||j                  d      xs( |j                  d      xs t        |      j                   |j                  dd      |j                  d      xs d|j                  d      xs d|
|j                         ||d	S # t        $ r Y yw xY w# t        $ r Y yw xY w)uL   파일 경로로부터 메타데이터 + 본문을 파싱하여 dict 반환.zutf-8)encodingNsecondstimespecdatesession r+   z	 session tagsr*   diaryteam_idtask_id)	rD   titletyperO   rP   rM   content
created_at
updated_atnamerQ   rR   unknown)r   	read_text	ExceptionrC   r   fromtimestampospathgetmtime	isoformatstrgetr2   
isinstancelistjoinstem)r   rD   r!   r6   r7   mtimedate_valsession_valrQ   raw_tagstags_strs              r   _parse_filezMemoryIndexer._parse_file   s   		?,,g,>D	11$7KE4 &&rww'7'7	'BCMMW`Ma U?yE1599VR0177>Heii	267==eDKj	+7Eyy,H(D)88H-x= ' 99Y/ 99Y/ ::<#ii6#
 
 99VR(h%xx)H(0s8}bH #YYv&T%))G*<TY@T@TIIfi0yy+3tyy+3tzz|

 
	
S  		
  		s"   H* H9 *	H65H69	IIc                     t        j                         }t        |d      5 t        fdd      D ]  }|j	                  |        	 ddd       |j                         S # 1 sw Y   |j                         S xY w)u&   파일 내용의 SHA256 해시 반환.rbc                  &     j                  d      S )Ni   )read)fhs   r   <lambda>z-MemoryIndexer._compute_hash.<locals>.<lambda>   s    bggen r   r   N)hashlibsha256openiterupdate	hexdigest)r   rD   rr   chunkro   s       @r   _compute_hashzMemoryIndexer._compute_hash   sp    !)T" 	%b4c: %e$%	% !!	% !!s   &A!!A9c                    	 | j                  |      }| j                  |      }|y| j                  j	                         }|j                  d|f       |j                         }t        j                         j                  d      }||d   |k(  ry	 |j                  d|d   |d	   |j                  d
      |j                  d      |j                  dd      |j                  dd      |||f	       | j                  j                          y	 |j                  d||d   |d	   |j                  d
      |j                  d      |j                  dd      |j                  dd      |j                  d|      ||f
       | j                  j                          y# t        $ r Y yw xY w# t        $ r | j                  j                          Y yw xY w# t        $ r | j                  j                          Y yw xY w)uv   단일 파일 인덱싱.

        Returns
        -------
        "indexed" | "updated" | "skipped" | "error"
        errorz6SELECT id, file_hash FROM memories WHERE file_path = ?rG   rH   	file_hashskippedz
                    UPDATE memories
                    SET title=?, type=?, team_id=?, task_id=?, tags=?,
                        content=?, updated_at=?, file_hash=?
                    WHERE file_path=?
                    rQ   rR   rO   rP   rM   rL   rS   updatedz
                INSERT INTO memories
                    (file_path, title, type, team_id, task_id, tags,
                     content, created_at, updated_at, file_hash)
                VALUES (?,?,?,?,?,?,?,?,?,?)
                rT   indexed)rx   rY   rj   r   r   executefetchoner   nowr^   r`   r   rollback)r   rD   r{   parsedr    rowr   s          r   
index_filezMemoryIndexer.index_file   s   	**95I !!),>jj!Lyl[llnlln&&	&:?;9,  wv

9-

9-

62.

9b1!!
& 

!!# 	KK 7O6NJJy)JJy)JJvr*JJy"-JJ|S1( JJE  		N  

##%8  	JJ!	s7   F A9F) B
G 	F&%F&)#GG#G87G8dir_pathfile_patternc                 :   ddddd}t         j                  j                  ||      }t        j                  |      }|D ]Y  }| j	                  |      }|dk(  r|dxx   dz  cc<   '|dk(  r|dxx   dz  cc<   :|dk(  r|dxx   dz  cc<   M|dxx   dz  cc<   [ |S )u   디렉토리 내 파일 패턴 매칭 후 인덱싱.

        Returns
        -------
        {"indexed": N, "updated": N, "skipped": N, "errors": N}
        r   r~   r}   r|   errorsr~   r(   r}   r|   r   )r[   r\   rc   globr   )r   r   r   statspatternfilesfpresults           r   index_directoryzMemoryIndexer.index_directory%  s     !QG'',,x6		'" 		%B__R(F"i A% 9$i A% 9$i A% h1$		% r   c           	         | j                   j                         }|j                  d       | j                   j                          	 |j                  d       | j                   j                          ddddd}| j
                  | j                  fD ]W  }t        j                  j                  |      s#| j                  |      }|D ]  }||xx   |j                  |d      z  cc<   ! Y |S # t        $ r Y w xY w)u[   전체 재인덱싱: memories 테이블 초기화 후 DIARY_DIR + ANU_MEMORY_DIR 인덱싱.zDELETE FROM memoriesz8INSERT INTO memories_fts(memories_fts) VALUES('rebuild')r   r   )r   r   r   r   rY   	DIARY_DIRANU_MEMORY_DIRr[   r\   isdirr   r`   )r   r    combined	directorysks         r   reindex_allzMemoryIndexer.reindex_all?  s    jj!*+

	KKRSJJ &'1aP..$*=*=> 	+I77==+$$Y/A +quuQ{*+		+   		s   +C- -	C98C9querytype_filterteam_filterlimitlayerc                    |dvrt        d| d      | j                  j                         }dt        t        t
        f   ffd} |       \  }}	|dk(  rd}
d}n|d	k(  rd
}
d}nd}
d}|j                         }t        |      dk(  r
d|d    d}ndj                  d |D              }d|
 d| d}d}	 |j                  ||g|	z   |gz          |j                         }|s<d| d}d| d| d}	 |j                  ||||g|	z   |gz          |j                         }|r|D cg c]  }t        |       c}S g S # t        $ r g }Y hw xY w# t        $ r g }Y ;w xY wc c}w )uX  FTS5 MATCH 검색 (한국어 LIKE fallback 포함).

        Parameters
        ----------
        layer : str
            "index"   — id, title, type, score만 반환 (~50-100 토큰)
            "summary" — id, title, type, score, snippet(50자) 반환 (~200-300 토큰)
            "full"    — 기존 동작과 동일 (기본값, 하위 호환)

        Returns
        -------
        list of dict
            - "index":   id, title, type, score
            - "summary": id, title, type, score, snippet
            - "full":    file_path, title, type, team_id, tags, snippet, score
        )indexsummaryfullz2layer must be 'index', 'summary', or 'full', got ''r	   c                      g } g }r"| j                  d       |j                         r"| j                  d       |j                         | rddj                  |       z   |fS d|fS )Nz
m.type = ?zm.team_id = ?z AND rL   )appendrc   )clausesparamsr   r   s     r   _extra_conditionsz/MemoryIndexer.search.<locals>._extra_conditionsx  si    GF|,k*/k*g!66>>v:r   r   z1m.id, m.title, m.type, memories_fts.rank AS scorez#m.id, m.title, m.type, 0.0 AS scorer   zVm.id, m.title, m.type, SUBSTR(m.content, 1, 50) AS snippet, memories_fts.rank AS scorezHm.id, m.title, m.type, SUBSTR(m.content, 1, 50) AS snippet, 0.0 AS scorezm.file_path, m.title, m.type, m.team_id, m.tags, snippet(memories_fts, 3, '[', ']', '...', 20) AS snippet, memories_fts.rank AS scorezcm.file_path, m.title, m.type, m.team_id, m.tags, SUBSTR(m.content, 1, 200) AS snippet, 0.0 AS scorer(   "r   z OR c              3   (   K   | ]
  }d | d   yw)r   N ).0ts     r   	<genexpr>z'MemoryIndexer.search.<locals>.<genexpr>  s     #=as!H#=s   z
            SELECT z
            FROM memories_fts
            JOIN memories m ON memories_fts.rowid = m.id
            WHERE memories_fts MATCH ?
            zD
            ORDER BY memories_fts.rank
            LIMIT ?
        N%z
                SELECT z}
                FROM memories m
                WHERE (m.title LIKE ? OR m.content LIKE ? OR m.tags LIKE ?)
                z%
                LIMIT ?
            )
ValueErrorr   r   tupler_   rb   r5   lenrc   r   fetchallrY   dict)r   r   r   r   r   r   r    r   	extra_sqlextra_params
fts_selectlike_selecttokens	fts_queryfts_sqlrowslike_patternlike_sqlr   s     ``               r   r.   zMemoryIndexer.searchZ  s   0 44QRWQXXYZ[[jj!	5d#3 	 #4"5	< GLJ?KiqJdK-  y  v;!F1I;a(I#=f#==I<   K 	 	KK)|!;ug!EF<<>D
 ugQ<L#} %  	H!<>MQVPWW ||~ /3T*cS	*::-  	D	&   +s*   8*D= 4,E &E=E
EEEidsc                     |sg S dj                  d |D              }| j                  j                         }|j                  d| d|       |j	                         }|r|D cg c]  }t        |       c}S g S c c}w )uX  ID 목록으로 메모리 전체 내용 조회 (Layer 3 전용).

        Parameters
        ----------
        ids : list[int]
            조회할 메모리 ID 목록. 빈 리스트 입력 시 빈 리스트 반환.

        Returns
        -------
        list of dict: id, file_path, title, type, team_id, tags, content, created_at
        z, c              3       K   | ]  }d   yw)?Nr   )r   r=   s     r   r   z+MemoryIndexer.get_by_ids.<locals>.<genexpr>  s      2 2s   z$SELECT * FROM memories WHERE id IN ())rc   r   r   r   r   r   )r   r   placeholdersr    r   r   s         r   
get_by_idszMemoryIndexer.get_by_ids  sy     Iyy 2c 22jj!2<.B	
 ||~.2T*cS	*::*s   #A:c                    | j                   j                         }|j                  d       |j                         d   }|j                  d       |j	                         D ci c]  }|d   |d    }}|j                  d       |j	                         D ci c]  }|d   |d    }}|j                  d       |j                         }|r
|d   r|d   nd}||||dS c c}w c c}w )	u   인덱스 통계 반환.

        Returns
        -------
        {"total": N, "by_type": {...}, "by_team": {...}, "last_indexed": "ISO"}
        zSELECT COUNT(*) FROM memoriesr   z1SELECT type, COUNT(*) FROM memories GROUP BY typer(   zQSELECT team_id, COUNT(*) FROM memories WHERE team_id IS NOT NULL GROUP BY team_idz$SELECT MAX(updated_at) FROM memoriesN)totalby_typeby_teamlast_indexed)r   r   r   r   r   )r   r    r   r   r   r   last_rowr   s           r   r   zMemoryIndexer.stats  s     jj!34q!GH-0\\^<c3q63q6><<gh-0\\^<c3q63q6><<:;<<>&.8A;x{D (	
 	
 = =s   "CC$c                 `    | j                   s"| j                  j                          d| _         yy)u   DB 연결 종료.TN)r   r   close)r   s    r   r   zMemoryIndexer.close  s%    ||JJDL r   )N)r	   N)z*.md)NN   r   )__name__
__module____qualname____doc__r   r   r   r   r_   r   r   staticmethodr   r   rC   rj   rx   r   r   r   intrb   r.   r   r   r   r   r   r   r   r      sN   80I^N@O  ,` - -tSy)9 - -b7
S 7
Xd^ 7
v"s "s "NC NC Nd 3 D 4T < &*%)j;j; c]j; c]	j;
 j; j; 
j;\;d3i ;DJ ;4
t 
> r   r   )r   r   rq   r[   r-   r   r   pathlibr   typingr   r   r   r   r   <module>r      s0      	 	    m  m r   