
    %<iSE                        d Z ddlZddlZddlZddlZddlmZ ddlmZ ddl	m
Z
mZmZ ej                  j                  dej                  j                  ej                  j!                  e      d             ddlZ e ej(                               Zdddd	d
ddd	gZdgdz  dgdz  gZddeeef   dz  de
fdZddZddZddZddZddZddZddZ ddZ!ddZ"y)u   
Tests for ingest.py - TDD implementation for task-510

모든 외부 의존성 (chunker, embedding_service, supabase)을 mock으로 처리.
    N)Any)mock)	MagicMockcallpatchz..zchunk 0 text
   )contentchunk_indextoken_countzchunk 1 text      g?i   g?existing_docreturnc                    t               }t               | r| gng _        |j                  j                  j                  j                  j
                  j                  j                  _        t               dt        ig_        t               i g_        t               dt        ig_        dt        dt         ffd}||j                  _	        |S )u   Supabase 클라이언트 mock 생성 헬퍼.

    Args:
        existing_doc: 중복 검사 시 반환할 기존 문서 dict.
                      None이면 문서 없음 (빈 리스트 반환).
    id
table_namer   c                    t               }t               }|j                  j                  j                  _        ||j                  _        t               }| dk(  r|j                  _        n|j                  _        ||j
                  _        t               }|j                  j                  j                  _        ||j                  _        |S )Nknowledge_documents)r   eqreturn_valueexecuteselectinsertdelete)	r   tblselect_chaininsert_chaindelete_chaindelete_responseinsert_chunk_responseinsert_doc_responseselect_responses	        G/home/jay/workspace/.worktrees/task-2057-dev2/libs/tests/test_ingest.py_table_side_effectz/_make_supabase_mock.<locals>._table_side_effect>   s    k !{<K$$,,9".

 !{..0CL  -0EL  -".

 !{<K$$,,9".


    )
r   datatabler   r   r   r   FAKE_DOCUMENT_IDstrside_effect)r   sbr$   r   r    r!   r"   s      @@@@r#   _make_supabase_mockr,   "   s     
B  kO-9L>rOUdBHH  --00==EER $+!%'7 89 &K"$  kO!#345Os y  . .BHHIr%   c                     t        d      } t        d|       5  t        dt              5 }t        dt              5  ddlm}  |d	d
d      }ddd       ddd       ddd       t        t              sJ t        j                  |      }t        |      |k(  sJ y# 1 sw Y   QxY w# 1 sw Y   UxY w# 1 sw Y   YxY w)uQ   ingest_document()가 UUID 형식의 document_id 문자열을 반환해야 한다.Nr   ingest._get_supabase_clientr   ingest.chunk_textingest.get_embeddings_batchr   ingest_documentzTest Docz$Some meaningful content for testing.testtitler	   source)
r,   r   FAKE_CHUNKSFAKE_EMBEDDINGSingestr4   
isinstancer)   uuidUUID)sb_mock
mock_chunkr4   resultparseds        r#   (test_ingest_document_returns_document_idrC   ^   s    !t4G 	+'B
!<
@J+/J

 	+ :

 
 
 fc"""YYvFv;&   !
 
 
 
 
 
s9   B8B,B B,B8 B)%B,,B5	1B88Cc                  V   t        d      } d}t        d|       5  t        dt              5 }t        dt              5 }dd	lm}  |d
|d       ddd       ddd       ddd       j                          |j                  }|d   d   |k(  s|d   j                  d      |k(  sJ j                          |j                  }|d   r|d   d   n|d   d   }t        D cg c]  }t        |d          }	}||	k(  sJ y# 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   xY wc c}w )ui   chunker.chunk_text()가 먼저 호출되고, 그 결과로 get_embeddings_batch()가 호출돼야 한다.Nr.   z#Content to be chunked and embedded.r/   r0   r1   r2   r   r3   Docsrcr6   r   texttextsr	   )
r,   r   r9   r:   r;   r4   assert_called_once	call_argsgetr)   )
r?   r	   r@   
mock_embedr4   rJ   embed_call_args	texts_argcexpected_textss
             r#   2test_ingest_document_calls_chunker_then_embeddingsrQ   z   sH   !t4G3G 	+'BD!<D@J+/JD OY*eWUCD D D !!#$$IQ<?g%1)9)9&)AW)LLL !!# **O4CA4F?1-a0O\]L^_fLgI1<=Ac!I,'=N=&&&'D D D D D D$ >s@   DD DDD#D&DDD	DD#c                     g dt         dt        ffd} t               }| |j                  _        t	        d|      5  t	        dt
              5  t	        dt              5  dd	lm}  |d
dd       ddd       ddd       ddd       dv s
J d        D cg c]  }d|j                         v s| }}t        |      dkD  s
J d        y# 1 sw Y   bxY w# 1 sw Y   fxY w# 1 sw Y   jxY wc c}w )uS   documents 테이블과 knowledge_chunks 테이블에 INSERT가 호출돼야 한다.r   r   c                     t               }t               }g |_        t               }||j                  j                  j                  _        ||j
                  _        dt        dt         f fd}||j                  _        |S )Nr&   r   c                     j                         t               }dk(  r$t        dt        ig      |j                  _        |S t        i g      |j                  _        |S Nr   r   r&   appendr   r(   r   r   r&   insinserted_tablesr   s     r#   _insert_side_effectzstest_ingest_document_inserts_documents_and_chunks.<locals>._tracking_table_side_effect.<locals>._insert_side_effect   s\    "":.+C22+4DBR;S:T+U( J ,52$+?(Jr%   	r   r&   r   r   r   r   r   r   r*   )r   r   r"   r   r\   r[   s   `    r#   _tracking_table_side_effectzVtest_ingest_document_inserts_documents_and_chunks.<locals>._tracking_table_side_effect   sl    k $+! {<K$$,,9".

	c 	i 	 "5


r%   r/   r0   r1   r2   r   r3   rE   zContent here.rF   r6   Nr   u2   knowledge_documents INSERT 호출 없음. 실제: chunku/   chunks 테이블 INSERT 호출 없음. 실제: )r)   r   r'   r*   r   r9   r:   r;   r4   lowerlen)r^   r?   r4   tchunk_tablesr[   s        @r#   1test_ingest_document_inserts_documents_and_chunksrd      s   !#O 	 . kG ;GMM 	+'BL!<L 	+/JL
 	+e_UKL L L !O3{7ijyiz5{{3.G!'QWWY2FAGLG|q e$STcSd"ee L L L L L L HsG   C-C!"C4C!<C-C96C9CC!!C*	&C--C6c                  "   t        t        j                               } | ddddt         dt        ffd}t               }||j                  _        t        d|      5  t        d	t              5 }t        d
t              5 }ddl	m
}  |ddd      }ddd       ddd       ddd       | k(  sJ d|  d| d       rJ d       j                          j                          y# 1 sw Y   VxY w# 1 sw Y   ZxY w# 1 sw Y   ^xY w)uq   같은 content_hash의 문서가 이미 존재하면 기존 id를 반환하고 INSERT를 하지 않아야 한다.	some_hash)r   content_hashFr   r   c                    t               }t               }g|_        t               }||j                  j                  j                  _        ||j
                  _        dt        dt         ffd}||j                  _        |S )Nr&   r   c                     dt               S )NT)r   )r&   insert_calleds    r#   r\   zktest_ingest_document_duplicate_returns_existing_id.<locals>._table_side_effect.<locals>._insert_side_effect   s     M;r%   r]   )r   r   r"   r   r\   r   rj   s        r#   r$   zNtest_ingest_document_duplicate_returns_existing_id.<locals>._table_side_effect   sl    k#+ ,~ {<K$$,,9".

	c 	i 	
 "5


r%   r/   r0   r1   r2   r   r3   zDup DoczDuplicate content.rF   r6   Nu
   기존 id u    를 반환해야 하지만 u    반환u6   중복 문서에 대해 INSERT가 호출되면 안 됨)r)   r=   uuid4r   r'   r*   r   r9   r:   r;   r4   assert_not_called)	existing_idr$   r?   r@   rL   r4   rA   r   rj   s	          @@r#   2test_ingest_document_duplicate_returns_existing_idrn      s   djjl#K%{CLMs y " kG 2GMM 	+'B
!<
@J+/J
 OY* (

 
 
 [ gJ{m;WX^W__f"gg VVV  "  "!
 
 
 
 
 
s<   "D4C9C-C9 D-C62C99D	>DDc                  ,   t        t        j                               } g dt         dt        ffd}t               }||j                  _        t        d|      5  ddlm}  ||       }ddd       d	u sJ | v sJ d
|  d        y# 1 sw Y   !xY w)u]   delete_document()가 Supabase knowledge_documents 테이블에 DELETE를 호출해야 한다.r   r   c                      t               }t               }dt        dt        dt         f fd}||j                  _        ||j
                  _        |S )Ncolvalr   c                     dk(  rj                  t        |             t               }t        d|ig      |j                  _        |S rU   rX   r)   r   r   r   )rq   rr   eq_mockdeleted_idsr   s      r#   _eq_side_effectz_test_delete_document_calls_supabase_delete.<locals>._table_side_effect.<locals>._eq_side_effect  sA    22""3s8,kG+4D#;-+HGOO(Nr%   )r   r)   r   r   r*   r   r   )r   r   r   rw   rv   s   `   r#   r$   zFtest_delete_document_calls_supabase_delete.<locals>._table_side_effect  sJ    k {	 	3 	9 	 '6#".


r%   r/   r0   r   )delete_documentNTzdocument_id u#   로 DELETE 호출 안 됨. 실제: )	r)   r=   rk   r   r'   r*   r   r;   rx   )doc_idr$   r?   rx   rA   rv   s        @r#   *test_delete_document_calls_supabase_deleterz     s    FKs y   kG 2GMM	,7	C )* ()
 T>>[ iL8[\g[h"ii ) )s   B

Bc                    	
 t        t        j                               } | ddddi dd	g g 
dt         dt        f	
fd	}t               }||j                  _        t        d
|      5  t        dt              5 }t        dt              5 }ddl	m
}  ||        ddd       ddd       ddd       t              dkD  sJ d       j                          j                          
D cg c]  }d|j                         v s| }}t        |      dkD  sJ d       y# 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   xY wc c}w )ua   reindex_document()가 기존 청크를 삭제하고 새로 청킹/임베딩/INSERT 해야 한다.zOriginal Titlez Original content for reindexing.rF   Noldhash)r   r7   r	   r8   
source_urlmetadatarg   r   r   c                     t               }t               } dk(  rgng |_        t               }||j                  j                  j                  _        ||j
                  _        t               }dt        dt        dt         f fd}||j                  _        ||j                  _        dt        dt         f	 fd}||j                  _        |S )Nr   rq   rr   r   c                     dj                         v rj                  t        |             t               }t        g       |j                  _        |S )Nr_   rV   )r`   rX   r)   r   r   r   )rq   rr   ru   deleted_chunk_doc_idsr   s      r#   _delete_eq_side_effectzntest_reindex_document_deletes_chunks_and_reinserts.<locals>._table_side_effect.<locals>._delete_eq_side_effectD  sC    ***,,%,,SX6kG+4"+=GOO(Nr%   r&   c                     j                         t               }t        dt        ig      |j                  _        |S )Nr   rV   rW   rY   s     r#   r\   zktest_reindex_document_deletes_chunks_and_reinserts.<locals>._table_side_effect.<locals>._insert_side_effectO  s7    "":.+C'0>N7O6P'QCKK$Jr%   )r   r&   r   r   r   r   r)   r   r*   r   r   )
r   r   r"   r   r   r   r\   r   r   r[   s
   `      r#   r$   zNtest_reindex_document_deletes_chunks_and_reinserts.<locals>._table_side_effect7  s    k $+1;?T1T~Z\ {<K$$,,9".

 !{	 	# 	) 	 '=#".

	c 	i 	 "5


r%   r/   r0   r1   r2   r   )reindex_documentu.   기존 청크 DELETE가 호출되지 않았음r_   u;   재인제스션 시 chunks INSERT가 호출되지 않았음)r)   r=   rk   r   r'   r*   r   r9   r:   r;   r   ra   rI   r`   )ry   r$   r?   r@   rL   r   rb   chunk_insertsr   r   r[   s           @@@r#   2test_reindex_document_deletes_chunks_and_reinsertsr   '  sR   F!5!L (*!#Os y B kG 2GMM 	+'B!!<!@J+/J! OY+ ! ! ! $%)[+[[) !!#!!# !0H17aggi3GQHMH}!`#``!%! ! ! ! ! !" IsH   +D;=D/D#D/&D;/EE#D,(D//D8	4D;;Ec                  h   d} t        j                  | j                  d            j                         }g dt        dt
        ffd}t               }||j                  _        g |j                  j                  dt        dt
        ffd}||j                  _        t        d|      5  t        d	t              5  t        d
t              5  ddlm}  |d| d       ddd       ddd       ddd       |v sJ d| d        y# 1 sw Y   +xY w# 1 sw Y   /xY w# 1 sw Y   3xY w)uV   ingest_document()가 content의 SHA-256 해시를 content_hash로 사용해야 한다.z'Specific content for hash verification.zutf-8r   r   c                     t               }t               }g |_        t               }||j                  j                  j                  _        ||j
                  _        dt        dt         f fd}||j                  _        |S )Nr&   r   c                     dk(  r#j                  t        | t              r| n|        t               }t        dt        ig      |j
                  _        |S rU   )rX   r<   dictr   r(   r   r   )r&   rZ   captured_insert_datar   s     r#   r\   z_test_content_hash_is_sha256_of_content.<locals>._table_side_effect.<locals>._insert_side_effect  sL    22$++JtT4JDPTU+C'0>N7O6P'QCKK$Jr%   r]   )r   r   r"   r   r\   r   s   `    r#   r$   zBtest_content_hash_is_sha256_of_content.<locals>._table_side_effect|  sl    k $+! {<K$$,,9".

	c 	i 	 "5


r%   c                      |       }| dk(  r5|j                   }dt        dt        dt        ffd}||j                   _        |S )Nr   argskwargsr   c                  n    t               }dt        dt        dt         ffd}||j                  _        |S )Nrq   rr   r   c                     | dk(  rj                  t        |             t               }t        g       |j                  _        |S )Nrg   rV   rt   )rq   rr   ru   queried_hashess      r#   rw   ztest_content_hash_is_sha256_of_content.<locals>._capturing_table_side_effect.<locals>._select_side_effect.<locals>._eq_side_effect  s:    n,&--c#h7'kG3<"3EGOO0"Nr%   )r   r)   r   r   r*   )r   r   selrw   r   s       r#   _select_side_effectzitest_content_hash_is_sha256_of_content.<locals>._capturing_table_side_effect.<locals>._select_side_effect  s7    k# #3 #9 # &5"
r%   )r   r   r   r*   )r   r   original_selectr   original_tabler   s       r#   _capturing_table_side_effectzLtest_content_hash_is_sha256_of_content.<locals>._capturing_table_side_effect  sL    Z(..!jjO3 # )  &9CJJ"
r%   r/   r0   r1   r2   r   r3   z	Hash TestrF   r6   Nu   SHA-256 해시 u.   로 조회하지 않았음. 실제 조회값: )hashlibsha256encode	hexdigestr)   r   r'   r*   r   r9   r:   r;   r4   )	r	   expected_hashr$   r?   r   r4   r   r   r   s	         @@@r#   &test_content_hash_is_sha256_of_contentr   u  s:   7GNN7>>'#:;EEGM13s y * kG 2GMM !#N]]..N  . !=GMM 	+'BJ!<J 	+/JJ
 	+k75IJ J J 	'g	'UVdUefg'J J J J J Js<   0D(DD&D.D(DDD%	!D((D1c            	      b   t        dt                     5  t        dg       5  t        dg       5  ddlm}  t	        j
                  t        d      5   | d	d
d       ddd       ddd       ddd       ddd       y# 1 sw Y   "xY w# 1 sw Y   &xY w# 1 sw Y   *xY w# 1 sw Y   yxY w)u>   빈 content를 전달하면 ValueError가 발생해야 한다.r/   r0   r1   r2   r   r3   r	   matchEmpty rF   r6   Nr   r   r;   r4   pytestraises
ValueErrorr3   s    r#   5test_ingest_document_empty_content_raises_value_errorr     s     	+)+FE!3E 	+"=E
 	+]]:Y7 	E'2eD	EE E E E	E 	EE E E E E EQ   B%B"BB	 B(B0B%B
BBBB"	B%%B.c            	      b   t        dt                     5  t        dg       5  t        dg       5  ddlm}  t	        j
                  t        d      5   | d	d
d       ddd       ddd       ddd       ddd       y# 1 sw Y   "xY w# 1 sw Y   &xY w# 1 sw Y   *xY w# 1 sw Y   yxY w)uK   공백만 있는 content를 전달하면 ValueError가 발생해야 한다.r/   r0   r1   r2   r   r3   r	   r   
Whitespacez   
	  rF   r6   Nr   r3   s    r#   ?test_ingest_document_whitespace_only_content_raises_value_errorr     s     	+)+FS!3S 	+"=S
 	+]]:Y7 	S,ER	SS S S S	S 	SS S S S S Sr   )N)r   N)#__doc__r   ossysr=   typingr   unittestr   unittest.mockr   r   r   pathr   joindirname__file__r   r)   rk   r(   r9   r:   r   r,   rC   rQ   rd   rn   rz   r   r   r   r    r%   r#   <module>r      s     	 
    0 0 277<< 94@ A  ztzz|$ qDqC
 	EDLEDL4d38nt&; 4y 4x!8'@,fh+#fjLFa\Igb
E
Sr%   