
    (<i5                     *   d Z ddlZddlZddlZddlZddlmZ ddlmZ ej                  j                  d e
 ee      j                  j                  j                  j                               ddlmZmZmZmZmZmZ  G d dej(                        Z G d d	ej(                        Z G d
 dej(                        Z G d dej(                        Z G d dej(                        Z G d dej(                        Zedk(  r ej8                          yy)u.   test_changelog.py - TDD 테스트 (RED 단계)    N)datetime)Path)	add_round
create_logfinalize_logget_recent_changelogload_logsave_logc                   (    e Zd ZdZddZddZddZy)TestCreateLogu   create_log() 테스트Nc                 :   t        d      }| j                  |d   d       | j                  d|       | j                  |d   g        | j                  |d   d       | j                  |d   d       | j                  |d   d       | j                  |d	   d       y
)u   기본 구조 확인zmy-skillskill
started_atroundsfinal_scorer   total_roundskeptrevertedN)r   assertEqualassertInselflogs     Z/home/jay/workspace/.worktrees/task-2057-dev2/scripts/autoresearch/tests/test_changelog.pytest_create_log_structurez'TestCreateLog.test_create_log_structure   s    $Wz2lC(X+]+Q/^,a0Va(Z!,    c                     t        d      }	 t        j                  |d          y# t        $ r | j	                  d|d           Y yw xY w)u/   타임스탬프가 ISO 8601 형식인지 확인
test-skillr   z#started_at is not ISO 8601 format: N)r   r   fromisoformat
ValueErrorfailr   s     r    test_create_log_timestamp_is_isoz.TestCreateLog.test_create_log_timestamp_is_iso%   sP    &	Q""3|#45 	QII;C<M;NOP	Qs   &  A	A	c                     t        d      }t        d      }| j                  |d   d       | j                  |d   d       y)u!   다른 스킬 이름으로 생성zskill-azskill-br   N)r   r   )r   log1log2s      r    test_create_log_different_skillsz.TestCreateLog.test_create_log_different_skills.   s>    )$)$g	2g	2r   returnN)__name__
__module____qualname____doc__r   r"   r&    r   r   r   r      s     	-Q3r   r   c                   H    e Zd ZdZd
dZd
dZd
dZd
dZd
dZd
dZ	d
d	Z
y)TestAddRoundu   add_round() 테스트Nc                 $    t        d      | _        y Nr   r   r   r   s    r   setUpzTestAddRound.setUp9       l+r   c           
      z    t        | j                  dddddg d      }| j                  t        |d         d       y	)
u*   라운드가 rounds 리스트에 추가됨   rewriteu   프롬프트 재작성      ?ffffff?r   r   	round_nummutation_typemutation_descriptionscore_beforescore_afteritems_detaildecisionr   Nr   r   r   lenr   results     r    test_add_round_appends_to_roundsz-TestAddRound.test_add_round_appends_to_rounds<   sD    #!9	
 	VH-.2r   c                    t        | j                  ddddddddgdd	d

      }|d   d   }| j                  |d   d       | j                  |d   d       | j                  |d   d       | j                  |d   d       | j                  |d   d       | j                  |d   dddg       | j                  |d   d       | j                  |d   d	       | j                  |d   d
       y)u.   추가된 라운드의 데이터 구조 확인r7   append   내용 추가333333?333333?a)itemscorer   d      
r   r<   r=   r>   r?   r@   rA   rB   input_tokensoutput_tokensr   r   roundr=   r>   r?   r@   rA   rB   rS   rT   N)r   r   r   assertAlmostEqual)r   rF   
round_datas      r   test_add_round_data_structurez*TestAddRound.test_add_round_data_structureJ   s   "!0#&56
 H%a(
G,a0O4h?$:;_Mz.93?z-8#>N3sS6Q5RSJ/8N3S9O4c:r   c           
          t        | j                  dddddg d      }| j                  |d   d       | j                  |d   d	       y
)u+   decision='kept'이면 kept 카운트 증가r7   r8   	   재작성r9   皙?r   r;   r   r   Nr   r   r   rE   s     r   #test_add_round_kept_increments_keptz0TestAddRound.test_add_round_kept_increments_keptc   sS    #!,	
 	+
+Q/r   c           
          t        | j                  dddddg d      }| j                  |d   d       | j                  |d   d	       y
)u3   decision='reverted'이면 reverted 카운트 증가r7   r8   rZ   r9   rK   r   r;   r   r   Nr\   rE   s     r   +test_add_round_reverted_increments_revertedz8TestAddRound.test_add_round_reverted_increments_revertedr   sS    #!,	
 	
+Q/+r   c                    | j                   }t        d      D ]9  }t        ||dz   dd|dz    t        |      dz  t        |dz         dz  g d      }; | j	                  t        |d         d       | j	                  |d   d       y	)
u   여러 라운드 추가   r7   r8   u
   라운드 皙?r   r;   r   N)r   ranger   floatr   rD   )r   r   is      r   test_add_multiple_roundsz%TestAddRound.test_add_multiple_rounds   s    hhq 
	Aa%''1!A#%7"1X^!!a%L3.	C
	 	S]+Q/Va(r   c           
          t        | j                  dddddg d      }| j                  |t               | j	                  d|       y	)
u$   add_round는 수정된 log를 반환r7   r8   	   테스트r9   rL   r   r;   r   N)r   r   assertIsInstancedictr   rE   s     r   test_add_round_returns_logz'TestAddRound.test_add_round_returns_log   sH    #!,	
 	fd+h'r   r'   )r)   r*   r+   r,   r4   rG   rX   r]   r_   rf   rk   r-   r   r   r/   r/   6   s(    ,3;20,)"(r   r/   c                   (    e Zd ZdZddZddZddZy)TestFinalizeLogu   finalize_log() 테스트Nc           
          t        d      }t        |dddddg d      }t        |dd	d
ddg d      }t        |d      }| j                  |d   d       y)u   final_score 업데이트r   r7   r8   rh   r9   r[   r      rI   u   추가g?r   r   N)r   r   r   rV   r   r   rF   s      r   !test_finalize_updates_final_scorez1TestFinalizeLog.test_finalize_updates_final_score   s]    &Q	;S"fMQ(Cb&Ics3vm4c:r   c           
          t        d      }t        |dddddg d      }t        |ddd	dd
g d      }t        |d      }| j                  |d   d       y)u1   total_rounds가 rounds 리스트 길이와 일치r   r7   r8   r1r9   r:   r   ro   r2r[   r   rp   r   N)r   r   r   r   rq   s      r   "test_finalize_updates_total_roundsz2TestFinalizeLog.test_finalize_updates_total_rounds   s]    &Q	4c2vFQ	4c2zJcs3/3r   c                 ^    t        d      }t        |d      }| j                  |t               y)u"   finalize_log는 log dict를 반환r   g        N)r   r   ri   rj   rq   s      r   test_finalize_returns_logz)TestFinalizeLog.test_finalize_returns_log   s(    &c3'fd+r   r'   )r)   r*   r+   r,   rr   rv   rx   r-   r   r   rm   rm      s    ";4,r   rm   c                   H    e Zd ZdZd
dZd
dZd
dZd
dZd
dZd
dZ	d
d	Z
y)TestSaveAndLoadLogu!   save_log() & load_log() 테스트Nc                 6    t        j                         | _        y )N)tempfilemkdtemptmpdirr3   s    r   r4   zTestSaveAndLoadLog.setUp   s    &&(r   c                     t        d      }t        |d| j                        }| j                  t	        |      j                                y)u   save_log가 파일을 생성r   	evals_dirN)r   r
   r~   
assertTruer   exists)r   r   
saved_paths      r   test_save_log_creates_filez-TestSaveAndLoadLog.test_save_log_creates_file   s8    &c<4;;G
Z(//12r   c                 v    t        d      }t        |d| j                        }| j                  |t               y)u#   save_log가 경로 문자열 반환r   r   N)r   r
   r~   ri   strrq   s      r   !test_save_log_returns_path_stringz4TestSaveAndLoadLog.test_save_log_returns_path_string   s.    &#|t{{Cfc*r   c           
         t        d      }t        |dddddg d      }t        |d| j                        }t	        |d	d
      5 }t        j                  |      }ddd       | j                  d   d       y# 1 sw Y   xY w)u"   저장된 파일이 유효한 JSONr   r7   r8   rh   r9   r:   r   r   rzutf-8)encodingNr   )r   r   r
   r~   openjsonloadr   )r   r   r   floadeds        r   test_save_log_valid_jsonz+TestSaveAndLoadLog.test_save_log_valid_json   sz    &Q	;S"fMc<4;;G
*cG4 	"YYq\F	",7	" 	"s   A88Bc                    t        d      }t        |d      }t        |d| j                         t	        d| j                        }| j                  |       |J | j                  |d   d       | j                  |d   d       y)u&   load_log가 저장된 로그를 반환r   g      ?r   Nr   r   )r   r   r
   r~   r	   assertIsNotNoner   rV   r   r   r   s      r   test_load_log_returns_dictz-TestSaveAndLoadLog.test_load_log_returns_dict   s{    &3%ldkk:,$++>V$!!!,7vm4d;r   c                 T    t        d| j                        }| j                  |       y)u   없는 스킬이면 None 반환znonexistent-skillr   N)r	   r~   assertIsNonerE   s     r    test_load_log_none_if_not_existsz3TestSaveAndLoadLog.test_load_log_none_if_not_exists   s!    -E&!r   c                    t        d      }t        |dddddddigd	d
d
      }t        |d      }t        |d| j                         t        d| j                        }|J | j                  |d   d       | j                  t        |d         d       | j                  |d   d   d   d       | j                  |d   d       y)u(   저장 후 로드하면 원본과 동일zroundtrip-skillr7   r8   u   첫 번째 라운드g?rL   rN   xr   2   rP   r   Nr   r   r   r=   r   )	r   r   r   r
   r~   r	   r   rD   rV   r   s      r   test_save_load_roundtripz+TestSaveAndLoadLog.test_save_load_roundtrip   s    *+Q	+A3vWZm_^dfhjmn3$'4;;?+t{{C!!!*;<VH-.2)!,_=yIvm4c:r   r'   )r)   r*   r+   r,   r4   r   r   r   r   r   r   r-   r   r   rz   rz      s(    +)3+8
<"
;r   rz   c                   H    e Zd ZdZdedefdZddZddZddZ	dd	Z
dd
Zy)TestGetRecentChangelogu    get_recent_changelog() 테스트nr(   c                     t        d      }t        |      D ]D  }t        ||dz   dd|dz    dt        |      dz  t        |dz         dz  g |dz  dk(  rd	nd
      }F |S )Nr   r7   r8   zRound z	 mutationrb   ro   r   r   r   r;   )r   rc   r   rd   )r   r   r   re   s       r   _make_log_with_roundsz,TestGetRecentChangelog._make_log_with_rounds   sz    &q 
	Aa%''-acU)%<"1X^!!a%L3.#$q5A::	C
	 
r   Nc                 l    | j                  d      }t        |d      }| j                  |t               y)u   문자열 반환ra   countN)r   r   ri   r   rq   s      r   test_returns_stringz*TestGetRecentChangelog.test_returns_string  s.    ((+%c3fc*r   c                 `    t        d      }t        |d      }| j                  |t               y)u<   라운드 없으면 적절한 메시지 또는 빈 문자열zempty-skill   r   N)r   r   ri   r   rq   s      r    test_returns_empty_for_no_roundsz7TestGetRecentChangelog.test_returns_empty_for_no_rounds  s(    '%c3fc*r   c                     | j                  d      }t        |d      }| j                  d|       | j                  d|       | j                  d|       | j                  d|       | j                  d|       y	)
u   최근 N개 라운드만 반환
   ra   r   zRound 10zRound 9zRound 8zRound 7zRound 2 N)r   r   r   assertNotInrq   s      r   test_returns_recent_n_roundsz3TestGetRecentChangelog.test_returns_recent_n_rounds  sh    ((,%c3j&)i(i(F+V,r   c           
          t        d      }t        |dddddg d      }t        |d	      }| j                  d|       | j                  d|       y
)u'   변경 정보가 텍스트에 포함됨z
info-skillr7   rI   u   중요한 변경r9   r:   r   r   r   N)r   r   r   r   rq   s      r   #test_content_includes_mutation_infoz:TestGetRecentChangelog.test_content_includes_mutation_info$  sN    &Q*<c3FS%c3h'(&1r   c                     | j                  d      }t        |d      }| j                  d|       | j                  d|       y)u1   count가 라운드 수보다 크면 전체 반환ro   r   r   zRound 1zRound 2N)r   r   r   rq   s      r   test_count_larger_than_roundsz4TestGetRecentChangelog.test_count_larger_than_rounds-  s:    ((+%c4i(i(r   r'   )r)   r*   r+   r,   intrj   r   r   r   r   r   r   r-   r   r   r   r      s1    *s t ++
-2)r   r   c                   0    e Zd ZdZddZddZddZddZy)TestAddRoundTokenBreakdownu&   add_round() 토큰 세분화 테스트Nc                 $    t        d      | _        y r1   r2   r3   s    r   r4   z TestAddRoundTokenBreakdown.setUp8  r5   r   c                    t        d!i d| j                  dddddddd	d
ddg dddddddddddddddddd}|d   d   }| j                  |d   d       | j                  |d   d       | j                  |d   d       | j                  |d   d       | j                  |d   d       | j                  |d   d       y )"u+   새 토큰 필드가 라운드에 저장됨r   r<   r7   r=   r8   r>   rh   r?   r9   r@   r:   rA   rB   r   rS   i,  rT   rQ   mutation_input_tokensrP   mutation_output_tokensP   execution_input_tokens   execution_output_tokensF   judge_input_tokensr   judge_output_tokensr   r   Nr-   r\   r   rF   rds      r   test_new_token_fields_storedz7TestAddRoundTokenBreakdown.test_new_token_fields_stored;  sA    


 $
 "-	

 
 
 
 
 
 
 #&
 $&
 $'
 %'
  "
  !#!
$ Ha 34c:45r:45s;56;012612B7r   c           
          t        | j                  dddddg d      }|d   d	   }| j                  |d
   d	       | j                  |d   d	       y)u$   새 필드 미지정 시 기본값 0r7   r8   rh   r9   r:   r   r;   r   r   r   r   Nr\   r   s      r   test_new_fields_default_zeroz7TestAddRoundTokenBreakdown.test_new_fields_default_zeroW  se    #!,	
 Ha 34a80115r   c                     t        | j                  dddddg ddd	
      }| j                  t        |d
         d       | j                  |d
   d   d   d       y)u-   기존 파라미터만 사용해도 동작함r7   rI   rJ   rK   rL   r   rP   rQ   rR   r   r   rS   NrC   rE   s     r   #test_backward_compat_existing_testsz>TestAddRoundTokenBreakdown.test_backward_compat_existing_testsg  si    "!0
 	VH-.2)!,^<cBr   r'   )r)   r*   r+   r,   r4   r   r   r   r-   r   r   r   r   5  s    0,886 Cr   r   __main__)r,   r   sysr|   unittestr   pathlibr   pathinsertr   __file__parentscripts.autoresearch.changelogr   r   r   r   r	   r
   TestCaser   r/   rm   rz   r   r   r)   mainr-   r   r   <module>r      s    4  
     3tH~,,33::AAB C 3H%% 3>i(8$$ i(X,h'' ,89;** 9;x8)X.. 8)vAC!2!2 ACH zHMMO r   