
    i                     z   U d Z ddlZddlmc mZ ddlZddlZddl	Z	ddl
mZ ddlmZ ddlmZmZmZ  e ee      j)                         j*                  j*                  j*                        Zeej.                  vrej.                  j1                  de       dededed	dfd
ZdZdddddddgddidZeeef   ed<    G d de	j<                        Z G d de	j<                        Z  G d de	j<                        Z! G d de	j<                        Z" G d d e	j<                        Z# G d! d"e	j<                        Z$ G d# d$e	j<                        Z% G d% d&e	j<                        Z& G d' d(e	j<                        Z' G d) d*e	j<                        Z( G d+ d,e	j<                        Z)e*d-k(  r e	jV                          yy).u<   test_runner.py - runner 모듈 TDD 테스트 (RED → GREEN)    N)Path)Any)	MagicMockcallpatchtmp_path
skill_namecontentreturnc                 `    | |z  }|j                  dd       |dz  j                  |d       y)u&   임시 디렉토리에 SKILL.md 생성T)parentsexist_okSKILL.mdutf-8encodingN)mkdir
write_text)r   r	   r
   	skill_dirs       =/home/jay/workspace/scripts/autoresearch/tests/test_runner.py_make_skill_dirr      s4    :%IOOD4O0''''B    uS   ---
name: test-skill
version: 1
---

# 테스트 스킬 본문

규칙:
- 규칙 1
quality   u   품질 체크clarityu   명확한가?g      ?)idquestionweightmethodweighted_average)nameversiondescriptionitemsscoringSAMPLE_CHECKLISTc                   (    e Zd ZdZddZddZddZy)TestBackupOriginalu"   backup_original() 단위 테스트Nc                 L   t        j                         5 }t        t        |      dt               ddlm}  |d|      }t        |      dz  dz  dz  }| j                  |j                         d|        | j                  |t        |             ddd       y# 1 sw Y   yxY w)u@   백업 파일이 evals/backup-original.md에 생성되어야 함my-skillr   backup_originalevalsbackup-original.mdu   백업 파일이 없음: N)tempfileTemporaryDirectoryr   r   SAMPLE_SKILL_CONTENTautoresearch.runnerr,   
assertTrueexistsassertEqualstr)selftmpdirr,   backup_pathexpecteds        r   test_backup_creates_filez+TestBackupOriginal.test_backup_creates_file;   s    ((* 		9fDL*6JK;)*f=KF|j07:=QQHOOHOO-1J8*/UV[#h-8		9 		9 		9s   A<BB#c                    t        j                         5 }t        t        |      dt               ddlm}  |d|       t        |      dz  dz  dz  }| j                  |j                  d      t               ddd       y# 1 sw Y   yxY w)	u2   백업 파일 내용이 원본과 동일해야 함r*   r   r+   r-   r.   r   r   N)	r/   r0   r   r   r1   r2   r,   r5   	read_text)r7   r8   r,   backup_files       r   $test_backup_content_matches_originalz7TestBackupOriginal.test_backup_content_matches_originalH   s    ((* 	\fDL*6JK;J/v,3g=@TTK[22G2DFZ[	\ 	\ 	\s   A$BBc                     t        j                         5 }t        t        |      dt               ddlm}  |d|      }| j                  |t               ddd       y# 1 sw Y   yxY w)u'   반환값이 문자열 경로여야 함r*   r   r+   N)	r/   r0   r   r   r1   r2   r,   assertIsInstancer6   )r7   r8   r,   results       r   test_backup_returns_path_stringz2TestBackupOriginal.test_backup_returns_path_stringT   sU    ((* 	/fDL*6JK;$Z8F!!&#.	/ 	/ 	/s   A AA'r   N)__name__
__module____qualname____doc__r;   r?   rC    r   r   r(   r(   8   s    ,9
\/r   r(   c                        e Zd ZdZddZddZy)TestRestoreSkillu    restore_skill() 단위 테스트Nc                 X   t        j                         5 }t        t        |      dd       ddlm} t        |      dz  dz  }|j                  dd        |dd	|d
       |j                  d      }| j                  d	|       | j                  d|       ddd       y# 1 sw Y   yxY w)u;   복원 후 SKILL.md가 원본 내용으로 돌아와야 함r*   u   ---
name: x
---
원본 본문r   restore_skillr   u    ---
name: x
---
변경된 본문r   r   u   원본 본문name: xu   변경된 본문N)
r/   r0   r   r   r2   rN   r   r=   assertInassertNotInr7   r8   rN   
skill_pathr
   s        r   $test_restore_writes_original_contentz5TestRestoreSkill.test_restore_writes_original_contentg   s    ((* 	:fDL*6XY9 f
2Z?J!!"GRY!Z *ovyI **G*<GMM/73/9	: 	: 	:s   BB  B)c                 V   t        j                         5 }t        t        |      dd       ddlm}  |dd|d       t        |      dz  dz  }|j                  d	      }| j                  d
|       | j                  d|       | j                  d|       ddd       y# 1 sw Y   yxY w)u1   복원 시 프론트매터가 보존되어야 함r*   u!   ---
name: x
version: 2
---
본문r   rM   u   복원된 본문zname: x
version: 2r   r   r   rO   z
version: 2N)r/   r0   r   r   r2   rN   r=   rP   rR   s        r   "test_restore_preserves_frontmatterz3TestRestoreSkill.test_restore_preserves_frontmattery   s    ((* 	7fDL*6]^9*&8&BWXf
2Z?J **G*<GMM)W-MM,0MM,g6	7 	7 	7s   BBB(rD   )rE   rF   rG   rH   rT   rV   rI   r   r   rK   rK   d   s    *:$7r   rK   c                   (    e Zd ZdZddZddZddZy)TestApplyMutationu!   apply_mutation() 단위 테스트Nc                 :   t        j                         5 }t        t        |      dt               ddlm}  |dd|d       t        |      dz  dz  }|j                  d      }| j                  d	|       | j                  d
|       ddd       y# 1 sw Y   yxY w)u3   변경된 본문이 SKILL.md에 저장되어야 함r*   r   apply_mutationu   # 새로운 본문
- 새 규칙name: test-skill
version: 1r   r   r   u   # 새로운 본문u   - 새 규칙N)	r/   r0   r   r   r1   r2   r[   r=   rP   r7   r8   r[   rS   r
   s        r   #test_apply_mutation_writes_new_bodyz5TestApplyMutation.test_apply_mutation_writes_new_body   s    ((* 
	3fDL*6JK::'I6Sqrf
2Z?J **G*<GMM.8MM.'2
	3 
	3 
	3s   A3BBc                 d   t        j                         5 }t        t        |      dt               ddlm}  |dd|d       t        |      dz  dz  }|j                  d      }| j                  |j                  d	      d
|dd        | j                  d|       ddd       y# 1 sw Y   yxY w)u7   변경 후에도 프론트매터가 보존되어야 함r*   r   rZ   u
   새 본문r\   r   r   r   z---
u   프론트매터 시작 없음: N2   zname: test-skill)r/   r0   r   r   r1   r2   r[   r=   r3   
startswithrP   r]   s        r   )test_apply_mutation_preserves_frontmatterz;TestApplyMutation.test_apply_mutation_preserves_frontmatter   s    ((* 
	7fDL*6JK::|V=[\f
2Z?J **G*<GOOG..w7;Z[bcfdf[gZj9klMM,g6
	7 
	7 
	7s   BB&&B/c                 4   t        j                         5 }t        t        |      dt               ddlm}  |dd|d       t        |      dz  dz  }|j                  d      }| j                  |j                  d	      d
       ddd       y# 1 sw Y   yxY w)u7   저장 파일에 --- 구분자가 두 번 있어야 함r*   r   rZ   u   본문rO   r   r   r   z---   N)
r/   r0   r   r   r1   r2   r[   r=   assertGreaterEqualcountr]   s        r   1test_apply_mutation_frontmatter_separator_presentzCTestApplyMutation.test_apply_mutation_frontmatter_separator_present   s    ((* 
	=fDL*6JK::xCf
2Z?J **G*<G##GMM%$8!<
	= 
	= 
	=s   A0BBrD   )rE   rF   rG   rH   r^   rb   rg   rI   r   r   rX   rX      s    +37=r   rX   c                   \   e Zd ZdZ	 	 	 	 	 ddedededededeeef   fdZdd	edeeef   fd
Z	dde
deeef   fdZ ed       ed       ed       ed       ed      dedededededdfd                                   Z ed       ed       ed       ed       ed       ed      dededededededdfd                                          Z ed       ed       ed       ed       ed       ed      dededededededdfd                                          Zy) TestRunRoundu4   run_round() 단위 테스트 - 외부 의존성 mockmutation_typer#   modified_bodyinput_tokensoutput_tokensr   c                     |||||dS )Nrj   mutation_descriptionmodified_skill_mdrl   rm   rI   )r7   rj   r#   rk   rl   rm   s         r   _make_mutation_resultz"TestRunRound._make_mutation_result   s     +$/!.(*
 	
r   outputc                     |ddddS N
      claude-sonnet-4-6rs   rl   rm   modelrI   r7   rs   s     r   _make_execute_resultz!TestRunRound._make_execute_result       (	
 	
r   scorec                     ddddg|ddddS 	Nr   PASSokr   rB   reason   좋음   rv   r$   total_scoresummaryrl   rm   rI   r7   r~   s     r   _make_judge_resultzTestRunRound._make_judge_result   %    &&DIJ 
 	
r    autoresearch.runner.judge_output!autoresearch.runner.execute_skill"autoresearch.runner.apply_mutation%autoresearch.runner.generate_mutation(autoresearch.runner.get_recent_changelogmock_recentmock_gen
mock_apply	mock_exec
mock_judgeNc                     t        j                         5 }t        t        |      dt               d|_        | j                  d      |_        | j                         |_        | j                  d      |_        ddl	}dd	l
m} dd
lm}	  |d      }
t        } |	ddd|d|
dd|dd      \  }}}| j                  |d       | j!                  |d   d       | j!                  |d   d       ddd       y# 1 sw Y   yxY w)u"   점수가 올라가면 KEEP 결정sk   변경 이력 없음u   # 새 본문rk   ?r~   r   N
create_log	run_round   # 원래 본문name: sk   테스트 입력r   333333?rx   claude-haiku-4-5-20251001r	   current_bodyfrontmatter	checklist
test_inputlog	round_num
prev_score
skills_dirmodel_mutatemodel_judgekeptreverted)r/   r0   r   r   r1   return_valuerr   r|   r   yamlautoresearch.changelogr   r2   r   r&   re   r5   )r7   r   r   r   r   r   r8   r   r   r   r   r   bodyr~   updated_logs                  r   'test_run_round_keep_when_score_improvesz4TestRunRound.test_run_round_keep_when_score_improves   s    ((* 	9fDL$0DE'=K$$($>$>^$>$\H!%)%>%>%@I"&*&=&=C&=&HJ#95T"C(I'0.&#-!07($D% ##E3/[0!4[4a8=	9 	9 	9s   CC44C=!autoresearch.runner.restore_skillmock_restorec                    t        j                         5 }t        t        |      dt               d|_        | j                  d      |_        | j                         |_        | j                  d      |_        ddl	m
} dd	lm}	  |d      }
 |	dd
dt        d|
dd|dd      \  }}}| j                  |d       | j                  |d   d       | j                  |d   d       |j                          ddd       y# 1 sw Y   yxY w)u;   점수가 내려가면 REVERT 결정 + restore_skill 호출r   r   u   # 나쁜 본문r   g333333?r   r   r   r   r   r      입력r   皙?rx   r   r   r   r   N)r/   r0   r   r   r1   r   rr   r|   r   r   r   r2   r   r&   r5   assert_called_oncer7   r   r   r   r   r   r   r8   r   r   r   r   r~   r   s                 r   &test_run_round_revert_when_score_dropsz3TestRunRound.test_run_round_revert_when_score_drops  s     ((* 	.fDL$0DE'=K$$($>$>M^$>$_H!%)%>%>%@I"&*&=&=C&=&HJ#95T"C'0.&*#!07($D% UC([4a8[0!4++-;	. 	. 	.s   C C>>Dc                    t        j                         5 }t        t        |      dt               d|_        t        d      |_        ddlm	} ddl
m}	  |d      }
 |	dddt        d	|
d
d|dd      \  }}}| j                  |d       | j                  |d       |j                          |j                          ddd       y# 1 sw Y   yxY w)uB   mutation 생성 실패 시 라운드 스킵 (점수/본문 유지)r      이력 없음u
   API 실패r   r   r   r   r   r   r   ffffff?rx   r   r   N)r/   r0   r   r   r1   r   	Exceptionside_effectr   r   r2   r   r&   r5   assert_not_calledr   s                 r   &test_run_round_skips_on_mutation_errorz3TestRunRound.test_run_round_skips_on_mutation_errorB  s      ((* 	+fDL$0DE'6K$#,\#:H 95T"C'0.&*#!07($D% UC(T#45((*((*;	+ 	+ 	+s   B%CC)   규칙 추가
   새 규칙   # 변경된 본문d   r`   	   결과물r   )rE   rF   rG   rH   r6   intdictr   rr   r|   floatr   r   r   r   r   r   rI   r   r   ri   ri      si   > -'1

 
 	

 
 
 
c3h
 
3 
c3h 

 
S#X 
 -.
./
/0
23
56'9'9 '9 	'9
 '9 '9 
'9 7 4 1 0 /
'9R -.
./
/0
./
23
56'.'. '.  	'.
 '. '. '. 
'. 7 4 0 1 0 /'.R -.
./
/0
./
23
56'+'+ '+  	'+
 '+ '+ '+ 
'+ 7 4 0 1 0 /'+r   ri   c                      e Zd ZdZd"dedeeef   fdZd#dedeeef   fdZ	d$dedeeef   fdZ
 ed	       ed
       ed       ed       ed       ed       ed       ed      dededededededededdfd                                                        Z ed	       ed
       ed       ed       ed       ed       ed       ed      dededededededededdfd                                                        Z ed	       ed
       ed       ed       ed       ed       ed       ed       ed      dedededededededededdfd                                                               Z ed	       ed
       ed       ed       ed       ed       ed       ed      dededededededededdfd                                                        Z ed	       ed
       ed       ed       ed       ed       ed       ed      dededededededededdfd                                                         Z ed	       ed
       ed       ed       ed       ed       ed       ed      dededededededededdfd!                                                        Zy)%TestRunu   run() 메인 루프 테스트rs   r   c                     |ddddS )Nrv   rw   mry   rI   r{   s     r   r|   zTestRun._make_execute_resultz  s     "rTWXXr   r~   c                     g |ddddS )Nr   r   rv   r   rI   r   s     r   r   zTestRun._make_judge_result}  s     
 	
r   r   c                     dd|dddS Nr   u   새 규칙 추가r   r`   ro   rI   )r7   r   s     r   rr   zTestRun._make_mutation_result  s    ,$7!%
 	
r   autoresearch.runner.save_log autoresearch.runner.finalize_logautoresearch.runner.run_round#autoresearch.runner.backup_originalr   r   "autoresearch.runner.load_checklistautoresearch.runner.load_skillmock_load_skillmock_load_checklistr   r   mock_backupmock_run_roundmock_finalize	mock_saveNc	           
         t        j                         5 }	t        t        |	      dt               d|_        t        |_        | j                         |_        | j                  d      |_        t        t        |	      dz  dz  dz        |_        dddd	d
gddddd}
dd|
f|_        d |_
        d|_        ddlm}  |dddddd|	      }| j                  |t               | j                  d|       ddd       y# 1 sw Y   yxY w)u/   run() 함수가 로그 dict를 반환해야 함r   r      # 본문      ?r   r-   r.   2024-01-01T00:00:00+00:00r   r   rounddecisionr   skill
started_atroundsfinal_scoretotal_roundsr   r   r   Q?c                 ,    i | |t        | d         dS Nr   )r   r   lenr   r~   s     r   <lambda>z3TestRun.test_run_returns_log_dict.<locals>.<lambda>  %     <<$ #CM 2< r   /tmp/log.jsonrun/fake/checklist.yaml	   테스트rv   ffffff?   r	   checklist_pathr   r   target_scoreconsecutiver   r   N)r/   r0   r   r   r1   r   r&   r|   r   r6   r   r2   r   rA   r   rP   )r7   r   r   r   r   r   r   r   r   r8   high_score_logr   rB   s                r   test_run_returns_log_dictz!TestRun.test_run_returns_log_dict  s   ( ((* (	+fDL$0DE+CO(/?,%)%>%>%@I"&*&=&=C&=&HJ#'*4<$+>+HK_+_'`K$ 9%&F;<  !.N ,6t^*LN')M%
 &5I"/5&!!F !!&$/MM'6*Q(	+ (	+ (	+s   CC==Dc	                    t        j                         5 }	t        t        |	      dt               d|_        t        |_        | j                         |_        | j                  d      |_        d|_        ddddd	gd
d
dd
d}
dd|
f|_        d |_	        d|_        d
dl
m}  |ddddddd|	       | j                  |j                  d       ddd       y# 1 sw Y   yxY w)u2   dry_run=True이면 1라운드만 실행 후 종료r   r   r   r   /tmp/backup.mdr   r   r   r   r   r   r   c                 ,    i | |t        | d         dS r   r   r   s     r   r   z=TestRun.test_run_dry_run_executes_one_round.<locals>.<lambda>  r   r   r   r   r   r   r`   r   r  T)r	   r  r   r   r  r  dry_runr   N)r/   r0   r   r   r1   r   r&   r|   r   r   r2   r   r5   
call_countr7   r   r   r   r   r   r   r   r   r8   	round_logr   s               r   #test_run_dry_run_executes_one_roundz+TestRun.test_run_dry_run_executes_one_round  s    ( ((* (	;fDL$0DE+CO(/?,%)%>%>%@I"&*&=&=C&=&HJ#'7K$ 9%&F;<  !)I ,6sI*FN')M%
 &5I"/5&!!	 ^66:Q(	; (	; (	;s   B9CC r   r   c
           	         t        j                         5 }
t        t        |
      dt               d|_        t        |_        | j                         |_        | j                  d      |_        d|_        ddddd	gd
d
dd
d}dd|f|_        d |_	        d|	_        d
dl
m}  |dddd|
       |j                          ddd       y# 1 sw Y   yxY w)u0   dry_run 종료 후 원본이 복원되어야 함r   )r   u   # 원본 본문r   r   r	  r   r   r   r   r   r   r   c                 ,    i | |t        | d         dS r   r   r   s     r   r   z<TestRun.test_run_dry_run_restores_original.<locals>.<lambda>4  r   r   r   r   r   r   T)r	   r  r   r  r   Nr/   r0   r   r   r1   r   r&   r|   r   r   r2   r   r   )r7   r   r   r   r   r   r   r   r   r   r8   r  r   s                r   "test_run_dry_run_restores_originalz*TestRun.test_run_dry_run_restores_original  s    , ((* %	.fDL$0DE+JO(/?,%)%>%>%@I"&*&=&=C&=&HJ#'7K$ 9%&F;<  !)I ,@i*PN')M%
 &5I"/5&! ++-K%	. %	. %	.   B*CCc	                 4   t        j                         5 }	t        t        |	      dt               d|_        t        |_        | j                         |_        | j                  d      |_        d|_        dgdt        dt        d	t        t        t        t        t        t        f   f   ffd
}
|
|_        d |_        d|_        ddlm}  |dddddd|	       | j#                  d   d       | j%                  d   d       ddd       y# 1 sw Y   yxY w)u;   연속 consecutive회 target_score 달성 시 조기 종료r   r   r   r   r	  r   argskwargsr   c                     dxx   dz  cc<   d|v r|j                  d      n| d   }t        |      }t        |j                  dg             }|j                  d   dd       ||d<   |j                  dd      dz   |d<   dd	|fS )
Nr   r   r   r   r   r   r   r   g
ףp=
?)getr   listappend)r  r  log_argr   r   r  s        r   side_effect_run_roundzNTestRun.test_run_stops_after_consecutive_target.<locals>.side_effect_run_roundg  s    1"/4&**U+DG.27mkooh;<
16JK(.H%&1oofa&@1&DF#"D+66r   c                 ,    i | |t        | d         dS r   r   r   s     r   r   zATestRun.test_run_stops_after_consecutive_target.<locals>.<lambda>s  r   r   r   r   r   r   r`   r   r  r  rv   N)r/   r0   r   r   r1   r   r&   r|   r   r   tupler6   r   r   r   r2   r   assertLessEqualre   )r7   r   r   r   r   r   r   r   r   r8   r  r   r  s               @r   'test_run_stops_after_consecutive_targetz/TestRun.test_run_stops_after_consecutive_targetH  s   ( ((* ,	6fDL$0DE+CO(/?,%)%>%>%@I"&*&=&=C&=&HJ#'7K$J	7S 	7C 	7E#uVZ[^`c[cVdJdDe 	7 *?N&)M%
 &5I"/5&!!   A3##JqM15Y,	6 ,	6 ,	6s   C/DDc	                    t        j                         5 }	t        t        |	      dt               d|_        t        |_        | j                         |_        | j                  d      |_        d|_        g ddgdt        d	t        d
t        t        t        t        t        t        f   f   ffd}
|
|_        d |_        d|_        ddlm}  |dddddd|	       | j#                  d   d       ddd       y# 1 sw Y   yxY w)u5   consecutive_count는 target 미달 시 0으로 리셋r   r   r   r   r	  )r   r   r   r   r   r   r   r  r  r   c                  D   d   }dxx   dz  cc<   |t              k  r|   nd}d|v r|j                  d      n| d   }t        |      }t        |j                  dg             }|j	                  |dz   dd       ||d<   |j                  dd      dz   |d<   d	||fS )
Nr   r   r   r   r   r   r   r   r   )r   r  r   r  r  )	r  r  idxr~   r  r   r   r  	score_seqs	          r   r   zKTestRun.test_run_consecutive_count_resets_on_low_score.<locals>.side_effect  s     m1"*-I*>	#D/4&**U+DG.27mkooh;<aVDE(.H%&1oofa&@1&DF#"E;77r   c                 ,    i | |t        | d         dS r   r   r   s     r   r   zHTestRun.test_run_consecutive_count_resets_on_low_score.<locals>.<lambda>  r   r   r   r   r   r   r`   r   r  r     N)r/   r0   r   r   r1   r   r&   r|   r   r   r  r6   r   r   r   r2   r   re   )r7   r   r   r   r   r   r   r   r   r8   r   r   r  r%  s               @@r   .test_run_consecutive_count_resets_on_low_scorez6TestRun.test_run_consecutive_count_resets_on_low_score  s   ( ((* .	6fDL$0DE+CO(/?,%)%>%>%@I"&*&=&=C&=&HJ#'7K$ <IJ
83 
8# 
8%UDQTVYQYN@Z:[ 
8 *5N&)M%
 &5I"/5&!! ##JqM15].	6 .	6 .	6s   CC??Dc	           	         t        j                         5 }	t        t        |	      dt               d|_        t        |_        | j                         |_        | j                  d      |_        d|_        ddddd	gd
d
dd
d}
dd|
f|_        d |_	        d|_        d
dl
m}  |dddd|	       |j                          ddd       y# 1 sw Y   yxY w)u0   run() 종료 시 save_log가 호출되어야 함r   r   r   r   r	  r   r   r   r   r   r   r   c                 ,    i | |t        | d         dS r   r   r   s     r   r   z1TestRun.test_run_calls_save_log.<locals>.<lambda>  r   r   r   r   r   r   rd   )r	   r  r   r   r   Nr  r  s               r   test_run_calls_save_logzTestRun.test_run_calls_save_log  s    ( ((* $	+fDL$0DE+CO(/?,%)%>%>%@I"&*&=&=C&=&HJ#'7K$ 9%&F;<  !)I ,6sI*FN')M%
 &5I"/5&! ((*I$	+ $	+ $	+r  r   )r   r   )rE   rF   rG   rH   r6   r   r   r|   r   r   rr   r   r   r  r  r  r!  r(  r+  rI   r   r   r   r   w  so   'Y3 Yc3h Y
 
S#X 

# 
cSVh 
 )*
-.
*+
01
-.
./
/0
+,4+"4+ '4+ 	4+
 4+ 4+ "4+ !4+ 4+ 
4+ - 1 0 / 2 , / +4+l )*
-.
*+
01
-.
./
/0
+,4;"4; '4; 	4;
 4; 4; "4; !4; 4; 
4; - 1 0 / 2 , / +4;l )*
-.
*+
01
./
-.
./
/0
+,2."2. '2. 	2.
 2.  2. 2. "2. !2. 2. 
2. - 1 0 / 0 2 , / +2.h )*
-.
*+
01
-.
./
/0
+,86"86 '86 	86
 86 86 "86 !86 86 
86 - 1 0 / 2 , / +86t )*
-.
*+
01
-.
./
/0
+,:6":6 ':6 	:6
 :6 :6 ":6 !:6 :6 
:6 - 1 0 / 2 , / +:6x )*
-.
*+
01
-.
./
/0
+,0+"0+ '0+ 	0+
 0+ 0+ "0+ !0+ 0+ 
0+ - 1 0 / 2 , / +0+r   r   c                       e Zd ZdZ ed      deddfd       Z ed      deddfd       Z ed      deddfd       Z ed      deddfd	       Z	y)
TestMainu"   main() CLI 인자 파싱 테스트autoresearch.runner.runmock_runr   Nc                     dddddg dd|_         dd	lm}  |g d
      }| j                  |d       |j	                  dddddddddd
       y)u7   CLI 인자가 run()에 올바르게 전달되어야 함r*   r   r   r  rd   r   r   r   r   r   r   r   r   r   main)--skillr*   --checklist/path/to/checklist.yaml--test-inputr   z--rounds20z--target-scorez0.9z--consecutive2z--model-mutateclaude-opus-4-6z--model-judger   --skills-dir/some/skillsr7  r   rw   r   r;  r   Fr=  )
r	   r  r   r   r  r  r   r   r  r   N)r   r2   r4  r5   assert_called_once_with)r7   r0  r4  	exit_codes       r   test_main_passes_correct_argsz&TestMain.test_main_passes_correct_args  sz      5!
 	-
	. 	A&((!4)*3% 	) 	
r   c                     dddddg dd|_         ddlm}  |g d       |j                  j                  }| j                  |d	          y
)u:   --dry-run 플래그가 dry_run=True로 전달되어야 함r   r   r   r   r   r2  r3  )r5  r   r6  /c.yamlr8  r   z	--dry-runr  N)r   r2   r4  	call_argsr  r3   r7   r0  r4  call_kwargss       r   test_main_dry_run_flagzTestMain.test_main_dry_run_flagE  sY     5!
 	-
	
 ((//I./r   c                    dddddg dd|_         ddlm}  |g d       |j                  j                  }| j                  |d   d	       | j                  |d
   d       | j                  |d   d       | j                  |d   d       | j                  d|d          | j                  |d          | j                  |d   d       y)u-   기본값이 올바르게 설정되어야 함r   r   g        r   r2  r3  )r5  r   r6  rB  r8  r   r   r`   r  r   r  r  r   rx   haikur   r  r   z/home/jay/workspace/skillsN)	r   r2   r4  rC  r  r5   assertAlmostEqualrP   assertFalserD  s       r   test_main_default_valuesz!TestMain.test_main_default_valuesc  s     5!
 	-		
 ((//X.3{>:DA]3Q7^46IJg{=9:Y/0\24PQr   c                 f    dddddg dd|_         ddlm}  |g d      }| j                  |d       y	)
u   성공 시 exit code 0 반환r   r   r   r   r   r2  r3  )r5  r   r6  rB  r8  inN)r   r2   r4  r5   )r7   r0  r4  codes       r   !test_main_returns_zero_on_successz*TestMain.test_main_returns_zero_on_success  sC     5!
 	-UVq!r   )
rE   rF   rG   rH   r   r   r@  rF  rK  rO  rI   r   r   r.  r.    s    ,
$%1
i 1
D 1
 &1
f $%0y 0T 0 &0: $% R  Rt  R & RD $%") " " &"r   r.  c                   (    e Zd ZdZddZddZddZy)TestLoadTestInputsu#   load_test_inputs() 단위 테스트Nc                    d}t        j                         5 }t        |      dz  }|j                  |d       ddlm}  |t        |            }| j                  t        |      d       | j                  |d   d   d	       | j                  d
|d   d          | j                  |d   d   d       ddd       y# 1 sw Y   yxY w)u5   정상 YAML 파일 로드 시 inputs 리스트 반환u   inputs:
  - id: insurance-fa-recruit
    text: "보험 FA 모집 광고, 타겟: 30대 보험설계사"
  - id: insurance-product-promo
    text: "삼성생명 종신보험 신상품 출시 광고"
ztest-inputs.yamlr   r   r   load_test_inputsrd   r   zinsurance-fa-recruitu   보험 FA 모집textr   zinsurance-product-promoN)
r/   r0   r   r   r2   rT  r6   r5   r   rP   )r7   yaml_contentr8   	yaml_pathrT  inputss         r   test_load_valid_yamlz'TestLoadTestInputs.test_load_valid_yaml  s     ((* 
	IfV'99I   @<%c)n5FS[!,VAYt_.DEMM,fQi.?@VAYt_.GH
	I 
	I 
	Is   BB<<Cc                 t    ddl m} | j                  t              5   |d       ddd       y# 1 sw Y   yxY w)u>   존재하지 않는 파일 경로 시 FileNotFoundError 발생r   rS  z"/nonexistent/path/test-inputs.yamlN)r2   rT  assertRaisesFileNotFoundError)r7   rT  s     r   test_load_missing_filez)TestLoadTestInputs.test_load_missing_file  s6    801 	CAB	C 	C 	Cs   	.7c                    d}t        j                         5 }t        |      dz  }|j                  |d       ddlm} | j                  t              5   |t        |             ddd       ddd       y# 1 sw Y   xY w# 1 sw Y   yxY w)u6   inputs 리스트가 비어 있으면 ValueError 발생zinputs: []
z
empty.yamlr   r   r   rS  N)	r/   r0   r   r   r2   rT  r[  
ValueErrorr6   )r7   rV  r8   rW  rT  s        r   test_load_empty_inputsz)TestLoadTestInputs.test_load_empty_inputs  s    %((* 	1fV|3I   @<"":. 1 Y01	1 	11 1	1 	1s#   =BA7&B7B 	<BBrD   )rE   rF   rG   rH   rY  r]  r`  rI   r   r   rQ  rQ    s    -I*C
1r   rQ  c                      e Zd ZdZd"dedeeef   fdZd#dedeeef   fdZd$de	deeef   fdZ
 ed	       ed
       ed       ed       ed      dedededededdfd                                   Z ed       ed       ed       ed       ed	       ed
       ed       ed      dededededededed eddfd!                                                        Zy)%TestMultiInputu   멀티 입력 지원 테스트rk   r   c                     dd|dddS r   rI   )r7   rk   s     r   rr   z$TestMultiInput._make_mutation_result  s    ,$7!.
 	
r   rs   c                     |ddddS ru   rI   r{   s     r   r|   z#TestMultiInput._make_execute_result  r}   r   r~   c                     ddddg|ddddS r   rI   r   s     r   r   z!TestMultiInput._make_judge_result  r   r   r   r   r   r   r   r   r   r   r   r   Nc                 b   t        j                         5 }t        t        |      dt               d|_        | j                         |_        | j                         |_        | j                  d      | j                  d      g|_	        ddl
m} ddlm}  |d      }	 |dd	d
t        ddg|	dd|dd      \  }
}}| j                  |dd       | j!                  |d   d       | j!                  |j"                  d       | j!                  |j"                  d       ddd       y# 1 sw Y   yxY w)u5   멀티 입력 시 평균 점수로 KEEP/REVERT 판정r   r   r   r   r   r   r   r   r   r      입력1   입력2r   r   rx   r   )r	   r   r   r   test_inputsr   r   r   r   r   r   r   r   )placesr   rd   N)r/   r0   r   r   r1   r   rr   r|   r   r   r   r   r2   r   r&   rI  r5   r  )r7   r   r   r   r   r   r8   r   r   r   r   r~   r   s                r   *test_run_round_multi_input_averages_scoresz9TestMultiInput.test_run_round_multi_input_averages_scores  s2    ((* $	7fDL$0DE'=K$$($>$>$@H!%)%>%>%@I" ''c'2''c'2&J"
 :5T"C'0.&*&	2!07($D% ""5#a"8[0!4Y1115Z22A6I$	7 $	7 $	7s   DD%%D.r   r   r   r   r   r   r   r   r   r   r   r   c	           	         t        j                         5 }	t        t        |	      dt               d|_        t        |_        dddd|_        g ddd	dd
g ddd	dd
g|_        d|_        dddddgddddd}
dd|
f|_        d |_        d|_        ddlm	}  |ddddgd|	      }| j                  |t               |j                  j                  }| j                  d|       | j                  |d   ddg       ddd       y# 1 sw Y   yxY w)u>   run() 함수에 test_inputs 리스트 전달 시 정상 동작r   r   r   rv   rw   )rs   rl   rm   r   r   r   r   r   r	  r   r   r   r   r   r   r   r   c                 ,    i | |t        | d         dS r   r   r   s     r   r   zFTestMultiInput.test_run_multi_input_via_run_function.<locals>.<lambda>J  r   r   r   r   r   rg  rh  )r	   r  ri  r   r   ri  N)r/   r0   r   r   r1   r   r&   r   r2   r   rA   r   rC  r  rP   r5   )r7   r   r   r   r   r   r   r   r   r8   r  r   rB   rE  s                 r   %test_run_multi_input_via_run_functionz4TestMultiInput.test_run_multi_input_via_run_function   sI   ( ((* +	QfDL$0DE+CO(/?,0;Rbd%eI"STSTgijSTSTgij&J" (8K$ 9%&F;<  !)I ,6sI*FN')M%
 &5I"/5&	2!F !!&$/(2299KMM-5[7)Y9OPW+	Q +	Q +	Qs   C%DDr,  r   r   )rE   rF   rG   rH   r6   r   r   rr   r|   r   r   r   r   rk  rn  rI   r   r   rb  rb    s   (
3 
RVWZ\_W_R` 

3 
c3h 

 
S#X 
 -.
./
/0
23
56-7-7 -7 	-7
 -7 -7 
-7 7 4 1 0 /
-7^ )*
-.
*+
01
-.
./
/0
+,7Q"7Q '7Q 	7Q
 7Q 7Q "7Q !7Q 7Q 
7Q - 1 0 / 2 , / +7Qr   rb  c                   X    e Zd ZdZ ed       ed      dededdfd              Zd
d	Zy)TestMainMultiInputu)   main() CLI 멀티 입력 관련 테스트z$autoresearch.runner.load_test_inputsr/  r0  	mock_loadr   Nc                 $   ddddddg|_         ddddd	g d
d|_         d	dlm}  |g d      }| j                  |d	       |j	                  d       |j
                  j                  }| j                  d|       | j                  |d   ddg       y)uT   --test-inputs-file 인자가 load_test_inputs를 통해 run()에 전달되어야 함input1u   첫 번째 입력)r   rU  input2u   두 번째 입력r   r   r   r   r   r2  r3  )r5  r   r6  rB  --test-inputs-file/path/to/test-inputs.yamlrv  ri  N)r   r2   r4  r5   r>  rC  r  rP   )r7   r0  rq  r4  r?  rE  s         r   test_main_test_inputs_filez-TestMainMultiInput.test_main_test_inputs_filej  s    
 %89%89"
	
 5!
 	-	
	 	A&))*EF((//m[1]36IK^5_`r   c                     ddl m} | j                  t              5 } |g d       ddd       | j	                  j
                  j                  d       y# 1 sw Y   0xY w)u;   --test-input과 --test-inputs-file 동시 사용 시 에러r   r3  )r5  r   r6  rB  r8  r   ru  z/path/to/file.yamlN)r2   r4  r[  
SystemExitassertNotEqual	exceptionrN  )r7   r4  ctxs      r   test_main_mutual_exclusivez-TestMainMultiInput.test_main_mutual_exclusive  sS    ,z* 	c		 	CMM..2	 	s   AArD   )rE   rF   rG   rH   r   r   rw  r}  rI   r   r   rp  rp  g  sL    3
12
$%!a9 !a !aW[ !a & 3!aF3r   rp  c                   6    e Zd ZdZ ed      deddfd       Zy)TestBackgroundu    --background 플래그 테스트r/  r0  r   Nc                 4   dddddg dd|_         ddlm} t        j                         5 } |ddd	d
dddd|g	      }ddd       | j                  d       |j                  j                  }| j                  |j                  dd             y# 1 sw Y   SxY w)u@   --background 플래그가 background=True로 전달되어야 함r   r   r   r   r   r2  r3  r5  r6  rB  r8  r   z--backgroundr<  N
backgroundF)
r   r2   r4  r/   r0   r5   rC  r  r3   r  )r7   r0  r4  r8   r?  rE  s         r   test_main_background_flagz(TestBackground.test_main_background_flag  s     5!
 	-((* 	f!"""
I	 	A&((//e<=#	 	s   BB)rE   rF   rG   rH   r   r   r  rI   r   r   r  r    s+    *
$%>) > > &>r   r  c                       e Zd ZdZ ed       ed       ed       ed       ed      deded	ed
ededdfd                                   Zy)TestRunRoundTokenBreakdownuS   run_round()에서 토큰 세분화 필드가 add_round에 전달되는지 테스트r   r   r   r   r   r   r   r   r   r   r   Nc                    t        j                         5 }t        t        |      dt               d|_        dddddd|_        d	d
ddd|_        ddddgddddd|_        ddlm} ddlm	}  |d      }	 |dddt        d|	dd|dd !      \  }
}
}|d"   d   }| j                  |d#   d       | j                  |d$   d       | j                  |d%   d
       | j                  |d&   d       | j                  |d'   d       | j                  |d(   d       | j                  |d)   d*       | j                  |d+   d,       d-d-d-       y-# 1 sw Y   y-xY w).u8   라운드 로그에 세분화 토큰 필드가 포함됨r   r   r   r   u   # 변경됨r   r`   ro   r         r   ry   r   r   r   r   r   r      rw   r   r   r   r   r   r   r   r   r   rx   r   r   r   mutation_input_tokensmutation_output_tokensexecution_input_tokensexecution_output_tokensjudge_input_tokensjudge_output_tokensrl   iJ  rm      N)r/   r0   r   r   r1   r   r   r   r2   r   r&   r5   )r7   r   r   r   r   r   r8   r   r   r   _r   rds                r   test_token_breakdown_in_logz6TestRunRoundTokenBreakdown.test_token_breakdown_in_log  s    ((* 4	AfDL$0DE'6K$!0(4%2 #!#%H! & #!$	&I" "+fMN"# "!#'J# :5T"C )'&*#!07!Aq+ X&q)BR 78#>R 892>R 893?R 9:C@R 45r:R 56;R/@R0-@i4	A 4	A 4	As   D,E

E)rE   rF   rG   rH   r   r   r  rI   r   r   r  r    s    ]
-.
./
/0
23
56=A=A =A 	=A
 =A =A 
=A 7 4 1 0 /
=Ar   r  __main__),rH   builtins@py_builtins_pytest.assertion.rewrite	assertionrewrite
@pytest_arsysr/   unittestpathlibr   typingr   unittest.mockr   r   r   r6   __file__resolveparent_AUTORESEARCH_PARENTpathinsertr   r1   r&   r   __annotations__TestCaser(   rK   rX   ri   r   r.  rQ  rb  rp  r  r  rE   r4  rI   r   r   <module>r     s   B   
     0 0 4>113::AAHHI sxx'HHOOA+,Cd C Cc Cd C
  "oE ,-$ $sCx.  $/** $/X"7x(( "7T+=)) +=fn+8$$ n+lO+h O+nH"x   H"`)1** )1bPQX&& PQp:3** :3D#>X&& #>VEA!2!2 EAP zHMMO r   