
    ͳi                     ~    d Z ddlZddlmc mZ ddlZddlm	Z	m
Z
 ddlZ G d d      Z G d d      Z G d d	      Zy)
u   task-1068.1: call_llm() Claude CLI 전환 테스트

run_evals.py의 call_llm() 함수가 API Key → Claude CLI로 전환되었는지 검증합니다.
    N)	MagicMockpatchc                   @    e Zd ZdZd	dZd	dZd	dZd	dZd	dZd	dZ	y)
TestCallLlmCliIntegrationu'   call_llm() Claude CLI 통합 테스트.Nc                    ddl m} t        d      5 }t        ddd      |_         |d      }|j                          |j                  }|d   d   d   }d}||k(  }|slt        j                  d	|fd
||f      t        j                  |      t        j                  |      dz  }dd|iz  }	t        t        j                  |	            dx}x}}|d   d   d   }d}||k(  }|slt        j                  d	|fd
||f      t        j                  |      t        j                  |      dz  }dd|iz  }	t        t        j                  |	            dx}x}}|d   d   d   }d}||k(  }|slt        j                  d	|fd
||f      t        j                  |      t        j                  |      dz  }dd|iz  }	t        t        j                  |	            dx}x}}d}|d   d   }||v }|slt        j                  d|fd||f      t        j                  |      t        j                  |      dz  }dd|iz  }	t        t        j                  |	            dx}x}}ddd       y# 1 sw Y   yxY w)u3   call_llm()이 subprocess를 사용하는지 확인.r   call_llmrun_evals.subprocess.runu
   LLM 응답 
returncodestdoutstderru   테스트 프롬프트claude==z%(py1)s == %(py4)spy1py4assert %(py6)spy6N   z-p   --modelin)z%(py1)s in %(py4)s)	run_evalsr	   r   r   return_valueassert_called_once	call_args
@pytest_ar_call_reprcompare	_safereprAssertionError_format_explanation)
selfr	   mock_runresultr!   @py_assert0@py_assert3@py_assert2@py_format5@py_format7s
             I/home/jay/workspace/tools/eval-runner/test_task_1068_1_cli_integration.pytest_call_llm_uses_subprocessz7TestCallLlmCliIntegration.test_call_llm_uses_subprocess   s   &-. 	0($-#%H! 67F '') !**IQ<?1%11%1111%111%1111111111Q<?1%--%----%---%----------Q<?1%A)AA%)AAAAA%)AAAA%AAA)AAAAAAAA/	!Q/9////9///9//////////#	0 	0 	0s   II''I0c                    ddl m} t        d      5 }t        ddd      |_         |d       |j
                  d   d   }|j                  d      d	z   }||   }d
}||k(  }|slt        j                  d|fd||f      t        j                  |      t        j                  |      dz  }dd|iz  }	t        t        j                  |	            dx}x}}ddd       y# 1 sw Y   yxY w)uI   call_llm()이 claude-haiku-4-5-20251001 모델을 사용하는지 확인.r   r   r
      응답r   r      프롬프트r   r   zclaude-haiku-4-5-20251001r   r   r   r   r   N)r   r	   r   r   r   r!   indexr"   r#   r$   r%   r&   )
r'   r	   r(   r!   model_indexr*   r+   r,   r-   r.   s
             r/    test_call_llm_uses_correct_modelz:TestCallLlmCliIntegration.test_call_llm_uses_correct_model&   s    &-. 	I($-%H! ^$ **1-a0I#//)4q8K[)H-HH)-HHHHH)-HHHH)HHH-HHHHHHHH	I 	I 	I   CCC&c           	      H   ddl m} t        d      5 }t        ddd      |_        t        j                  t              5 } |d       d	d	d	       d
}j                  }t        |      }||v }|s
t        j                  d|fd||f      t        j                  |      dt        j                         v st        j                  t              rt        j                  t              nddt        j                         v st        j                  |      rt        j                  |      ndt        j                  |      t        j                  |      dz  }dd|iz  }	t!        t        j"                  |	            d	x}x}x}}d	d	d	       y	# 1 sw Y   ExY w# 1 sw Y   y	xY w)u9   call_llm()이 에러를 적절히 처리하는지 확인.r   r   r
   r   r   zError: Claude CLI failedr   r3   Nu   claude CLI 호출 실패r   )zK%(py1)s in %(py8)s
{%(py8)s = %(py3)s(%(py6)s
{%(py6)s = %(py4)s.value
})
}strexc_info)r   py3r   r   py8zassert %(py10)spy10)r   r	   r   r   r   pytestraisesRuntimeErrorvaluer9   r"   r#   r$   @py_builtinslocals_should_repr_global_namer%   r&   )
r'   r	   r(   r:   r*   @py_assert5@py_assert7r,   @py_format9@py_format11s
             r/   test_call_llm_handles_errorz5TestCallLlmCliIntegration.test_call_llm_handles_error7   s   &-. 
	E($-1%H! |, )() .DX^^D^1DD-1DDDDD-1DDDD-DDDDDDDDDDDDDDDXDDDXDDD^DDD1DDDDDDDD
	E 
	E) )
	E 
	Es#   -F	FD:FF	FF!c                    ddl m} t        d      5 }t        ddd      |_         |d      }d}||k(  }|st        j                  d	|fd
||f      dt        j                         v st        j                  |      rt        j                  |      ndt        j                  |      dz  }dd|iz  }t        t        j                  |            dx}}ddd       y# 1 sw Y   yxY w)u-   call_llm()이 출력을 trim하는지 확인.r   r   r
   u     응답 내용  
r   r   r3   u   응답 내용r   z%(py0)s == %(py3)sr)   py0r;   assert %(py5)spy5N)r   r	   r   r   r   r"   r#   rB   rC   rD   r$   r%   r&   )r'   r	   r(   r)   r,   @py_assert1@py_format4@py_format6s           r/   test_call_llm_strips_outputz5TestCallLlmCliIntegration.test_call_llm_strips_outputG   s    &-. 		-($-,%H! n-F,,6_,,,,6_,,,,,,6,,,6,,,_,,,,,,,		- 		- 		-r7   c                    ddl m} t        d      5 }t        ddd      |_         |d       |j
                  d   }d	}||v }|st        j                  d
|fd||f      t        j                  |      dt        j                         v st        j                  |      rt        j                  |      nddz  }dd|iz  }t        t        j                  |            dx}}|d	   }d}||kD  }|slt        j                  d|fd||f      t        j                  |      t        j                  |      dz  }	dd|	iz  }
t        t        j                  |
            dx}x}}ddd       y# 1 sw Y   yxY w)u0   call_llm()이 timeout을 설정하는지 확인.r   r   r
   r2   r   r   r3   r   timeoutr   )z%(py1)s in %(py3)scall_kwargsr   r;   rN   rO   N)>)z%(py1)s > %(py4)sr   r   r   )r   r	   r   r   r   r!   r"   r#   r$   rB   rC   rD   r%   r&   )r'   r	   r(   rV   r*   r,   rQ   rR   r+   r-   r.   s              r/   test_call_llm_has_timeoutz3TestCallLlmCliIntegration.test_call_llm_has_timeoutV   s    &-. 	.($-%H! ^$",,Q/K+9++++9+++9++++++++++++++++y)-A-)A----)A---)---A-------	. 	. 	.s   EE++E4c                     ddl }ddlm} t        d      5 }t        d      5 }t	        ddd      |_         |d	       |j                  d
       ddd       ddd       y# 1 sw Y   xY w# 1 sw Y   yxY w)u6   call_llm()이 rate limit delay를 가지는지 확인.r   Nr   r
   zrun_evals.time.sleepr2   r   r   r3   r   )timer   r	   r   r   r   assert_called_once_with)r'   r[   r	   r(   
mock_sleeps        r/   test_call_llm_rate_limit_delayz8TestCallLlmCliIntegration.test_call_llm_rate_limit_delayg   s}    &-. 	6(-. 
6*(1 #)% ( 2215
6	6 	6
6 
6	6 	6s"   A,-A A, A)	%A,,A5returnN)
__name__
__module____qualname____doc__r0   r6   rI   rS   rY   r^        r/   r   r      s%    10.I"E -."6rf   r   c                        e Zd ZdZddZddZy)TestNoAnthropicImportu"   anthropic SDK 미사용 테스트.Nc                    ddl }d}t        |      }||v}|st        j                  d|fd||f      t        j                  |      dt        j                         v st        j                  t              rt        j                  t              nddt        j                         v st        j                  |      rt        j                  |      ndt        j                  |      dz  }d	d
|iz  }t        t        j                  |            dx}x}}ddl
}|j                  |      }d}||v}|st        j                  d|fd||f      t        j                  |      dt        j                         v st        j                  |      rt        j                  |      nddz  }	dd|	iz  }
t        t        j                  |
            dx}}y)u5   run_evals.py에 anthropic import가 없는지 확인.r   N	anthropicnot in)z4%(py1)s not in %(py6)s
{%(py6)s = %(py3)s(%(py4)s)
}dirr   )r   r;   r   r   zassert %(py8)sr<   zimport anthropicz%(py1)s not in %(py3)ssourcerW   rN   rO   )r   rm   r"   r#   r$   rB   rC   rD   r%   r&   inspect	getsource)r'   r   r*   rE   r,   r.   rG   rp   ro   rQ   rR   s              r/   "test_no_anthropic_import_in_modulez8TestNoAnthropicImport.test_no_anthropic_import_in_module~   s    0#i.0{.0000{.000{000000#000#000000i000i000.0000000 	""9-!/!////!///!////////////////rf   c                 v   ddl }ddlm} |j                  |      }d}||v}|st	        j
                  d|fd||f      t	        j                  |      dt        j                         v st	        j                  |      rt	        j                  |      nddz  }d	d
|iz  }t        t	        j                  |            dx}}g }d}|j                  }	 |	       }
||
v}|}|sd}|j                  } |       }||v }|}|st	        j
                  d|fd||
f      t	        j                  |      dt        j                         v st	        j                  |      rt	        j                  |      ndt	        j                  |	      t	        j                  |
      dz  }dd|iz  }|j                  |       |st	        j
                  dfdf      t	        j                  |      dt        j                         v st	        j                  |      rt	        j                  |      ndt	        j                        t	        j                  |      dz  }dd|iz  }|j                  |       t	        j                  |d      i z  }dd|iz  }t        t	        j                  |            dx}x}x}x}x}	x}
x}x}x}}y)u:   call_llm()에서 API Key를 사용하지 않는지 확인.r   Nr   ANTHROPIC_API_KEYrk   rn   ro   rW   rN   rO   api_keyz
no api key)zH%(py3)s not in %(py9)s
{%(py9)s = %(py7)s
{%(py7)s = %(py5)s.lower
}()
})r;   rO   py7py9z%(py11)spy11r   )zJ%(py14)s in %(py20)s
{%(py20)s = %(py18)s
{%(py18)s = %(py16)s.lower
}()
})py14py16py18py20z%(py22)spy22r   zassert %(py25)spy25)rp   r   r	   rq   r"   r#   r$   rB   rC   rD   r%   r&   lowerappend_format_boolop)r'   rp   r	   ro   r*   r,   rQ   rR   rP   @py_assert6@py_assert8@py_assert4@py_assert13@py_assert17@py_assert19@py_assert15@py_format10@py_format12@py_format21@py_format23@py_format24@py_format26s                         r/   test_no_api_key_usagez+TestNoAnthropicImport.test_no_api_key_usage   s   &""8,"0"&0000"&000"000000&000&0000000PyPPPy.P,P&,,P,.P,.2PPPPPyPPPyPPPPPPPPPPPPPPPPPPPPPP,.PPP,PPPPPP&PPP&PPP,PPP.PPPPPPPPPPPPPPPrf   r_   )ra   rb   rc   rd   rr   r   re   rf   r/   rh   rh   {   s    ,0Qrf   rh   c                   (    e Zd ZdZddZddZddZy)TestBackwardCompatibilityu   후방 호환성 테스트.Nc                    ddl m} t        d      5 }t        ddd      |_         |d      }t        |t              }|sdd	t        j                         v st        j                  t
              rt        j                  t
              nd	d
t        j                         v st        j                  |      rt        j                  |      nd
dt        j                         v st        j                  t              rt        j                  t              ndt        j                  |      dz  }t        t        j                  |            d}ddd       y# 1 sw Y   yxY w)uK   call_llm()이 문자열을 반환하는지 확인 (인터페이스 유지).r   r   r
   u   테스트 응답r   r   r3   z5assert %(py4)s
{%(py4)s = %(py0)s(%(py1)s, %(py2)s)
}
isinstancer)   r9   )rM   r   py2r   N)r   r	   r   r   r   r   r9   rB   rC   r"   rD   r$   r%   r&   )r'   r	   r(   r)   r+   r-   s         r/   test_call_llm_returns_stringz6TestBackwardCompatibility.test_call_llm_returns_string   s    &-. 		+($-)%H! n-Ffc********:***:******f***f******c***c**********		+ 		+ 		+s   D<EE c                    ddl }ddlm} |j                  |      }t	        |j
                  j                               }dg}||k(  }|st        j                  d|fd||f      dt        j                         v st        j                  |      rt        j                  |      ndt        j                  |      dz  }d	d
|iz  }t        t        j                  |            dx}}y)uC   call_llm() 함수 시그니처가 변경되지 않았는지 확인.r   Nr   promptr   rK   paramsrL   rN   rO   )rp   r   r	   	signaturelist
parameterskeysr"   r#   rB   rC   rD   r$   r%   r&   )	r'   rp   r	   sigr   r,   rP   rQ   rR   s	            r/   !test_call_llm_signature_unchangedz;TestBackwardCompatibility.test_call_llm_signature_unchanged   s    &)cnn))+, ##v####v######v###v##########rf   c                      y)u5   기존 테스트가 여전히 통과하는지 확인.Nre   )r'   s    r/   test_existing_tests_still_passz8TestBackwardCompatibility.test_existing_tests_still_pass   s     	rf   r_   )ra   rb   rc   rd   r   r   r   re   rf   r/   r   r      s    %+
$rf   r   )rd   builtinsrB   _pytest.assertion.rewrite	assertionrewriter"   
subprocessunittest.mockr   r   r>   r   rh   r   re   rf   r/   <module>r      s=   
    * l6 l6^Q Q6" "rf   