
    Iij                        U d Z ddlZddlmc mZ ddlZddlm	Z	 ddl
mZ ddlmZ ddlmZ ddlZej"                  j%                  d e e	e      j*                  j*                               dZee   ed<   	 ddlZedu Zej6                  j9                  ed	      Z G d
 d      Z G d d      Z G d d      Z  G d d      Z! G d d      Z" G d d      Z# G d d      Z$ G d d      Z% G d d      Z&y# e$ r Y w xY w)u   
TDD RED 단계: test_crawl_utils.py
crawl_utils.py 모듈에 대한 테스트 스위트
(crawl_utils.py는 아직 구현되지 않음 - TDD RED 단계)
    N)Path)
ModuleType)Optional)	MagicMock_crawl_utilsuC   crawl_utils.py 미구현 (TDD RED 단계 - 토르가 구현 예정))reasonc                   F    e Zd ZdZedd       Zedd       Zedd       Zy)TestProxyRotatorRoundRobinu-   ProxyRotator의 round_robin 전략 테스트.Nc                    d}t         |u}|st        j                  d|fdt         |f      dt        j                         v st        j
                  t               rt        j                  t               ndt        j                  |      dz  }dd|iz  }t        t        j                  |            dx}}g d}t        j                  |d	
      }|j                  } |       }d}||k(  }	|	st        j                  d|	fd||f      dt        j                         v st        j
                  |      rt        j                  |      ndt        j                  |      t        j                  |      t        j                  |      dz  }
dd|
iz  }t        t        j                  |            dx}x}x}	}|j                  } |       }d}||k(  }	|	st        j                  d|	fd||f      dt        j                         v st        j
                  |      rt        j                  |      ndt        j                  |      t        j                  |      t        j                  |      dz  }
dd|
iz  }t        t        j                  |            dx}x}x}	}|j                  } |       }d}||k(  }	|	st        j                  d|	fd||f      dt        j                         v st        j
                  |      rt        j                  |      ndt        j                  |      t        j                  |      t        j                  |      dz  }
dd|
iz  }t        t        j                  |            dx}x}x}	}y)uD   round_robin 전략은 프록시를 순서대로 반환해야 한다.Nis notz%(py0)s is not %(py3)sr   py0py3assert %(py5)spy5http://proxy1:8080http://proxy2:8080http://proxy3:8080round_robinstrategyr   ==zG%(py4)s
{%(py4)s = %(py2)s
{%(py2)s = %(py0)s.get_next
}()
} == %(py7)srotatorr   py2py4py7assert %(py9)spy9r   r   r   
@pytest_ar_call_reprcompare@py_builtinslocals_should_repr_global_name	_safereprAssertionError_format_explanationProxyRotatorget_nextself@py_assert2@py_assert1@py_format4@py_format6proxiesr   @py_assert3@py_assert6@py_assert5@py_format8@py_format10s               5/home/jay/workspace/scripts/tests/test_crawl_utils.py)test_round_robin_returns_proxies_in_orderzDTestProxyRotatorRoundRobin.test_round_robin_returns_proxies_in_order(   s    $('|4''''|4''''''|'''|'''4'''''''T++GmL9!9%99!%99999!%9999999w999w999999!999%999999999!9%99!%99999!%9999999w999w999999!999%999999999!9%99!%99999!%9999999w999w999999!999%99999999    c                    d}t         |u}|st        j                  d|fdt         |f      dt        j                         v st        j
                  t               rt        j                  t               ndt        j                  |      dz  }dd|iz  }t        t        j                  |            dx}}dd	g}t        j                  |d
      }|j                          |j                          |j                  } |       }d}||k(  }	|	st        j                  d|	fd||f      dt        j                         v st        j
                  |      rt        j                  |      ndt        j                  |      t        j                  |      t        j                  |      dz  }
dd|
iz  }t        t        j                  |            dx}x}x}	}y)uK   round_robin은 마지막 프록시 이후 처음으로 순환해야 한다.Nr   r   r   r   r   r   r   r   r   r   r   r   r   r   r#   r$   r%   r0   s               r<   test_round_robin_wraps_aroundz8TestProxyRotatorRoundRobin.test_round_robin_wraps_around3   s    $('|4''''|4''''''|'''|'''4'''''''')=>++GmL9!9%99!%99999!%9999999w999w999999!999%99999999r>   c                 2   d}t         |u}|st        j                  d|fdt         |f      dt        j                         v st        j
                  t               rt        j                  t               ndt        j                  |      dz  }dd|iz  }t        t        j                  |            dx}}t        j                  dgd	
      }|j                  } |       }d}||k(  }|st        j                  d|fd||f      dt        j                         v st        j
                  |      rt        j                  |      ndt        j                  |      t        j                  |      t        j                  |      dz  }	dd|	iz  }
t        t        j                  |
            dx}x}x}}|j                  } |       }d}||k(  }|st        j                  d|fd||f      dt        j                         v st        j
                  |      rt        j                  |      ndt        j                  |      t        j                  |      t        j                  |      dz  }	dd|	iz  }
t        t        j                  |
            dx}x}x}}y)uQ   프록시가 1개일 때 round_robin은 계속 같은 값을 반환해야 한다.Nr   r   r   r   r   r   zhttp://only-proxy:8080r   r   r   r   r   r   r#   r$   r%   r1   r2   r3   r4   r5   r   r7   r8   r9   r:   r;   s              r<   #test_round_robin_single_proxy_loopsz>TestProxyRotatorRoundRobin.test_round_robin_single_proxy_loops>   s    $('|4''''|4''''''|'''|'''4'''''''++-E,FQ^_=!=%==!%=====!%=======w===w======!===%=========!=%==!%=====!%=======w===w======!===%========r>   returnN)__name__
__module____qualname____doc___skip_if_missingr=   r@   rC    r>   r<   r
   r
   %   s?    7: : : : > >r>   r
   c                   4    e Zd ZdZedd       Zedd       Zy)TestProxyRotatorRandomu(   ProxyRotator의 random 전략 테스트.Nc                    d}t         |u}|st        j                  d|fdt         |f      dt        j                         v st        j
                  t               rt        j                  t               ndt        j                  |      dz  }dd|iz  }t        t        j                  |            dx}}g d}t        j                  |d	
      }t        d      D ]  }|j                         }||v }|st        j                  d|fd||f      dt        j                         v st        j
                  |      rt        j                  |      nddt        j                         v st        j
                  |      rt        j                  |      nddz  }	dd|	iz  }
t        t        j                  |
            d} y)uK   random 전략은 항상 프록시 목록 중 하나를 반환해야 한다.Nr   r   r   r   r   r   r   randomr      in)z%(py0)s in %(py2)sresultr6   r   r    assert %(py4)sr!   )r   r&   r'   r(   r)   r*   r+   r,   r-   r.   ranger/   )r1   r2   r3   r4   r5   r6   r   _rS   @py_format3@py_format5s              r<   *test_random_strategy_returns_value_in_listzATestProxyRotatorRandom.test_random_strategy_returns_value_in_listP   s    $('|4''''|4''''''|'''|'''4'''''''

 ++GhGr 	%A%%'FW$$$$6W$$$$$$6$$$6$$$$$$W$$$W$$$$$$$	%r>   c           	         d}t         |u}|st        j                  d|fdt         |f      dt        j                         v st        j
                  t               rt        j                  t               ndt        j                  |      dz  }dd|iz  }t        t        j                  |            dx}}dd	g}t        j                  |d
      }t        d      D ch c]  }|j                          }}|j                  }t        |      }	 ||	      }
|
s+ddt        j                         v st        j
                  |      rt        j                  |      ndt        j                  |      dt        j                         v st        j
                  t              rt        j                  t              nddt        j                         v st        j
                  |      rt        j                  |      ndt        j                  |	      t        j                  |
      dz  }t        t        j                  |            dx}x}	}
yc c}w )uH   random 전략 호출 결과가 항상 프록시 풀에 속해야 한다.Nr   r   r   r   r   r   z
http://a:1z
http://b:2rO   r   2   zhassert %(py8)s
{%(py8)s = %(py2)s
{%(py2)s = %(py0)s.issubset
}(%(py6)s
{%(py6)s = %(py3)s(%(py4)s)
})
}resultssetr6   )r   r    r   r!   py6py8)r   r&   r'   r(   r)   r*   r+   r,   r-   r.   rV   r/   issubsetr^   )r1   r2   r3   r4   r5   r6   r   rW   r]   r9   @py_assert7@py_format9s               r<   1test_random_strategy_multiple_calls_are_from_poolzHTestProxyRotatorRandom.test_random_strategy_multiple_calls_are_from_pool_   sP    $('|4''''|4''''''|'''|'''4'''''''.++GhG/4Ry9!7##%99-G---------w---w------------------G---G------------- :s   IrD   )rF   rG   rH   rI   rJ   rZ   rd   rK   r>   r<   rM   rM   M   s+    2% % . .r>   rM   c                   F    e Zd ZdZedd       Zedd       Zedd       Zy)TestProxyRotatorEmptyListu6   ProxyRotator 빈 리스트 엣지 케이스 테스트.Nc                 `   d}t         |u}|st        j                  d|fdt         |f      dt        j                         v st        j
                  t               rt        j                  t               ndt        j                  |      dz  }dd|iz  }t        t        j                  |            dx}}t        j                  g       }|j                  } |       }d}||u }|st        j                  d|fd	||f      d
t        j                         v st        j
                  |      rt        j                  |      nd
t        j                  |      t        j                  |      t        j                  |      dz  }	dd|	iz  }
t        t        j                  |
            dx}x}x}}y)uH   빈 프록시 리스트에서 get_next()는 None을 반환해야 한다.Nr   r   r   r   r   r   iszG%(py4)s
{%(py4)s = %(py2)s
{%(py2)s = %(py0)s.get_next
}()
} is %(py7)sr   r   r#   r$   r%   rB   s              r<   %test_empty_list_get_next_returns_nonez?TestProxyRotatorEmptyList.test_empty_list_get_next_returns_noner   s     $('|4''''|4''''''|'''|'''4'''''''++B/)!)T)!T))))!T))))))w)))w))))))!)))T)))))))r>   c                    d}t         |u}|st        j                  d|fdt         |f      dt        j                         v st        j
                  t               rt        j                  t               ndt        j                  |      dz  }dd|iz  }t        t        j                  |            dx}}t        j                  g       }t        |      }d}||k(  }|st        j                  d	|fd
||f      dt        j                         v st        j
                  t              rt        j                  t              nddt        j                         v st        j
                  |      rt        j                  |      ndt        j                  |      t        j                  |      dz  }dd|iz  }	t        t        j                  |	            dx}x}}y)u6   빈 프록시 리스트의 len()은 0이어야 한다.Nr   r   r   r   r   r   r   r   z0%(py3)s
{%(py3)s = %(py0)s(%(py1)s)
} == %(py6)slenr   r   py1r   r_   assert %(py8)sr`   r   r&   r'   r(   r)   r*   r+   r,   r-   r.   rn   
r1   r2   r3   r4   r5   r   r9   @py_assert4@py_format7rc   s
             r<   test_empty_list_len_is_zeroz5TestProxyRotatorEmptyList.test_empty_list_len_is_zeroz   s     $('|4''''|4''''''|'''|'''4'''''''++B/7| q |q    |q      s   s      7   7   |   q       r>   c                 d   d}t         |u}|st        j                  d|fdt         |f      dt        j                         v st        j
                  t               rt        j                  t               ndt        j                  |      dz  }dd|iz  }t        t        j                  |            dx}}t        j                  g d	      }|j                  } |       }d}||u }|st        j                  d
|fd||f      dt        j                         v st        j
                  |      rt        j                  |      ndt        j                  |      t        j                  |      t        j                  |      dz  }	dd|	iz  }
t        t        j                  |
            dx}x}x}}y)uG   random 전략에서도 빈 리스트이면 None을 반환해야 한다.Nr   r   r   r   r   r   rO   r   rh   rj   r   r   r#   r$   r%   rB   s              r<   ,test_empty_list_random_strategy_returns_nonezFTestProxyRotatorEmptyList.test_empty_list_random_strategy_returns_none   s     $('|4''''|4''''''|'''|'''4'''''''++BB)!)T)!T))))!T))))))w)))w))))))!)))T)))))))r>   rD   )rF   rG   rH   rI   rJ   rk   rv   rx   rK   r>   r<   rf   rf   o   s?    @* * ! ! * *r>   rf   c                   j    e Zd ZdZedd       Zedd       Zedd       Zedd       Zedd       Z	y)	TestProxyRotatorRemoveAndLenu-   ProxyRotator remove()와 __len__() 테스트.Nc                    d}t         |u}|st        j                  d|fdt         |f      dt        j                         v st        j
                  t               rt        j                  t               ndt        j                  |      dz  }dd|iz  }t        t        j                  |            dx}}g d}t        j                  |      }t        |      }d	}||k(  }|st        j                  d
|fd||f      dt        j                         v st        j
                  t              rt        j                  t              nddt        j                         v st        j
                  |      rt        j                  |      ndt        j                  |      t        j                  |      dz  }	dd|	iz  }
t        t        j                  |
            dx}x}}|j                  d       t        |      }d}||k(  }|st        j                  d
|fd||f      dt        j                         v st        j
                  t              rt        j                  t              nddt        j                         v st        j
                  |      rt        j                  |      ndt        j                  |      t        j                  |      dz  }	dd|	iz  }
t        t        j                  |
            dx}x}}y)u,   remove() 후 len()이 1 감소해야 한다.Nr   r   r   r   r   r   r      r   rm   rn   r   ro   rq   r`   r      )r   r&   r'   r(   r)   r*   r+   r,   r-   r.   rn   remover1   r2   r3   r4   r5   r6   r   r9   rt   ru   rc   s              r<   test_remove_decreases_lenz6TestProxyRotatorRemoveAndLen.test_remove_decreases_len   s    $('|4''''|4''''''|'''|'''4'''''''T++G47| q |q    |q      s   s      7   7   |   q       +,7| q |q    |q      s   s      7   7   |   q       r>   c                    d}t         |u}|st        j                  d|fdt         |f      dt        j                         v st        j
                  t               rt        j                  t               ndt        j                  |      dz  }dd|iz  }t        t        j                  |            dx}}dd	g}t        j                  |d
      }|j                  d	       t        d      D ]  }|j                  } |       }d	}	||	k7  }
|
st        j                  d|
fd||	f      dt        j                         v st        j
                  |      rt        j                  |      ndt        j                  |      t        j                  |      t        j                  |	      dz  }dd|iz  }t        t        j                  |            dx}x}x}
}	 y)uO   remove()된 프록시는 이후 get_next()에서 반환되지 않아야 한다.Nr   r   r   r   r   r   r   r   r   r   
   )!=)zG%(py4)s
{%(py4)s = %(py2)s
{%(py2)s = %(py0)s.get_next
}()
} != %(py7)sr   r   r#   r$   )r   r&   r'   r(   r)   r*   r+   r,   r-   r.   r~   rV   r/   )r1   r2   r3   r4   r5   r6   r   rW   r7   r8   r9   r:   r;   s                r<   "test_remove_excluded_from_rotationz?TestProxyRotatorRemoveAndLen.test_remove_excluded_from_rotation   s%    $('|4''''|4''''''|'''|'''4'''''''')=>++GmL+,r 	>A##=#%=)==%)=====%)=======7===7===#===%===)========	>r>   c                    d}t         |u}|st        j                  d|fdt         |f      dt        j                         v st        j
                  t               rt        j                  t               ndt        j                  |      dz  }dd|iz  }t        t        j                  |            dx}}dg}t        j                  |      }|j                  d       |j                  } |       }d}||u }	|	st        j                  d	|	fd
||f      dt        j                         v st        j
                  |      rt        j                  |      ndt        j                  |      t        j                  |      t        j                  |      dz  }
dd|
iz  }t        t        j                  |            dx}x}x}	}t        |      }d}	||	k(  }|st        j                  d|fd||	f      dt        j                         v st        j
                  t              rt        j                  t              nddt        j                         v st        j
                  |      rt        j                  |      ndt        j                  |      t        j                  |	      dz  }dd|iz  }t        t        j                  |            dx}x}}	y)uK   모든 프록시를 제거하면 get_next()는 None을 반환해야 한다.Nr   r   r   r   r   r   r   rh   rj   r   r   r#   r$   r   r   rm   rn   ro   rq   r`   )r   r&   r'   r(   r)   r*   r+   r,   r-   r.   r~   r/   rn   )r1   r2   r3   r4   r5   r6   r   r7   r8   r9   r:   r;   rt   ru   rc   s                  r<   !test_remove_all_proxies_then_nonez>TestProxyRotatorRemoveAndLen.test_remove_all_proxies_then_none   s    $('|4''''|4''''''|'''|'''4''''''''(++G4+,)!)T)!T))))!T))))))w)))w))))))!)))T)))))))7| q |q    |q      s   s      7   7   |   q       r>   c                    d}t         |u}|st        j                  d|fdt         |f      dt        j                         v st        j
                  t               rt        j                  t               ndt        j                  |      dz  }dd|iz  }t        t        j                  |            dx}}t        j                  dg      }|j                  d	       t        |      }d
}||k(  }|st        j                  d|fd||f      dt        j                         v st        j
                  t              rt        j                  t              nddt        j                         v st        j
                  |      rt        j                  |      ndt        j                  |      t        j                  |      dz  }dd|iz  }	t        t        j                  |	            dx}x}}y)uO   존재하지 않는 프록시 remove()는 예외 없이 처리되어야 한다.Nr   r   r   r   r   r   r   zhttp://nonexistent:9999   r   rm   rn   r   ro   rq   r`   )r   r&   r'   r(   r)   r*   r+   r,   r-   r.   r~   rn   rs   s
             r<   %test_remove_nonexistent_proxy_is_safezBTestProxyRotatorRemoveAndLen.test_remove_nonexistent_proxy_is_safe   s    $('|4''''|4''''''|'''|'''4'''''''++-A,BC 	017| q |q    |q      s   s      7   7   |   q       r>   c                    d}t         |u}|st        j                  d|fdt         |f      dt        j                         v st        j
                  t               rt        j                  t               ndt        j                  |      dz  }dd|iz  }t        t        j                  |            dx}}g d}t        j                  |      }t        |      }d	}||k(  }|st        j                  d
|fd||f      dt        j                         v st        j
                  t              rt        j                  t              nddt        j                         v st        j
                  |      rt        j                  |      ndt        j                  |      t        j                  |      dz  }	dd|	iz  }
t        t        j                  |
            dx}x}}y)uB   len()이 초기 프록시 리스트 개수와 일치해야 한다.Nr   r   r   r   r   r   )zhttp://p1:1zhttp://p2:2zhttp://p3:3zhttp://p4:4   r   rm   rn   r   ro   rq   r`   rr   r   s              r<   test_len_reflects_initial_countz<TestProxyRotatorRemoveAndLen.test_len_reflects_initial_count   s     $('|4''''|4''''''|'''|'''4'''''''N++G47| q |q    |q      s   s      7   7   |   q       r>   rD   )
rF   rG   rH   rI   rJ   r   r   r   r   r   rK   r>   r<   rz   rz      sg    7! ! 	> 	> 	! 	! ! ! ! !r>   rz   c                       e Zd ZdZed
d       Zed
d       Zed
d       Zed
d       Zed
d       Z	ed
d       Z
ed
d	       Zy)TestIsProxyErroru"   is_proxy_error() 함수 테스트.Nc                    d}t         |u}|st        j                  d|fdt         |f      dt        j                         v st        j
                  t               rt        j                  t               ndt        j                  |      dz  }dd|iz  }t        t        j                  |            dx}}t        d      }t         j                  } ||      }d	}||u }|s
t        j                  d
|fd||f      dt        j                         v st        j
                  t               rt        j                  t               ndt        j                  |      dt        j                         v st        j
                  |      rt        j                  |      ndt        j                  |      t        j                  |      dz  }	dd|	iz  }
t        t        j                  |
            dx}x}x}}y)u/   ConnectionError는 True를 반환해야 한다.Nr   r   r   r   r   r   zConnection refusedTrh   zT%(py5)s
{%(py5)s = %(py2)s
{%(py2)s = %(py0)s.is_proxy_error
}(%(py3)s)
} is %(py8)serrr   r    r   r   r`   assert %(py10)spy10)r   r&   r'   r(   r)   r*   r+   r,   r-   ConnectionErroris_proxy_errorr1   r2   r3   r4   r5   r   rt   rb   r8   rc   @py_format11s              r<   "test_connection_error_returns_truez3TestIsProxyError.test_connection_error_returns_true   s    $('|4''''|4''''''|'''|'''4'''''''23**7*3/747/47777/4777777|777|777*77777737773777/77747777777r>   c                    d}t         |u}|st        j                  d|fdt         |f      dt        j                         v st        j
                  t               rt        j                  t               ndt        j                  |      dz  }dd|iz  }t        t        j                  |            dx}}t        d      }t         j                  } ||      }d	}||u }|s
t        j                  d
|fd||f      dt        j                         v st        j
                  t               rt        j                  t               ndt        j                  |      dt        j                         v st        j
                  |      rt        j                  |      ndt        j                  |      t        j                  |      dz  }	dd|	iz  }
t        t        j                  |
            dx}x}x}}y)u,   TimeoutError는 True를 반환해야 한다.Nr   r   r   r   r   r   zRequest timed outTrh   r   r   r   r   r   )r   r&   r'   r(   r)   r*   r+   r,   r-   TimeoutErrorr   r   s              r<   test_timeout_error_returns_truez0TestIsProxyError.test_timeout_error_returns_true   s    $('|4''''|4''''''|'''|'''4'''''''./**7*3/747/47777/4777777|777|777*77777737773777/77747777777r>   c                    d}t         |u}|st        j                  d|fdt         |f      dt        j                         v st        j
                  t               rt        j                  t               ndt        j                  |      dz  }dd|iz  }t        t        j                  |            dx}}t        d      }t         j                  } ||      }d	}||u }|s
t        j                  d
|fd||f      dt        j                         v st        j
                  t               rt        j                  t               ndt        j                  |      dt        j                         v st        j
                  |      rt        j                  |      ndt        j                  |      t        j                  |      dz  }	dd|	iz  }
t        t        j                  |
            dx}x}x}}y)u2   일반 ValueError는 False를 반환해야 한다.Nr   r   r   r   r   r   zInvalid valueFrh   r   r   r   r   r   )r   r&   r'   r(   r)   r*   r+   r,   r-   
ValueErrorr   r   s              r<   test_value_error_returns_falsez/TestIsProxyError.test_value_error_returns_false   s    $('|4''''|4''''''|'''|'''4''''''')**8*3/858/58888/5888888|888|888*88888838883888/88858888888r>   c                    d}t         |u}|st        j                  d|fdt         |f      dt        j                         v st        j
                  t               rt        j                  t               ndt        j                  |      dz  }dd|iz  }t        t        j                  |            dx}}t        d      }t         j                  } ||      }d	}||u }|s
t        j                  d
|fd||f      dt        j                         v st        j
                  t               rt        j                  t               ndt        j                  |      dt        j                         v st        j
                  |      rt        j                  |      ndt        j                  |      t        j                  |      dz  }	dd|	iz  }
t        t        j                  |
            dx}x}x}}y)u1   일반 Exception은 False를 반환해야 한다.Nr   r   r   r   r   r   zSome generic errorFrh   r   r   r   r   r   )r   r&   r'   r(   r)   r*   r+   r,   r-   	Exceptionr   r   s              r<   $test_generic_exception_returns_falsez5TestIsProxyError.test_generic_exception_returns_false   s    $('|4''''|4''''''|'''|'''4''''''',-**8*3/858/58888/5888888|888|888*88888838883888/88858888888r>   c                    d}t         |u}|st        j                  d|fdt         |f      dt        j                         v st        j
                  t               rt        j                  t               ndt        j                  |      dz  }dd|iz  }t        t        j                  |            dx}}t        d      }t         j                  } ||      }d	}||u }|s
t        j                  d
|fd||f      dt        j                         v st        j
                  t               rt        j                  t               ndt        j                  |      dt        j                         v st        j
                  |      rt        j                  |      ndt        j                  |      t        j                  |      dz  }	dd|	iz  }
t        t        j                  |
            dx}x}x}}y)uN   OSError(ConnectionError의 부모)는 프록시 오류로 판별해야 한다.Nr   r   r   r   r   r   zNetwork unreachableTrh   r   r   r   r   r   )r   r&   r'   r(   r)   r*   r+   r,   r-   OSErrorr   r   s              r<   test_os_error_returns_truez+TestIsProxyError.test_os_error_returns_true   s    $('|4''''|4''''''|'''|'''4'''''''+,**7*3/747/47777/4777777|777|777*77777737773777/77747777777r>   c                 &   d}t         |u}|st        j                  d|fdt         |f      dt        j                         v st        j
                  t               rt        j                  t               ndt        j                  |      dz  }dd|iz  }t        t        j                  |            dx}}t        d      }t               }d	|_        t        |d
|       t         j                  } ||      }d}||u }	|	s
t        j                  d|	fd||f      dt        j                         v st        j
                  t               rt        j                  t               ndt        j                  |      dt        j                         v st        j
                  |      rt        j                  |      ndt        j                  |      t        j                  |      dz  }
dd|
iz  }t        t        j                  |            dx}x}x}	}y)uA   404 상태를 가진 HTTP 오류는 False를 반환해야 한다.Nr   r   r   r   r   r   z404 Not Foundi  responseFrh   r   http_errr   r   r   r   r&   r'   r(   r)   r*   r+   r,   r-   r   r   status_codesetattrr   r1   r2   r3   r4   r5   r   	mock_resprt   rb   r8   rc   r   s               r<   !test_http_error_404_returns_falsez2TestIsProxyError.test_http_error_404_returns_false   s*    $('|4''''|4''''''|'''|'''4'''''''_-K	 #	*i0**=*84==4====4======|===|===*======8===8===4==========r>   c                 &   d}t         |u}|st        j                  d|fdt         |f      dt        j                         v st        j
                  t               rt        j                  t               ndt        j                  |      dz  }dd|iz  }t        t        j                  |            dx}}t        d      }t               }d	|_        t        |d
|       t         j                  } ||      }d}||u }	|	s
t        j                  d|	fd||f      dt        j                         v st        j
                  t               rt        j                  t               ndt        j                  |      dt        j                         v st        j
                  |      rt        j                  |      ndt        j                  |      t        j                  |      dz  }
dd|
iz  }t        t        j                  |            dx}x}x}	}y)uS   500 서버 오류는 프록시 오류가 아니므로 False를 반환해야 한다.Nr   r   r   r   r   r   z500 Internal Server Errori  r   Frh   r   r   r   r   r   r   r   s               r<   !test_http_error_500_returns_falsez2TestIsProxyError.test_http_error_500_returns_false  s+    $('|4''''|4''''''|'''|'''4'''''''89K	 #	*i0**=*84==4====4======|===|===*======8===8===4==========r>   rD   )rF   rG   rH   rI   rJ   r   r   r   r   r   r   r   rK   r>   r<   r   r      s    ,8 8 8 8 9 9 9 9 8 8 > > > >r>   r   c                   j    e Zd ZdZedd       Zedd       Zedd       Zedd       Zedd       Z	y)	TestFetchWithRetryu$   fetch_with_retry() 함수 테스트.Nc                    d}t         |u}|st        j                  d|fdt         |f      dt        j                         v st        j
                  t               rt        j                  t               ndt        j                  |      dz  }dd|iz  }t        t        j                  |            dx}}t               }d|_
        t               }||j                  _        t        |	      }t        j                  d
d|      }||k(  }|st        j                  d|fd||f      dt        j                         v st        j
                  |      rt        j                  |      nddt        j                         v st        j
                  |      rt        j                  |      nddz  }	dd|	iz  }
t        t        j                  |
            d}|j                  }|j                  }d}||k(  }|st        j                  d|fd||f      dt        j                         v st        j
                  |      rt        j                  |      ndt        j                  |      t        j                  |      t        j                  |      dz  }dd|iz  }t        t        j                  |            dx}x}x}}y)uE   첫 번째 시도에서 성공하면 Response를 반환해야 한다.Nr   r   r   r   r   r      return_valuehttps://example.comr|   max_retriesfetcher_classr   z%(py0)s == %(py2)srS   mock_responserT   rU   r!   r   zM%(py4)s
{%(py4)s = %(py2)s
{%(py2)s = %(py0)s.fetch
}.call_count
} == %(py7)smock_fetcher_instancer   r#   r$   )r   r&   r'   r(   r)   r*   r+   r,   r-   r   r   fetchr   fetch_with_retry
call_countr1   r2   r3   r4   r5   r   r   MockFetcherClassrS   rX   rY   r7   r8   r9   r:   r;   s                   r<   test_success_on_first_attemptz0TestFetchWithRetry.test_success_on_first_attempt  s    $('|4''''|4''''''|'''|'''4'''''''!$'! )3@##0$2GH..!*
 &&&&v&&&&&&v&&&v&&&&&&&&&&&&&&&&$**:*55::5::::5::::::$:::$:::*:::5::::::::::r>   c                    d}t         |u}|st        j                  d|fdt         |f      dt        j                         v st        j
                  t               rt        j                  t               ndt        j                  |      dz  }dd|iz  }t        t        j                  |            dx}}t               }d|_
        t               }t        d	      |g|j                  _        t        |
      }t        j                  dd|      }||k(  }|st        j                  d|fd||f      dt        j                         v st        j
                  |      rt        j                  |      nddt        j                         v st        j
                  |      rt        j                  |      nddz  }	dd|	iz  }
t        t        j                  |
            d}|j                  }|j                  }d}||k(  }|st        j                  d|fd||f      dt        j                         v st        j
                  |      rt        j                  |      ndt        j                  |      t        j                  |      t        j                  |      dz  }dd|iz  }t        t        j                  |            dx}x}x}}y)uO   연결 오류 후 재시도에서 성공하면 Response를 반환해야 한다.Nr   r   r   r   r   r   r   z
proxy downr   r   r|   r   r   r   rS   r   rT   rU   r!   r}   r   r   r   r#   r$   )r   r&   r'   r(   r)   r*   r+   r,   r-   r   r   r   r   side_effectr   r   r   s                   r<   .test_retries_on_connection_error_then_succeedszATestFetchWithRetry.test_retries_on_connection_error_then_succeeds/  s    $('|4''''|4''''''|'''|'''4'''''''!$'! )L)3
##/
 %2GH..!*
 &&&&v&&&&&&v&&&v&&&&&&&&&&&&&&&&$**:*55::5::::5::::::$:::$:::*:::5::::::::::r>   c                 .   d}t         |u}|st        j                  d|fdt         |f      dt        j                         v st        j
                  t               rt        j                  t               ndt        j                  |      dz  }dd|iz  }t        t        j                  |            dx}}t               }t        d      |j                  _        t        |	      }t        j                  t              5  t        j                  d
d|       ddd       |j                  }|j                   }d}||k(  }	|	st        j                  d|	fd||f      dt        j                         v st        j
                  |      rt        j                  |      ndt        j                  |      t        j                  |      t        j                  |      dz  }
dd|
iz  }t        t        j                  |            dx}x}x}	}y# 1 sw Y   xY w)uC   모든 재시도 실패 시 마지막 예외를 raise해야 한다.Nr   r   r   r   r   r   zalways failsr   r   r|   r   r   r   r   r   r#   r$   )r   r&   r'   r(   r)   r*   r+   r,   r-   r   r   r   r   pytestraisesr   r   )r1   r2   r3   r4   r5   r   r   r7   r8   r9   r:   r;   s               r<   1test_raises_last_exception_after_all_retries_failzDTestFetchWithRetry.test_raises_last_exception_after_all_retries_failG  sK    $('|4''''|4''''''|'''|'''4''''''' )2A.2Q##/$2GH]]?+ 	))%.	 %**:*55::5::::5::::::$:::$:::*:::5::::::::::	 	s   ?HHc                 0   d}t         |u}|st        j                  d|fdt         |f      dt        j                         v st        j
                  t               rt        j                  t               ndt        j                  |      dz  }dd|iz  }t        t        j                  |            dx}}t               }d|_
        t               }t        d	      |g|j                  _        t        |
      }t               }d|j                  _        t        j                   dd||       |j                  }|j"                  }	d}
|	|
k\  }|st        j                  d|fd|	|
f      dt        j                         v st        j
                  |      rt        j                  |      ndt        j                  |      t        j                  |	      t        j                  |
      dz  }dd|iz  }t        t        j                  |            dx}x}	x}}
y)u@   재시도 시 ProxyRotator.get_next()가 호출되어야 한다.Nr   r   r   r   r   r   r   zfirst proxy downr   zhttp://new-proxy:8080r   r|   r   proxy_rotatorr   r   )>=)zP%(py4)s
{%(py4)s = %(py2)s
{%(py2)s = %(py0)s.get_next
}.call_count
} >= %(py7)smock_rotatorr   r#   r$   )r   r&   r'   r(   r)   r*   r+   r,   r-   r   r   r   r   r   r/   r   r   r   )r1   r2   r3   r4   r5   r   r   r   r   r7   r8   r9   r:   r;   s                 r<    test_proxy_rotator_used_on_retryz3TestFetchWithRetry.test_proxy_rotator_used_on_retryY  sZ    $('|4''''|4''''''|'''|'''4'''''''!$'! )./3
##/
 %2GH {-D*%%!&*		
 $$4$//414/14444/1444444|444|444$444/44414444444r>   c                    d}t         |u}|st        j                  d|fdt         |f      dt        j                         v st        j
                  t               rt        j                  t               ndt        j                  |      dz  }dd|iz  }t        t        j                  |            dx}}t               }d|_
        t               }t        d	      |g|j                  _        t        |
      }t               }d}	|	|j                  _        t        j                   dd||       |j"                  j%                          y)uU   프록시 오류 발생 시 해당 프록시가 rotator에서 제거되어야 한다.Nr   r   r   r   r   r   r   zproxy errorr   zhttp://bad-proxy:8080r   r|   r   )r   r&   r'   r(   r)   r*   r+   r,   r-   r   r   r   r   r   r/   r   r   r~   assert_called)
r1   r2   r3   r4   r5   r   r   r   r   	bad_proxys
             r<   !test_proxy_removed_on_proxy_errorz4TestFetchWithRetry.test_proxy_removed_on_proxy_errort  s     $('|4''''|4''''''|'''|'''4'''''''!$'! )M*3
##/
 %2GH {+	-6*%%!&*		
 	))+r>   rD   )
rF   rG   rH   rI   rJ   r   r   r   r   r   rK   r>   r<   r   r     sg    .; ;( ; ;. ; ;" 5 54 , ,r>   r   c                       e Zd ZdZedd       Zedd       Zedd       Zedd       Zedd       Z	edd       Z
edd	       Zedd
       Zy)TestGetResourceBlockTypesu,   get_resource_block_types() 함수 테스트.Nc                 J
   d}t         |u}|st        j                  d|fdt         |f      dt        j                         v st        j
                  t               rt        j                  t               ndt        j                  |      dz  }dd|iz  }t        t        j                  |            dx}}t        j                  d      }t        |t              }|sd	d
t        j                         v st        j
                  t              rt        j                  t              nd
dt        j                         v st        j
                  |      rt        j                  |      nddt        j                         v st        j
                  t              rt        j                  t              ndt        j                  |      dz  }t        t        j                  |            d}d}||v }|st        j                  d|fd||f      t        j                  |      dt        j                         v st        j
                  |      rt        j                  |      nddz  }dd|iz  }t        t        j                  |            dx}}d}||v }|st        j                  d|fd||f      t        j                  |      dt        j                         v st        j
                  |      rt        j                  |      nddz  }dd|iz  }t        t        j                  |            dx}}d}||v }|st        j                  d|fd||f      t        j                  |      dt        j                         v st        j
                  |      rt        j                  |      nddz  }dd|iz  }t        t        j                  |            dx}}d}||v }|st        j                  d|fd||f      t        j                  |      dt        j                         v st        j
                  |      rt        j                  |      nddz  }dd|iz  }t        t        j                  |            dx}}d}||v }|st        j                  d|fd||f      t        j                  |      dt        j                         v st        j
                  |      rt        j                  |      nddz  }dd|iz  }t        t        j                  |            dx}}y)uR   default 프리셋은 image, font, media, stylesheet, other를 포함해야 한다.Nr   r   r   r   r   r   default5assert %(py4)s
{%(py4)s = %(py0)s(%(py1)s, %(py2)s)
}
isinstancerS   r^   r   rp   r    r!   imagerQ   z%(py1)s in %(py3)srp   r   fontmedia
stylesheetotherr   r&   r'   r(   r)   r*   r+   r,   r-   get_resource_block_typesr   r^   	r1   r2   r3   r4   r5   rS   r7   rY   @py_assert0s	            r<   *test_default_preset_returns_expected_typeszDTestGetResourceBlockTypes.test_default_preset_returns_expected_types  s    $('|4''''|4''''''|'''|'''4'''''''66yA&#&&&&&&&&z&&&z&&&&&&&&&&&&&&&&&#&&&#&&&&&&&&&& w&    w&   w      &   &       vvv w&    w&   w      &   &       %|v%%%%|v%%%|%%%%%%v%%%v%%%%%%% w&    w&   w      &   &       r>   c                    d}t         |u}|st        j                  d|fdt         |f      dt        j                         v st        j
                  t               rt        j                  t               ndt        j                  |      dz  }dd|iz  }t        t        j                  |            dx}}t        j                  d      }d	}||v}|st        j                  d
|fd||f      t        j                  |      dt        j                         v st        j
                  |      rt        j                  |      nddz  }dd|iz  }t        t        j                  |            dx}}y)uC   default 프리셋에는 websocket이 포함되지 않아야 한다.Nr   r   r   r   r   r   r   	websocketnot inz%(py1)s not in %(py3)srS   r   
r   r&   r'   r(   r)   r*   r+   r,   r-   r   r1   r2   r3   r4   r5   rS   r   s          r<   &test_default_preset_excludes_websocketz@TestGetResourceBlockTypes.test_default_preset_excludes_websocket  s     $('|4''''|4''''''|'''|'''4'''''''66yA({&(((({&((({((((((&(((&(((((((r>   c                 v   d}t         |u}|st        j                  d|fdt         |f      dt        j                         v st        j
                  t               rt        j                  t               ndt        j                  |      dz  }dd|iz  }t        t        j                  |            dx}}t        j                  d      }d	}||v }|st        j                  d
|fd||f      t        j                  |      dt        j                         v st        j
                  |      rt        j                  |      nddz  }dd|iz  }t        t        j                  |            dx}}d}||v }|st        j                  d
|fd||f      t        j                  |      dt        j                         v st        j
                  |      rt        j                  |      nddz  }dd|iz  }t        t        j                  |            dx}}d}||v }|st        j                  d
|fd||f      t        j                  |      dt        j                         v st        j
                  |      rt        j                  |      nddz  }dd|iz  }t        t        j                  |            dx}}y)u?   minimal 프리셋은 image, font, media만 포함해야 한다.Nr   r   r   r   r   r   minimalr   rQ   r   rS   r   r   r   r   r   s          r<   *test_minimal_preset_returns_expected_typeszDTestGetResourceBlockTypes.test_minimal_preset_returns_expected_types  sx    $('|4''''|4''''''|'''|'''4'''''''66yA w&    w&   w      &   &       vvv w&    w&   w      &   &       r>   c                    d}t         |u}|st        j                  d|fdt         |f      dt        j                         v st        j
                  t               rt        j                  t               ndt        j                  |      dz  }dd|iz  }t        t        j                  |            dx}}t        j                  d      }d	}||v}|st        j                  d
|fd||f      t        j                  |      dt        j                         v st        j
                  |      rt        j                  |      nddz  }dd|iz  }t        t        j                  |            dx}}y)uD   minimal 프리셋에는 stylesheet가 포함되지 않아야 한다.Nr   r   r   r   r   r   r   r   r   r   rS   r   r   r   s          r<   'test_minimal_preset_excludes_stylesheetzATestGetResourceBlockTypes.test_minimal_preset_excludes_stylesheet  s     $('|4''''|4''''''|'''|'''4'''''''66yA)|6))))|6)))|))))))6)))6)))))))r>   c                    d}t         |u}|st        j                  d|fdt         |f      dt        j                         v st        j
                  t               rt        j                  t               ndt        j                  |      dz  }dd|iz  }t        t        j                  |            dx}}t        j                  d      }t        |t              }|sd	d
t        j                         v st        j
                  t              rt        j                  t              nd
dt        j                         v st        j
                  |      rt        j                  |      nddt        j                         v st        j
                  t              rt        j                  t              ndt        j                  |      dz  }t        t        j                  |            d}d}||v }|st        j                  d|fd||f      t        j                  |      dt        j                         v st        j
                  |      rt        j                  |      nddz  }dd|iz  }t        t        j                  |            dx}}d}||v }|st        j                  d|fd||f      t        j                  |      dt        j                         v st        j
                  |      rt        j                  |      nddz  }dd|iz  }t        t        j                  |            dx}}d}||v }|st        j                  d|fd||f      t        j                  |      dt        j                         v st        j
                  |      rt        j                  |      nddz  }dd|iz  }t        t        j                  |            dx}}d}||v }|st        j                  d|fd||f      t        j                  |      dt        j                         v st        j
                  |      rt        j                  |      nddz  }dd|iz  }t        t        j                  |            dx}}d}||v }|st        j                  d|fd||f      t        j                  |      dt        j                         v st        j
                  |      rt        j                  |      nddz  }dd|iz  }t        t        j                  |            dx}}d}||v }|st        j                  d|fd||f      t        j                  |      dt        j                         v st        j
                  |      rt        j                  |      nddz  }dd|iz  }t        t        j                  |            dx}}d}||v }|st        j                  d|fd||f      t        j                  |      dt        j                         v st        j
                  |      rt        j                  |      nddz  }dd|iz  }t        t        j                  |            dx}}y)u[   aggressive 프리셋은 websocket, manifest를 포함한 모든 타입을 가져야 한다.Nr   r   r   r   r   r   
aggressiver   r   rS   r^   r   r   rQ   r   r   r   r   r   r   r   manifestr   r   s	            r<   )test_aggressive_preset_includes_all_typeszCTestGetResourceBlockTypes.test_aggressive_preset_includes_all_types  sd    $('|4''''|4''''''|'''|'''4'''''''66|D&#&&&&&&&&z&&&z&&&&&&&&&&&&&&&&&#&&&#&&&&&&&&&& w&    w&   w      &   &       vvv w&    w&   w      &   &       %|v%%%%|v%%%|%%%%%%v%%%v%%%%%%% w&    w&   w      &   &       ${f$$$${f$$${$$$$$$f$$$f$$$$$$$#zV####zV###z######V###V#######r>   c           	      
   d}t         |u}|st        j                  d|fdt         |f      dt        j                         v st        j
                  t               rt        j                  t               ndt        j                  |      dz  }dd|iz  }t        t        j                  |            dx}}t        j                  d      }t        j                  d	      }t        |      }t        |      }||kD  }|st        j                  d
|fd||f      dt        j                         v st        j
                  t              rt        j                  t              nddt        j                         v st        j
                  |      rt        j                  |      ndt        j                  |      dt        j                         v st        j
                  t              rt        j                  t              ndd	t        j                         v st        j
                  |      rt        j                  |      nd	t        j                  |      dz  }	dd|	iz  }
t        t        j                  |
            dx}x}}y)uO   aggressive 프리셋은 default보다 더 많은 타입을 포함해야 한다.Nr   r   r   r   r   r   r   r   )>)zM%(py3)s
{%(py3)s = %(py0)s(%(py1)s)
} > %(py8)s
{%(py8)s = %(py5)s(%(py6)s)
}rn   )r   rp   r   r   r_   r`   r   r   )r   r&   r'   r(   r)   r*   r+   r,   r-   r   rn   )r1   r2   r3   r4   r5   r   r   rb   rt   rc   r   s              r<   +test_aggressive_has_more_types_than_defaultzETestGetResourceBlockTypes.test_aggressive_has_more_types_than_default  sK    $('|4''''|4''''''|'''|'''4'''''''!::<H
77	B:-W-----------s---s------:---:------------------W---W----------r>   c                 ^   d}t         |u}|st        j                  d|fdt         |f      dt        j                         v st        j
                  t               rt        j                  t               ndt        j                  |      dz  }dd|iz  }t        t        j                  |            dx}}t        j                         }t        j                  d      }||k(  }|st        j                  d	|fd
||f      dt        j                         v st        j
                  |      rt        j                  |      nddt        j                         v st        j
                  |      rt        j                  |      nddz  }dd|iz  }t        t        j                  |            d}y)uX   인자 없이 호출하면 default 프리셋과 동일한 결과를 반환해야 한다.Nr   r   r   r   r   r   r   r   r   result_no_argresult_defaultrT   rU   r!   r   )	r1   r2   r3   r4   r5   r   r   rX   rY   s	            r<   test_no_arg_uses_default_presetz9TestGetResourceBlockTypes.test_no_arg_uses_default_preset  s     $('|4''''|4''''''|'''|'''4'''''''$==?%>>yI....}......}...}................r>   c                    d}t         |u}|st        j                  d|fdt         |f      dt        j                         v st        j
                  t               rt        j                  t               ndt        j                  |      dz  }dd|iz  }t        t        j                  |            dx}}dD ]Q  }t        j                  |      }t        |t              }|s$t        j                  | d	      d
z   dt        j                         v st        j
                  t              rt        j                  t              nddt        j                         v st        j
                  |      rt        j                  |      nddt        j                         v st        j
                  t              rt        j                  t              ndt        j                  |      dz  }t        t        j                  |            d}T y)u2   반환값은 반드시 set 타입이어야 한다.Nr   r   r   r   r   r   )r   r   r   u,    프리셋의 반환 타입이 set이 아님z7
>assert %(py4)s
{%(py4)s = %(py0)s(%(py1)s, %(py2)s)
}r   rS   r^   r   )r   r&   r'   r(   r)   r*   r+   r,   r-   r   r   r^   _format_assertmsg)	r1   r2   r3   r4   r5   presetrS   r7   rY   s	            r<   test_returns_set_typez/TestGetResourceBlockTypes.test_returns_set_type  s    $('|4''''|4''''''|'''|'''4''''''': 	dF!::6BFfc*c*ccvh6b,ccccccc:ccc:ccccccfcccfcccccccccccccc*cccccc	dr>   rD   )rF   rG   rH   rI   rJ   r   r   r   r   r   r   r   r  rK   r>   r<   r   r     s    6
! 
! ) ) ! ! * * $ $ . . / / d dr>   r   c                       e Zd ZdZedd       Zedd       Zedd       Zedd       Zedd       Z	edd       Z
edd	       Zedd
       Zedd       Zedd       Zy)TestHtmlToMarkdownu$   html_to_markdown() 함수 테스트.Nc                 0   d}t         |u}|st        j                  d|fdt         |f      dt        j                         v st        j
                  t               rt        j                  t               ndt        j                  |      dz  }dd|iz  }t        t        j                  |            dx}}d}t        j                  |      }d	}||v }|st        j                  d
|fd||f      t        j                  |      dt        j                         v st        j
                  |      rt        j                  |      nddz  }dd|iz  }t        t        j                  |            dx}}d}||v }|st        j                  d
|fd||f      t        j                  |      dt        j                         v st        j
                  |      rt        j                  |      nddz  }dd|iz  }t        t        j                  |            dx}}y)u<   h1 태그는 Markdown # 제목으로 변환되어야 한다.Nr   r   r   r   r   r   u   <h1>제목</h1>u   제목rQ   r   rS   r   #
r   r&   r'   r(   r)   r*   r+   r,   r-   html_to_markdownr1   r2   r3   r4   r5   htmlrS   r   s           r<   test_basic_heading_conversionz0TestHtmlToMarkdown.test_basic_heading_conversion  s#    $('|4''''|4''''''|'''|'''4''''''' ..t4!x6!!!!x6!!!x!!!!!!6!!!6!!!!!!!sf}sfsffr>   c                    d}t         |u}|st        j                  d|fdt         |f      dt        j                         v st        j
                  t               rt        j                  t               ndt        j                  |      dz  }dd|iz  }t        t        j                  |            dx}}d}t        j                  |      }d	}||v }|st        j                  d
|fd||f      t        j                  |      dt        j                         v st        j
                  |      rt        j                  |      nddz  }dd|iz  }t        t        j                  |            dx}}y)u:   p 태그의 텍스트는 결과에 포함되어야 한다.Nr   r   r   r   r   r   u!   <p>본문 텍스트입니다.</p>u   본문 텍스트입니다.rQ   r   rS   r   r	  r  s           r<   test_paragraph_conversionz,TestHtmlToMarkdown.test_paragraph_conversion  s     $('|4''''|4''''''|'''|'''4'''''''2..t4+5+v5555+v555+555555v555v5555555r>   c                 0   d}t         |u}|st        j                  d|fdt         |f      dt        j                         v st        j
                  t               rt        j                  t               ndt        j                  |      dz  }dd|iz  }t        t        j                  |            dx}}d}t        j                  |      }d	}||v }|st        j                  d
|fd||f      t        j                  |      dt        j                         v st        j
                  |      rt        j                  |      nddz  }dd|iz  }t        t        j                  |            dx}}d}||v }|st        j                  d
|fd||f      t        j                  |      dt        j                         v st        j
                  |      rt        j                  |      nddz  }dd|iz  }t        t        j                  |            dx}}y)u@   a 태그는 Markdown 링크 형식으로 변환되어야 한다.Nr   r   r   r   r   r   (   <a href="https://example.com">링크</a>u   링크rQ   r   rS   r   r   r	  r  s           r<   test_anchor_tag_conversionz-TestHtmlToMarkdown.test_anchor_tag_conversion  s'    $('|4''''|4''''''|'''|'''4'''''''9..t4!x6!!!!x6!!!x!!!!!!6!!!6!!!!!!!$.$....$...$................r>   c                 4   d}t         |u}|st        j                  d|fdt         |f      dt        j                         v st        j
                  t               rt        j                  t               ndt        j                  |      dz  }dd|iz  }t        t        j                  |            dx}}d}t        j                  |d	
      }d}||v}|st        j                  d|fd||f      t        j                  |      dt        j                         v st        j
                  |      rt        j                  |      nddz  }dd|iz  }t        t        j                  |            dx}}d}||v }|st        j                  d|fd||f      t        j                  |      dt        j                         v st        j
                  |      rt        j                  |      nddz  }dd|iz  }t        t        j                  |            dx}}y)uE   remove_noise=True 시 script 태그 내용이 제거되어야 한다.Nr   r   r   r   r   r   u+   <p>본문</p><script>alert('xss');</script>Tremove_noisealertr   r   rS   r   u   본문rQ   r   r	  r  s           r<   %test_remove_noise_removes_script_tagsz8TestHtmlToMarkdown.test_remove_noise_removes_script_tags  s&    $('|4''''|4''''''|'''|'''4'''''''<..t$G$wf$$$$wf$$$w$$$$$$f$$$f$$$$$$$!x6!!!!x6!!!x!!!!!!6!!!6!!!!!!!r>   c                 4   d}t         |u}|st        j                  d|fdt         |f      dt        j                         v st        j
                  t               rt        j                  t               ndt        j                  |      dz  }dd|iz  }t        t        j                  |            dx}}d}t        j                  |d	
      }d}||v}|st        j                  d|fd||f      t        j                  |      dt        j                         v st        j
                  |      rt        j                  |      nddz  }dd|iz  }t        t        j                  |            dx}}d}||v }|st        j                  d|fd||f      t        j                  |      dt        j                         v st        j
                  |      rt        j                  |      nddz  }dd|iz  }t        t        j                  |            dx}}y)uD   remove_noise=True 시 style 태그 내용이 제거되어야 한다.Nr   r   r   r   r   r   u0   <p>내용</p><style>.cls { color: red; }</style>Tr  z
color: redr   r   rS   r   u   내용rQ   r   r	  r  s           r<   $test_remove_noise_removes_style_tagsz7TestHtmlToMarkdown.test_remove_noise_removes_style_tags  s&    $('|4''''|4''''''|'''|'''4'''''''A..t$G)|6))))|6)))|))))))6)))6)))))))!x6!!!!x6!!!x!!!!!!6!!!6!!!!!!!r>   c                    d}t         |u}|st        j                  d|fdt         |f      dt        j                         v st        j
                  t               rt        j                  t               ndt        j                  |      dz  }dd|iz  }t        t        j                  |            dx}}d}t        j                  |d	
      }d}||v}|st        j                  d|fd||f      t        j                  |      dt        j                         v st        j
                  |      rt        j                  |      nddz  }dd|iz  }t        t        j                  |            dx}}y)uG   remove_noise=True 시 noscript 태그 내용이 제거되어야 한다.Nr   r   r   r   r   r   u.   <p>텍스트</p><noscript>JS 필요</noscript>Tr  u	   JS 필요r   r   rS   r   r	  r  s           r<   'test_remove_noise_removes_noscript_tagsz:TestHtmlToMarkdown.test_remove_noise_removes_noscript_tags(  s     $('|4''''|4''''''|'''|'''4'''''''?..t$G({&(((({&((({((((((&(((&(((((((r>   c                 4   d}t         |u}|st        j                  d|fdt         |f      dt        j                         v st        j
                  t               rt        j                  t               ndt        j                  |      dz  }dd|iz  }t        t        j                  |            dx}}d}t        j                  |d	
      }d}||v}|st        j                  d|fd||f      t        j                  |      dt        j                         v st        j
                  |      rt        j                  |      nddz  }dd|iz  }t        t        j                  |            dx}}d}||v}|st        j                  d|fd||f      t        j                  |      dt        j                         v st        j
                  |      rt        j                  |      nddz  }dd|iz  }t        t        j                  |            dx}}y)uB   remove_noise=True 시 svg 태그 내용이 제거되어야 한다.Nr   r   r   r   r   r   u+   <p>텍스트</p><svg><path d='M0 0'/></svg>Tr  <svgr   r   rS   r   z<pathr	  r  s           r<   "test_remove_noise_removes_svg_tagsz5TestHtmlToMarkdown.test_remove_noise_removes_svg_tags2  s&    $('|4''''|4''''''|'''|'''4'''''''<..t$G#vV####vV###v######V###V#######$wf$$$$wf$$$w$$$$$$f$$$f$$$$$$$r>   c                    d}t         |u}|st        j                  d|fdt         |f      dt        j                         v st        j
                  t               rt        j                  t               ndt        j                  |      dz  }dd|iz  }t        t        j                  |            dx}}d}t        j                  |d	
      }d}||v }|st        j                  d|fd||f      t        j                  |      dt        j                         v st        j
                  |      rt        j                  |      nddz  }dd|iz  }t        t        j                  |            dx}}y)uD   remove_noise=False 시 script 태그 내용이 유지될 수 있다.Nr   r   r   r   r   r   u(   <p>본문</p><script>var x = 1;</script>Fr  z	var x = 1rQ   r   rS   r   r	  r  s           r<   $test_remove_noise_false_keeps_scriptz7TestHtmlToMarkdown.test_remove_noise_false_keeps_script=  s     $('|4''''|4''''''|'''|'''4'''''''9..t%H${f$$$${f$$${$$$$$$f$$$f$$$$$$$r>   c                    d}t         |u}|st        j                  d|fdt         |f      dt        j                         v st        j
                  t               rt        j                  t               ndt        j                  |      dz  }dd|iz  }t        t        j                  |            dx}}d}t        j                  |      }d	}||v}|st        j                  d
|fd||f      t        j                  |      dt        j                         v st        j
                  |      rt        j                  |      nddz  }dd|iz  }t        t        j                  |            dx}}y)uI   결과에 연속된 빈 줄이 3개 이상 이어지지 않아야 한다.Nr   r   r   r   r   r   z<p>A</p><p>B</p><p>C</p>z


r   r   rS   r   r	  r  s           r<   test_no_excessive_blank_linesz0TestHtmlToMarkdown.test_no_excessive_blank_linesG  s     $('|4''''|4''''''|'''|'''4''''''')..t4 %xv%%%%xv%%%x%%%%%%v%%%v%%%%%%%r>   c                    d}t         |u}|st        j                  d|fdt         |f      dt        j                         v st        j
                  t               rt        j                  t               ndt        j                  |      dz  }dd|iz  }t        t        j                  |            dx}}t        j                  d      }t        |t              }|sd	d
t        j                         v st        j
                  t              rt        j                  t              nd
dt        j                         v st        j
                  |      rt        j                  |      nddt        j                         v st        j
                  t              rt        j                  t              ndt        j                  |      dz  }t        t        j                  |            d}yu(   반환값은 str 타입이어야 한다.Nr   r   r   r   r   r   z<p>hello</p>r   r   rS   strr   )r   r&   r'   r(   r)   r*   r+   r,   r-   r
  r   r%  r1   r2   r3   r4   r5   rS   r7   rY   s           r<   test_returns_stringz&TestHtmlToMarkdown.test_returns_stringR  s     $('|4''''|4''''''|'''|'''4'''''''..~>&#&&&&&&&&z&&&z&&&&&&&&&&&&&&&&&#&&&#&&&&&&&&&&r>   rD   )rF   rG   rH   rI   rJ   r  r  r  r  r  r  r  r   r"  r'  rK   r>   r<   r  r    s    .  6 6 / / " " " " ) ) % % % % & & ' 'r>   r  c                       e Zd ZdZedd       Zedd       Zedd       Zedd       Zedd       Z	edd       Z
edd	       Zedd
       Zedd       Zedd       Zy)TestCleanHtmlu   clean_html() 함수 테스트.Nc                 0   d}t         |u}|st        j                  d|fdt         |f      dt        j                         v st        j
                  t               rt        j                  t               ndt        j                  |      dz  }dd|iz  }t        t        j                  |            dx}}d}t        j                  |      }d	}||v}|st        j                  d
|fd||f      t        j                  |      dt        j                         v st        j
                  |      rt        j                  |      nddz  }dd|iz  }t        t        j                  |            dx}}d}||v}|st        j                  d
|fd||f      t        j                  |      dt        j                         v st        j
                  |      rt        j                  |      nddz  }dd|iz  }t        t        j                  |            dx}}y)u6   script 태그와 그 내용이 제거되어야 한다.Nr   r   r   r   r   r   u%   <p>본문</p><script>evil();</script>z<scriptr   r   rS   r   zevil()
r   r&   r'   r(   r)   r*   r+   r,   r-   
clean_htmlr  s           r<   test_removes_script_tagsz&TestCleanHtml.test_removes_script_tagsc  s$    $('|4''''|4''''''|'''|'''4'''''''6((.&y&&&&y&&&y&&&&&&&&&&&&&&&&%xv%%%%xv%%%x%%%%%%v%%%v%%%%%%%r>   c                 0   d}t         |u}|st        j                  d|fdt         |f      dt        j                         v st        j
                  t               rt        j                  t               ndt        j                  |      dz  }dd|iz  }t        t        j                  |            dx}}d}t        j                  |      }d	}||v}|st        j                  d
|fd||f      t        j                  |      dt        j                         v st        j
                  |      rt        j                  |      nddz  }dd|iz  }t        t        j                  |            dx}}d}||v}|st        j                  d
|fd||f      t        j                  |      dt        j                         v st        j
                  |      rt        j                  |      nddz  }dd|iz  }t        t        j                  |            dx}}y)u5   style 태그와 그 내용이 제거되어야 한다.Nr   r   r   r   r   r   u/   <p>내용</p><style>body { margin: 0; }</style>z<styler   r   rS   r   z	margin: 0r+  r  s           r<   test_removes_style_tagsz%TestCleanHtml.test_removes_style_tagsn  s$    $('|4''''|4''''''|'''|'''4'''''''@((.%xv%%%%xv%%%x%%%%%%v%%%v%%%%%%%({&(((({&((({((((((&(((&(((((((r>   c                 0   d}t         |u}|st        j                  d|fdt         |f      dt        j                         v st        j
                  t               rt        j                  t               ndt        j                  |      dz  }dd|iz  }t        t        j                  |            dx}}d}t        j                  |      }d	}||v}|st        j                  d
|fd||f      t        j                  |      dt        j                         v st        j
                  |      rt        j                  |      nddz  }dd|iz  }t        t        j                  |            dx}}d}||v}|st        j                  d
|fd||f      t        j                  |      dt        j                         v st        j
                  |      rt        j                  |      nddz  }dd|iz  }t        t        j                  |            dx}}y)u8   noscript 태그와 그 내용이 제거되어야 한다.Nr   r   r   r   r   r   u=   <div><noscript>Please enable JS</noscript><p>본문</p></div>z	<noscriptr   r   rS   r   zPlease enable JSr+  r  s           r<   test_removes_noscript_tagsz(TestCleanHtml.test_removes_noscript_tagsy  s'    $('|4''''|4''''''|'''|'''4'''''''N((.({&(((({&((({((((((&(((&(((((((!/!////!///!////////////////r>   c                    d}t         |u}|st        j                  d|fdt         |f      dt        j                         v st        j
                  t               rt        j                  t               ndt        j                  |      dz  }dd|iz  }t        t        j                  |            dx}}d}t        j                  |      }d	}||v}|st        j                  d
|fd||f      t        j                  |      dt        j                         v st        j
                  |      rt        j                  |      nddz  }dd|iz  }t        t        j                  |            dx}}y)u3   svg 태그와 그 내용이 제거되어야 한다.Nr   r   r   r   r   r   u6   <div><svg><circle r='10'/></svg><p>텍스트</p></div>r  r   r   rS   r   r+  r  s           r<   test_removes_svg_tagsz#TestCleanHtml.test_removes_svg_tags  s     $('|4''''|4''''''|'''|'''4'''''''G((.#vV####vV###v######V###V#######r>   c                 0   d}t         |u}|st        j                  d|fdt         |f      dt        j                         v st        j
                  t               rt        j                  t               ndt        j                  |      dz  }dd|iz  }t        t        j                  |            dx}}d}t        j                  |      }d	}||v}|st        j                  d
|fd||f      t        j                  |      dt        j                         v st        j
                  |      rt        j                  |      nddz  }dd|iz  }t        t        j                  |            dx}}d}||v }|st        j                  d|fd||f      t        j                  |      dt        j                         v st        j
                  |      rt        j                  |      nddz  }dd|iz  }t        t        j                  |            dx}}y)u)   onclick 속성이 제거되어야 한다.Nr   r   r   r   r   r   z.<button onclick="doSomething()">Click</button>onclickr   r   rS   r   ClickrQ   r   r+  r  s           r<   test_removes_onclick_attributez,TestCleanHtml.test_removes_onclick_attribute  s$    $('|4''''|4''''''|'''|'''4'''''''?((.&y&&&&y&&&y&&&&&&&&&&&&&&&& w&    w&   w      &   &       r>   c                 0   d}t         |u}|st        j                  d|fdt         |f      dt        j                         v st        j
                  t               rt        j                  t               ndt        j                  |      dz  }dd|iz  }t        t        j                  |            dx}}d}t        j                  |      }d	}||v}|st        j                  d
|fd||f      t        j                  |      dt        j                         v st        j
                  |      rt        j                  |      nddz  }dd|iz  }t        t        j                  |            dx}}d}||v }|st        j                  d|fd||f      t        j                  |      dt        j                         v st        j
                  |      rt        j                  |      nddz  }dd|iz  }t        t        j                  |            dx}}y)u1   style 인라인 속성이 제거되어야 한다.Nr   r   r   r   r   r   u5   <p style="color: red; font-size: 14px;">텍스트</p>zstyle="r   r   rS   r   u	   텍스트rQ   r   r+  r  s           r<   test_removes_style_attributez*TestCleanHtml.test_removes_style_attribute  s$    $('|4''''|4''''''|'''|'''4'''''''F((.&y&&&&y&&&y&&&&&&&&&&&&&&&&${f$$$${f$$${$$$$$$f$$$f$$$$$$$r>   c                    d}t         |u}|st        j                  d|fdt         |f      dt        j                         v st        j
                  t               rt        j                  t               ndt        j                  |      dz  }dd|iz  }t        t        j                  |            dx}}d}t        j                  |      }d	}||v }|st        j                  d
|fd||f      t        j                  |      dt        j                         v st        j
                  |      rt        j                  |      nddz  }dd|iz  }t        t        j                  |            dx}}y)u=   태그 제거 후 텍스트 내용은 보존되어야 한다.Nr   r   r   r   r   r   u,   <div><p>중요한 내용입니다.</p></div>u   중요한 내용입니다.rQ   r   rS   r   r+  r  s           r<   test_preserves_text_contentz)TestCleanHtml.test_preserves_text_content  s     $('|4''''|4''''''|'''|'''4'''''''=((.+5+v5555+v555+555555v555v5555555r>   c                 0   d}t         |u}|st        j                  d|fdt         |f      dt        j                         v st        j
                  t               rt        j                  t               ndt        j                  |      dz  }dd|iz  }t        t        j                  |            dx}}d}t        j                  |      }d	}||v }|st        j                  d
|fd||f      t        j                  |      dt        j                         v st        j
                  |      rt        j                  |      nddz  }dd|iz  }t        t        j                  |            dx}}d}||v }|st        j                  d
|fd||f      t        j                  |      dt        j                         v st        j
                  |      rt        j                  |      nddz  }dd|iz  }t        t        j                  |            dx}}y)u-   href 속성은 제거되지 않아야 한다.Nr   r   r   r   r   r   r  hrefrQ   r   rS   r   r   r+  r  s           r<   test_preserves_href_attributez+TestCleanHtml.test_preserves_href_attribute  s'    $('|4''''|4''''''|'''|'''4'''''''9((.vvv$.$....$...$................r>   c                    d}t         |u}|st        j                  d|fdt         |f      dt        j                         v st        j
                  t               rt        j                  t               ndt        j                  |      dz  }dd|iz  }t        t        j                  |            dx}}t        j                  d      }t        |t              }|sd	d
t        j                         v st        j
                  t              rt        j                  t              nd
dt        j                         v st        j
                  |      rt        j                  |      nddt        j                         v st        j
                  t              rt        j                  t              ndt        j                  |      dz  }t        t        j                  |            d}yr$  r   r&   r'   r(   r)   r*   r+   r,   r-   r,  r   r%  r&  s           r<   r'  z!TestCleanHtml.test_returns_string  s     $('|4''''|4''''''|'''|'''4'''''''((8&#&&&&&&&&z&&&z&&&&&&&&&&&&&&&&&#&&&#&&&&&&&&&&r>   c                    d}t         |u}|st        j                  d|fdt         |f      dt        j                         v st        j
                  t               rt        j                  t               ndt        j                  |      dz  }dd|iz  }t        t        j                  |            dx}}t        j                  d      }t        |t              }|sd	d
t        j                         v st        j
                  t              rt        j                  t              nd
dt        j                         v st        j
                  |      rt        j                  |      nddt        j                         v st        j
                  t              rt        j                  t              ndt        j                  |      dz  }t        t        j                  |            d}y)u.   빈 HTML 입력도 str을 반환해야 한다.Nr   r   r   r   r   r    r   r   rS   r%  r   r@  r&  s           r<   test_empty_html_returns_stringz,TestCleanHtml.test_empty_html_returns_string  s     $('|4''''|4''''''|'''|'''4'''''''((,&#&&&&&&&&z&&&z&&&&&&&&&&&&&&&&&#&&&#&&&&&&&&&&r>   rD   )rF   rG   rH   rI   rJ   r-  r/  r1  r3  r7  r9  r;  r>  r'  rC  rK   r>   r<   r)  r)  `  s    (& & ) ) 0 0 $ $ ! ! % % 6 6 / / ' ' ' 'r>   r)  )'rI   builtinsr(   _pytest.assertion.rewrite	assertionrewriter&   syspathlibr   typesr   typingr   unittest.mockr   r   pathinsertr%  __file__parentr   __annotations__crawl_utilsModuleNotFoundError_MISSINGmarkskipifrJ   r
   rM   rf   rz   r   r   r   r  r)  rK   r>   r<   <module>rW     s    
    #  3tH~,,334 5 &*hz" )	& 4;;%%P &   >  >P. .D* *B8! 8!@?> ?>Nw, w,~Qd Qdri' i'bg' g'U  		s   8C8 8D ?D 