""" 통합 테스트: test_crawl_integration.py browser.py + crawl_utils + insurance_crawler + insurance_spider 모듈 간 통합 호환성을 검증하는 테스트 스위트. 주의: - 외부 네트워크 호출 금지 (모든 테스트는 로컬 HTML fixture 사용). - 합법적 공개 데이터(보험사 공시 페이지 등)만을 대상으로 합니다. """ import asyncio import importlib.util import json import sys import tempfile from pathlib import Path from typing import Any, Optional import pytest _WORKSPACE = Path(__file__).resolve().parent.parent.parent sys.path.insert(0, str(_WORKSPACE / "scripts")) # ──────────────────────────────────────────────────────── # browser.py — importlib으로 로드 (파일명이 모듈명 규칙에 해당하지 않음) # ──────────────────────────────────────────────────────── _browser_mod: Optional[Any] = None try: _spec = importlib.util.spec_from_file_location("browser", str(_WORKSPACE / "scripts" / "browser.py")) if _spec is not None and _spec.loader is not None: _browser_mod = importlib.util.module_from_spec(_spec) _spec.loader.exec_module(_browser_mod) # type: ignore[union-attr] except Exception: pass # ──────────────────────────────────────────────────────── # 나머지 모듈 import # ──────────────────────────────────────────────────────── import crawl_utils # pyright: ignore[reportMissingImports] import insurance_crawler as _insurance_crawler_mod # pyright: ignore[reportMissingImports] import insurance_spider as _insurance_spider_mod # pyright: ignore[reportMissingImports] from crawl_utils import ( # pyright: ignore[reportMissingImports] ProxyRotator, clean_html, fetch_with_retry, get_resource_block_types, html_to_markdown, is_proxy_error, ) from insurance_crawler import InsuranceCrawler # pyright: ignore[reportMissingImports] from insurance_spider import InsuranceSpider, ResponseHistory # pyright: ignore[reportMissingImports] # ──────────────────────────────────────────────────────── # 공통 HTML 픽스처 # ──────────────────────────────────────────────────────── HTML_PRODUCTS = """

화재보험 50000

자동차보험 100000

생명보험 30000

다음 """ HTML_TABLE = """

보험명	보험료	가입기간
화재보험	50000	1년
자동차보험	100000	1년

""" HTML_WITH_SCRIPT = """

보험 공시 정보

화재보험

""" HTML_SIMPLE = "

보험 정보

" HTML_EMPTY = "" def _make_mock_response( html: str, url: str = "https://example.com", status: int = 200, ) -> Any: """Selector를 Response 대용으로 사용 (Response는 Selector 상속).""" from scrapling.parser import Selector sel = Selector(html, url=url, adaptive=False) sel.status = status # type: ignore[attr-defined] sel.meta = {} # type: ignore[attr-defined] return sel async def _collect_parse_results(spider: Any, response: Any) -> list[Any]: """parse() async generator의 결과를 리스트로 수집.""" results: list[Any] = [] async for item in spider.parse(response): results.append(item) return results # ════════════════════════════════════════════════════════ # 1. TestImportCompatibility — 모든 모듈이 충돌 없이 import 가능 # ════════════════════════════════════════════════════════ class TestImportCompatibility: """모든 모듈이 충돌 없이 import 가능한지 검증한다.""" def test_browser_module_loaded(self) -> None: """browser.py가 importlib으로 정상 로드되어야 한다.""" assert _browser_mod is not None, "browser.py를 로드하지 못했습니다." def test_browser_stealth_args_exists(self) -> None: """browser.py에 STEALTH_ARGS 변수가 존재해야 한다.""" assert _browser_mod is not None assert hasattr(_browser_mod, "STEALTH_ARGS") def test_browser_harmful_args_exists(self) -> None: """browser.py에 HARMFUL_ARGS 변수가 존재해야 한다.""" assert _browser_mod is not None assert hasattr(_browser_mod, "HARMFUL_ARGS") def test_browser_generate_stealth_headers_callable(self) -> None: """browser.py의 generate_stealth_headers가 호출 가능해야 한다.""" assert _browser_mod is not None assert callable(_browser_mod.generate_stealth_headers) def test_browser_get_google_referer_callable(self) -> None: """browser.py의 get_google_referer가 호출 가능해야 한다.""" assert _browser_mod is not None assert callable(_browser_mod.get_google_referer) def test_browser_create_resource_blocker_callable(self) -> None: """browser.py의 create_resource_blocker가 호출 가능해야 한다.""" assert _browser_mod is not None assert callable(_browser_mod.create_resource_blocker) def test_browser_blocked_resource_types_exists(self) -> None: """browser.py에 BLOCKED_RESOURCE_TYPES 변수가 존재해야 한다.""" assert _browser_mod is not None assert hasattr(_browser_mod, "BLOCKED_RESOURCE_TYPES") def test_crawl_utils_proxy_rotator_importable(self) -> None: """crawl_utils.ProxyRotator가 import 가능해야 한다.""" assert ProxyRotator is not None def test_crawl_utils_is_proxy_error_importable(self) -> None: """crawl_utils.is_proxy_error가 import 가능해야 한다.""" assert callable(is_proxy_error) def test_crawl_utils_fetch_with_retry_importable(self) -> None: """crawl_utils.fetch_with_retry가 import 가능해야 한다.""" assert callable(fetch_with_retry) def test_crawl_utils_get_resource_block_types_importable(self) -> None: """crawl_utils.get_resource_block_types가 import 가능해야 한다.""" assert callable(get_resource_block_types) def test_crawl_utils_html_to_markdown_importable(self) -> None: """crawl_utils.html_to_markdown가 import 가능해야 한다.""" assert callable(html_to_markdown) def test_crawl_utils_clean_html_importable(self) -> None: """crawl_utils.clean_html이 import 가능해야 한다.""" assert callable(clean_html) def test_insurance_crawler_importable(self) -> None: """insurance_crawler.InsuranceCrawler가 import 가능해야 한다.""" assert InsuranceCrawler is not None def test_insurance_spider_importable(self) -> None: """insurance_spider.InsuranceSpider가 import 가능해야 한다.""" assert InsuranceSpider is not None def test_response_history_importable(self) -> None: """insurance_spider.ResponseHistory가 import 가능해야 한다.""" assert ResponseHistory is not None def test_no_name_conflict_between_modules(self) -> None: """crawl_utils와 insurance_crawler 사이에 이름 충돌이 없어야 한다.""" # ProxyRotator는 crawl_utils에 정의되고 insurance_crawler가 import해서 사용 cu_ProxyRotator = crawl_utils.ProxyRotator ic_ProxyRotator = _insurance_crawler_mod.ProxyRotator # type: ignore[attr-defined] assert cu_ProxyRotator is ic_ProxyRotator def test_all_modules_distinct_objects(self) -> None: """각 모듈은 서로 다른 모듈 객체여야 한다.""" assert _insurance_crawler_mod is not _insurance_spider_mod assert _insurance_crawler_mod is not crawl_utils assert _insurance_spider_mod is not crawl_utils # ════════════════════════════════════════════════════════ # 2. TestInterfaceConsistency — 함수 인터페이스 일관성 # ════════════════════════════════════════════════════════ class TestInterfaceConsistency: """함수 인터페이스가 모듈 간에 일관성 있게 연결되는지 검증한다.""" def test_stealth_args_is_tuple(self) -> None: """browser.py의 STEALTH_ARGS는 tuple이어야 한다.""" assert _browser_mod is not None assert isinstance(_browser_mod.STEALTH_ARGS, tuple) def test_harmful_args_is_tuple(self) -> None: """browser.py의 HARMFUL_ARGS는 tuple이어야 한다.""" assert _browser_mod is not None assert isinstance(_browser_mod.HARMFUL_ARGS, tuple) def test_blocked_resource_types_is_set(self) -> None: """browser.py의 BLOCKED_RESOURCE_TYPES는 set이어야 한다.""" assert _browser_mod is not None assert isinstance(_browser_mod.BLOCKED_RESOURCE_TYPES, set) def test_insurance_crawler_uses_proxy_rotator_from_crawl_utils(self) -> None: """InsuranceCrawler가 proxy_list로 생성할 때 crawl_utils.ProxyRotator를 사용해야 한다.""" proxies = ["http://proxy1:8080", "http://proxy2:8080"] crawler = InsuranceCrawler(proxy_list=proxies) assert crawler.proxy_rotator is not None assert isinstance(crawler.proxy_rotator, ProxyRotator) def test_insurance_crawler_clean_html_and_html_to_markdown_pipeline(self) -> None: """InsuranceCrawler.to_llm_input()은 clean_html→html_to_markdown 파이프라인을 사용한다.""" crawler = InsuranceCrawler() html = "

보험 공시 정보

" result = crawler.to_llm_input(html) # 파이프라인 결과: script 제거, 텍스트 보존 assert isinstance(result, str) assert "보험 공시 정보" in result assert "evil()" not in result def test_insurance_spider_has_crawler_attribute(self) -> None: """InsuranceSpider 인스턴스에 _crawler 속성(InsuranceCrawler)이 있어야 한다.""" spider = InsuranceSpider(start_urls=["https://example.com"]) assert hasattr(spider, "_crawler") assert isinstance(spider._crawler, InsuranceCrawler) def test_insurance_spider_has_response_history_attribute(self) -> None: """InsuranceSpider 인스턴스에 response_history 속성(ResponseHistory)이 있어야 한다.""" spider = InsuranceSpider(start_urls=["https://example.com"]) assert hasattr(spider, "response_history") assert isinstance(spider.response_history, ResponseHistory) def test_response_history_record_method_signature(self) -> None: """ResponseHistory.record()는 url, status, redirects, headers 인자를 받아야 한다.""" history = ResponseHistory() # 모든 인자를 사용한 호출이 예외 없이 실행되어야 한다 history.record( "https://example.com", 200, redirects=["https://old.example.com"], headers={"Content-Type": "text/html"}, ) result = history.get_history() assert len(result) == 1 def test_response_history_save_signature(self) -> None: """ResponseHistory.save()는 str 또는 Path 인자를 받아야 한다.""" history = ResponseHistory() history.record("https://example.com", 200) with tempfile.TemporaryDirectory() as tmpdir: # str 경로 path_str = str(Path(tmpdir) / "history_str.json") history.save(path_str) assert Path(path_str).exists() # Path 객체 path_obj = Path(tmpdir) / "history_path.json" history.save(path_obj) assert path_obj.exists() def test_get_resource_block_types_matches_blocked_resource_types(self) -> None: """crawl_utils.get_resource_block_types('default')와 browser.BLOCKED_RESOURCE_TYPES가 같아야 한다.""" assert _browser_mod is not None default_types = get_resource_block_types("default") browser_types: set[str] = _browser_mod.BLOCKED_RESOURCE_TYPES assert default_types == browser_types def test_insurance_spider_extract_with_selector_delegation(self) -> None: """InsuranceSpider.parse()에서 css 모드는 _crawler.extract_with_selector를 호출해야 한다.""" config = { "mode": "css", "css_selector": ".product", "fields": {"name": ".name"}, } spider = InsuranceSpider( start_urls=["https://example.com"], extraction_config=config, ) response = _make_mock_response(HTML_PRODUCTS, url="https://example.com") results = asyncio.run(_collect_parse_results(spider, response)) items = [r for r in results if isinstance(r, dict)] assert len(items) >= 1 def test_insurance_spider_extract_table_delegation(self) -> None: """InsuranceSpider.parse()에서 table 모드는 _crawler.extract_table을 호출해야 한다.""" config = {"mode": "table", "table_selector": "table"} spider = InsuranceSpider( start_urls=["https://example.com"], extraction_config=config, ) response = _make_mock_response(HTML_TABLE, url="https://example.com") results = asyncio.run(_collect_parse_results(spider, response)) items = [r for r in results if isinstance(r, dict)] assert len(items) >= 1 # ════════════════════════════════════════════════════════ # 3. TestDataFlowIntegration — 데이터 플로우 통합 (외부 네트워크 없이) # ════════════════════════════════════════════════════════ class TestDataFlowIntegration: """외부 네트워크 없이 HTML에서 데이터까지의 플로우를 검증한다.""" def test_html_parse_extract_with_selector_returns_dict(self) -> None: """HTML → parse() → extract_with_selector() → 결과 dict 플로우가 동작해야 한다.""" crawler = InsuranceCrawler(adaptive=False) page = crawler.parse(HTML_PRODUCTS) result = crawler.extract_with_selector( page, css_selector=".product", fields={"name": ".name", "price": ".price"}, ) assert isinstance(result, list) assert len(result) == 3 assert isinstance(result[0], dict) assert result[0]["name"] == "화재보험" assert result[0]["price"] == "50000" def test_html_parse_extract_table_returns_dict(self) -> None: """HTML → parse() → extract_table() → 결과 dict 플로우가 동작해야 한다.""" crawler = InsuranceCrawler(adaptive=False) page = crawler.parse(HTML_TABLE) result = crawler.extract_table(page) assert isinstance(result, list) assert len(result) == 2 assert isinstance(result[0], dict) assert result[0]["보험명"] == "화재보험" assert result[0]["보험료"] == "50000" def test_html_to_llm_input_returns_markdown_string(self) -> None: """HTML → to_llm_input() → markdown 문자열 플로우가 동작해야 한다.""" crawler = InsuranceCrawler() result = crawler.to_llm_input(HTML_PRODUCTS) assert isinstance(result, str) assert len(result) > 0 # 텍스트 내용이 포함되어야 한다 assert "화재보험" in result or "보험" in result def test_response_history_record_get_history_get_chain_save(self) -> None: """record → get_history → get_chain → save(tmpfile) → 파일 존재 확인.""" history = ResponseHistory() # record history.record( "https://example.com/final", 200, redirects=["https://example.com/old"], ) history.record("https://example.com/other", 301) # get_history all_hist = history.get_history() assert len(all_hist) == 2 # get_chain: 최종 URL chain_final = history.get_chain("https://example.com/final") assert len(chain_final) >= 1 assert chain_final[0]["url"] == "https://example.com/final" # get_chain: 리다이렉트 URL로도 조회 chain_old = history.get_chain("https://example.com/old") assert len(chain_old) >= 1 # save with tempfile.TemporaryDirectory() as tmpdir: path = Path(tmpdir) / "history.json" history.save(path) assert path.exists() data = json.loads(path.read_text(encoding="utf-8")) assert isinstance(data, list) assert len(data) == 2 def test_clean_html_then_html_to_markdown_pipeline(self) -> None: """clean_html() → html_to_markdown() 파이프라인이 정상 동작해야 한다.""" raw_html = "

보험 정보

" cleaned = clean_html(raw_html) assert "evil()" not in cleaned markdown = html_to_markdown(cleaned, remove_noise=False) assert isinstance(markdown, str) assert "보험 정보" in markdown def test_proxy_rotator_round_trip_with_crawler(self) -> None: """InsuranceCrawler에 proxy_list 전달 시 ProxyRotator가 올바르게 동작해야 한다.""" proxies = ["http://p1:8080", "http://p2:8080"] crawler = InsuranceCrawler(proxy_list=proxies) assert crawler.proxy_rotator is not None first = crawler.proxy_rotator.get_next() second = crawler.proxy_rotator.get_next() assert first == "http://p1:8080" assert second == "http://p2:8080" def test_spider_parse_css_mode_full_flow(self) -> None: """Spider parse CSS 모드 전체 플로우: HTML → parse → dict items.""" config = { "mode": "css", "css_selector": ".product", "fields": {"name": ".name", "price": ".price"}, } spider = InsuranceSpider( start_urls=["https://example.com"], extraction_config=config, ) response = _make_mock_response(HTML_PRODUCTS, url="https://example.com/products") results = asyncio.run(_collect_parse_results(spider, response)) items = [r for r in results if isinstance(r, dict)] assert len(items) >= 1 # _source_url 메타데이터 확인 assert all("_source_url" in item for item in items) assert items[0]["_source_url"] == "https://example.com/products" # ════════════════════════════════════════════════════════ # 4. TestSpiderCrawlerIntegration — Spider-Crawler 연동 # ════════════════════════════════════════════════════════ class TestSpiderCrawlerIntegration: """InsuranceSpider와 InsuranceCrawler의 연동을 검증한다.""" def test_spider_init_creates_insurance_crawler_internally(self) -> None: """InsuranceSpider 초기화 시 InsuranceCrawler가 내부 생성되어야 한다.""" spider = InsuranceSpider(start_urls=["https://example.com"]) assert hasattr(spider, "_crawler") assert isinstance(spider._crawler, InsuranceCrawler) def test_spider_crawler_adaptive_false(self) -> None: """InsuranceSpider 내부의 _crawler는 adaptive=False로 초기화되어야 한다.""" spider = InsuranceSpider(start_urls=["https://example.com"]) assert spider._crawler.adaptive is False def test_spider_extraction_config_css_mode_selects_extract_with_selector(self) -> None: """extraction_config mode=css 전달 시 parse()에서 extract_with_selector가 선택되어야 한다.""" config = { "mode": "css", "css_selector": ".product", "fields": {"name": ".name"}, } spider = InsuranceSpider( start_urls=["https://example.com"], extraction_config=config, ) assert spider.extraction_config is not None assert spider.extraction_config["mode"] == "css" response = _make_mock_response(HTML_PRODUCTS) results = asyncio.run(_collect_parse_results(spider, response)) items = [r for r in results if isinstance(r, dict)] assert len(items) >= 1 assert "name" in items[0] def test_spider_extraction_config_table_mode_selects_extract_table(self) -> None: """extraction_config mode=table 전달 시 parse()에서 extract_table이 선택되어야 한다.""" config = {"mode": "table", "table_selector": "table"} spider = InsuranceSpider( start_urls=["https://example.com"], extraction_config=config, ) response = _make_mock_response(HTML_TABLE) results = asyncio.run(_collect_parse_results(spider, response)) items = [r for r in results if isinstance(r, dict)] assert len(items) >= 1 # 테이블 헤더 키가 있어야 함 assert "보험명" in items[0] or "col_0" in items[0] def test_spider_extraction_config_similar_mode(self) -> None: """extraction_config mode=similar 전달 시 parse()에서 extract_similar가 선택되어야 한다.""" config = { "mode": "similar", "reference_selector": ".product", } spider = InsuranceSpider( start_urls=["https://example.com"], extraction_config=config, ) response = _make_mock_response(HTML_PRODUCTS) results = asyncio.run(_collect_parse_results(spider, response)) assert isinstance(results, list) def test_spider_response_history_records_on_parse(self) -> None: """parse() 호출 시 ResponseHistory에 URL이 기록되어야 한다.""" config = {"mode": "css", "css_selector": ".product", "fields": {"name": ".name"}} spider = InsuranceSpider( start_urls=["https://example.com"], extraction_config=config, ) response = _make_mock_response(HTML_PRODUCTS, url="https://example.com/insurance") asyncio.run(_collect_parse_results(spider, response)) history = spider.response_history.get_history() assert len(history) >= 1 assert any(e["url"] == "https://example.com/insurance" for e in history) def test_spider_on_start_creates_output_dir(self) -> None: """on_start() 실행 후 output_dir이 생성되어야 한다.""" with tempfile.TemporaryDirectory() as tmpdir: output_dir = Path(tmpdir) / "spider_out" spider = InsuranceSpider( start_urls=["https://example.com"], output_dir=str(output_dir), ) asyncio.run(spider.on_start(resuming=False)) assert output_dir.exists() def test_spider_on_close_saves_history_file(self) -> None: """on_close() 실행 후 response_history.json 파일이 output_dir에 저장되어야 한다.""" with tempfile.TemporaryDirectory() as tmpdir: spider = InsuranceSpider( start_urls=["https://example.com"], output_dir=tmpdir, ) spider.response_history.record("https://example.com", 200) asyncio.run(spider.on_start(resuming=False)) asyncio.run(spider.on_close()) history_file = Path(tmpdir) / "response_history.json" assert history_file.exists() # ════════════════════════════════════════════════════════ # 5. TestEdgeCases — 엣지 케이스 # ════════════════════════════════════════════════════════ class TestEdgeCases: """경계 조건 및 예외 상황에서 각 모듈이 안전하게 동작하는지 검증한다.""" def test_empty_html_extract_with_selector_returns_empty_list(self) -> None: """빈 HTML에서 extract_with_selector()는 빈 리스트를 반환해야 한다.""" crawler = InsuranceCrawler(adaptive=False) page = crawler.parse(HTML_EMPTY) result = crawler.extract_with_selector( page, css_selector=".product", fields={"name": ".name"}, ) assert result == [] def test_empty_html_extract_table_returns_empty_list(self) -> None: """빈 HTML에서 extract_table()은 빈 리스트를 반환해야 한다.""" crawler = InsuranceCrawler(adaptive=False) page = crawler.parse(HTML_EMPTY) result = crawler.extract_table(page) assert result == [] def test_empty_html_to_llm_input_returns_empty_string(self) -> None: """빈 HTML에서 to_llm_input()은 빈 문자열을 반환해야 한다.""" crawler = InsuranceCrawler() result = crawler.to_llm_input(HTML_EMPTY) assert isinstance(result, str) assert result.strip() == "" def test_none_proxy_list_crawler_proxy_rotator_is_none(self) -> None: """proxy_list=None이면 InsuranceCrawler.proxy_rotator는 None이어야 한다.""" crawler = InsuranceCrawler(proxy_list=None) assert crawler.proxy_rotator is None def test_adaptive_false_disables_smart_matching(self) -> None: """adaptive=False이면 InsuranceCrawler.adaptive가 False여야 한다.""" crawler = InsuranceCrawler(adaptive=False) assert crawler.adaptive is False def test_empty_html_spider_parse_returns_no_items(self) -> None: """빈 HTML에서 Spider.parse()는 아이템을 yield하지 않아야 한다.""" config = {"mode": "css", "css_selector": ".product", "fields": {"name": ".name"}} spider = InsuranceSpider( start_urls=["https://example.com"], extraction_config=config, ) response = _make_mock_response(HTML_EMPTY, url="https://example.com") results = asyncio.run(_collect_parse_results(spider, response)) items = [r for r in results if isinstance(r, dict)] assert len(items) == 0 def test_nonexistent_css_selector_returns_empty_list(self) -> None: """존재하지 않는 CSS 셀렉터로 extract_with_selector() 호출 시 빈 리스트를 반환해야 한다.""" crawler = InsuranceCrawler(adaptive=False) page = crawler.parse(HTML_PRODUCTS) result = crawler.extract_with_selector( page, css_selector=".does-not-exist", fields={"name": ".name"}, ) assert result == [] def test_no_table_returns_empty_list(self) -> None: """테이블이 없는 HTML에서 extract_table()은 빈 리스트를 반환해야 한다.""" crawler = InsuranceCrawler(adaptive=False) page = crawler.parse(HTML_SIMPLE) result = crawler.extract_table(page) assert result == [] def test_response_history_empty_get_chain_returns_empty(self) -> None: """빈 ResponseHistory에서 get_chain()은 빈 리스트를 반환해야 한다.""" history = ResponseHistory() result = history.get_chain("https://example.com") assert result == [] def test_response_history_clear_then_save_empty_file(self) -> None: """clear() 후 save()하면 빈 배열([]) JSON 파일이 생성되어야 한다.""" history = ResponseHistory() history.record("https://example.com", 200) history.clear() with tempfile.TemporaryDirectory() as tmpdir: path = Path(tmpdir) / "empty.json" history.save(path) assert path.exists() data = json.loads(path.read_text(encoding="utf-8")) assert data == [] def test_generate_stealth_headers_returns_dict(self) -> None: """browser.generate_stealth_headers()는 dict를 반환해야 한다.""" assert _browser_mod is not None result = _browser_mod.generate_stealth_headers() assert isinstance(result, dict) def test_get_google_referer_returns_google_url(self) -> None: """browser.get_google_referer()는 Google URL 문자열을 반환해야 한다.""" assert _browser_mod is not None result = _browser_mod.get_google_referer() assert isinstance(result, str) assert "google.com" in result def test_create_resource_blocker_returns_callable(self) -> None: """browser.create_resource_blocker()는 호출 가능한 함수를 반환해야 한다.""" assert _browser_mod is not None handler = _browser_mod.create_resource_blocker() assert callable(handler) def test_extract_similar_nonexistent_reference_returns_empty(self) -> None: """존재하지 않는 reference_selector로 extract_similar() 호출 시 빈 리스트를 반환해야 한다.""" crawler = InsuranceCrawler(adaptive=False) page = crawler.parse(HTML_PRODUCTS) result = crawler.extract_similar(page, reference_selector=".nonexistent-ref") assert result == [] def test_proxy_rotator_empty_list_returns_none(self) -> None: """빈 proxy_list로 ProxyRotator 생성 시 get_next()는 None을 반환해야 한다.""" rotator = ProxyRotator([]) assert rotator.get_next() is None def test_spider_extraction_config_none_yields_nothing(self) -> None: """extraction_config=None이면 parse()는 아이템을 yield하지 않아야 한다.""" spider = InsuranceSpider( start_urls=["https://example.com"], extraction_config=None, ) response = _make_mock_response(HTML_PRODUCTS) results = asyncio.run(_collect_parse_results(spider, response)) items = [r for r in results if isinstance(r, dict)] assert len(items) == 0