"""
통합 테스트: test_crawl_integration.py

browser.py + crawl_utils + insurance_crawler + insurance_spider 모듈 간
통합 호환성을 검증하는 테스트 스위트.

주의:
- 외부 네트워크 호출 금지 (모든 테스트는 로컬 HTML fixture 사용).
- 합법적 공개 데이터(보험사 공시 페이지 등)만을 대상으로 합니다.
"""

import asyncio
import importlib.util
import json
import sys
import tempfile
from pathlib import Path
from typing import Any, Optional

import pytest

_WORKSPACE = Path(__file__).resolve().parent.parent.parent
sys.path.insert(0, str(_WORKSPACE / "scripts"))

# ────────────────────────────────────────────────────────
# browser.py — importlib으로 로드 (파일명이 모듈명 규칙에 해당하지 않음)
# ────────────────────────────────────────────────────────

_browser_mod: Optional[Any] = None
try:
    _spec = importlib.util.spec_from_file_location("browser", str(_WORKSPACE / "scripts" / "browser.py"))
    if _spec is not None and _spec.loader is not None:
        _browser_mod = importlib.util.module_from_spec(_spec)
        _spec.loader.exec_module(_browser_mod)  # type: ignore[union-attr]
except Exception:
    pass

# ────────────────────────────────────────────────────────
# 나머지 모듈 import
# ────────────────────────────────────────────────────────

import crawl_utils  # pyright: ignore[reportMissingImports]
import insurance_crawler as _insurance_crawler_mod  # pyright: ignore[reportMissingImports]
import insurance_spider as _insurance_spider_mod  # pyright: ignore[reportMissingImports]
from crawl_utils import (  # pyright: ignore[reportMissingImports]
    ProxyRotator,
    clean_html,
    fetch_with_retry,
    get_resource_block_types,
    html_to_markdown,
    is_proxy_error,
)
from insurance_crawler import InsuranceCrawler  # pyright: ignore[reportMissingImports]
from insurance_spider import InsuranceSpider, ResponseHistory  # pyright: ignore[reportMissingImports]

# ────────────────────────────────────────────────────────
# 공통 HTML 픽스처
# ────────────────────────────────────────────────────────

HTML_PRODUCTS = """
<html>
<body>
  <div class="product">
    <span class="name">화재보험</span>
    <span class="price">50000</span>
  </div>
  <div class="product">
    <span class="name">자동차보험</span>
    <span class="price">100000</span>
  </div>
  <div class="product">
    <span class="name">생명보험</span>
    <span class="price">30000</span>
  </div>
  <a class="next-page" href="/products?page=2">다음</a>
</body>
</html>
"""

HTML_TABLE = """
<html>
<body>
  <table>
    <tr><th>보험명</th><th>보험료</th><th>가입기간</th></tr>
    <tr><td>화재보험</td><td>50000</td><td>1년</td></tr>
    <tr><td>자동차보험</td><td>100000</td><td>1년</td></tr>
  </table>
</body>
</html>
"""

HTML_WITH_SCRIPT = """
<html>
<body>
  <script>alert('xss');</script>
  <p>보험 공시 정보</p>
  <div class="product"><span class="name">화재보험</span></div>
</body>
</html>
"""

HTML_SIMPLE = "<html><body><p>보험 정보</p></body></html>"
HTML_EMPTY = ""


def _make_mock_response(
    html: str,
    url: str = "https://example.com",
    status: int = 200,
) -> Any:
    """Selector를 Response 대용으로 사용 (Response는 Selector 상속)."""
    from scrapling.parser import Selector

    sel = Selector(html, url=url, adaptive=False)
    sel.status = status  # type: ignore[attr-defined]
    sel.meta = {}  # type: ignore[attr-defined]
    return sel


async def _collect_parse_results(spider: Any, response: Any) -> list[Any]:
    """parse() async generator의 결과를 리스트로 수집."""
    results: list[Any] = []
    async for item in spider.parse(response):
        results.append(item)
    return results


# ════════════════════════════════════════════════════════
# 1. TestImportCompatibility — 모든 모듈이 충돌 없이 import 가능
# ════════════════════════════════════════════════════════


class TestImportCompatibility:
    """모든 모듈이 충돌 없이 import 가능한지 검증한다."""

    def test_browser_module_loaded(self) -> None:
        """browser.py가 importlib으로 정상 로드되어야 한다."""
        assert _browser_mod is not None, "browser.py를 로드하지 못했습니다."

    def test_browser_stealth_args_exists(self) -> None:
        """browser.py에 STEALTH_ARGS 변수가 존재해야 한다."""
        assert _browser_mod is not None
        assert hasattr(_browser_mod, "STEALTH_ARGS")

    def test_browser_harmful_args_exists(self) -> None:
        """browser.py에 HARMFUL_ARGS 변수가 존재해야 한다."""
        assert _browser_mod is not None
        assert hasattr(_browser_mod, "HARMFUL_ARGS")

    def test_browser_generate_stealth_headers_callable(self) -> None:
        """browser.py의 generate_stealth_headers가 호출 가능해야 한다."""
        assert _browser_mod is not None
        assert callable(_browser_mod.generate_stealth_headers)

    def test_browser_get_google_referer_callable(self) -> None:
        """browser.py의 get_google_referer가 호출 가능해야 한다."""
        assert _browser_mod is not None
        assert callable(_browser_mod.get_google_referer)

    def test_browser_create_resource_blocker_callable(self) -> None:
        """browser.py의 create_resource_blocker가 호출 가능해야 한다."""
        assert _browser_mod is not None
        assert callable(_browser_mod.create_resource_blocker)

    def test_browser_blocked_resource_types_exists(self) -> None:
        """browser.py에 BLOCKED_RESOURCE_TYPES 변수가 존재해야 한다."""
        assert _browser_mod is not None
        assert hasattr(_browser_mod, "BLOCKED_RESOURCE_TYPES")

    def test_crawl_utils_proxy_rotator_importable(self) -> None:
        """crawl_utils.ProxyRotator가 import 가능해야 한다."""
        assert ProxyRotator is not None

    def test_crawl_utils_is_proxy_error_importable(self) -> None:
        """crawl_utils.is_proxy_error가 import 가능해야 한다."""
        assert callable(is_proxy_error)

    def test_crawl_utils_fetch_with_retry_importable(self) -> None:
        """crawl_utils.fetch_with_retry가 import 가능해야 한다."""
        assert callable(fetch_with_retry)

    def test_crawl_utils_get_resource_block_types_importable(self) -> None:
        """crawl_utils.get_resource_block_types가 import 가능해야 한다."""
        assert callable(get_resource_block_types)

    def test_crawl_utils_html_to_markdown_importable(self) -> None:
        """crawl_utils.html_to_markdown가 import 가능해야 한다."""
        assert callable(html_to_markdown)

    def test_crawl_utils_clean_html_importable(self) -> None:
        """crawl_utils.clean_html이 import 가능해야 한다."""
        assert callable(clean_html)

    def test_insurance_crawler_importable(self) -> None:
        """insurance_crawler.InsuranceCrawler가 import 가능해야 한다."""
        assert InsuranceCrawler is not None

    def test_insurance_spider_importable(self) -> None:
        """insurance_spider.InsuranceSpider가 import 가능해야 한다."""
        assert InsuranceSpider is not None

    def test_response_history_importable(self) -> None:
        """insurance_spider.ResponseHistory가 import 가능해야 한다."""
        assert ResponseHistory is not None

    def test_no_name_conflict_between_modules(self) -> None:
        """crawl_utils와 insurance_crawler 사이에 이름 충돌이 없어야 한다."""
        # ProxyRotator는 crawl_utils에 정의되고 insurance_crawler가 import해서 사용
        cu_ProxyRotator = crawl_utils.ProxyRotator
        ic_ProxyRotator = _insurance_crawler_mod.ProxyRotator  # type: ignore[attr-defined]
        assert cu_ProxyRotator is ic_ProxyRotator

    def test_all_modules_distinct_objects(self) -> None:
        """각 모듈은 서로 다른 모듈 객체여야 한다."""
        assert _insurance_crawler_mod is not _insurance_spider_mod
        assert _insurance_crawler_mod is not crawl_utils
        assert _insurance_spider_mod is not crawl_utils


# ════════════════════════════════════════════════════════
# 2. TestInterfaceConsistency — 함수 인터페이스 일관성
# ════════════════════════════════════════════════════════


class TestInterfaceConsistency:
    """함수 인터페이스가 모듈 간에 일관성 있게 연결되는지 검증한다."""

    def test_stealth_args_is_tuple(self) -> None:
        """browser.py의 STEALTH_ARGS는 tuple이어야 한다."""
        assert _browser_mod is not None
        assert isinstance(_browser_mod.STEALTH_ARGS, tuple)

    def test_harmful_args_is_tuple(self) -> None:
        """browser.py의 HARMFUL_ARGS는 tuple이어야 한다."""
        assert _browser_mod is not None
        assert isinstance(_browser_mod.HARMFUL_ARGS, tuple)

    def test_blocked_resource_types_is_set(self) -> None:
        """browser.py의 BLOCKED_RESOURCE_TYPES는 set이어야 한다."""
        assert _browser_mod is not None
        assert isinstance(_browser_mod.BLOCKED_RESOURCE_TYPES, set)

    def test_insurance_crawler_uses_proxy_rotator_from_crawl_utils(self) -> None:
        """InsuranceCrawler가 proxy_list로 생성할 때 crawl_utils.ProxyRotator를 사용해야 한다."""
        proxies = ["http://proxy1:8080", "http://proxy2:8080"]
        crawler = InsuranceCrawler(proxy_list=proxies)
        assert crawler.proxy_rotator is not None
        assert isinstance(crawler.proxy_rotator, ProxyRotator)

    def test_insurance_crawler_clean_html_and_html_to_markdown_pipeline(self) -> None:
        """InsuranceCrawler.to_llm_input()은 clean_html→html_to_markdown 파이프라인을 사용한다."""
        crawler = InsuranceCrawler()
        html = "<p>보험 공시 정보</p><script>evil();</script>"
        result = crawler.to_llm_input(html)
        # 파이프라인 결과: script 제거, 텍스트 보존
        assert isinstance(result, str)
        assert "보험 공시 정보" in result
        assert "evil()" not in result

    def test_insurance_spider_has_crawler_attribute(self) -> None:
        """InsuranceSpider 인스턴스에 _crawler 속성(InsuranceCrawler)이 있어야 한다."""
        spider = InsuranceSpider(start_urls=["https://example.com"])
        assert hasattr(spider, "_crawler")
        assert isinstance(spider._crawler, InsuranceCrawler)

    def test_insurance_spider_has_response_history_attribute(self) -> None:
        """InsuranceSpider 인스턴스에 response_history 속성(ResponseHistory)이 있어야 한다."""
        spider = InsuranceSpider(start_urls=["https://example.com"])
        assert hasattr(spider, "response_history")
        assert isinstance(spider.response_history, ResponseHistory)

    def test_response_history_record_method_signature(self) -> None:
        """ResponseHistory.record()는 url, status, redirects, headers 인자를 받아야 한다."""
        history = ResponseHistory()
        # 모든 인자를 사용한 호출이 예외 없이 실행되어야 한다
        history.record(
            "https://example.com",
            200,
            redirects=["https://old.example.com"],
            headers={"Content-Type": "text/html"},
        )
        result = history.get_history()
        assert len(result) == 1

    def test_response_history_save_signature(self) -> None:
        """ResponseHistory.save()는 str 또는 Path 인자를 받아야 한다."""
        history = ResponseHistory()
        history.record("https://example.com", 200)
        with tempfile.TemporaryDirectory() as tmpdir:
            # str 경로
            path_str = str(Path(tmpdir) / "history_str.json")
            history.save(path_str)
            assert Path(path_str).exists()
            # Path 객체
            path_obj = Path(tmpdir) / "history_path.json"
            history.save(path_obj)
            assert path_obj.exists()

    def test_get_resource_block_types_matches_blocked_resource_types(self) -> None:
        """crawl_utils.get_resource_block_types('default')와 browser.BLOCKED_RESOURCE_TYPES가 같아야 한다."""
        assert _browser_mod is not None
        default_types = get_resource_block_types("default")
        browser_types: set[str] = _browser_mod.BLOCKED_RESOURCE_TYPES
        assert default_types == browser_types

    def test_insurance_spider_extract_with_selector_delegation(self) -> None:
        """InsuranceSpider.parse()에서 css 모드는 _crawler.extract_with_selector를 호출해야 한다."""
        config = {
            "mode": "css",
            "css_selector": ".product",
            "fields": {"name": ".name"},
        }
        spider = InsuranceSpider(
            start_urls=["https://example.com"],
            extraction_config=config,
        )
        response = _make_mock_response(HTML_PRODUCTS, url="https://example.com")
        results = asyncio.run(_collect_parse_results(spider, response))
        items = [r for r in results if isinstance(r, dict)]
        assert len(items) >= 1

    def test_insurance_spider_extract_table_delegation(self) -> None:
        """InsuranceSpider.parse()에서 table 모드는 _crawler.extract_table을 호출해야 한다."""
        config = {"mode": "table", "table_selector": "table"}
        spider = InsuranceSpider(
            start_urls=["https://example.com"],
            extraction_config=config,
        )
        response = _make_mock_response(HTML_TABLE, url="https://example.com")
        results = asyncio.run(_collect_parse_results(spider, response))
        items = [r for r in results if isinstance(r, dict)]
        assert len(items) >= 1


# ════════════════════════════════════════════════════════
# 3. TestDataFlowIntegration — 데이터 플로우 통합 (외부 네트워크 없이)
# ════════════════════════════════════════════════════════


class TestDataFlowIntegration:
    """외부 네트워크 없이 HTML에서 데이터까지의 플로우를 검증한다."""

    def test_html_parse_extract_with_selector_returns_dict(self) -> None:
        """HTML → parse() → extract_with_selector() → 결과 dict 플로우가 동작해야 한다."""
        crawler = InsuranceCrawler(adaptive=False)
        page = crawler.parse(HTML_PRODUCTS)
        result = crawler.extract_with_selector(
            page,
            css_selector=".product",
            fields={"name": ".name", "price": ".price"},
        )
        assert isinstance(result, list)
        assert len(result) == 3
        assert isinstance(result[0], dict)
        assert result[0]["name"] == "화재보험"
        assert result[0]["price"] == "50000"

    def test_html_parse_extract_table_returns_dict(self) -> None:
        """HTML → parse() → extract_table() → 결과 dict 플로우가 동작해야 한다."""
        crawler = InsuranceCrawler(adaptive=False)
        page = crawler.parse(HTML_TABLE)
        result = crawler.extract_table(page)
        assert isinstance(result, list)
        assert len(result) == 2
        assert isinstance(result[0], dict)
        assert result[0]["보험명"] == "화재보험"
        assert result[0]["보험료"] == "50000"

    def test_html_to_llm_input_returns_markdown_string(self) -> None:
        """HTML → to_llm_input() → markdown 문자열 플로우가 동작해야 한다."""
        crawler = InsuranceCrawler()
        result = crawler.to_llm_input(HTML_PRODUCTS)
        assert isinstance(result, str)
        assert len(result) > 0
        # 텍스트 내용이 포함되어야 한다
        assert "화재보험" in result or "보험" in result

    def test_response_history_record_get_history_get_chain_save(self) -> None:
        """record → get_history → get_chain → save(tmpfile) → 파일 존재 확인."""
        history = ResponseHistory()
        # record
        history.record(
            "https://example.com/final",
            200,
            redirects=["https://example.com/old"],
        )
        history.record("https://example.com/other", 301)
        # get_history
        all_hist = history.get_history()
        assert len(all_hist) == 2
        # get_chain: 최종 URL
        chain_final = history.get_chain("https://example.com/final")
        assert len(chain_final) >= 1
        assert chain_final[0]["url"] == "https://example.com/final"
        # get_chain: 리다이렉트 URL로도 조회
        chain_old = history.get_chain("https://example.com/old")
        assert len(chain_old) >= 1
        # save
        with tempfile.TemporaryDirectory() as tmpdir:
            path = Path(tmpdir) / "history.json"
            history.save(path)
            assert path.exists()
            data = json.loads(path.read_text(encoding="utf-8"))
            assert isinstance(data, list)
            assert len(data) == 2

    def test_clean_html_then_html_to_markdown_pipeline(self) -> None:
        """clean_html() → html_to_markdown() 파이프라인이 정상 동작해야 한다."""
        raw_html = "<p>보험 정보</p><script>evil();</script><style>.a{}</style>"
        cleaned = clean_html(raw_html)
        assert "evil()" not in cleaned
        markdown = html_to_markdown(cleaned, remove_noise=False)
        assert isinstance(markdown, str)
        assert "보험 정보" in markdown

    def test_proxy_rotator_round_trip_with_crawler(self) -> None:
        """InsuranceCrawler에 proxy_list 전달 시 ProxyRotator가 올바르게 동작해야 한다."""
        proxies = ["http://p1:8080", "http://p2:8080"]
        crawler = InsuranceCrawler(proxy_list=proxies)
        assert crawler.proxy_rotator is not None
        first = crawler.proxy_rotator.get_next()
        second = crawler.proxy_rotator.get_next()
        assert first == "http://p1:8080"
        assert second == "http://p2:8080"

    def test_spider_parse_css_mode_full_flow(self) -> None:
        """Spider parse CSS 모드 전체 플로우: HTML → parse → dict items."""
        config = {
            "mode": "css",
            "css_selector": ".product",
            "fields": {"name": ".name", "price": ".price"},
        }
        spider = InsuranceSpider(
            start_urls=["https://example.com"],
            extraction_config=config,
        )
        response = _make_mock_response(HTML_PRODUCTS, url="https://example.com/products")
        results = asyncio.run(_collect_parse_results(spider, response))
        items = [r for r in results if isinstance(r, dict)]
        assert len(items) >= 1
        # _source_url 메타데이터 확인
        assert all("_source_url" in item for item in items)
        assert items[0]["_source_url"] == "https://example.com/products"


# ════════════════════════════════════════════════════════
# 4. TestSpiderCrawlerIntegration — Spider-Crawler 연동
# ════════════════════════════════════════════════════════


class TestSpiderCrawlerIntegration:
    """InsuranceSpider와 InsuranceCrawler의 연동을 검증한다."""

    def test_spider_init_creates_insurance_crawler_internally(self) -> None:
        """InsuranceSpider 초기화 시 InsuranceCrawler가 내부 생성되어야 한다."""
        spider = InsuranceSpider(start_urls=["https://example.com"])
        assert hasattr(spider, "_crawler")
        assert isinstance(spider._crawler, InsuranceCrawler)

    def test_spider_crawler_adaptive_false(self) -> None:
        """InsuranceSpider 내부의 _crawler는 adaptive=False로 초기화되어야 한다."""
        spider = InsuranceSpider(start_urls=["https://example.com"])
        assert spider._crawler.adaptive is False

    def test_spider_extraction_config_css_mode_selects_extract_with_selector(self) -> None:
        """extraction_config mode=css 전달 시 parse()에서 extract_with_selector가 선택되어야 한다."""
        config = {
            "mode": "css",
            "css_selector": ".product",
            "fields": {"name": ".name"},
        }
        spider = InsuranceSpider(
            start_urls=["https://example.com"],
            extraction_config=config,
        )
        assert spider.extraction_config is not None
        assert spider.extraction_config["mode"] == "css"
        response = _make_mock_response(HTML_PRODUCTS)
        results = asyncio.run(_collect_parse_results(spider, response))
        items = [r for r in results if isinstance(r, dict)]
        assert len(items) >= 1
        assert "name" in items[0]

    def test_spider_extraction_config_table_mode_selects_extract_table(self) -> None:
        """extraction_config mode=table 전달 시 parse()에서 extract_table이 선택되어야 한다."""
        config = {"mode": "table", "table_selector": "table"}
        spider = InsuranceSpider(
            start_urls=["https://example.com"],
            extraction_config=config,
        )
        response = _make_mock_response(HTML_TABLE)
        results = asyncio.run(_collect_parse_results(spider, response))
        items = [r for r in results if isinstance(r, dict)]
        assert len(items) >= 1
        # 테이블 헤더 키가 있어야 함
        assert "보험명" in items[0] or "col_0" in items[0]

    def test_spider_extraction_config_similar_mode(self) -> None:
        """extraction_config mode=similar 전달 시 parse()에서 extract_similar가 선택되어야 한다."""
        config = {
            "mode": "similar",
            "reference_selector": ".product",
        }
        spider = InsuranceSpider(
            start_urls=["https://example.com"],
            extraction_config=config,
        )
        response = _make_mock_response(HTML_PRODUCTS)
        results = asyncio.run(_collect_parse_results(spider, response))
        assert isinstance(results, list)

    def test_spider_response_history_records_on_parse(self) -> None:
        """parse() 호출 시 ResponseHistory에 URL이 기록되어야 한다."""
        config = {"mode": "css", "css_selector": ".product", "fields": {"name": ".name"}}
        spider = InsuranceSpider(
            start_urls=["https://example.com"],
            extraction_config=config,
        )
        response = _make_mock_response(HTML_PRODUCTS, url="https://example.com/insurance")
        asyncio.run(_collect_parse_results(spider, response))
        history = spider.response_history.get_history()
        assert len(history) >= 1
        assert any(e["url"] == "https://example.com/insurance" for e in history)

    def test_spider_on_start_creates_output_dir(self) -> None:
        """on_start() 실행 후 output_dir이 생성되어야 한다."""
        with tempfile.TemporaryDirectory() as tmpdir:
            output_dir = Path(tmpdir) / "spider_out"
            spider = InsuranceSpider(
                start_urls=["https://example.com"],
                output_dir=str(output_dir),
            )
            asyncio.run(spider.on_start(resuming=False))
            assert output_dir.exists()

    def test_spider_on_close_saves_history_file(self) -> None:
        """on_close() 실행 후 response_history.json 파일이 output_dir에 저장되어야 한다."""
        with tempfile.TemporaryDirectory() as tmpdir:
            spider = InsuranceSpider(
                start_urls=["https://example.com"],
                output_dir=tmpdir,
            )
            spider.response_history.record("https://example.com", 200)
            asyncio.run(spider.on_start(resuming=False))
            asyncio.run(spider.on_close())
            history_file = Path(tmpdir) / "response_history.json"
            assert history_file.exists()


# ════════════════════════════════════════════════════════
# 5. TestEdgeCases — 엣지 케이스
# ════════════════════════════════════════════════════════


class TestEdgeCases:
    """경계 조건 및 예외 상황에서 각 모듈이 안전하게 동작하는지 검증한다."""

    def test_empty_html_extract_with_selector_returns_empty_list(self) -> None:
        """빈 HTML에서 extract_with_selector()는 빈 리스트를 반환해야 한다."""
        crawler = InsuranceCrawler(adaptive=False)
        page = crawler.parse(HTML_EMPTY)
        result = crawler.extract_with_selector(
            page,
            css_selector=".product",
            fields={"name": ".name"},
        )
        assert result == []

    def test_empty_html_extract_table_returns_empty_list(self) -> None:
        """빈 HTML에서 extract_table()은 빈 리스트를 반환해야 한다."""
        crawler = InsuranceCrawler(adaptive=False)
        page = crawler.parse(HTML_EMPTY)
        result = crawler.extract_table(page)
        assert result == []

    def test_empty_html_to_llm_input_returns_empty_string(self) -> None:
        """빈 HTML에서 to_llm_input()은 빈 문자열을 반환해야 한다."""
        crawler = InsuranceCrawler()
        result = crawler.to_llm_input(HTML_EMPTY)
        assert isinstance(result, str)
        assert result.strip() == ""

    def test_none_proxy_list_crawler_proxy_rotator_is_none(self) -> None:
        """proxy_list=None이면 InsuranceCrawler.proxy_rotator는 None이어야 한다."""
        crawler = InsuranceCrawler(proxy_list=None)
        assert crawler.proxy_rotator is None

    def test_adaptive_false_disables_smart_matching(self) -> None:
        """adaptive=False이면 InsuranceCrawler.adaptive가 False여야 한다."""
        crawler = InsuranceCrawler(adaptive=False)
        assert crawler.adaptive is False

    def test_empty_html_spider_parse_returns_no_items(self) -> None:
        """빈 HTML에서 Spider.parse()는 아이템을 yield하지 않아야 한다."""
        config = {"mode": "css", "css_selector": ".product", "fields": {"name": ".name"}}
        spider = InsuranceSpider(
            start_urls=["https://example.com"],
            extraction_config=config,
        )
        response = _make_mock_response(HTML_EMPTY, url="https://example.com")
        results = asyncio.run(_collect_parse_results(spider, response))
        items = [r for r in results if isinstance(r, dict)]
        assert len(items) == 0

    def test_nonexistent_css_selector_returns_empty_list(self) -> None:
        """존재하지 않는 CSS 셀렉터로 extract_with_selector() 호출 시 빈 리스트를 반환해야 한다."""
        crawler = InsuranceCrawler(adaptive=False)
        page = crawler.parse(HTML_PRODUCTS)
        result = crawler.extract_with_selector(
            page,
            css_selector=".does-not-exist",
            fields={"name": ".name"},
        )
        assert result == []

    def test_no_table_returns_empty_list(self) -> None:
        """테이블이 없는 HTML에서 extract_table()은 빈 리스트를 반환해야 한다."""
        crawler = InsuranceCrawler(adaptive=False)
        page = crawler.parse(HTML_SIMPLE)
        result = crawler.extract_table(page)
        assert result == []

    def test_response_history_empty_get_chain_returns_empty(self) -> None:
        """빈 ResponseHistory에서 get_chain()은 빈 리스트를 반환해야 한다."""
        history = ResponseHistory()
        result = history.get_chain("https://example.com")
        assert result == []

    def test_response_history_clear_then_save_empty_file(self) -> None:
        """clear() 후 save()하면 빈 배열([]) JSON 파일이 생성되어야 한다."""
        history = ResponseHistory()
        history.record("https://example.com", 200)
        history.clear()
        with tempfile.TemporaryDirectory() as tmpdir:
            path = Path(tmpdir) / "empty.json"
            history.save(path)
            assert path.exists()
            data = json.loads(path.read_text(encoding="utf-8"))
            assert data == []

    def test_generate_stealth_headers_returns_dict(self) -> None:
        """browser.generate_stealth_headers()는 dict를 반환해야 한다."""
        assert _browser_mod is not None
        result = _browser_mod.generate_stealth_headers()
        assert isinstance(result, dict)

    def test_get_google_referer_returns_google_url(self) -> None:
        """browser.get_google_referer()는 Google URL 문자열을 반환해야 한다."""
        assert _browser_mod is not None
        result = _browser_mod.get_google_referer()
        assert isinstance(result, str)
        assert "google.com" in result

    def test_create_resource_blocker_returns_callable(self) -> None:
        """browser.create_resource_blocker()는 호출 가능한 함수를 반환해야 한다."""
        assert _browser_mod is not None
        handler = _browser_mod.create_resource_blocker()
        assert callable(handler)

    def test_extract_similar_nonexistent_reference_returns_empty(self) -> None:
        """존재하지 않는 reference_selector로 extract_similar() 호출 시 빈 리스트를 반환해야 한다."""
        crawler = InsuranceCrawler(adaptive=False)
        page = crawler.parse(HTML_PRODUCTS)
        result = crawler.extract_similar(page, reference_selector=".nonexistent-ref")
        assert result == []

    def test_proxy_rotator_empty_list_returns_none(self) -> None:
        """빈 proxy_list로 ProxyRotator 생성 시 get_next()는 None을 반환해야 한다."""
        rotator = ProxyRotator([])
        assert rotator.get_next() is None

    def test_spider_extraction_config_none_yields_nothing(self) -> None:
        """extraction_config=None이면 parse()는 아이템을 yield하지 않아야 한다."""
        spider = InsuranceSpider(
            start_urls=["https://example.com"],
            extraction_config=None,
        )
        response = _make_mock_response(HTML_PRODUCTS)
        results = asyncio.run(_collect_parse_results(spider, response))
        items = [r for r in results if isinstance(r, dict)]
        assert len(items) == 0
