"""
Lightpanda 크롤러 래퍼 테스트

단위 테스트: mock 사용 (네트워크 불필요)
통합 테스트: @pytest.mark.integration — 실제 lightpanda 서버(ws://127.0.0.1:9333) 필요
"""

from __future__ import annotations

import asyncio
from dataclasses import fields
from unittest.mock import AsyncMock, MagicMock, patch

import pytest

# 아직 구현 전이므로 import 실패해도 수집 단계에서만 에러남
try:
    from tools.lightpanda_crawler import CrawlError, CrawlResult, LightpandaCrawler
except ImportError:
    CrawlResult = None  # type: ignore[assignment, misc]
    LightpandaCrawler = None  # type: ignore[assignment]
    CrawlError = None  # type: ignore[assignment]


# ---------------------------------------------------------------------------
# helpers
# ---------------------------------------------------------------------------


def _make_mock_page(
    title: str = "Test Page",
    content: str = "<html><head><title>Test Page</title></head><body><h1>Hello</h1><a href='https://example.com/link'>link</a></body></html>",
) -> MagicMock:
    """playwright Page 모의 객체 생성"""
    page = MagicMock()
    page.title = AsyncMock(return_value=title)
    page.content = AsyncMock(return_value=content)
    page.goto = AsyncMock(return_value=MagicMock(status=200))
    page.close = AsyncMock()
    page.url = "https://example.com"
    page.evaluate = AsyncMock(return_value="Test Page")
    page.query_selector = AsyncMock(return_value=None)
    page.query_selector_all = AsyncMock(return_value=[])
    return page


def _make_mock_browser(page: MagicMock | None = None) -> MagicMock:
    """playwright Browser 모의 객체 생성"""
    if page is None:
        page = _make_mock_page()
    browser = MagicMock()
    browser.new_page = AsyncMock(return_value=page)
    browser.close = AsyncMock()
    browser.is_connected = MagicMock(return_value=True)
    return browser


# ---------------------------------------------------------------------------
# 1. 인스턴스 생성 / 기본값 확인
# ---------------------------------------------------------------------------


class TestLightpandaCrawlerInit:
    def test_default_cdp_endpoint(self) -> None:
        """CDP endpoint 기본값이 ws://127.0.0.1:9333 인지 확인"""
        assert LightpandaCrawler is not None, "LightpandaCrawler import 실패"
        crawler = LightpandaCrawler()
        assert crawler.cdp_endpoint == "ws://127.0.0.1:9333"

    def test_custom_cdp_endpoint(self) -> None:
        """사용자 지정 CDP endpoint 설정"""
        assert LightpandaCrawler is not None
        crawler = LightpandaCrawler(cdp_endpoint="ws://localhost:9999")
        assert crawler.cdp_endpoint == "ws://localhost:9999"

    def test_default_timeout(self) -> None:
        """기본 타임아웃 30000ms"""
        assert LightpandaCrawler is not None
        crawler = LightpandaCrawler()
        assert crawler.timeout_ms == 30000

    def test_chrome_fallback_endpoint(self) -> None:
        """Chrome fallback endpoint 기본값 확인"""
        assert LightpandaCrawler is not None
        crawler = LightpandaCrawler()
        assert crawler.chrome_endpoint == "ws://127.0.0.1:9222"


# ---------------------------------------------------------------------------
# 2. CrawlResult 데이터클래스 필드 검증
# ---------------------------------------------------------------------------


class TestCrawlResultDataclass:
    def test_required_fields_exist(self) -> None:
        """CrawlResult 에 필수 필드가 모두 존재하는지 확인"""
        assert CrawlResult is not None, "CrawlResult import 실패"
        field_names = {f.name for f in fields(CrawlResult)}
        required = {"url", "title", "text", "html", "links", "meta", "status", "elapsed_ms", "engine"}
        assert required.issubset(field_names), f"누락 필드: {required - field_names}"

    def test_instantiation(self) -> None:
        """CrawlResult 인스턴스 생성"""
        assert CrawlResult is not None
        result = CrawlResult(
            url="https://example.com",
            title="Example",
            text="Hello world",
            html="<html></html>",
            links=["https://example.com/page"],
            meta={"description": "test"},
            status=200,
            elapsed_ms=100.0,
            engine="lightpanda",
        )
        assert result.url == "https://example.com"
        assert result.engine == "lightpanda"
        assert isinstance(result.links, list)
        assert isinstance(result.meta, dict)

    def test_engine_field_values(self) -> None:
        """engine 필드는 'lightpanda' 또는 'chrome' 중 하나"""
        assert CrawlResult is not None
        for engine in ("lightpanda", "chrome"):
            r = CrawlResult(
                url="https://x.com",
                title="",
                text="",
                html="",
                links=[],
                meta={},
                status=200,
                elapsed_ms=0.0,
                engine=engine,
            )
            assert r.engine == engine


# ---------------------------------------------------------------------------
# 3. fetch() 단위 테스트 (mock)
# ---------------------------------------------------------------------------


class TestFetchUnit:
    @pytest.fixture
    def mock_playwright_connect(self):
        """playwright connect_over_cdp mock"""
        with patch("tools.lightpanda_crawler.async_playwright") as mock_pw:
            mock_instance = AsyncMock()
            mock_pw.return_value.__aenter__ = AsyncMock(return_value=mock_instance)
            mock_pw.return_value.__aexit__ = AsyncMock(return_value=False)

            page = _make_mock_page()
            browser = _make_mock_browser(page)
            mock_instance.chromium.connect_over_cdp = AsyncMock(return_value=browser)

            yield mock_pw, browser, page

    @pytest.mark.asyncio
    async def test_fetch_returns_crawl_result(self, mock_playwright_connect) -> None:
        """fetch() 가 CrawlResult 를 반환하는지 확인"""
        assert LightpandaCrawler is not None
        _, browser, page = mock_playwright_connect

        async with LightpandaCrawler() as crawler:
            result = await crawler.fetch("https://example.com")

        assert isinstance(result, CrawlResult)

    @pytest.mark.asyncio
    async def test_fetch_result_has_required_fields(self, mock_playwright_connect) -> None:
        """fetch() 결과에 title/text/html/links 필드가 있는지 확인"""
        assert LightpandaCrawler is not None
        _, browser, page = mock_playwright_connect

        async with LightpandaCrawler() as crawler:
            result = await crawler.fetch("https://example.com")

        assert hasattr(result, "title")
        assert hasattr(result, "text")
        assert hasattr(result, "html")
        assert hasattr(result, "links")

    @pytest.mark.asyncio
    async def test_fetch_url_stored_correctly(self, mock_playwright_connect) -> None:
        """fetch() 결과에 요청 URL이 저장되는지 확인"""
        assert LightpandaCrawler is not None
        _, browser, page = mock_playwright_connect

        async with LightpandaCrawler() as crawler:
            result = await crawler.fetch("https://example.com")

        assert result.url == "https://example.com"

    @pytest.mark.asyncio
    async def test_fetch_elapsed_ms_positive(self, mock_playwright_connect) -> None:
        """fetch() elapsed_ms 가 양수인지 확인"""
        assert LightpandaCrawler is not None
        _, browser, page = mock_playwright_connect

        async with LightpandaCrawler() as crawler:
            result = await crawler.fetch("https://example.com")

        assert result.elapsed_ms >= 0


# ---------------------------------------------------------------------------
# 4. fetch_many() 병렬 크롤링 단위 테스트 (mock)
# ---------------------------------------------------------------------------


class TestFetchManyUnit:
    @pytest.fixture
    def mock_playwright_connect(self):
        with patch("tools.lightpanda_crawler.async_playwright") as mock_pw:
            mock_instance = AsyncMock()
            mock_pw.return_value.__aenter__ = AsyncMock(return_value=mock_instance)
            mock_pw.return_value.__aexit__ = AsyncMock(return_value=False)

            page = _make_mock_page()
            browser = _make_mock_browser(page)
            mock_instance.chromium.connect_over_cdp = AsyncMock(return_value=browser)

            yield mock_pw, browser, page

    @pytest.mark.asyncio
    async def test_fetch_many_returns_list(self, mock_playwright_connect) -> None:
        """fetch_many() 가 리스트를 반환하는지 확인"""
        assert LightpandaCrawler is not None
        urls = ["https://example.com", "https://example.org", "https://example.net"]

        async with LightpandaCrawler() as crawler:
            results = await crawler.fetch_many(urls)

        assert isinstance(results, list)

    @pytest.mark.asyncio
    async def test_fetch_many_count_matches_input(self, mock_playwright_connect) -> None:
        """fetch_many() 결과 수가 입력 URL 수와 일치하는지 확인"""
        assert LightpandaCrawler is not None
        urls = ["https://example.com", "https://example.org", "https://example.net"]

        async with LightpandaCrawler() as crawler:
            results = await crawler.fetch_many(urls)

        assert len(results) == 3

    @pytest.mark.asyncio
    async def test_fetch_many_all_crawl_results(self, mock_playwright_connect) -> None:
        """fetch_many() 결과가 모두 CrawlResult 인스턴스인지 확인"""
        assert LightpandaCrawler is not None
        urls = ["https://example.com", "https://example.org", "https://example.net"]

        async with LightpandaCrawler() as crawler:
            results = await crawler.fetch_many(urls)

        assert all(isinstance(r, CrawlResult) for r in results)

    @pytest.mark.asyncio
    async def test_fetch_many_empty_list(self, mock_playwright_connect) -> None:
        """fetch_many() 빈 리스트 입력 시 빈 리스트 반환"""
        assert LightpandaCrawler is not None

        async with LightpandaCrawler() as crawler:
            results = await crawler.fetch_many([])

        assert results == []

    @pytest.mark.asyncio
    async def test_fetch_many_concurrency_param(self, mock_playwright_connect) -> None:
        """fetch_many() concurrency 파라미터가 수용되는지 확인"""
        assert LightpandaCrawler is not None
        urls = ["https://example.com", "https://example.org"]

        async with LightpandaCrawler() as crawler:
            results = await crawler.fetch_many(urls, concurrency=5)

        assert len(results) == 2


# ---------------------------------------------------------------------------
# 5. evaluate() JS 실행 단위 테스트 (mock)
# ---------------------------------------------------------------------------


class TestEvaluateUnit:
    @pytest.fixture
    def mock_playwright_connect(self):
        with patch("tools.lightpanda_crawler.async_playwright") as mock_pw:
            mock_instance = AsyncMock()
            mock_pw.return_value.__aenter__ = AsyncMock(return_value=mock_instance)
            mock_pw.return_value.__aexit__ = AsyncMock(return_value=False)

            page = _make_mock_page()
            page.evaluate = AsyncMock(return_value="Example Domain")
            browser = _make_mock_browser(page)
            mock_instance.chromium.connect_over_cdp = AsyncMock(return_value=browser)

            yield mock_pw, browser, page

    @pytest.mark.asyncio
    async def test_evaluate_returns_value(self, mock_playwright_connect) -> None:
        """evaluate() 가 JS 실행 결과를 반환하는지 확인"""
        assert LightpandaCrawler is not None
        _, browser, page = mock_playwright_connect

        async with LightpandaCrawler() as crawler:
            result = await crawler.evaluate("https://example.com", "document.title")

        assert result == "Example Domain"

    @pytest.mark.asyncio
    async def test_evaluate_calls_page_evaluate(self, mock_playwright_connect) -> None:
        """evaluate() 가 page.evaluate 를 호출하는지 확인"""
        assert LightpandaCrawler is not None
        _, browser, page = mock_playwright_connect

        async with LightpandaCrawler() as crawler:
            await crawler.evaluate("https://example.com", "document.title")

        page.evaluate.assert_called_once()


# ---------------------------------------------------------------------------
# 6. extract_structured() CSS 셀렉터 추출 단위 테스트 (mock)
# ---------------------------------------------------------------------------


class TestExtractStructuredUnit:
    @pytest.fixture
    def mock_playwright_connect(self):
        with patch("tools.lightpanda_crawler.async_playwright") as mock_pw:
            mock_instance = AsyncMock()
            mock_pw.return_value.__aenter__ = AsyncMock(return_value=mock_instance)
            mock_pw.return_value.__aexit__ = AsyncMock(return_value=False)

            mock_el = MagicMock()
            mock_el.inner_text = AsyncMock(return_value="Hello World")
            mock_el.get_attribute = AsyncMock(return_value="attr_value")

            page = _make_mock_page()
            page.query_selector = AsyncMock(return_value=mock_el)
            page.query_selector_all = AsyncMock(return_value=[mock_el, mock_el])
            browser = _make_mock_browser(page)
            mock_instance.chromium.connect_over_cdp = AsyncMock(return_value=browser)

            yield mock_pw, browser, page, mock_el

    @pytest.mark.asyncio
    async def test_extract_structured_returns_dict(self, mock_playwright_connect) -> None:
        """extract_structured() 가 dict 를 반환하는지 확인"""
        assert LightpandaCrawler is not None
        _, browser, page, mock_el = mock_playwright_connect

        async with LightpandaCrawler() as crawler:
            result = await crawler.extract_structured("https://example.com", {"title": "h1"})

        assert isinstance(result, dict)

    @pytest.mark.asyncio
    async def test_extract_structured_key_present(self, mock_playwright_connect) -> None:
        """extract_structured() 결과에 요청한 키가 있는지 확인"""
        assert LightpandaCrawler is not None
        _, browser, page, mock_el = mock_playwright_connect

        async with LightpandaCrawler() as crawler:
            result = await crawler.extract_structured("https://example.com", {"title": "h1"})

        assert "title" in result

    @pytest.mark.asyncio
    async def test_extract_structured_attr_syntax(self, mock_playwright_connect) -> None:
        """::attr(name) 문법으로 속성 추출 가능한지 확인"""
        assert LightpandaCrawler is not None
        _, browser, page, mock_el = mock_playwright_connect

        async with LightpandaCrawler() as crawler:
            result = await crawler.extract_structured("https://example.com", {"href": "a::attr(href)"})

        assert "href" in result

    @pytest.mark.asyncio
    async def test_extract_structured_plural_key_returns_list(self, mock_playwright_connect) -> None:
        """복수형 키(links)는 리스트를 반환하는지 확인"""
        assert LightpandaCrawler is not None
        _, browser, page, mock_el = mock_playwright_connect

        async with LightpandaCrawler() as crawler:
            result = await crawler.extract_structured("https://example.com", {"links": "a"})

        assert isinstance(result.get("links"), list)


# ---------------------------------------------------------------------------
# 7. Chrome fallback 단위 테스트 (mock)
# ---------------------------------------------------------------------------


class TestChromeFallback:
    @pytest.mark.asyncio
    async def test_chrome_fallback_on_lightpanda_failure(self) -> None:
        """Lightpanda 연결 실패 시 Chrome으로 fallback 되는지 확인"""
        assert LightpandaCrawler is not None

        with patch("tools.lightpanda_crawler.async_playwright") as mock_pw:
            mock_instance = AsyncMock()
            mock_pw.return_value.__aenter__ = AsyncMock(return_value=mock_instance)
            mock_pw.return_value.__aexit__ = AsyncMock(return_value=False)

            page = _make_mock_page()
            chrome_browser = _make_mock_browser(page)

            # Lightpanda 실패, Chrome 성공
            mock_instance.chromium.connect_over_cdp = AsyncMock(
                side_effect=[
                    Exception("Lightpanda connection refused"),
                    chrome_browser,
                ]
            )

            async with LightpandaCrawler() as crawler:
                assert crawler._engine == "chrome"

    @pytest.mark.asyncio
    async def test_engine_is_lightpanda_on_success(self) -> None:
        """Lightpanda 연결 성공 시 engine 이 'lightpanda' 인지 확인"""
        assert LightpandaCrawler is not None

        with patch("tools.lightpanda_crawler.async_playwright") as mock_pw:
            mock_instance = AsyncMock()
            mock_pw.return_value.__aenter__ = AsyncMock(return_value=mock_instance)
            mock_pw.return_value.__aexit__ = AsyncMock(return_value=False)

            page = _make_mock_page()
            lp_browser = _make_mock_browser(page)
            mock_instance.chromium.connect_over_cdp = AsyncMock(return_value=lp_browser)

            async with LightpandaCrawler() as crawler:
                assert crawler._engine == "lightpanda"

    @pytest.mark.asyncio
    async def test_crawl_result_engine_matches_crawler(self) -> None:
        """CrawlResult.engine 이 실제 사용 엔진과 일치하는지 확인"""
        assert LightpandaCrawler is not None

        with patch("tools.lightpanda_crawler.async_playwright") as mock_pw:
            mock_instance = AsyncMock()
            mock_pw.return_value.__aenter__ = AsyncMock(return_value=mock_instance)
            mock_pw.return_value.__aexit__ = AsyncMock(return_value=False)

            page = _make_mock_page()
            lp_browser = _make_mock_browser(page)
            mock_instance.chromium.connect_over_cdp = AsyncMock(return_value=lp_browser)

            async with LightpandaCrawler() as crawler:
                result = await crawler.fetch("https://example.com")
                assert result.engine == crawler._engine


# ---------------------------------------------------------------------------
# 8. 에러 핸들링 단위 테스트 (mock)
# ---------------------------------------------------------------------------


class TestErrorHandling:
    def test_crawl_error_exists(self) -> None:
        """CrawlError 커스텀 예외가 존재하는지 확인"""
        assert CrawlError is not None, "CrawlError import 실패"
        assert issubclass(CrawlError, Exception)

    @pytest.mark.asyncio
    async def test_both_endpoints_fail_raises_crawl_error(self) -> None:
        """Lightpanda + Chrome 모두 실패 시 CrawlError 발생"""
        assert LightpandaCrawler is not None
        assert CrawlError is not None

        with patch("tools.lightpanda_crawler.async_playwright") as mock_pw:
            mock_instance = AsyncMock()
            mock_pw.return_value.__aenter__ = AsyncMock(return_value=mock_instance)
            mock_pw.return_value.__aexit__ = AsyncMock(return_value=False)

            mock_instance.chromium.connect_over_cdp = AsyncMock(side_effect=Exception("Connection refused"))

            with pytest.raises(CrawlError):
                async with LightpandaCrawler() as crawler:
                    pass

    @pytest.mark.asyncio
    async def test_screenshot_raises_not_implemented(self) -> None:
        """screenshot() 호출 시 NotImplementedError 발생"""
        assert LightpandaCrawler is not None

        with patch("tools.lightpanda_crawler.async_playwright") as mock_pw:
            mock_instance = AsyncMock()
            mock_pw.return_value.__aenter__ = AsyncMock(return_value=mock_instance)
            mock_pw.return_value.__aexit__ = AsyncMock(return_value=False)

            page = _make_mock_page()
            browser = _make_mock_browser(page)
            mock_instance.chromium.connect_over_cdp = AsyncMock(return_value=browser)

            async with LightpandaCrawler() as crawler:
                with pytest.raises(NotImplementedError) as exc_info:
                    await crawler.screenshot("https://example.com")  # type: ignore[attr-defined]
                assert "Playwright+Chrome" in str(exc_info.value) or "스크린샷" in str(exc_info.value)

    @pytest.mark.asyncio
    async def test_pdf_raises_not_implemented(self) -> None:
        """pdf() 호출 시 NotImplementedError 발생"""
        assert LightpandaCrawler is not None

        with patch("tools.lightpanda_crawler.async_playwright") as mock_pw:
            mock_instance = AsyncMock()
            mock_pw.return_value.__aenter__ = AsyncMock(return_value=mock_instance)
            mock_pw.return_value.__aexit__ = AsyncMock(return_value=False)

            page = _make_mock_page()
            browser = _make_mock_browser(page)
            mock_instance.chromium.connect_over_cdp = AsyncMock(return_value=browser)

            async with LightpandaCrawler() as crawler:
                with pytest.raises(NotImplementedError):
                    await crawler.pdf("https://example.com")  # type: ignore[attr-defined]


# ---------------------------------------------------------------------------
# 9. 통합 테스트 — 실제 Lightpanda 서버 필요
# ---------------------------------------------------------------------------


@pytest.mark.integration
class TestIntegration:
    @pytest.mark.asyncio
    async def test_fetch_example_com(self) -> None:
        """실제 example.com 크롤링 — title/text/html/links 필드 존재 확인"""
        assert LightpandaCrawler is not None
        async with LightpandaCrawler() as crawler:
            result = await crawler.fetch("https://example.com")

        assert isinstance(result, CrawlResult)
        assert result.title  # 비어있지 않음
        assert result.html  # HTML 존재
        assert result.url == "https://example.com"
        assert isinstance(result.links, list)
        assert result.elapsed_ms > 0
        assert result.engine in ("lightpanda", "chrome")

    @pytest.mark.asyncio
    async def test_fetch_many_three_urls(self) -> None:
        """3개 URL 병렬 크롤링 — 결과 수 확인"""
        assert LightpandaCrawler is not None
        urls = [
            "https://example.com",
            "https://example.org",
            "https://example.net",
        ]
        async with LightpandaCrawler() as crawler:
            results = await crawler.fetch_many(urls, concurrency=3)

        assert len(results) == 3
        assert all(isinstance(r, CrawlResult) for r in results)

    @pytest.mark.asyncio
    async def test_evaluate_document_title(self) -> None:
        """evaluate() 로 document.title 반환 확인"""
        assert LightpandaCrawler is not None
        async with LightpandaCrawler() as crawler:
            title = await crawler.evaluate("https://example.com", "document.title")

        assert isinstance(title, str)
        assert len(title) > 0

    @pytest.mark.asyncio
    async def test_extract_structured_css(self) -> None:
        """CSS 셀렉터로 구조화 데이터 추출"""
        assert LightpandaCrawler is not None
        async with LightpandaCrawler() as crawler:
            result = await crawler.extract_structured(
                "https://example.com",
                {"heading": "h1", "links": "a"},
            )

        assert isinstance(result, dict)
        assert "heading" in result
        assert "links" in result
        assert isinstance(result["links"], list)
