#!/usr/bin/env python3
"""utils/robots_policy.py 테스트 스위트"""

import sys
import time
from pathlib import Path
from unittest.mock import MagicMock, patch

import pytest

sys.path.insert(0, str(Path(__file__).parent.parent.parent))

from utils.robots_policy import _robots_cache, check_robots_txt


class TestCheckRobotsTxt:
    """check_robots_txt() 기본 동작 테스트"""

    def setup_method(self):
        """각 테스트 전 캐시 초기화"""
        _robots_cache.clear()

    def test_allowed_url_returns_true(self):
        """허용된 경로는 True 반환"""
        mock_parser = MagicMock()
        mock_parser.can_fetch.return_value = True

        with patch("utils.robots_policy._fetch_robots_parser", return_value=mock_parser):
            result = check_robots_txt("https://example.com/page")

        assert result is True

    def test_disallowed_url_returns_false(self):
        """차단된 경로는 False 반환"""
        mock_parser = MagicMock()
        mock_parser.can_fetch.return_value = False

        with patch("utils.robots_policy._fetch_robots_parser", return_value=mock_parser):
            result = check_robots_txt("https://example.com/private")

        assert result is False

    def test_custom_user_agent_passed_to_parser(self):
        """user_agent 파라미터가 can_fetch에 전달됨"""
        mock_parser = MagicMock()
        mock_parser.can_fetch.return_value = True

        with patch("utils.robots_policy._fetch_robots_parser", return_value=mock_parser):
            check_robots_txt("https://example.com/page", user_agent="MyBot")

        mock_parser.can_fetch.assert_called_once_with("MyBot", "https://example.com/page")

    def test_default_user_agent_is_wildcard(self):
        """기본 user_agent는 '*'"""
        mock_parser = MagicMock()
        mock_parser.can_fetch.return_value = True

        with patch("utils.robots_policy._fetch_robots_parser", return_value=mock_parser):
            check_robots_txt("https://example.com/page")

        mock_parser.can_fetch.assert_called_once_with("*", "https://example.com/page")

    def test_network_error_returns_true(self):
        """네트워크 에러 시 기본 허용(True) 반환"""
        with patch("utils.robots_policy._fetch_robots_parser", side_effect=OSError("timeout")):
            result = check_robots_txt("https://broken.example.com/page")

        assert result is True

    def test_exception_returns_true(self):
        """예외 발생 시 기본 허용(True) 반환"""
        with patch("utils.robots_policy._fetch_robots_parser", side_effect=Exception("unexpected")):
            result = check_robots_txt("https://example.com/page")

        assert result is True


class TestRobotsCache:
    """캐시 동작 테스트"""

    def setup_method(self):
        _robots_cache.clear()

    def test_cache_populated_after_first_call(self):
        """첫 번째 호출 후 캐시에 항목 존재"""
        mock_parser = MagicMock()
        mock_parser.can_fetch.return_value = True

        with patch("utils.robots_policy._fetch_robots_parser", return_value=mock_parser) as mock_fetch:
            check_robots_txt("https://example.com/page")
            check_robots_txt("https://example.com/other")

        # robots.txt는 도메인 단위 — 두 번 호출해도 fetch는 1회
        assert mock_fetch.call_count == 1

    def test_different_domains_fetch_separately(self):
        """다른 도메인은 별도로 fetch"""
        mock_parser = MagicMock()
        mock_parser.can_fetch.return_value = True

        with patch("utils.robots_policy._fetch_robots_parser", return_value=mock_parser) as mock_fetch:
            check_robots_txt("https://example.com/page")
            check_robots_txt("https://other.com/page")

        assert mock_fetch.call_count == 2

    def test_cache_respects_ttl(self):
        """TTL 만료 후 재fetch 발생"""
        mock_parser = MagicMock()
        mock_parser.can_fetch.return_value = True

        with patch("utils.robots_policy._fetch_robots_parser", return_value=mock_parser) as mock_fetch:
            check_robots_txt("https://example.com/page")
            # 캐시 항목의 타임스탬프를 오래된 것으로 조작
            key = "https://example.com"
            parser, _ts = _robots_cache[key]
            _robots_cache[key] = (parser, time.monotonic() - 3700)  # TTL(3600s) 초과
            check_robots_txt("https://example.com/page")

        assert mock_fetch.call_count == 2

    def test_cache_key_is_scheme_plus_netloc(self):
        """캐시 키는 scheme://netloc 형식"""
        mock_parser = MagicMock()
        mock_parser.can_fetch.return_value = True

        with patch("utils.robots_policy._fetch_robots_parser", return_value=mock_parser):
            check_robots_txt("https://example.com/a/b/c")

        assert "https://example.com" in _robots_cache


class TestInvalidUrls:
    """비정상 URL 처리 테스트"""

    def setup_method(self):
        _robots_cache.clear()

    def test_empty_url_returns_true(self):
        """빈 URL은 허용(True) 반환"""
        result = check_robots_txt("")
        assert result is True

    def test_url_without_scheme_returns_true(self):
        """스킴 없는 URL은 허용(True) 반환"""
        result = check_robots_txt("example.com/page")
        assert result is True
