"""Utility to convert DevTools "Copy as cURL" commands to Scrapling Fetcher arguments."""

import shlex
from typing import Optional

from scrapling.core.utils._shell import _CookieParser, _ParseHeaders

# Flags that consume a value argument
_VALUE_FLAGS: dict[str, str] = {
    "-H": "header",
    "--header": "header",
    "-b": "cookie",
    "--cookie": "cookie",
    "-X": "method",
    "--request": "method",
    "-d": "data",
    "--data": "data",
    "--data-raw": "data",
    "--data-binary": "data",
    "-x": "proxy",
    "--proxy": "proxy",
}

# Flags that are no-ops (single token, no value)
_IGNORED_FLAGS: frozenset[str] = frozenset(["--compressed"])


def _normalize_multiline(curl_command: str) -> str:
    """Join backslash-continued lines into a single line."""
    return curl_command.replace("\\\n", " ")


def _tokenize(curl_command: str) -> list[str]:
    """Tokenize a curl command string, handling quotes."""
    normalized = _normalize_multiline(curl_command)
    return shlex.split(normalized)


def _is_flag(token: str) -> bool:
    """Return True if the token looks like a CLI flag."""
    return token.startswith("-")


def parse_curl(curl_command: str) -> dict[str, object]:
    """Parse a curl command string into Scrapling Fetcher call arguments.

    Args:
        curl_command: A curl command string, optionally with backslash line
            continuations (as produced by browser DevTools "Copy as cURL").

    Returns:
        A dict with keys:
            - ``url`` (str)
            - ``method`` (str): "GET", "POST", "PUT", or "DELETE"
            - ``headers`` (dict[str, str]): HTTP headers (Cookie header excluded)
            - ``cookies`` (dict[str, str]): parsed cookies
            - ``data`` (str | None): request body, present only when provided
            - ``proxy`` (str | None): proxy URL, present only when provided

    Raises:
        ValueError: If the command is empty, does not start with ``curl``,
            or contains no URL.
    """
    stripped = curl_command.strip()
    if not stripped:
        raise ValueError("curl command must not be empty")

    try:
        tokens = _tokenize(stripped)
    except ValueError as exc:
        raise ValueError(f"Failed to tokenize curl command: {exc}") from exc

    if not tokens or tokens[0] != "curl":
        first = repr(tokens[0]) if tokens else "'(empty)'"
        raise ValueError(f"Command must start with 'curl', got: {first}")

    # Walk tokens
    raw_header_lines: list[str] = []
    raw_cookie_strings: list[str] = []
    explicit_method: Optional[str] = None
    raw_data: Optional[str] = None
    proxy: Optional[str] = None
    url_candidates: list[str] = []

    idx = 1  # skip "curl"
    while idx < len(tokens):
        token = tokens[idx]

        if token in _IGNORED_FLAGS:
            idx += 1
            continue

        if token in _VALUE_FLAGS:
            if idx + 1 >= len(tokens):
                raise ValueError(f"Flag {token!r} requires a value but got none")
            value = tokens[idx + 1]
            key = _VALUE_FLAGS[token]

            if key == "header":
                raw_header_lines.append(value)
            elif key == "cookie":
                raw_cookie_strings.append(value)
            elif key == "method":
                explicit_method = value.upper()
            elif key == "data":
                raw_data = value
            elif key == "proxy":
                proxy = value

            idx += 2
            continue

        # Compound flag=value (e.g. --data=foo)
        if "=" in token and _is_flag(token):
            flag, _, value = token.partition("=")
            if flag in _VALUE_FLAGS:
                key = _VALUE_FLAGS[flag]
                if key == "header":
                    raw_header_lines.append(value)
                elif key == "cookie":
                    raw_cookie_strings.append(value)
                elif key == "method":
                    explicit_method = value.upper()
                elif key == "data":
                    raw_data = value
                elif key == "proxy":
                    proxy = value
                idx += 1
                continue

        # Unknown flags (single-token, no value) — skip silently
        if _is_flag(token):
            idx += 1
            continue

        # Non-flag token: URL candidate
        url_candidates.append(token)
        idx += 1

    if not url_candidates:
        raise ValueError("No URL found in curl command")

    url = url_candidates[0]

    # Parse headers; _ParseHeaders separates Cookie header into cookies dict
    headers, cookies_from_headers = _ParseHeaders(raw_header_lines, parse_cookies=True)

    # Parse cookies from -b / --cookie flags
    cookies_from_b: dict[str, str] = {}
    for cookie_str in raw_cookie_strings:
        cookies_from_b.update(dict(_CookieParser(cookie_str)))

    # Merge cookies (header cookies + -b cookies)
    cookies: dict[str, str] = {**cookies_from_headers, **cookies_from_b}

    # Determine HTTP method
    if explicit_method is not None:
        method = explicit_method
    elif raw_data is not None:
        method = "POST"
    else:
        method = "GET"

    result: dict[str, object] = {
        "url": url,
        "method": method,
        "headers": headers,
        "cookies": cookies,
    }

    if raw_data is not None:
        result["data"] = raw_data

    if proxy is not None:
        result["proxy"] = proxy

    return result
