"""
hybrid_server.py - FastAPI-based PDF conversion server using docling.

Endpoints:
    POST /v1/convert/file  - Upload a PDF and receive DoclingDocument as JSON
    GET  /health           - Health check
"""

import asyncio
import logging
import os
import sys
import tempfile
import threading
from contextlib import asynccontextmanager
from typing import Any, Dict

# Ensure local site-packages is on path (docling installed at ~/.local)
_LOCAL_SITE = os.path.expanduser("~/.local/lib/python3.12/site-packages")
if _LOCAL_SITE not in sys.path:
    sys.path.insert(0, _LOCAL_SITE)

from docling.document_converter import DocumentConverter
from fastapi import FastAPI, File, HTTPException, UploadFile
from fastapi.responses import JSONResponse

# ---------------------------------------------------------------------------
# Logging
# ---------------------------------------------------------------------------
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
)
logger = logging.getLogger("hybrid_server")

# ---------------------------------------------------------------------------
# Constants
# ---------------------------------------------------------------------------
MAX_UPLOAD_BYTES = 100 * 1024 * 1024  # 100 MB

# ---------------------------------------------------------------------------
# Singleton converter with thread-safe lazy initialisation
# ---------------------------------------------------------------------------
_converter: DocumentConverter | None = None
_converter_lock = threading.Lock()


def get_converter() -> DocumentConverter:
    """Return the singleton DocumentConverter, creating it on first call."""
    global _converter
    if _converter is None:
        with _converter_lock:
            if _converter is None:
                logger.info("Initialising DocumentConverter (model loading) …")
                _converter = DocumentConverter()
                logger.info("DocumentConverter ready.")
    return _converter


# ---------------------------------------------------------------------------
# Lifespan: warm up converter at startup
# ---------------------------------------------------------------------------
@asynccontextmanager
async def lifespan(app: FastAPI):  # type: ignore[type-arg]
    logger.info("Server starting up – warming up DocumentConverter …")
    await asyncio.to_thread(get_converter)
    logger.info("Startup complete.")
    yield
    logger.info("Server shutting down.")


# ---------------------------------------------------------------------------
# App
# ---------------------------------------------------------------------------
app = FastAPI(
    title="docling-fast hybrid server",
    description="Converts PDF files to structured JSON using docling.",
    version="1.0.0",
    lifespan=lifespan,
)


# ---------------------------------------------------------------------------
# Routes
# ---------------------------------------------------------------------------
@app.get("/health")
async def health() -> Dict[str, Any]:
    """Return service health status."""
    return {"status": "ok", "service": "hybrid_server"}


def _convert_pdf(tmp_path: str) -> Dict[str, Any]:
    """Run docling conversion (blocking) inside a thread."""
    converter = get_converter()
    with _converter_lock:
        result = converter.convert(tmp_path)
    doc = result.document
    return doc.export_to_dict()


@app.post("/v1/convert/file")
async def convert_file(file: UploadFile = File(...)) -> JSONResponse:
    """
    Accept a PDF upload and return the DoclingDocument as JSON.

    - Maximum file size: 100 MB
    - Processing is delegated to a thread pool to avoid blocking the event loop
    """
    # ---- size guard (read into memory once to check) -----------------------
    content = await file.read()
    if len(content) > MAX_UPLOAD_BYTES:
        raise HTTPException(
            status_code=413,
            detail=f"File too large. Maximum allowed size is {MAX_UPLOAD_BYTES // (1024 * 1024)} MB.",
        )

    logger.info("Received file: %s (%d bytes)", file.filename, len(content))

    # ---- write to temp file ------------------------------------------------
    suffix = os.path.splitext(file.filename or "upload")[1] or ".pdf"
    tmp_file = tempfile.NamedTemporaryFile(
        delete=False, suffix=suffix, prefix="hybrid_upload_"
    )
    try:
        tmp_file.write(content)
        tmp_file.close()
        tmp_path = tmp_file.name

        # ---- convert in a thread (non-blocking) ----------------------------
        try:
            doc_dict = await asyncio.to_thread(_convert_pdf, tmp_path)
        except Exception as exc:
            logger.exception("Conversion failed for %s", file.filename)
            raise HTTPException(
                status_code=500,
                detail=f"Conversion error: {exc}",
            ) from exc
    finally:
        # ---- clean up temp file --------------------------------------------
        try:
            os.unlink(tmp_file.name)
        except OSError:
            pass

    return JSONResponse(content=doc_dict)
