"""OpenAI 호환 API 서버 — FastAPI 기반.

엔드포인트:
  POST /v1/chat/completions  — OpenAI Chat Completions 형식
  GET  /v1/models            — 사용 가능 모델 목록
  GET  /health               — 서버 상태

실행:
  uvicorn services.openai_compat_server:app --port 8080

의존성: fastapi, uvicorn, pydantic (미설치 시 ImportError)
"""

import json
import logging
import time
import uuid
from typing import Any, AsyncGenerator, Dict, List, Optional

try:
    from fastapi import FastAPI
    from fastapi.responses import JSONResponse, StreamingResponse
    from pydantic import BaseModel
except ImportError as _err:
    raise ImportError(
        "fastapi, pydantic이 필요합니다. "
        "pip install fastapi uvicorn pydantic 으로 설치하세요."
    ) from _err

logger = logging.getLogger(__name__)
app = FastAPI(title="OpenAI Compatible API", version="1.0.0")

_SUPPORTED_MODELS: List[Dict[str, Any]] = [
    {"id": "gpt-4o",            "object": "model", "owned_by": "openai"},
    {"id": "gpt-4o-mini",       "object": "model", "owned_by": "openai"},
    {"id": "claude-sonnet-4-6", "object": "model", "owned_by": "anthropic"},
    {"id": "claude-opus-4-6",   "object": "model", "owned_by": "anthropic"},
    {"id": "claude-haiku-4-5",  "object": "model", "owned_by": "anthropic"},
    {"id": "gemini-2.5-pro",    "object": "model", "owned_by": "google"},
    {"id": "gemini-2.5-flash",  "object": "model", "owned_by": "google"},
    {"id": "deepseek-chat",     "object": "model", "owned_by": "deepseek"},
]


# ---------------------------------------------------------------------------
# Pydantic 모델
# ---------------------------------------------------------------------------


class ChatCompletionRequest(BaseModel):
    model: str
    messages: List[Dict[str, Any]]
    temperature: Optional[float] = None
    max_tokens: Optional[int] = None
    stream: Optional[bool] = False


class ChatCompletionResponse(BaseModel):
    id: str
    object: str
    created: int
    model: str
    choices: List[Dict[str, Any]]
    usage: Optional[Dict[str, Any]] = None


# ---------------------------------------------------------------------------
# SSE 스트리밍 헬퍼
# ---------------------------------------------------------------------------


async def _stream_response(req: ChatCompletionRequest) -> AsyncGenerator[str, None]:
    """Server-Sent Events 형식 청크 스트리밍.
    TODO: stub — 실제 LLM 스트리밍 호출로 교체.
    """
    rid = f"chatcmpl-{uuid.uuid4().hex[:12]}"
    created = int(time.time())
    # TODO: 실제 LLM 스트리밍 호출로 교체 (req.model, req.messages 활용)
    for token in ["This", " is", " a", " stub", " response", "."]:
        chunk = {
            "id": rid, "object": "chat.completion.chunk", "created": created,
            "model": req.model,
            "choices": [{"index": 0, "delta": {"role": "assistant", "content": token},
                         "finish_reason": None}],
        }
        yield f"data: {json.dumps(chunk)}\n\n"
    final = {
        "id": rid, "object": "chat.completion.chunk", "created": created,
        "model": req.model,
        "choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}],
    }
    yield f"data: {json.dumps(final)}\n\n"
    yield "data: [DONE]\n\n"


# ---------------------------------------------------------------------------
# 엔드포인트
# ---------------------------------------------------------------------------


@app.get("/health")
async def health() -> Dict[str, str]:
    """서버 상태 확인."""
    return {"status": "ok"}


@app.get("/v1/models")
async def list_models() -> Dict[str, Any]:
    """사용 가능한 모델 목록 반환 (OpenAI 형식)."""
    return {"object": "list", "data": _SUPPORTED_MODELS}


@app.post("/v1/chat/completions")
async def chat_completions(request: ChatCompletionRequest) -> Any:
    """OpenAI Chat Completions 호환 엔드포인트.

    stream=True  → SSE(Server-Sent Events) 스트리밍 응답
    stream=False → 단일 JSON 응답 (기본)

    TODO: stub — 실제 LLM 호출로 교체.
    """
    if request.stream:
        return StreamingResponse(
            _stream_response(request),
            media_type="text/event-stream",
            headers={"Cache-Control": "no-cache", "X-Accel-Buffering": "no"},
        )
    # TODO: 실제 LLM 호출로 교체 (request.model, request.messages 활용)
    resp = ChatCompletionResponse(
        id=f"chatcmpl-{uuid.uuid4().hex[:12]}",
        object="chat.completion",
        created=int(time.time()),
        model=request.model,
        choices=[{
            "index": 0,
            "message": {
                "role": "assistant",
                "content": "This is a stub response. TODO: connect real LLM backend.",
            },
            "finish_reason": "stop",
        }],
        usage={"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0},
    )
    return JSONResponse(content=resp.model_dump())
