"""YouTube 요약 파이프라인 — 메인 오케스트레이터

크론 6시간마다 실행:
  YouTube API 신규 영상 → 전사 → 요약 → Drive 업로드 → Firestore 저장 → Telegram 보고
"""

import logging
import subprocess
import time
from dataclasses import dataclass, field
from datetime import datetime, timezone

from . import config
from .drive_uploader import (
    build_summary_markdown,
    build_transcript_markdown,
    upload_markdown,
    upload_processing_log,
)
from .firestore_writer import (
    _get_db,
    is_video_processed,
    save_insurance_chunk,
    save_youtube_knowledge,
)
from .summarizer import generate_embedding, generate_summary
from .transcriber import transcribe
from .youtube_api import (
    get_active_channels,
    get_new_videos,
    get_uploads_playlist_id,
    get_video_duration,
)
from .youtube_api import update_last_crawled as yt_update_last_crawled

logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
)
logger = logging.getLogger("youtube-pipeline")


@dataclass
class VideoLog:
    """영상별 처리 로그"""

    video_id: str
    title: str
    channel_name: str
    transcription_source: str = ""
    transcript_length: int = 0
    summary_length: int = 0
    status: str = "pending"  # 'completed' | 'skipped' | 'failed'
    error: str = ""
    drive_summary_url: str = ""
    drive_transcript_url: str = ""
    processing_time_ms: int = 0
    has_numeric_warnings: bool = False


@dataclass
class RunStats:
    """파이프라인 실행 통계"""

    total_channels: int = 0
    total_videos: int = 0
    processed: int = 0
    skipped: int = 0
    failed: int = 0
    video_logs: list[VideoLog] = field(default_factory=list)


def check_whisper_health() -> bool:
    """Whisper GPU 서비스 health check"""
    import requests as req

    try:
        resp = req.get(config.WHISPER_HEALTH_ENDPOINT, timeout=5)
        return resp.status_code == 200
    except Exception as e:
        logger.warning("Whisper health check 실패: %s", e)
        return False


def build_processing_log(stats: RunStats, start_time: datetime) -> str:
    """처리 로그 마크다운 생성"""
    elapsed = (datetime.now(timezone.utc) - start_time).total_seconds()
    lines = [
        "# YouTube 파이프라인 처리 로그",
        "",
        f"- **실행 시각**: {start_time.strftime('%Y-%m-%d %H:%M:%S')} UTC",
        f"- **소요 시간**: {elapsed:.1f}초",
        f"- **채널 수**: {stats.total_channels}",
        f"- **영상 수**: {stats.total_videos}",
        f"- **처리**: {stats.processed} | **스킵**: {stats.skipped} | **실패**: {stats.failed}",
        "",
        "---",
        "",
    ]

    for vlog in stats.video_logs:
        if vlog.status == "completed":
            status_icon = "✅"
        elif vlog.status == "skipped":
            status_icon = "⏭️"
        else:
            status_icon = "❌"
        lines.append(f"### {status_icon} {vlog.title}")
        lines.append(f"- **채널**: {vlog.channel_name}")
        lines.append(f"- **영상 ID**: {vlog.video_id}")
        lines.append(
            f"- **전사**: {vlog.transcription_source} ({vlog.transcript_length:,}자)"
        )
        if vlog.summary_length:
            lines.append(f"- **요약**: {vlog.summary_length:,}자")
        if vlog.has_numeric_warnings:
            lines.append("- ⚠️ **수치 포함 경고**")
        if vlog.error:
            lines.append(f"- **에러**: {vlog.error}")
        lines.append(f"- **처리시간**: {vlog.processing_time_ms}ms")
        lines.append("")

    return "\n".join(lines)


def send_telegram_report(stats: RunStats, start_time: datetime) -> None:
    """Telegram으로 처리 결과 보고"""
    log_content = build_processing_log(stats, start_time)
    date_str = start_time.strftime("%Y-%m-%d")
    log_path = f"/tmp/youtube-pipeline-{date_str}.md"

    with open(log_path, "w", encoding="utf-8") as f:
        f.write(log_content)

    try:
        subprocess.run(
            [
                "/usr/local/bin/cokacdir",
                "--sendfile",
                log_path,
                "--chat",
                config.TELEGRAM_CHAT_ID,
                "--key",
                config.COKACDIR_KEY,
            ],
            check=True,
            capture_output=True,
            text=True,
        )
        logger.info("Telegram 보고 완료")
    except Exception as e:
        logger.error("Telegram 보고 실패: %s", e)


def run_pipeline() -> RunStats:
    """메인 파이프라인 실행"""
    start_time = datetime.now(timezone.utc)
    stats = RunStats()

    # 0. Whisper health check
    whisper_ok = check_whisper_health()
    if whisper_ok:
        logger.info("Whisper 서비스 정상")
    else:
        logger.warning("Whisper 서비스 미응답 — Whisper fallback 비활성")

    # 1. Firestore DB 초기화 + 활성 채널 로드
    db = _get_db()
    channels = get_active_channels(db)

    if not channels:
        logger.info("등록된 활성 채널 없음")
        send_telegram_report(stats, start_time)
        return stats

    stats.total_channels = len(channels)
    logger.info("활성 채널 %d개 로드", len(channels))

    # 2. 채널별 처리
    for channel in channels:
        channel_id: str = channel.get("channelId", "")
        channel_name: str = channel.get("channelName", "알수없는채널")
        last_crawled_at = channel.get("lastCrawledAt")
        doc_id: str = channel.get("_doc_id", "")

        logger.info("채널 처리 시작: %s (%s)", channel_name, channel_id)

        try:
            # lastCrawledAt 처리 (Firestore Timestamp → datetime)
            if last_crawled_at is not None:
                if hasattr(last_crawled_at, "timestamp"):
                    # Firestore Timestamp 객체
                    cutoff = datetime.fromtimestamp(
                        last_crawled_at.timestamp(), tz=timezone.utc
                    )
                elif isinstance(last_crawled_at, datetime):
                    cutoff = (
                        last_crawled_at
                        if last_crawled_at.tzinfo
                        else last_crawled_at.replace(tzinfo=timezone.utc)
                    )
                else:
                    cutoff = datetime(2000, 1, 1, tzinfo=timezone.utc)
            else:
                cutoff = datetime(2000, 1, 1, tzinfo=timezone.utc)

            # uploads playlist ID 조회
            uploads_id = get_uploads_playlist_id(channel_id, config.YOUTUBE_API_KEY)
            if not uploads_id:
                logger.warning("채널 %s: uploads playlist 없음", channel_name)
                continue

            # 신규 영상 목록
            new_videos = get_new_videos(uploads_id, cutoff, config.YOUTUBE_API_KEY)
            logger.info("채널 %s: 신규 영상 %d개", channel_name, len(new_videos))

            for video_info in new_videos:
                video_id: str = video_info["video_id"]
                title: str = video_info["title"]
                published_at: str = video_info["published_at"]
                description: str = video_info.get("description", "")
                video_date: str = published_at[:10]

                stats.total_videos += 1
                vlog = VideoLog(
                    video_id=video_id, title=title, channel_name=channel_name
                )

                video_start = time.time()

                try:
                    # 중복 체크
                    if is_video_processed(video_id, db):
                        logger.info("스킵 (이미 처리됨): %s", title)
                        vlog.status = "skipped"
                        stats.skipped += 1
                        stats.video_logs.append(vlog)
                        continue

                    # 영상 길이 조회
                    duration = get_video_duration(video_id, config.YOUTUBE_API_KEY)

                    # 3-tier 전사
                    transcript = transcribe(video_id, title, description)
                    vlog.transcription_source = transcript.source
                    vlog.transcript_length = len(transcript.text)

                    is_title_only = transcript.source == "title_description"

                    # 요약 생성 (title_description이면 금지)
                    summary_text = ""
                    embedding: list[float] = []
                    has_numeric_warnings = False

                    if not is_title_only:
                        summary_result = generate_summary(transcript.text)
                        summary_text = summary_result.summary_text
                        has_numeric_warnings = summary_result.has_numeric_warnings
                        vlog.summary_length = len(summary_text)
                        vlog.has_numeric_warnings = has_numeric_warnings

                        # 임베딩 생성
                        embedding = generate_embedding(summary_text[:8000])

                    # Drive 업로드
                    drive_summary_url = ""
                    drive_transcript_url = ""

                    if summary_text:
                        md_summary = build_summary_markdown(
                            summary_text,
                            title,
                            channel_name,
                            video_id,
                            published_at,
                            duration,
                            transcript.source,
                            len(transcript.text),
                        )
                        drive_summary_url = upload_markdown(
                            md_summary, channel_name, title, video_date, "_요약"
                        )
                        vlog.drive_summary_url = drive_summary_url

                    if not is_title_only:
                        md_transcript = build_transcript_markdown(
                            transcript.text,
                            title,
                            channel_name,
                            video_id,
                            published_at,
                            duration,
                            transcript.source,
                        )
                        drive_transcript_url = upload_markdown(
                            md_transcript, channel_name, title, video_date, "_전사"
                        )
                        vlog.drive_transcript_url = drive_transcript_url

                    # insurance_chunks 저장 (title_description이면 금지)
                    if not is_title_only and summary_text and embedding:
                        save_insurance_chunk(
                            video_id=video_id,
                            channel_name=channel_name,
                            title=title,
                            summary=summary_text,
                            embedding=embedding,
                            video_date=video_date,
                            drive_url=drive_summary_url,
                            db=db,
                        )

                    # youtube_knowledge 메타데이터 저장
                    processing_time_ms = int((time.time() - video_start) * 1000)
                    vlog.processing_time_ms = processing_time_ms

                    save_youtube_knowledge(
                        data={
                            "videoId": video_id,
                            "channelId": channel_id,
                            "channelName": channel_name,
                            "title": title,
                            "publishedAt": published_at,
                            "chunkText": summary_text[:2000] if summary_text else "",
                            "embedding": embedding,
                            "relatedCompanyIds": [],
                            "relatedProductIds": [],
                            "conflictsWithPolicy": False,
                            "sourceType": "youtube",
                            "driveUrl": drive_summary_url,
                            "hasTranscript": not is_title_only,
                            "transcriptionSource": transcript.source,
                            "driveTranscriptUrl": drive_transcript_url,
                            "summaryStatus": "done" if summary_text else "skipped",
                            "transcriptionLength": len(transcript.text),
                            "processingTimeMs": processing_time_ms,
                            "hasNumericWarnings": has_numeric_warnings,
                        },
                        db=db,
                    )

                    vlog.status = "completed"
                    stats.processed += 1
                    logger.info(
                        "처리 완료: %s (%s, %d자)",
                        title,
                        transcript.source,
                        len(transcript.text),
                    )

                except Exception as e:
                    vlog.status = "failed"
                    vlog.error = str(e)
                    vlog.processing_time_ms = int((time.time() - video_start) * 1000)
                    stats.failed += 1
                    logger.error("영상 처리 실패 (%s): %s", title, e)

                stats.video_logs.append(vlog)

            # lastCrawledAt 갱신
            if new_videos and doc_id:
                yt_update_last_crawled(db, doc_id)
                logger.info("채널 %s lastCrawledAt 갱신 완료", channel_name)

        except Exception as e:
            logger.error("채널 처리 오류 (%s): %s", channel_name, e)

    # 3. 처리 로그 Drive 업로드
    if stats.video_logs:
        try:
            log_content = build_processing_log(stats, start_time)
            date_str = start_time.strftime("%Y-%m-%d")
            upload_processing_log(log_content, date_str)
            logger.info("처리 로그 Drive 업로드 완료")
        except Exception as e:
            logger.error("처리 로그 업로드 실패: %s", e)

    # 4. Telegram 보고
    send_telegram_report(stats, start_time)

    return stats


def main() -> None:
    """CLI 진입점"""
    logger.info("YouTube 파이프라인 시작")
    stats = run_pipeline()
    logger.info(
        "파이프라인 완료 — 채널: %d, 영상: %d (처리: %d, 스킵: %d, 실패: %d)",
        stats.total_channels,
        stats.total_videos,
        stats.processed,
        stats.skipped,
        stats.failed,
    )


if __name__ == "__main__":
    main()
