#!/usr/bin/env python3
"""IDS Phase 3 — Mobile Prototype Korean OCR Validator.

Loads a rendered prototype PNG, extracts Korean text via pytesseract (kor+eng),
and verifies that all expected scenario keywords are present (100% match).

Exit codes:
    0 — all keywords matched
    1 — one or more keywords missing
    2 — pytesseract / dependency missing
"""

from __future__ import annotations

import argparse
import sys
from pathlib import Path

SCENARIO_KEYWORDS: dict[str, list[str]] = {
    "signup_step1": ["전화번호", "휴대폰", "인증번호"],
    "signup_step2": ["인증번호", "다시", "확인"],
    "signup_step3": ["약관", "동의", "필수"],
    "dashboard": ["보험", "월", "원", "추천"],
    "insurance_compare": ["보험료", "비교", "현대해상"],
    "cardnews_publish": ["카드뉴스", "발행", "미리보기"],
    "ai_analysis": ["AI", "분석", "점수"],
}


def parse_args() -> argparse.Namespace:
    parser = argparse.ArgumentParser(description="Korean OCR validator")
    parser.add_argument("--image", required=True, help="Absolute path to PNG")
    parser.add_argument(
        "--scenario",
        required=True,
        choices=sorted(SCENARIO_KEYWORDS),
        help="Scenario id to look up expected keywords",
    )
    parser.add_argument(
        "--lang",
        default="kor+eng",
        help="Tesseract language code (default kor+eng)",
    )
    return parser.parse_args()


def extract_text(image_path: Path, lang: str) -> str:
    try:
        import pytesseract  # type: ignore[import-untyped]
        from PIL import Image  # type: ignore[import-untyped]
    except ImportError as exc:
        print(
            f"pytesseract/Pillow unavailable: {exc}. "
            "Install with: pip install pytesseract pillow && apt install tesseract-ocr-kor",
            file=sys.stderr,
        )
        sys.exit(2)

    if not image_path.is_file():
        print(f"Image not found: {image_path}", file=sys.stderr)
        sys.exit(1)

    image = Image.open(image_path)
    return pytesseract.image_to_string(image, lang=lang)


def main() -> int:
    args = parse_args()
    text = extract_text(Path(args.image), args.lang)
    expected = SCENARIO_KEYWORDS[args.scenario]
    missing = [kw for kw in expected if kw not in text]

    if missing:
        print(f"FAIL: missing keywords: {missing}", file=sys.stderr)
        print("--- OCR text ---", file=sys.stderr)
        print(text, file=sys.stderr)
        return 1
    print(f"OK: all {len(expected)} keywords matched in {args.image}")
    return 0


if __name__ == "__main__":
    sys.exit(main())
