"""IDS Phase 0.5 — Lite Evaluator (5항목, OCR 비의존, 정적 분석).

목적:
    Phase 0 SSOT(mapping-tables.md #5-A + target-audience.md §7)를
    LayoutMeta(JSON Schema 강제)로 받아 5항목을 즉시 PASS/WARN/FAIL 평가.

mapping #4(scripts/quality_evaluator.py: PIL 회귀 게이트)와 책임 분리:
    - mapping #4 = 27 PNG 렌더 결과의 정량 회귀(silent corruption 차단).
    - mapping #5(본 모듈) = LayoutMeta + PIL Image 정적 분석으로 디자인 품질 게이트.

5항목 (외부 명명 = task-2446 task.md / 내부 알고리즘 = mapping-tables 5-A):
    L1 Contrast      — 글리프 픽셀 contrast 분포 5th/95th percentile (mapping 5-A L1)
    L2 Margin        — safe-area(SSOT 6.7%=72px) 침범 검사 (mapping 5-A L2)
    L3 Hierarchy     — 헤딩/서브헤드/본문 비율 + dq-rules absolute_min(40) (mapping 5-A L3 + dq-rules font_sizes)
    L4 Color Token   — palette 4색 이내 + AI 퍼플 검출 (mapping 5-A L5 + dq-rules color)
    L5 Typography    — Pretendard / Noto Sans KR + weight 600+ + 자간/행간 (dq-rules font_pairing)

참조:
    - SSOT: memory/plans/ids-phase4-design-system/mapping-tables.md (lines 430~620)
    - SSOT: memory/plans/ids-phase4-design-system/target-audience.md §7 (lines 238~337)
    - dq-rules: memory/specs/dq-rules.json
"""

from __future__ import annotations

import json
import math
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any

import numpy as np
from PIL import Image


# ---------------------------------------------------------------------------
# SSOT 상수 — Phase 0 §7 / mapping #5-A 직결
# ---------------------------------------------------------------------------

SSOT_MAPPING_VERSION: str = "v1.0"
"""Phase 0 SSOT 시점 버전. LayoutMeta.mappingVersion 미일치 시 진입 차단."""

# §7.3 Safe-area
SAFE_AREA_RATIO: float = 0.067  # 6.7% — 72px @ 1080
GRID_BASELINE: int = 8           # 8px baseline
GRID_SUBGRID_WHITELIST_ROLES: tuple[str, ...] = ("icon",)

# §7.2 Contrast SSOT
CONTRAST_BODY_MIN_AA: float = 4.5     # 5th percentile 절대 하한
CONTRAST_BODY_RECOMMENDED: float = 7.0  # 95th percentile AAA 권장
CONTRAST_LARGE_MIN_AA: float = 3.0    # 24px+ bold or 18.5px+ 모든 weight
CTA_CONTRAST_MIN_AA: float = 4.5      # SSOT §7.2 CTA = 4.5 (대형도 AA 4.5 적용)

# dq-rules.json 핵심 임계 (memory/specs/dq-rules.json)
DQ_FONT_ABSOLUTE_MIN: int = 40
DQ_FONT_HEADLINE_MIN: int = 84
DQ_FONT_SUBHEAD_MIN: int = 64
DQ_FONT_CTA_MIN: int = 40
DQ_FONT_DISCLAIMER_MIN: int = 40
DQ_HEAD_SUB_MIN_RATIO: float = 1.3   # font_ratio.min_head_sub_ratio
DQ_COLOR_MAX_PALETTE: int = 4         # max_brand_colors=3 + max_accent_colors=1
DQ_FONT_MIN_FAMILIES: int = 2
DQ_FONT_MAX_FAMILIES: int = 3

# L4 AI 퍼플 검출 (Supanova LR_NO_AI_PURPLE_GRADIENT)
AI_PURPLE_HUE_MIN: float = 270.0
AI_PURPLE_HUE_MAX: float = 300.0
AI_PURPLE_SAT_MIN: float = 0.5
AI_PURPLE_RATIO_FAIL: float = 0.10  # 캔버스 10% 초과 시 즉시 FAIL

# L5 Typography 강제 (task.md L5 — Pretendard / Noto Sans KR 만 PASS)
PRIMARY_KOREAN_FAMILIES: frozenset[str] = frozenset({"Pretendard", "Noto Sans KR"})

# Phase 0 §7.1 preset palette 토큰 (mapping-tables #3 brand 매칭)
# theme-fa-fintech: Brex 기반 — Brex Orange / Near Black / Warm Cream
# theme-consumer-warm: Apple 기반 — Apple Blue / Off White / Charcoal
PRESET_PALETTE_TOKENS: dict[str, list[str]] = {
    "theme-fa-fintech": [
        "#FF6E2B",  # Brex Orange (primary)
        "#0B1E3F",  # Near Black (text)
        "#F8F4EE",  # Warm Cream (bg)
        "#1F1F1F",  # Body text fallback
        "#FFFFFF",  # White
    ],
    "theme-consumer-warm": [
        "#0071E3",  # Apple Blue (primary)
        "#1D1D1F",  # Charcoal (text)
        "#F5F5F7",  # Off White (bg)
        "#FFFFFF",  # White
        "#86868B",  # Light Gray (caption)
    ],
}
DQ_BANNED_FONTS: frozenset[str] = frozenset(
    {"궁서체", "궁서", "굴림", "굴림체", "바탕", "바탕체"}
)
DQ_BANNED_WEIGHTS: frozenset[int] = frozenset({100, 200, 300})

# Hex 텍스트 컬러 → glyph 분류 ΔE 임계 (sRGB Euclidean)
GLYPH_COLOR_DELTA: float = 30.0


# ---------------------------------------------------------------------------
# 예외
# ---------------------------------------------------------------------------


class SchemaValidationError(Exception):
    """LayoutMeta가 JSON Schema를 위반."""


class SSotMismatchError(Exception):
    """mappingVersion이 SSOT(SSOT_MAPPING_VERSION)와 불일치 → 진입 차단."""


# ---------------------------------------------------------------------------
# 데이터 클래스
# ---------------------------------------------------------------------------


@dataclass
class ItemResult:
    """5항목 각 평가 결과."""

    code: str   # "L1" ~ "L5"
    name: str   # "Contrast" / "Margin" / "Hierarchy" / "Color Token" / "Typography"
    verdict: str  # "PASS" | "WARN" | "FAIL"
    score: int    # 0~100
    reason: str | None = None
    details: dict[str, Any] = field(default_factory=dict)


@dataclass
class EvalResult:
    """Lite Evaluator 종합 결과."""

    overall: str  # "PASS" | "WARN" | "FAIL"
    score: int    # 5항목 평균
    items: list[ItemResult]
    fail_reasons: list[str] = field(default_factory=list)
    layout_meta_summary: dict[str, Any] = field(default_factory=dict)


# ---------------------------------------------------------------------------
# Schema 로딩 + 검증
# ---------------------------------------------------------------------------


_SCHEMA_PATH = Path(__file__).parent / "schemas" / "lite_evaluator_input.schema.json"


def load_schema() -> dict[str, Any]:
    """LayoutMeta JSON Schema(Draft-07)를 로드한다."""

    with _SCHEMA_PATH.open("r", encoding="utf-8") as f:
        return json.load(f)


def _validate_schema(layout_meta: dict[str, Any]) -> None:
    """jsonschema Draft7Validator로 입력 계약 강제 검증.

    실패 시 SchemaValidationError. 외부 의존성(jsonschema)은 lazy import.
    """
    try:
        from jsonschema import Draft7Validator
    except ImportError as exc:
        raise RuntimeError(
            "lite_evaluator requires 'jsonschema'. "
            "Install: pip install jsonschema"
        ) from exc

    schema = load_schema()
    validator = Draft7Validator(schema)
    errors = sorted(validator.iter_errors(layout_meta), key=lambda e: e.path)
    if errors:
        msgs = []
        for e in errors:
            path = "/".join(str(p) for p in e.absolute_path) or "(root)"
            msgs.append(f"  - {path}: {e.message}")
        raise SchemaValidationError(
            "LayoutMeta JSON Schema validation FAIL:\n" + "\n".join(msgs)
        )


def _check_mapping_version(layout_meta: dict[str, Any]) -> None:
    """mappingVersion이 SSOT_MAPPING_VERSION과 일치하지 않으면 진입 차단."""

    mv = layout_meta.get("mappingVersion")
    if mv != SSOT_MAPPING_VERSION:
        raise SSotMismatchError(
            f"mappingVersion mismatch: got '{mv}', "
            f"expected '{SSOT_MAPPING_VERSION}' (Phase 0 SSOT)."
        )


# ---------------------------------------------------------------------------
# 색상 유틸 (WCAG contrast / hex 파싱 / hue)
# ---------------------------------------------------------------------------


def _hex_to_rgb(hex_color: str) -> tuple[int, int, int]:
    h = hex_color.lstrip("#")
    return int(h[0:2], 16), int(h[2:4], 16), int(h[4:6], 16)


def _relative_luminance(rgb: tuple[int, int, int]) -> float:
    """WCAG 상대 휘도 (sRGB → linear → Y)."""

    def chan(c: int) -> float:
        v = c / 255.0
        return v / 12.92 if v <= 0.03928 else ((v + 0.055) / 1.055) ** 2.4

    r, g, b = rgb
    return 0.2126 * chan(r) + 0.7152 * chan(g) + 0.0722 * chan(b)


def _contrast_ratio(
    fg: tuple[int, int, int], bg: tuple[int, int, int]
) -> float:
    """WCAG contrast ratio (L1+0.05)/(L2+0.05)."""

    l1 = _relative_luminance(fg)
    l2 = _relative_luminance(bg)
    light, dark = (l1, l2) if l1 > l2 else (l2, l1)
    return (light + 0.05) / (dark + 0.05)


def _rgb_to_hsl(rgb: tuple[int, int, int]) -> tuple[float, float, float]:
    """RGB(0~255) → HSL(h: 0~360, s/l: 0~1)."""

    r, g, b = (c / 255.0 for c in rgb)
    mx, mn = max(r, g, b), min(r, g, b)
    light = (mx + mn) / 2.0
    if mx == mn:
        return 0.0, 0.0, light
    delta = mx - mn
    sat = delta / (2.0 - mx - mn) if light > 0.5 else delta / (mx + mn)
    if mx == r:
        hue = ((g - b) / delta) % 6
    elif mx == g:
        hue = (b - r) / delta + 2
    else:
        hue = (r - g) / delta + 4
    return hue * 60.0, sat, light


def _color_distance(
    a: tuple[int, int, int], b: tuple[int, int, int]
) -> float:
    """sRGB Euclidean distance."""

    return math.sqrt(sum((x - y) ** 2 for x, y in zip(a, b)))


# ---------------------------------------------------------------------------
# L1 Contrast — 글리프 픽셀 5th/95th percentile (mapping 5-A L1)
# ---------------------------------------------------------------------------


def _percentile(values: list[float], p: float) -> float:
    """선형 보간 percentile (numpy 미사용 시 호환)."""

    if not values:
        return 0.0
    s = sorted(values)
    n = len(s)
    k = (n - 1) * p / 100.0
    lo = math.floor(k)
    hi = math.ceil(k)
    if lo == hi:
        return s[int(k)]
    return s[lo] + (s[hi] - s[lo]) * (k - lo)


def _crop_bbox(arr: np.ndarray, bbox: list[float]) -> np.ndarray:
    x, y, w, h = bbox
    x, y, w, h = int(x), int(y), int(w), int(h)
    H, W = arr.shape[:2]
    x0, y0 = max(0, x), max(0, y)
    x1, y1 = min(W, x + w), min(H, y + h)
    if x1 <= x0 or y1 <= y0:
        return np.empty((0, 0, arr.shape[-1] if arr.ndim == 3 else 1))
    return arr[y0:y1, x0:x1]


def _glyph_pixel_contrasts(
    img_arr: np.ndarray,
    component: dict[str, Any],
) -> list[float]:
    """component bbox 내 글리프 픽셀별 contrast 분포(per-glyph LOCAL bg).

    mapping #5-A L1 알고리즘 정신:
        "글리프 픽셀별 contrast 계산. text_rgb vs effective_bg.getpixel((x,y))"
    text_alpha_mask 미제공 환경에서 fill 색상 기반 글리프 분류 사용.

    배경 분포 보존(Codex high #4 해소):
        bbox-median 단일값 대신 글리프 픽셀 위치마다 인근 비-glyph 픽셀에서
        local bg 추정(이중 샘플링) → 그라데이션/photo bg 끝점 분포가 그대로
        contrast 분포에 반영됨. 광역(전체 bbox) bg와 로컬 bg의 contrast를
        모두 산출하여 두 분포의 worst를 채택(끝점 우회 차단).
    """
    fill_rgb = _hex_to_rgb(component["fill"])
    region = _crop_bbox(img_arr, component["bbox"])
    if region.size == 0:
        return []

    H, W = region.shape[:2]
    rgb = region[..., :3].astype(np.int32)
    flat = rgb.reshape(-1, 3)
    diff = flat - np.array(fill_rgb, dtype=np.int32)
    dist_flat = np.sqrt((diff * diff).sum(axis=1))
    glyph_mask_flat = dist_flat < GLYPH_COLOR_DELTA
    bg_mask_flat = ~glyph_mask_flat

    if not glyph_mask_flat.any() or not bg_mask_flat.any():
        mean_bg = flat.mean(axis=0).astype(int) if flat.size else np.array([255, 255, 255])
        ratio = _contrast_ratio(
            fill_rgb, (int(mean_bg[0]), int(mean_bg[1]), int(mean_bg[2]))
        )
        return [ratio]

    glyph_mask = glyph_mask_flat.reshape(H, W)
    bg_mask = bg_mask_flat.reshape(H, W)
    bg_pixels = rgb[bg_mask]
    # 광역 bg fallback (안전망)
    bg_median_global = np.median(bg_pixels, axis=0).astype(int)
    global_bg = (int(bg_median_global[0]), int(bg_median_global[1]), int(bg_median_global[2]))

    # 적분 이미지 트릭으로 정사각 윈도우 내 평균 bg 빠르게 산출
    radius = max(8, min(H, W) // 16)  # 로컬 윈도우 반경 (≥8px)
    bg_only = rgb * bg_mask[..., None]
    bg_count = bg_mask.astype(np.int32)
    cum_rgb = np.cumsum(np.cumsum(bg_only, axis=0), axis=1)
    cum_cnt = np.cumsum(np.cumsum(bg_count, axis=0), axis=1)

    def _window_mean(yy: int, xx: int) -> tuple[int, int, int] | None:
        y0 = max(0, yy - radius)
        x0 = max(0, xx - radius)
        y1 = min(H - 1, yy + radius)
        x1 = min(W - 1, xx + radius)
        s_rgb = cum_rgb[y1, x1].copy()
        s_cnt = int(cum_cnt[y1, x1])
        if y0 > 0:
            s_rgb -= cum_rgb[y0 - 1, x1]
            s_cnt -= int(cum_cnt[y0 - 1, x1])
        if x0 > 0:
            s_rgb -= cum_rgb[y1, x0 - 1]
            s_cnt -= int(cum_cnt[y1, x0 - 1])
        if y0 > 0 and x0 > 0:
            s_rgb += cum_rgb[y0 - 1, x0 - 1]
            s_cnt += int(cum_cnt[y0 - 1, x0 - 1])
        if s_cnt == 0:
            return None
        return (int(s_rgb[0] / s_cnt), int(s_rgb[1] / s_cnt), int(s_rgb[2] / s_cnt))

    # 글리프 픽셀별 contrast — local bg 우선, 없으면 global
    contrasts: list[float] = []
    glyph_ys, glyph_xs = np.where(glyph_mask)
    # 너무 많은 픽셀이면 균등 샘플링 (성능)
    max_samples = 4000
    if len(glyph_ys) > max_samples:
        idx = np.linspace(0, len(glyph_ys) - 1, max_samples).astype(int)
        glyph_ys, glyph_xs = glyph_ys[idx], glyph_xs[idx]
    for yy, xx in zip(glyph_ys, glyph_xs):
        local_bg = _window_mean(int(yy), int(xx))
        bg = local_bg if local_bg is not None else global_bg
        # 글리프 픽셀의 실제 색상(anti-aliasing 영향)을 fg로 사용
        fg = (int(rgb[yy, xx, 0]), int(rgb[yy, xx, 1]), int(rgb[yy, xx, 2]))
        contrasts.append(_contrast_ratio(fg, bg))
    return contrasts


def evaluate_l1_contrast(
    img_arr: np.ndarray, layout_meta: dict[str, Any]
) -> ItemResult:
    """L1: 글리프 픽셀 contrast 분포 5th/95th percentile."""

    text_components = [
        c
        for c in layout_meta["components"]
        if c.get("isText", True) and c["role"] in {"headline", "subhead", "body", "caption", "cta", "disclaimer"}
    ]
    if not text_components:
        return ItemResult(
            code="L1",
            name="Contrast",
            verdict="WARN",
            score=70,
            reason="text component 0건 — 평가 대상 없음",
            details={"component_count": 0},
        )

    per_component: list[dict[str, Any]] = []
    overall_fail = False
    overall_warn = False
    fail_reasons: list[str] = []

    for c in text_components:
        contrasts = _glyph_pixel_contrasts(img_arr, c)
        if not contrasts:
            continue
        p5 = _percentile(contrasts, 5)
        p95 = _percentile(contrasts, 95)
        is_cta = c["role"] == "cta"
        # 절대 하한
        min_required = CTA_CONTRAST_MIN_AA if is_cta else CONTRAST_BODY_MIN_AA
        # 24px+ bold or 18.5px+ 대형 텍스트는 3:1 허용 (헤드라인/서브헤드)
        is_large = c["role"] in {"headline", "subhead"} and c.get("fontWeight", 400) >= 600
        if is_large and not is_cta:
            min_required = CONTRAST_LARGE_MIN_AA

        comp_fail = p5 < min_required
        comp_warn = (not comp_fail) and (p95 < CONTRAST_BODY_RECOMMENDED)
        if comp_fail:
            overall_fail = True
            fail_reasons.append(
                f"L1 {c['name']} contrast p5={p5:.2f} < {min_required:.1f} (절대 하한)"
            )
        elif comp_warn:
            overall_warn = True

        per_component.append(
            {
                "name": c["name"],
                "role": c["role"],
                "p5": round(p5, 3),
                "p95": round(p95, 3),
                "min_required": min_required,
                "is_cta": is_cta,
                "verdict": "FAIL" if comp_fail else ("WARN" if comp_warn else "PASS"),
            }
        )

    verdict = "FAIL" if overall_fail else ("WARN" if overall_warn else "PASS")
    score = 40 if overall_fail else (75 if overall_warn else 100)
    return ItemResult(
        code="L1",
        name="Contrast",
        verdict=verdict,
        score=score,
        reason="; ".join(fail_reasons) if fail_reasons else None,
        details={"per_component": per_component},
    )


# ---------------------------------------------------------------------------
# L2 Margin — Safe-area 침범 (mapping 5-A L2 / SSOT §7.3 = 6.7% = 72px)
# ---------------------------------------------------------------------------


def evaluate_l2_margin(layout_meta: dict[str, Any]) -> ItemResult:
    """L2: 모든 텍스트 component bbox가 safe-area 안에 있어야 한다."""

    canvas = layout_meta["canvas"]
    safe = layout_meta["safeArea"]
    W, H = canvas["width"], canvas["height"]
    top, bottom = safe["top"], safe["bottom"]
    left, right = safe["left"], safe["right"]

    violations: list[dict[str, Any]] = []
    text_components = [
        c
        for c in layout_meta["components"]
        if c.get("isText", True) and c["role"] not in {"logo", "decoration", "icon"}
    ]
    for c in text_components:
        x, y, w, h = c["bbox"]
        v_top = y < top
        v_left = x < left
        v_right = (x + w) > (W - right)
        v_bottom = (y + h) > (H - bottom)
        if v_top or v_left or v_right or v_bottom:
            violations.append(
                {
                    "name": c["name"],
                    "role": c["role"],
                    "bbox": [x, y, w, h],
                    "violations": {
                        "top": v_top,
                        "left": v_left,
                        "right": v_right,
                        "bottom": v_bottom,
                    },
                }
            )

    # SSOT §7.3 6.7%(72px) 검증 (1080 기준 ±4px 허용 — 1px 라운딩 4방향)
    expected = max(W, H) * SAFE_AREA_RATIO
    delta = max(
        abs(top - expected),
        abs(bottom - expected),
        abs(left - expected),
        abs(right - expected),
    )
    ssot_aligned = delta <= 4

    # SSOT §7.3 grid baseline=8 (4는 sub-grid 화이트리스트 — icon optical만)
    grid = layout_meta.get("grid", {})
    baseline = int(grid.get("baseline", 8))
    grid_violation = baseline != GRID_BASELINE

    fail_reasons: list[str] = []
    warn_reasons: list[str] = []
    if violations:
        fail_reasons.append(f"safe-area 침범 {len(violations)}건")
    if not ssot_aligned:
        warn_reasons.append(
            f"safe-area SSOT 불일치: 기대 {expected:.0f}px ±4 / 실제 top={top}, "
            f"bottom={bottom}, left={left}, right={right}"
        )
    # baseline=4가 전체 grid에 적용되면 SSOT 위반 (icon optical은 컴포넌트 단위 예외)
    if grid_violation:
        warn_reasons.append(
            f"grid baseline={baseline} ≠ SSOT 8 (4px는 sub-grid 화이트리스트만 허용)"
        )

    if fail_reasons:
        verdict = "FAIL"
        score = max(0, 60 - len(violations) * 10)
        reason = "; ".join(fail_reasons + warn_reasons)
    elif warn_reasons:
        verdict = "WARN"
        score = 75
        reason = "; ".join(warn_reasons)
    else:
        verdict = "PASS"
        score = 100
        reason = None

    return ItemResult(
        code="L2",
        name="Margin",
        verdict=verdict,
        score=score,
        reason=reason,
        details={
            "safe_area": safe,
            "ssot_expected_px": round(expected, 1),
            "ssot_aligned": ssot_aligned,
            "grid_baseline": baseline,
            "grid_baseline_aligned": not grid_violation,
            "violations": violations,
            "violation_count": len(violations),
        },
    )


# ---------------------------------------------------------------------------
# L3 Hierarchy — heading/subhead/body 비율 + dq-rules absolute_min
# ---------------------------------------------------------------------------


def evaluate_l3_hierarchy(layout_meta: dict[str, Any]) -> ItemResult:
    """L3: dq-rules font_sizes(absolute_min=40, headline/subhead/cta) +
    font_ratio.min_head_sub_ratio=1.3 검증.
    """
    by_role: dict[str, list[float]] = {}
    fail_reasons: list[str] = []
    for c in layout_meta["components"]:
        if not c.get("isText", True):
            continue
        by_role.setdefault(c["role"], []).append(c["fontSize"])

    # absolute_min 위반 검사 (모든 텍스트)
    abs_min_violators: list[dict[str, Any]] = []
    for c in layout_meta["components"]:
        if not c.get("isText", True):
            continue
        if c["fontSize"] < DQ_FONT_ABSOLUTE_MIN:
            abs_min_violators.append(
                {"name": c["name"], "role": c["role"], "fontSize": c["fontSize"]}
            )
    if abs_min_violators:
        fail_reasons.append(
            f"absolute_min={DQ_FONT_ABSOLUTE_MIN}px 위반 {len(abs_min_violators)}건"
        )

    # role별 min 검사
    role_min: dict[str, int] = {
        "headline": DQ_FONT_HEADLINE_MIN,
        "subhead": DQ_FONT_SUBHEAD_MIN,
        "cta": DQ_FONT_CTA_MIN,
        "disclaimer": DQ_FONT_DISCLAIMER_MIN,
    }
    role_violations: list[dict[str, Any]] = []
    for role, min_size in role_min.items():
        for fs in by_role.get(role, []):
            if fs < min_size:
                role_violations.append(
                    {"role": role, "fontSize": fs, "min": min_size}
                )
    if role_violations:
        fail_reasons.append(f"role 최소값 위반 {len(role_violations)}건")

    # head/sub 비율 검사 (headline/subhead 둘 다 존재 시)
    ratio_warn = False
    head_sub_ratio: float | None = None
    if by_role.get("headline") and by_role.get("subhead"):
        head_max = max(by_role["headline"])
        sub_max = max(by_role["subhead"])
        if sub_max > 0:
            head_sub_ratio = head_max / sub_max
            if head_sub_ratio < DQ_HEAD_SUB_MIN_RATIO:
                fail_reasons.append(
                    f"head/sub ratio={head_sub_ratio:.2f} < {DQ_HEAD_SUB_MIN_RATIO}"
                )

    if fail_reasons:
        verdict = "FAIL"
        score = 30
    elif ratio_warn:
        verdict = "WARN"
        score = 75
    else:
        verdict = "PASS"
        score = 100
    return ItemResult(
        code="L3",
        name="Hierarchy",
        verdict=verdict,
        score=score,
        reason="; ".join(fail_reasons) if fail_reasons else None,
        details={
            "by_role_sizes": by_role,
            "abs_min_violators": abs_min_violators,
            "role_violations": role_violations,
            "head_sub_ratio": head_sub_ratio,
        },
    )


# ---------------------------------------------------------------------------
# L4 Color Token — palette 4색 이내 + AI 퍼플 검출 (mapping 5-A L5 + dq-rules color)
# ---------------------------------------------------------------------------


def _cluster_palette(arr: np.ndarray, k: int = 5) -> list[tuple[tuple[int, int, int], float]]:
    """간이 색상 클러스터링: 픽셀 다운샘플 → hue 기반 그룹화 → top-k weight.

    scikit-learn k-means 의존성 회피. 결정적 동작 (numpy seed 무관).
    """
    if arr.ndim != 3:
        return []
    rgb = arr[..., :3]
    h, w = rgb.shape[:2]
    # 다운샘플 (속도) — 256 grid
    step_y = max(1, h // 64)
    step_x = max(1, w // 64)
    sample = rgb[::step_y, ::step_x].reshape(-1, 3).astype(int)

    # 6×6×6 RGB bin
    binsize = 64
    bins = (sample // binsize).astype(int)
    keys = bins[:, 0] * 100 + bins[:, 1] * 10 + bins[:, 2]
    counts: dict[int, int] = {}
    sums: dict[int, np.ndarray] = {}
    for key, px in zip(keys, sample):
        counts[key] = counts.get(key, 0) + 1
        if key not in sums:
            sums[key] = px.astype(np.int64).copy()
        else:
            sums[key] += px
    total = sum(counts.values())
    palette = []
    for key, c in sorted(counts.items(), key=lambda kv: -kv[1])[:k]:
        avg = (sums[key] // c).astype(int)
        palette.append(
            (
                (int(avg[0]), int(avg[1]), int(avg[2])),
                c / total if total else 0.0,
            )
        )
    return palette


def _unify_hues(
    palette: list[tuple[tuple[int, int, int], float]],
    hue_tolerance: float = 15.0,
) -> list[dict[str, Any]]:
    """Δhue<15° 클러스터 통합 (그라데이션 카운트 부풀림 방지)."""

    groups: list[dict[str, Any]] = []
    for rgb, weight in palette:
        hue, sat, light = _rgb_to_hsl(rgb)
        merged = False
        for g in groups:
            dh = abs(hue - g["h"])
            if dh > 180:
                dh = 360 - dh
            if dh < hue_tolerance:
                g["weight"] += weight
                merged = True
                break
        if not merged:
            groups.append({"h": hue, "s": sat, "l": light, "weight": weight, "rgb": rgb})
    return groups


def evaluate_l4_color_token(
    img_arr: np.ndarray, layout_meta: dict[str, Any]
) -> ItemResult:
    """L4: palette 4색 이내 + AI 퍼플(hue 270~300, sat>0.5) 캔버스 10%+ 검출."""

    palette = _cluster_palette(img_arr, k=5)
    unified = _unify_hues(palette)
    significant = [g for g in unified if g["weight"] > 0.02]
    color_count = len(significant)

    # AI 퍼플 비율 (hue 270~300 + sat>0.5)
    rgb = img_arr[..., :3].astype(int)
    h, w = rgb.shape[:2]
    # 다운샘플로 빠르게 측정
    step = max(1, max(h, w) // 256)
    sample = rgb[::step, ::step].reshape(-1, 3)
    purple_count = 0
    for px in sample:
        hue, sat, _ = _rgb_to_hsl((int(px[0]), int(px[1]), int(px[2])))
        if AI_PURPLE_HUE_MIN <= hue <= AI_PURPLE_HUE_MAX and sat > AI_PURPLE_SAT_MIN:
            purple_count += 1
    purple_ratio = purple_count / len(sample) if len(sample) else 0.0

    # 텍스트 fill 토큰 일관성 — themePreset 토큰과 직접 매칭
    # Codex high #5 해소: 이미지 팔레트 추정 X, SSOT 토큰 X 비교 O
    preset_tokens = PRESET_PALETTE_TOKENS.get(layout_meta.get("themePreset", ""), [])
    preset_rgbs = [_hex_to_rgb(h) for h in preset_tokens]
    text_fills = {
        c["fill"].upper()
        for c in layout_meta["components"]
        if c.get("isText", True)
    }
    off_token: list[str] = []
    for fill in text_fills:
        fill_rgb = _hex_to_rgb(fill)
        # ΔRGB Euclidean < 30 → 동일 토큰으로 간주 (안티앨리어싱 라운딩 허용)
        if preset_rgbs and not any(
            _color_distance(fill_rgb, t) < 30 for t in preset_rgbs
        ):
            off_token.append(fill)

    fail_reasons: list[str] = []
    if color_count > DQ_COLOR_MAX_PALETTE:
        fail_reasons.append(
            f"palette {color_count}색 > 4 (brand 2~3 + accent 1)"
        )
    if purple_ratio > AI_PURPLE_RATIO_FAIL:
        fail_reasons.append(
            f"AI 퍼플 그라디언트 {purple_ratio:.1%} > 10% "
            f"(LR_NO_AI_PURPLE_GRADIENT)"
        )

    warn_reasons: list[str] = []
    if off_token:
        # ★ off-token이 발견되면 반드시 WARN 이상 (Codex high #5 — 회귀 차단)
        warn_reasons.append(
            f"off-token fill {len(off_token)}건: {off_token} "
            f"(themePreset='{layout_meta.get('themePreset')}' palette 외)"
        )

    if fail_reasons:
        verdict = "FAIL"
        score = 30
        reason = "; ".join(fail_reasons)
    elif warn_reasons:
        verdict = "WARN"
        score = 75
        reason = "; ".join(warn_reasons)
    else:
        verdict = "PASS"
        score = 100
        reason = None
    return ItemResult(
        code="L4",
        name="Color Token",
        verdict=verdict,
        score=score,
        reason=reason,
        details={
            "color_count": color_count,
            "ai_purple_ratio": round(purple_ratio, 4),
            "off_token_fills": off_token,
            "palette": [
                {"rgb": g["rgb"], "h": round(g["h"], 1), "weight": round(g["weight"], 4)}
                for g in significant
            ],
        },
    )


# ---------------------------------------------------------------------------
# L5 Typography — Pretendard / Noto Sans KR 강제 + 자간/행간 dq-rules
# ---------------------------------------------------------------------------


def evaluate_l5_typography(layout_meta: dict[str, Any]) -> ItemResult:
    """L5: 폰트 가족 / weight / 자간 / 행간 검증."""

    fail_reasons: list[str] = []
    warn_reasons: list[str] = []
    families_seen: set[str] = set()
    weight_violations: list[dict[str, Any]] = []
    family_violations: list[dict[str, Any]] = []

    for c in layout_meta["components"]:
        if not c.get("isText", True):
            continue
        family = c.get("fontFamily")
        if not family:
            fail_reasons.append(f"{c['name']} fontFamily 미지정 (Pretendard/Noto Sans KR 강제)")
            family_violations.append({"name": c["name"], "family": None, "type": "missing"})
            continue
        families_seen.add(family)

        # 금지 폰트
        if family in DQ_BANNED_FONTS:
            fail_reasons.append(
                f"{c['name']} fontFamily='{family}' (banned: 굴림/바탕/궁서)"
            )
            family_violations.append({"name": c["name"], "family": family, "type": "banned"})
        # ★ task.md L5 강제: Pretendard / Noto Sans KR 외 모두 FAIL
        elif family not in PRIMARY_KOREAN_FAMILIES:
            fail_reasons.append(
                f"{c['name']} fontFamily='{family}' "
                f"(허용: Pretendard, Noto Sans KR — task.md L5 강제)"
            )
            family_violations.append(
                {"name": c["name"], "family": family, "type": "not_primary"}
            )

        # weight banned (Thin/ExtraLight/Light)
        weight = c.get("fontWeight")
        if weight in DQ_BANNED_WEIGHTS:
            fail_reasons.append(
                f"{c['name']} fontWeight={weight} (banned: 100/200/300)"
            )
            weight_violations.append({"name": c["name"], "weight": weight, "type": "banned"})
        # 헤드라인/CTA는 fontWeight ≥ 600 권장 (dq-rules font_weights)
        if c["role"] in {"headline", "cta"} and weight is not None and weight < 600:
            warn_reasons.append(
                f"{c['name']} ({c['role']}) fontWeight={weight} < 600 "
                f"(SemiBold 이상 권장)"
            )
            weight_violations.append(
                {"name": c["name"], "weight": weight, "type": "weak_emphasis"}
            )

        # 자간 (letterSpacing) 극단값 — px 단위 ±100 이상은 비현실적
        ls = c.get("letterSpacing")
        if ls is not None and (ls < -100 or ls > 100):
            warn_reasons.append(f"{c['name']} letterSpacing={ls} 극단값")

        # 행간 (lineHeight) — 정상 범위 0.8~3.0
        lh = c.get("lineHeight")
        if lh is not None and lh > 0 and (lh < 0.8 or lh > 3.0):
            warn_reasons.append(f"{c['name']} lineHeight={lh} 권장 범위 0.8~3.0 벗어남")

    # min_families / max_families (dq-rules font_pairing)
    family_count = len(families_seen)
    if family_count < DQ_FONT_MIN_FAMILIES:
        warn_reasons.append(
            f"families={family_count} < {DQ_FONT_MIN_FAMILIES} "
            f"(min_families 미달 — 동일 패밀리 내 weight 대비로 충족 가능)"
        )
    elif family_count > DQ_FONT_MAX_FAMILIES:
        fail_reasons.append(
            f"families={family_count} > {DQ_FONT_MAX_FAMILIES} "
            f"(max_families 초과)"
        )

    if fail_reasons:
        verdict = "FAIL"
        score = 30
        reason = "; ".join(fail_reasons)
    elif warn_reasons:
        verdict = "WARN"
        score = 75
        reason = "; ".join(warn_reasons)
    else:
        verdict = "PASS"
        score = 100
        reason = None
    return ItemResult(
        code="L5",
        name="Typography",
        verdict=verdict,
        score=score,
        reason=reason,
        details={
            "families_seen": sorted(families_seen),
            "family_count": family_count,
            "family_violations": family_violations,
            "weight_violations": weight_violations,
        },
    )


# ---------------------------------------------------------------------------
# 진입점
# ---------------------------------------------------------------------------


def evaluate(layout_meta: dict[str, Any]) -> EvalResult:
    """Lite Evaluator 메인 진입점.

    Steps:
        1. JSON Schema 강제 검증 (실패 → SchemaValidationError)
        2. mappingVersion 검증 (실패 → SSotMismatchError)
        3. PNG 로드 (PIL) → numpy
        4. L1~L5 각 평가
        5. EvalResult 종합

    Returns:
        EvalResult — overall verdict + 5항목 details
    """
    _validate_schema(layout_meta)
    _check_mapping_version(layout_meta)

    img_path = Path(layout_meta["image_path"])
    if not img_path.exists():
        raise FileNotFoundError(f"image_path not found: {img_path}")
    img = Image.open(img_path).convert("RGB")
    img_arr = np.asarray(img)

    items = [
        evaluate_l1_contrast(img_arr, layout_meta),
        evaluate_l2_margin(layout_meta),
        evaluate_l3_hierarchy(layout_meta),
        evaluate_l4_color_token(img_arr, layout_meta),
        evaluate_l5_typography(layout_meta),
    ]

    fail_reasons = [
        f"{it.code} {it.name}: {it.reason}"
        for it in items
        if it.verdict == "FAIL" and it.reason
    ]
    if any(it.verdict == "FAIL" for it in items):
        overall = "FAIL"
    elif any(it.verdict == "WARN" for it in items):
        overall = "WARN"
    else:
        overall = "PASS"
    score = int(round(sum(it.score for it in items) / len(items)))
    return EvalResult(
        overall=overall,
        score=score,
        items=items,
        fail_reasons=fail_reasons,
        layout_meta_summary={
            "themePreset": layout_meta["themePreset"],
            "targetPersona": layout_meta["targetPersona"],
            "mappingVersion": layout_meta["mappingVersion"],
            "component_count": len(layout_meta["components"]),
        },
    )