#!/usr/bin/env python3
"""
Phase 4 Validation Script - Marketing System Upgrade
작성자: 프레이야 (프론트엔드)
대상: 마케팅 스킬 업그레이드 검증 5개 항목

Usage:
  python3 validate_marketing_upgrade.py --target all|evals|context|routing|integrity
    [--check-files file1,file2,...]
    [--context-check skill1,skill2,...]
    [--routing-check skill1,skill2,...]
    [--backup-dir /path/to/backup/]
"""

import argparse
import json
import os
import re
import sys
from pathlib import Path

# ─────────────────────────────────────────────
# 상수 정의
# ─────────────────────────────────────────────

SKILLS_BASE = "/home/jay/.claude/skills"
BACKUP_DIR_DEFAULT = "/home/jay/workspace/backups/marketing-upgrade-20260326"

EVAL_TARGET_SKILLS = [
    "ad-creative",
    "copywriting",
    "content-strategy",
    "seo-audit",
    "ai-seo",
    "social-content",
    "paid-ads",
    "analytics-tracking",
]

CONTEXT_CHECK_SKILLS_DEFAULT = [
    "blog-dominance",
    "geo-optimizer",
    "thread-architect",
    "naver-seo",
    "copywriting-prompt",
    "churn-prevention",
    "launch-strategy",
    "marketing-ideas",
]

ROUTING_CHECK_SKILLS_DEFAULT = EVAL_TARGET_SKILLS + [
    "blog-dominance",
    "geo-optimizer",
    "thread-architect",
    "naver-seo",
]

EVAL_REQUIRED_FIELDS = {"id", "prompt", "expected_output", "assertions", "files"}
EVAL_MIN_COUNT = 5
ROUTING_KEYWORDS = [
    "라우팅",
    "defers to",
    "defer to",
    "should defer",
    "cross-reference",
    "Recognizes this as",
    "References or defers",
    "not ad creative generation",
    "not creative generation",
    "not an SEO audit",
    "not page copywriting",
    "should make clear that",
    "the right skill for",
    "see the",
    "범위를 벗어",
    "outside content-strategy scope",
    "outside.*scope",
    "→ paid-ads",
    "→ ad-creative",
    "→ email-sequence",
    "→ seo-audit",
    "→ schema-markup",
    "→ thread-architect",
    "→ geo-optimizer",
    "→ analytics-tracking",
    "→ copywriting",
    "→ blog-dominance",
    "→ revops",
    "→ cold-email",
    "→ sales-enablement",
]
INTEGRITY_SECTIONS = [
    "Core Principles",
    "Output Format",
    "핵심 원칙",
    "출력 형식",
]

# ─────────────────────────────────────────────
# YAML 파싱 (표준 라이브러리 + 정규식 폴백)
# ─────────────────────────────────────────────


def try_import_yaml():
    """PyYAML 임포트 시도, 실패 시 None 반환."""
    try:
        import yaml

        return yaml
    except ImportError:
        return None


def extract_yaml_frontmatter(content: str):
    """
    SKILL.md에서 첫 번째 YAML frontmatter 블록(--- ... ---)을 추출.
    반환: (raw_yaml_str, error_msg)
    """
    m = re.match(r"^---\s*\n(.*?)\n---\s*(\n|$)", content, re.DOTALL)
    if not m:
        return None, "YAML frontmatter 블록(--- ... ---)을 찾을 수 없습니다"
    return m.group(1), None


def parse_yaml_safe(yaml_str: str):
    """
    YAML 파싱. PyYAML 우선, 없으면 정규식 기반 단순 파서 폴백.
    반환: (dict_or_None, error_msg_or_None)
    """
    yaml_mod = try_import_yaml()
    if yaml_mod:
        try:
            data = yaml_mod.safe_load(yaml_str)
            if data is None:
                data = {}
            return data, None
        except Exception as e:
            return None, f"yaml.safe_load 실패: {e}"

    # ── 폴백: 정규식 기반 단순 key-value 파서 ──
    # multiline 값(|, >) 지원 포함
    data = {}
    lines = yaml_str.splitlines()
    i = 0
    while i < len(lines):
        line = lines[i]
        # 최상위 key: value 또는 key: | 패턴
        m = re.match(r"^([A-Za-z_][A-Za-z0-9_-]*):\s*(.*)", line)
        if m:
            key = m.group(1)
            val = m.group(2).strip()
            if val in ("|", "|-", ">", ">-"):
                # 블록 스칼라: 이후 들여쓰기 줄을 수집
                block_lines = []
                i += 1
                while i < len(lines):
                    if lines[i] == "" or lines[i][0] in (" ", "\t"):
                        block_lines.append(lines[i].strip())
                        i += 1
                    else:
                        break
                data[key] = "\n".join(block_lines)
                continue
            else:
                # 인라인 값 (따옴표 제거)
                val = re.sub(r'^["\']|["\']$', "", val)
                data[key] = val
        i += 1
    return data, None


# ─────────────────────────────────────────────
# 섹션 추출 유틸리티
# ─────────────────────────────────────────────


def extract_section(content: str, section_name: str) -> str | None:
    """
    ## {section_name} 헤딩에서 다음 ## 헤딩까지의 텍스트를 반환.
    없으면 None.
    """
    pattern = rf"(## {re.escape(section_name)}\s*\n.*?)(?=\n## |\Z)"
    m = re.search(pattern, content, re.DOTALL)
    if m:
        return m.group(1).strip()
    return None


# ─────────────────────────────────────────────
# V1: evals.json 스키마 검증
# ─────────────────────────────────────────────


def verify_evals_schema() -> dict:
    """V1: 8개 마케팅 스킬의 evals/evals.json 스키마를 검증."""
    issues = []
    warnings = []
    passed = []

    for skill in EVAL_TARGET_SKILLS:
        eval_path = os.path.join(SKILLS_BASE, skill, "evals", "evals.json")

        # 파일 존재 확인
        if not os.path.exists(eval_path):
            issues.append(f"[{skill}] evals.json 파일 없음: {eval_path}")
            continue

        # JSON 파싱
        try:
            with open(eval_path, encoding="utf-8") as f:
                data = json.load(f)
        except json.JSONDecodeError as e:
            issues.append(f"[{skill}] JSON 파싱 오류: {e}")
            continue
        except OSError as e:
            issues.append(f"[{skill}] 파일 읽기 오류: {e}")
            continue

        skill_errors = []

        # skill_name 필드 확인
        if "skill_name" not in data:
            skill_errors.append("skill_name 필드 없음")

        # evals 배열 확인
        evals = data.get("evals")
        if not isinstance(evals, list):
            skill_errors.append("evals 필드가 배열이 아님 또는 없음")
            issues.append(f"[{skill}] " + ", ".join(skill_errors))
            continue

        # 최소 eval 수 확인
        if len(evals) < EVAL_MIN_COUNT:
            skill_errors.append(f"eval 수 부족: {len(evals)}개 (최소 {EVAL_MIN_COUNT}개 필요)")

        # 각 eval 필드 검사
        missing_fields_evals = []
        for idx, ev in enumerate(evals):
            if not isinstance(ev, dict):
                missing_fields_evals.append(f"eval[{idx}]: dict 아님")
                continue
            missing = EVAL_REQUIRED_FIELDS - set(ev.keys())
            if missing:
                missing_fields_evals.append(f"eval[{idx}](id={ev.get('id','?')}): 필드 누락 {sorted(missing)}")
        if missing_fields_evals:
            skill_errors.append("필드 누락 - " + "; ".join(missing_fields_evals))

        # 경계 테스트: 라우팅 eval 존재 확인
        has_routing_eval = False
        for ev in evals:
            if not isinstance(ev, dict):
                continue
            text = str(ev.get("expected_output", "")) + str(ev.get("assertions", ""))
            if any(kw in text for kw in ROUTING_KEYWORDS):
                has_routing_eval = True
                break

        if not has_routing_eval:
            warnings.append(
                f"[{skill}] 경계 테스트(라우팅) eval 없음 "
                f"(expected_output 또는 assertions에 '라우팅'/'defers to'/'→' 없음)"
            )

        if skill_errors:
            issues.append(f"[{skill}] " + " | ".join(skill_errors))
        else:
            passed.append(skill)

    # 판정
    if issues:
        status = "FAIL"
        detail_parts = [f"FAIL 스킬: {'; '.join(issues)}"]
    elif warnings:
        status = "WARN"
        detail_parts = [f"PASS 스킬: {', '.join(passed)}"]
    else:
        status = "PASS"
        detail_parts = [f"모든 스킬 통과: {', '.join(passed)}"]

    if warnings:
        detail_parts.append(f"WARN: {'; '.join(warnings)}")

    return {
        "verifier": "V1_evals_schema",
        "status": status,
        "details": " | ".join(detail_parts),
    }


# ─────────────────────────────────────────────
# V2: description YAML frontmatter 유효성
# ─────────────────────────────────────────────


def verify_yaml_frontmatter(check_files: list[str]) -> dict:
    """V2: 지정된 SKILL.md 파일의 YAML frontmatter 유효성 검증."""
    if not check_files:
        return {
            "verifier": "V2_yaml_valid",
            "status": "WARN",
            "details": "--check-files 옵션이 제공되지 않아 검증 대상 없음",
        }

    issues = []
    passed = []

    for fpath in check_files:
        fpath = fpath.strip()
        if not os.path.exists(fpath):
            issues.append(f"{fpath}: 파일 없음")
            continue

        try:
            with open(fpath, encoding="utf-8") as f:
                content = f.read()
        except OSError as e:
            issues.append(f"{fpath}: 읽기 오류 - {e}")
            continue

        # frontmatter 추출
        yaml_str, err = extract_yaml_frontmatter(content)
        if err:
            issues.append(f"{fpath}: {err}")
            continue

        # YAML 파싱
        assert yaml_str is not None  # guarded by err check above
        data, parse_err = parse_yaml_safe(yaml_str)
        if parse_err or data is None:
            issues.append(f"{fpath}: YAML 파싱 실패 - {parse_err}")
            continue

        # description 필드 확인
        if "description" not in data:
            issues.append(f"{fpath}: description 필드 없음")
            continue

        desc: str = data["description"]
        if not desc or (isinstance(desc, str) and not desc.strip()):
            issues.append(f"{fpath}: description 필드가 비어 있음")
            continue

        passed.append(os.path.basename(os.path.dirname(fpath)) or fpath)

    if issues:
        status = "FAIL"
        details = f"실패: {'; '.join(issues)}"
        if passed:
            details += f" | 통과: {', '.join(passed)}"
    else:
        status = "PASS"
        details = f"모든 파일 통과 ({len(passed)}개): {', '.join(passed)}"

    return {
        "verifier": "V2_yaml_valid",
        "status": status,
        "details": details,
    }


# ─────────────────────────────────────────────
# V3: Before Starting 참조 존재 확인
# ─────────────────────────────────────────────


def verify_context_reference(context_skills: list[str]) -> dict:
    """V3: 지정 스킬의 SKILL.md에 product-marketing-context 참조 확인."""
    issues = []
    passed = []

    for skill in context_skills:
        skill = skill.strip()
        skill_path = os.path.join(SKILLS_BASE, skill, "SKILL.md")

        if not os.path.exists(skill_path):
            issues.append(f"[{skill}] SKILL.md 없음: {skill_path}")
            continue

        try:
            with open(skill_path, encoding="utf-8") as f:
                content = f.read()
        except OSError as e:
            issues.append(f"[{skill}] 읽기 오류: {e}")
            continue

        if "product-marketing-context" not in content:
            issues.append(f"[{skill}] 'product-marketing-context' 문자열 없음")
        else:
            passed.append(skill)

    if issues:
        status = "FAIL"
        details = f"참조 누락: {'; '.join(issues)}"
        if passed:
            details += f" | 통과: {', '.join(passed)}"
    else:
        status = "PASS"
        details = f"모든 스킬에 product-marketing-context 참조 존재: {', '.join(passed)}"

    return {
        "verifier": "V3_context_ref",
        "status": status,
        "details": details,
    }


# ─────────────────────────────────────────────
# V4: 라우팅 패턴 일관성 검증
# ─────────────────────────────────────────────


def verify_routing_consistency(routing_skills: list[str]) -> dict:
    """V4: description의 → <skill-name> 패턴이 실제 존재하는 스킬인지 확인."""
    issues = []
    warnings = []
    passed = []

    # 실제 존재하는 스킬 목록 수집
    existing_skills: set[str] = set()
    if os.path.isdir(SKILLS_BASE):
        for entry in os.scandir(SKILLS_BASE):
            if entry.is_dir():
                existing_skills.add(entry.name)

    # → skill-name 패턴: 영문 소문자 + 숫자 + 하이픈, 최소 2자
    routing_pattern = re.compile(r"→\s*([a-z][a-z0-9-]{1,})")

    for skill in routing_skills:
        skill = skill.strip()
        skill_path = os.path.join(SKILLS_BASE, skill, "SKILL.md")

        if not os.path.exists(skill_path):
            warnings.append(f"[{skill}] SKILL.md 없음 (스킵)")
            continue

        try:
            with open(skill_path, encoding="utf-8") as f:
                content = f.read()
        except OSError as e:
            issues.append(f"[{skill}] 읽기 오류: {e}")
            continue

        # description YAML에서만 추출 (frontmatter)
        yaml_str, _ = extract_yaml_frontmatter(content)
        search_target = yaml_str if yaml_str else content

        targets = routing_pattern.findall(search_target)

        if not targets:
            # 라우팅 선언 없음 - 경고 없이 통과
            passed.append(skill)
            continue

        broken = []
        for target in targets:
            # 스킬 이름으로 확인 (단어 경계 고려: 너무 짧은 일반 단어 제외)
            if len(target) < 3:
                continue
            # 실제 존재하는 스킬 디렉토리인지 확인
            if target not in existing_skills:
                broken.append(target)

        if broken:
            issues.append(f"[{skill}] 존재하지 않는 라우팅 타겟: {', '.join(broken)}")
        else:
            if targets:
                passed.append(f"{skill}(→{','.join(set(targets))})")
            else:
                passed.append(skill)

    if issues:
        status = "FAIL"
        details = f"깨진 라우팅: {'; '.join(issues)}"
        if passed:
            details += f" | 통과: {', '.join(passed)}"
    elif warnings:
        status = "WARN"
        details = f"WARN: {'; '.join(warnings)}"
        if passed:
            details += f" | 통과: {', '.join(passed)}"
    else:
        status = "PASS"
        details = f"모든 라우팅 유효: {', '.join(passed)}"

    return {
        "verifier": "V4_routing_consistency",
        "status": status,
        "details": details,
    }


# ─────────────────────────────────────────────
# V5: 핵심 로직 변경 무결성
# ─────────────────────────────────────────────


def verify_core_integrity(backup_dir: str) -> dict:
    """V5: 백업본 대비 SKILL.md 핵심 섹션 변경 여부 확인."""
    if not os.path.isdir(backup_dir):
        return {
            "verifier": "V5_core_integrity",
            "status": "FAIL",
            "details": f"백업 디렉토리 없음: {backup_dir}",
        }

    issues = []
    passed = []
    no_section_skills = []  # 섹션 없는 스킬은 건너뜀

    # 백업 디렉토리 내 스킬 목록 수집
    backup_skills = []
    try:
        for entry in os.scandir(backup_dir):
            if entry.is_dir():
                backup_skills.append(entry.name)
    except OSError as e:
        return {
            "verifier": "V5_core_integrity",
            "status": "FAIL",
            "details": f"백업 디렉토리 스캔 오류: {e}",
        }

    if not backup_skills:
        return {
            "verifier": "V5_core_integrity",
            "status": "WARN",
            "details": f"백업 디렉토리에 스킬 없음: {backup_dir}",
        }

    for skill in sorted(backup_skills):
        backup_skill_path = os.path.join(backup_dir, skill, "SKILL.md")
        current_skill_path = os.path.join(SKILLS_BASE, skill, "SKILL.md")

        if not os.path.exists(backup_skill_path):
            continue  # 백업에 SKILL.md 없으면 스킵

        if not os.path.exists(current_skill_path):
            issues.append(f"[{skill}] 현재 SKILL.md 없음 (백업에는 존재)")
            continue

        # 파일 읽기
        try:
            with open(backup_skill_path, encoding="utf-8") as f:
                backup_content = f.read()
            with open(current_skill_path, encoding="utf-8") as f:
                current_content = f.read()
        except OSError as e:
            issues.append(f"[{skill}] 파일 읽기 오류: {e}")
            continue

        # 각 핵심 섹션 비교
        skill_has_any_section = False
        skill_changed = []

        for section in INTEGRITY_SECTIONS:
            backup_sec = extract_section(backup_content, section)
            current_sec = extract_section(current_content, section)

            if backup_sec is None and current_sec is None:
                continue  # 두 버전 모두 없음 - 검사 불필요

            skill_has_any_section = True

            if backup_sec is None and current_sec is not None:
                skill_changed.append(f"'{section}' 섹션 새로 추가됨")
            elif backup_sec is not None and current_sec is None:
                skill_changed.append(f"'{section}' 섹션 삭제됨")
            elif backup_sec is not None and current_sec is not None and backup_sec != current_sec:
                # 실질적 내용 변경 여부 확인 (공백 정규화 후 비교)
                def normalize(text: str) -> str:
                    return re.sub(r"\s+", " ", text).strip()

                if normalize(backup_sec) != normalize(current_sec):
                    skill_changed.append(f"'{section}' 내용 변경됨")

        # 핵심 섹션 없는 스킬도 description 변경 여부를 확인
        if not skill_has_any_section:
            # description 필드 비교 (frontmatter 내)
            backup_yaml, _ = extract_yaml_frontmatter(backup_content)
            current_yaml, _ = extract_yaml_frontmatter(current_content)
            if backup_yaml and current_yaml:
                backup_data, _ = parse_yaml_safe(backup_yaml)
                current_data, _ = parse_yaml_safe(current_yaml)
                if backup_data and current_data:
                    b_desc = str(backup_data.get("description", ""))
                    c_desc = str(current_data.get("description", ""))
                    if b_desc != c_desc:
                        # description만 변경된 경우 — 허용 범위지만 추적
                        no_section_skills.append(f"{skill}(desc변경)")
                    else:
                        no_section_skills.append(skill)
                else:
                    no_section_skills.append(skill)
            else:
                no_section_skills.append(skill)
        elif skill_changed:
            issues.append(f"[{skill}] " + " | ".join(skill_changed))
        else:
            passed.append(skill)

    # 판정
    detail_parts = []
    if issues:
        status = "FAIL"
        detail_parts.append(f"변경 감지: {'; '.join(issues)}")
    else:
        status = "PASS"

    if passed:
        detail_parts.append(f"무결성 통과: {', '.join(passed)}")
    if no_section_skills:
        detail_parts.append(f"핵심 섹션 없음(스킵): {', '.join(no_section_skills)}")
    if not detail_parts:
        detail_parts.append("검증 대상 없음")

    return {
        "verifier": "V5_core_integrity",
        "status": status,
        "details": " | ".join(detail_parts),
    }


# ─────────────────────────────────────────────
# 결과 집계 및 출력
# ─────────────────────────────────────────────


def aggregate_results(results: list[dict]) -> dict:
    """결과 집계: overall 상태 및 summary 문자열 생성."""
    statuses = [r["status"] for r in results]

    if "FAIL" in statuses:
        overall = "FAIL"
    elif "WARN" in statuses:
        overall = "WARN"
    else:
        overall = "PASS"

    # summary 생성: V1:PASS V2:FAIL ...
    # verifier 이름에서 Vn 코드 추출
    summary_parts = []
    for r in results:
        # V1_evals_schema → V1
        code = r["verifier"].split("_")[0]
        summary_parts.append(f"{code}:{r['status']}")

    return {
        "overall": overall,
        "results": results,
        "summary": " ".join(summary_parts),
    }


def determine_exit_code(overall: str) -> int:
    """exit code: 0=all pass, 1=any fail, 2=error."""
    if overall == "PASS":
        return 0
    elif overall == "WARN":
        return 0  # WARN은 pass로 처리 (FAIL만 1)
    elif overall == "FAIL":
        return 1
    else:
        return 2


# ─────────────────────────────────────────────
# CLI 진입점
# ─────────────────────────────────────────────


def parse_args():
    parser = argparse.ArgumentParser(
        description="Phase 4 마케팅 업그레이드 검증 스크립트",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
Target 옵션:
  all        - 모든 검증 실행 (V1~V5)
  evals      - V1: evals.json 스키마 검증
  yaml       - V2: description YAML frontmatter 유효성
  context    - V3: Before Starting 참조 확인
  routing    - V4: 라우팅 패턴 일관성 검증
  integrity  - V5: 핵심 로직 변경 무결성

예시:
  python3 validate_marketing_upgrade.py --target all
  python3 validate_marketing_upgrade.py --target yaml --check-files /path/to/SKILL.md
  python3 validate_marketing_upgrade.py --target context --context-check blog-dominance,geo-optimizer
  python3 validate_marketing_upgrade.py --target routing --routing-check ad-creative,copywriting
  python3 validate_marketing_upgrade.py --target integrity --backup-dir /path/to/backup/
        """,
    )
    parser.add_argument(
        "--target",
        required=True,
        choices=["all", "evals", "yaml", "context", "routing", "integrity"],
        help="실행할 검증 대상",
    )
    parser.add_argument(
        "--check-files",
        default="",
        help="V2 검증 대상 SKILL.md 파일 목록 (쉼표 구분)",
    )
    parser.add_argument(
        "--context-check",
        default="",
        help="V3 검증 대상 스킬 목록 (쉼표 구분, 기본: blog-dominance,geo-optimizer,thread-architect,naver-seo)",
    )
    parser.add_argument(
        "--routing-check",
        default="",
        help="V4 검증 대상 스킬 목록 (쉼표 구분, 기본: 전체 마케팅 스킬)",
    )
    parser.add_argument(
        "--backup-dir",
        default=BACKUP_DIR_DEFAULT,
        help=f"V5 백업 디렉토리 (기본: {BACKUP_DIR_DEFAULT})",
    )
    return parser.parse_args()


def main():
    try:
        args = parse_args()
    except SystemExit as e:
        sys.exit(int(str(e)) if str(e).isdigit() else 2)

    # 인수 파싱
    check_files = [f.strip() for f in args.check_files.split(",") if f.strip()] if args.check_files else []
    context_skills = (
        [s.strip() for s in args.context_check.split(",") if s.strip()]
        if args.context_check
        else CONTEXT_CHECK_SKILLS_DEFAULT
    )
    routing_skills = (
        [s.strip() for s in args.routing_check.split(",") if s.strip()]
        if args.routing_check
        else ROUTING_CHECK_SKILLS_DEFAULT
    )
    backup_dir = args.backup_dir

    # 실행할 검증 항목 결정
    target = args.target
    results = []

    try:
        if target in ("all", "evals"):
            results.append(verify_evals_schema())

        if target in ("all", "yaml"):
            results.append(verify_yaml_frontmatter(check_files))

        if target in ("all", "context"):
            results.append(verify_context_reference(context_skills))

        if target in ("all", "routing"):
            results.append(verify_routing_consistency(routing_skills))

        if target in ("all", "integrity"):
            results.append(verify_core_integrity(backup_dir))

    except Exception as e:
        # 예상치 못한 오류: exit code 2
        error_output = {
            "overall": "ERROR",
            "results": [],
            "summary": f"예상치 못한 오류: {e}",
            "error": str(e),
        }
        print(json.dumps(error_output, ensure_ascii=False, indent=2))
        sys.exit(2)

    # 결과 집계
    output = aggregate_results(results)

    # JSON 출력
    print(json.dumps(output, ensure_ascii=False, indent=2))

    # exit code 결정
    sys.exit(determine_exit_code(output["overall"]))


if __name__ == "__main__":
    main()