"""스킬 보안 스캔 모듈.

위협 패턴 스캔, 구조 검사, 보이지 않는 문자 탐지 및 설치 정책 판정.

Usage:
    from utils.skill_guard import scan_skill, should_allow_install
    result = scan_skill(Path("/path/to/skill"))
    allowed, msg = should_allow_install(result)
"""

import re
from dataclasses import dataclass, field
from datetime import datetime, timezone
from pathlib import Path

from utils.skill_guard_patterns import INVISIBLE_CHARS, THREAT_PATTERNS

_MAX_FILE_BYTES = 10 * 1024 * 1024
_MAX_FILE_COUNT = 100
_BINARY_EXT: set[str] = {".exe", ".dll", ".so", ".bin", ".pyc", ".pyo", ".o", ".a", ".lib", ".obj"}
_POLICY: dict[tuple[str, str], tuple[bool | None, str]] = {
    ("official", "safe"): (True, "공식 스킬 - 안전"),
    ("official", "caution"): (True, "공식 스킬 - 주의 (경고)"),
    ("official", "dangerous"): (None, "공식 스킬 - 위험 패턴 발견, 확인 필요"),
    ("community", "safe"): (True, "커뮤니티 스킬 - 안전"),
    ("community", "caution"): (None, "커뮤니티 스킬 - 주의 (사용자 확인 필요)"),
    ("community", "dangerous"): (False, "커뮤니티 스킬 - 위험 (설치 차단)"),
    ("agent-created", "safe"): (True, "에이전트 스킬 - 안전"),
    ("agent-created", "caution"): (False, "에이전트 스킬 - 주의 (설치 차단)"),
    ("agent-created", "dangerous"): (False, "에이전트 스킬 - 위험 (설치 차단)"),
}


@dataclass
class SkillFinding:
    pattern_id: str
    severity: str
    category: str
    file: str
    line: int
    match: str
    description: str


@dataclass
class SkillScanResult:
    skill_name: str
    source: str
    trust_level: str
    verdict: str
    findings: list[SkillFinding] = field(default_factory=list)
    scanned_at: str = ""


def _mkf(pid: str, sev: str, cat: str, fp: str, ln: int, mt: str, desc: str) -> SkillFinding:
    return SkillFinding(pid, sev, cat, fp, ln, mt, desc)


def _read_text(filepath: Path) -> str | None:
    if not filepath.is_file() or filepath.stat().st_size > _MAX_FILE_BYTES:
        return None
    if filepath.suffix.lower() in _BINARY_EXT:
        return None
    try:
        return filepath.read_text(encoding="utf-8", errors="replace")
    except OSError:
        return None


def scan_file(filepath: Path) -> list[SkillFinding]:
    """단일 파일 위협 패턴 스캔."""
    content = _read_text(filepath)
    if content is None:
        return []
    out: list[SkillFinding] = []
    for no, line in enumerate(content.splitlines(), 1):
        for pat, pid, sev, cat, desc in THREAT_PATTERNS:
            m = re.search(pat, line, re.IGNORECASE)
            if m:
                out.append(_mkf(pid, sev, cat, str(filepath), no, m.group(0)[:200], desc))
                break
    return out


def check_invisible_chars(filepath: Path) -> list[SkillFinding]:
    """보이지 않는 유니코드 문자 탐지."""
    if not filepath.is_file():
        return []
    try:
        content = filepath.read_text(encoding="utf-8", errors="replace")
    except OSError:
        return []
    out: list[SkillFinding] = []
    for no, line in enumerate(content.splitlines(), 1):
        found = [c for c in line if c in INVISIBLE_CHARS]
        if found:
            out.append(
                _mkf(
                    "INVIS-001", "high", "obfuscation", str(filepath), no, repr(found[:5]), "보이지 않는 유니코드 문자"
                )
            )
    return out


def _check_symlinks(all_items: list[Path], base: str) -> list[SkillFinding]:
    out: list[SkillFinding] = []
    for item in all_items:
        if not item.is_symlink():
            continue
        try:
            target = item.resolve()
            if not str(target).startswith(base):
                out.append(_mkf("STRUCT-003", "critical", "structure", str(item), 0, str(target), "심링크 탈출"))
        except OSError:
            pass
    return out


def check_structure(skill_dir: Path) -> list[SkillFinding]:
    """파일 수, 크기, 심링크 탈출 검사."""
    if not skill_dir.is_dir():
        return []
    all_items = list(skill_dir.rglob("*"))
    fc = sum(1 for f in all_items if f.is_file())
    out: list[SkillFinding] = []
    if fc > _MAX_FILE_COUNT:
        out.append(
            _mkf(
                "STRUCT-001",
                "medium",
                "structure",
                str(skill_dir),
                0,
                f"count={fc}",
                f"파일 수 초과 (최대 {_MAX_FILE_COUNT}개)",
            )
        )
    out.extend(_check_symlinks(all_items, str(skill_dir.resolve())))
    for item in all_items:
        if not item.is_symlink() and item.is_file() and item.stat().st_size > _MAX_FILE_BYTES:
            out.append(
                _mkf("STRUCT-002", "medium", "structure", str(item), 0, f"size={item.stat().st_size}", "파일 크기 초과")
            )
    return out


def scan_skill(skill_path: Path, source: str = "community") -> SkillScanResult:
    """스킬 디렉터리/파일 전체 보안 스캔."""
    fds: list[SkillFinding] = []
    if skill_path.is_dir():
        name = skill_path.name
        fds.extend(check_structure(skill_path))
        for fp in skill_path.rglob("*"):
            if fp.is_file() and not fp.is_symlink():
                fds.extend(scan_file(fp))
                fds.extend(check_invisible_chars(fp))
    else:
        name = skill_path.stem
        fds.extend(scan_file(skill_path))
        fds.extend(check_invisible_chars(skill_path))
    has_crit = any(f.severity == "critical" for f in fds)
    has_high = any(f.severity == "high" for f in fds)
    verdict = "dangerous" if has_crit else ("caution" if (has_high or fds) else "safe")
    return SkillScanResult(name, source, source, verdict, fds, datetime.now(timezone.utc).isoformat())


def should_allow_install(result: SkillScanResult, force: bool = False) -> tuple[bool | None, str]:
    """설치 정책 판정. True=allow, False=block, None=확인 필요."""
    if force:
        return True, f"[force] {result.skill_name} 강제 허용"
    return _POLICY.get((result.source, result.verdict), (None, f"알 수 없는 소스({result.source})"))


def format_scan_report(result: SkillScanResult) -> str:
    """보고서 문자열 생성."""
    lines = [
        "=" * 60,
        f"[SKILL GUARD] {result.skill_name}",
        f"  소스: {result.source}  판정: {result.verdict} ({result.verdict.upper()})",
        f"  스캔: {result.scanned_at}  발견: {len(result.findings)}건",
    ]
    if result.findings:
        lines.append("-" * 60)
        for fnd in result.findings:
            lines.append(
                f"  [{fnd.severity.upper()}] {fnd.pattern_id} @ {Path(fnd.file).name}:{fnd.line} | {fnd.description}"
            )
    lines.append("=" * 60)
    return "\n".join(lines)
