"""M-05 @ 참조 시스템.

텍스트 내 @file:, @folder:, @diff, @staged 참조를 파싱하고
실제 파일 내용 / git 출력으로 치환한다.

Usage:
    from utils.context_refs import parse_refs, resolve_refs

    refs = parse_refs("check @file:utils/config_loader.py and @diff")
    text = resolve_refs("review @file:README.md", base_dir="/home/jay/workspace")
"""

from __future__ import annotations

import re
import subprocess
from pathlib import Path
from typing import Any

try:
    from utils.injection_guard import scan_content as _scan_content

    _GUARD_AVAILABLE = True
except ImportError:
    _GUARD_AVAILABLE = False

_MAX_FILE_BYTES = 50 * 1024  # 50 KB
_MAX_TOTAL_BYTES = 100 * 1024  # 100 KB
_MAX_FOLDER_FILES = 5
_MAX_FOLDER_LINES = 200

_REF_PATTERN = re.compile(r"(?P<ref>" r"@(?P<type>file|folder):(?P<path>[^\s]+)" r"|@(?P<bare>diff|staged)\b" r")")


def parse_refs(text: str) -> list[dict[str, Any]]:
    """텍스트에서 @ 참조를 파싱하여 목록을 반환한다.

    Returns:
        각 항목은 {"type": str, "path": str | None, "raw": str} 형태.
    """
    results: list[dict[str, Any]] = []
    for m in _REF_PATTERN.finditer(text):
        if m.group("type"):
            ref_type, ref_path = m.group("type"), m.group("path")
        else:
            ref_type, ref_path = m.group("bare"), None
        results.append({"type": ref_type, "path": ref_path, "raw": m.group("ref")})
    return results


def _read_safe(path: Path, max_lines: int | None = None) -> str | None:
    """파일을 안전하게 읽는다. 크기 초과 또는 오류 시 None 반환."""
    if not path.exists() or not path.is_file() or path.stat().st_size > _MAX_FILE_BYTES:
        return None
    try:
        text = path.read_text(encoding="utf-8", errors="replace")
        if max_lines is not None:
            text = "\n".join(text.splitlines()[:max_lines])
        return text
    except OSError:
        return None


def _target(path_str: str, base_dir: Path | None) -> Path:
    """상대/절대 경로를 base_dir 기준으로 해석한다."""
    p = Path(path_str)
    if p.is_absolute() or base_dir is None:
        return p
    resolved = (base_dir / path_str).resolve()
    try:
        resolved.relative_to(base_dir.resolve())
    except ValueError:
        raise PermissionError(f"Path escape detected: {path_str}")
    return resolved


def _check_budget(budget: list[int], data: bytes) -> bool:
    """예산 확인. 초과 시 False."""
    return budget[0] + len(data) <= _MAX_TOTAL_BYTES


def _resolve_file(raw: str, path_str: str, base_dir: Path | None, budget: list[int]) -> str:
    """@file: 참조를 실제 내용으로 치환한다."""
    t = _target(path_str, base_dir)
    if not t.exists() or not t.is_file():
        return f"<!-- {raw}: not found -->"
    size = t.stat().st_size
    if size > _MAX_FILE_BYTES:
        return f"<!-- {raw}: too large ({size} bytes, limit {_MAX_FILE_BYTES}) -->"
    if not _check_budget(budget, b"x" * size):
        return f"<!-- {raw}: context budget exceeded -->"
    content = _read_safe(t)
    if content is None:
        return f"<!-- {raw}: not found -->"
    budget[0] += len(content.encode("utf-8"))
    return f"<!-- {raw} -->\n{content}"


def _resolve_folder(raw: str, path_str: str, base_dir: Path | None, budget: list[int]) -> str:
    """@folder: 참조를 폴더 내 파일 목록+내용으로 치환한다."""
    t = _target(path_str, base_dir)
    if not t.exists() or not t.is_dir():
        return f"<!-- {raw}: not found -->"
    try:
        files = sorted(f for f in t.iterdir() if f.is_file())
    except OSError:
        return f"<!-- {raw}: not found -->"
    parts: list[str] = [f"<!-- {raw} — {len(files)} file(s) -->", f"Files in {t}:"]
    parts.extend(f"  - {f.name}" for f in files[:_MAX_FOLDER_FILES])
    for f in files[:_MAX_FOLDER_FILES]:
        content = _read_safe(f, _MAX_FOLDER_LINES)
        if content is None:
            parts.append(f"\n### {f.name} (skipped — too large or unreadable)")
            continue
        chunk = content.encode("utf-8")
        if not _check_budget(budget, chunk):
            parts.append(f"\n### {f.name} (skipped — context budget exceeded)")
            continue
        budget[0] += len(chunk)
        parts.append(f"\n### {f.name}\n{content}")
    return "\n".join(parts)


def _run_git(args: list[str], cwd: str | None = None) -> str:
    """git 명령을 실행하고 출력을 반환한다. 실패 시 빈 문자열."""
    try:
        return subprocess.run(["git"] + args, capture_output=True, text=True, timeout=10, cwd=cwd).stdout
    except Exception:
        return ""


def _resolve_git(raw: str, git_args: list[str], fallback: str, base_dir: Path | None, budget: list[int]) -> str:
    """git 명령 출력으로 참조를 치환한다."""
    output = _run_git(git_args, cwd=str(base_dir) if base_dir else None) or fallback
    chunk = output.encode("utf-8")
    if not _check_budget(budget, chunk):
        return f"<!-- {raw}: context budget exceeded -->"
    budget[0] += len(chunk)
    return f"<!-- {raw} -->\n{output}"


def resolve_refs(text: str, base_dir: str | Path | None = None) -> str:
    """텍스트 내 @ 참조를 실제 내용으로 치환한 문자열을 반환한다.

    Args:
        text: @ 참조가 포함된 원본 텍스트.
        base_dir: 상대 경로 해석 기준 디렉터리. None이면 현재 디렉터리.

    Returns:
        @ 참조가 내용으로 치환된 문자열.
    """
    base = Path(base_dir) if base_dir is not None else None
    budget: list[int] = [0]

    def _replace(m: re.Match[str]) -> str:
        raw = m.group("ref")
        if m.group("type"):
            ref_type, path_str = m.group("type"), m.group("path")
            if ref_type == "file":
                return _resolve_file(raw, path_str, base, budget)
            if ref_type == "folder":
                return _resolve_folder(raw, path_str, base, budget)
        else:
            ref_type = m.group("bare")
            if ref_type == "diff":
                return _resolve_git(raw, ["diff", "--staged"], "(no staged diff or not a git repository)", base, budget)
            if ref_type == "staged":
                return _resolve_git(
                    raw, ["diff", "--staged", "--name-only"], "(no staged files or not a git repository)", base, budget
                )
        return raw

    return _REF_PATTERN.sub(_replace, text)