# pyright: reportMissingImports=false
"""
test_ci_sh_worktree_exclude_2549.py — task-2549 회귀 테스트

scripts/ci.sh의 1단계 syntax check (`find $WORKSPACE -name "*.py"`)에
non-source 디렉토리 (.worktrees / .venv / venv / .codegraph-venv /
node_modules / .git) 가지치기 (prune) 가 적용되어 스캔 시간 폭증이
재발하지 않도록 박제한다.

근본 원인: 80개의 .worktrees/* 디렉토리가 워크스페이스에 누적되어
find가 .py 파일 442,775개를 수집 → py_compile 단계 173+ 분 stuck.

추가로 워크스페이스 내 중첩된 venv(`tools/ai-image-gen/jaaz-app/server/venv/`)
및 `scripts/.codegraph-venv/`도 vendor 코드이므로 동일하게 가지치기 (스펙의
".venv 제외" intent 확장).

검증 포인트:
1. ci.sh source 자체에 prune 패턴 6종 (.worktrees/.venv/venv/.codegraph-venv/
   node_modules/.git)이 박혀 있다 — find 구문 순서에 강하게 결합되지 않게
   정규식으로 유연하게 매칭한다.
2. find 명령이 실제로 vendor 디렉토리 내부 .py 파일을 수집하지 않는다
   (드라이런 — `-print0` null-delimited 파싱으로 특수문자 안전).
3. find 결과가 비-필터 대비 압도적으로 감소한다 (>4x reduction).
"""

import os
import pathlib
import re
import subprocess

import pytest

# repo root 추적 (.../task-2549-dev2)
ROOT = pathlib.Path(__file__).resolve().parents[2]
CI_SH = ROOT / "scripts" / "ci.sh"


# ---------------------------------------------------------------------------
# Source 검증 — prune 6종이 ci.sh 1단계 find 블록에 박혀 있어야 함
# ---------------------------------------------------------------------------

EXPECTED_PRUNE_NAMES = [
    ".worktrees",
    ".venv",
    "venv",
    ".codegraph-venv",
    "node_modules",
    ".git",
]


def test_ci_sh_exists():
    """scripts/ci.sh가 워크트리 루트에 존재한다."""
    assert CI_SH.exists(), f"ci.sh not found at {CI_SH}"


def test_ci_sh_still_finds_py_files():
    """ci.sh가 여전히 *.py 패턴을 find한다 (regression 방향 검증)."""
    content = CI_SH.read_text()
    assert re.search(r"-name\s+[\"']\*\.py[\"']", content), (
        "find pattern '*.py' missing"
    )


def test_ci_sh_uses_prune_optimization():
    """ci.sh find 블록이 -prune 가지치기 최적화를 사용한다.

    `-not -path X` 는 트리 전체를 traverse 한 뒤 출력에서만 제외하므로
    수십만 파일이 있을 때 여전히 느리다. `-prune` 은 트리 자체를
    탐색하지 않아 훨씬 빠르다. 회귀 방지를 위해 prune 사용을 강제한다.
    """
    content = CI_SH.read_text()
    assert "-prune" in content, (
        "ci.sh가 -prune 최적화를 사용하지 않음 — `-not -path` 회귀 위험"
    )


@pytest.mark.parametrize("dir_name", EXPECTED_PRUNE_NAMES)
def test_ci_sh_prunes_vendor_dir(dir_name: str):
    """ci.sh가 vendor 디렉토리 6종 각각을 -path 패턴으로 매칭한다.

    구체적 구문이 아니라 "`*/<dir_name>` 패턴이 source 내 존재"만 검증해
    `-not -path`/`-prune`/구문 순서 변경 등에 강결합되지 않게 한다.
    """
    content = CI_SH.read_text()
    # */dir_name 또는 */dir_name/* 어느 형태든 매칭
    pat = re.compile(
        r"""[\"']\*/""" + re.escape(dir_name) + r"""(?:/\*)?[\"']"""
    )
    assert pat.search(content), (
        f"vendor path '*/{dir_name}' 패턴이 ci.sh에 없음 — "
        f"prune/exclude 누락 시 442,775 파일 스캔 회귀 위험"
    )


# ---------------------------------------------------------------------------
# 행동 검증 — 실제 find 명령이 prune을 적용하는지 (드라이런 fake workspace)
# ---------------------------------------------------------------------------

@pytest.fixture
def fake_workspace(tmp_path):
    """가짜 workspace: 정상 .py 3개 + 6종 vendor 디렉토리 .py 다수."""
    ws = tmp_path / "ws"
    ws.mkdir()

    # 정상 영역 (수집되어야 함)
    (ws / "src").mkdir()
    (ws / "src" / "a.py").write_text("print('a')\n")
    (ws / "src" / "b.py").write_text("print('b')\n")
    (ws / "tests").mkdir()
    (ws / "tests" / "test_x.py").write_text("def test_x(): pass\n")

    # .worktrees/ — 가지치기 대상 (재귀)
    (ws / ".worktrees" / "task-A" / "src").mkdir(parents=True)
    (ws / ".worktrees" / "task-A" / "src" / "x.py").write_text("pass\n")
    (ws / ".worktrees" / "task-B" / "deep" / "nested").mkdir(parents=True)
    (ws / ".worktrees" / "task-B" / "deep" / "nested" / "z.py").write_text(
        "pass\n"
    )

    # .venv/ — 가지치기 대상
    (ws / ".venv" / "lib").mkdir(parents=True)
    (ws / ".venv" / "lib" / "pkg.py").write_text("pass\n")
    (ws / ".venv" / "site.py").write_text("pass\n")

    # 중첩 venv/ (no-dot) — 가지치기 대상 (jaaz-app 사례)
    (ws / "tools" / "app" / "server" / "venv" / "lib").mkdir(parents=True)
    (ws / "tools" / "app" / "server" / "venv" / "lib" / "vendor.py").write_text(
        "pass\n"
    )
    (ws / "tools" / "app" / "server" / "venv" / "boot.py").write_text("pass\n")

    # .codegraph-venv/ — 가지치기 대상 (scripts/.codegraph-venv 사례)
    (ws / "scripts" / ".codegraph-venv" / "lib").mkdir(parents=True)
    (ws / "scripts" / ".codegraph-venv" / "lib" / "pkg.py").write_text("pass\n")

    # node_modules/ — 가지치기 대상
    (ws / "node_modules" / "pkg").mkdir(parents=True)
    (ws / "node_modules" / "pkg" / "bridge.py").write_text("pass\n")
    # 중첩 node_modules도 가지치기
    (ws / "frontend" / "node_modules" / "x").mkdir(parents=True)
    (ws / "frontend" / "node_modules" / "x" / "y.py").write_text("pass\n")

    # .git/ — 가지치기 대상
    (ws / ".git" / "hooks").mkdir(parents=True)
    (ws / ".git" / "hooks" / "pre.py").write_text("pass\n")

    return ws


def _run_find_pruned(workspace: pathlib.Path) -> list[str]:
    """ci.sh와 동일한 find 명령 (prune 적용)을 -print0 로 안전 파싱.

    `-print0` + null-delimited split 로 파일명에 줄바꿈/공백/특수문자가
    있어도 정확히 분리한다 (ci.sh source 와 동일한 read-NUL 패턴).
    """
    cmd = [
        "find", str(workspace),
        "(",
        "-path", "*/.worktrees",
        "-o", "-path", "*/.venv",
        "-o", "-path", "*/venv",
        "-o", "-path", "*/.codegraph-venv",
        "-o", "-path", "*/node_modules",
        "-o", "-path", "*/.git",
        ")",
        "-prune", "-o", "-name", "*.py", "-print0",
    ]
    result = subprocess.run(cmd, capture_output=True, check=True)
    # null-delimited split — 빈 항목 제거
    return [p.decode("utf-8") for p in result.stdout.split(b"\0") if p]


def _run_find_unfiltered(workspace: pathlib.Path) -> list[str]:
    """수정 前 동작 (prune 없음) — -print0 로 안전 파싱."""
    cmd = ["find", str(workspace), "-name", "*.py", "-print0"]
    result = subprocess.run(cmd, capture_output=True, check=True)
    return [p.decode("utf-8") for p in result.stdout.split(b"\0") if p]


def test_find_prunes_all_vendor_dirs_in_fake_workspace(fake_workspace):
    """prune 적용 시 6종 vendor 디렉토리 내부 .py 0건."""
    files = _run_find_pruned(fake_workspace)
    for f in files:
        for blocked in EXPECTED_PRUNE_NAMES:
            assert f"/{blocked}/" not in f, (
                f"{blocked} 내부 파일 누설: {f}"
            )


def test_find_collects_normal_py_files(fake_workspace):
    """prune 적용해도 정상 영역 .py 3개 (src/a, src/b, tests/test_x)는 수집된다."""
    files = _run_find_pruned(fake_workspace)
    rel = {os.path.relpath(f, fake_workspace) for f in files}
    expected = {"src/a.py", "src/b.py", "tests/test_x.py"}
    assert expected.issubset(rel), (
        f"정상 .py 누락 — 기대 {expected} ⊆ 실제 {rel}"
    )
    assert len(files) == 3, (
        f"정상 .py 정확히 3개여야 하는데 실제 {len(files)}: {files}"
    )


def test_prune_significantly_reduces_count(fake_workspace):
    """prune 적용 후 카운트가 비적용 대비 4배 이상 감소."""
    pruned = _run_find_pruned(fake_workspace)
    unfiltered = _run_find_unfiltered(fake_workspace)
    assert len(unfiltered) >= 12, (
        f"unfiltered count too low: {len(unfiltered)} — fixture broken"
    )
    assert len(pruned) <= 3, (
        f"pruned count too high: {len(pruned)} — prune not effective"
    )
    assert len(unfiltered) / max(len(pruned), 1) >= 4, (
        f"reduction ratio too low: {len(unfiltered)} → {len(pruned)}"
    )


def test_nested_venv_pruned(fake_workspace):
    """중첩된 venv (jaaz-app 사례) 도 확실히 가지치기된다 — recursive 패턴 검증."""
    files = _run_find_pruned(fake_workspace)
    for f in files:
        assert "tools/app/server/venv/" not in f, (
            f"중첩 venv 파일 누설: {f}"
        )


def test_nested_node_modules_pruned(fake_workspace):
    """중첩된 node_modules (frontend/) 도 가지치기된다."""
    files = _run_find_pruned(fake_workspace)
    for f in files:
        assert "frontend/node_modules/" not in f, (
            f"중첩 node_modules 파일 누설: {f}"
        )


def test_filenames_with_newline_safe(tmp_path):
    """파일명에 줄바꿈이 있어도 null-delimited 파싱으로 정확히 분리된다.

    Gemini 리뷰 #3 회귀 박제: `splitlines()` 는 파일명 내 `\\n` 에 취약.
    `-print0` + null-split 방식이 견고함을 검증.
    """
    ws = tmp_path / "ws"
    ws.mkdir()
    # 일반 파일
    (ws / "normal.py").write_text("pass\n")
    # 줄바꿈 포함 파일명 — null-delim 파싱이 필수
    weird_name = "weird\nname.py"
    (ws / weird_name).write_text("pass\n")

    files = _run_find_pruned(ws)
    # 2개 파일이 정확히 분리되어야 함
    assert len(files) == 2, (
        f"줄바꿈 포함 파일명 분리 실패: 기대 2개, 실제 {len(files)}: {files}"
    )
    names = {os.path.basename(f) for f in files}
    assert names == {"normal.py", weird_name}, (
        f"파일명 깨짐: {names}"
    )