#!/usr/bin/env python3
"""
modularity-check.py — 모듈화 수동 검증 도구 (아르고스 테스터)
Usage:
  python3 modularity-check.py scan [--json] [--exclude-tests]
  python3 modularity-check.py verify
  python3 modularity-check.py discover [--json] [--min-occurrences N]
"""

import argparse
import ast
import json
import re
import sys
from pathlib import Path

# ─── 경로 설정 ────────────────────────────────────────────────────────────────
WORKSPACE = Path(__file__).resolve().parent.parent
REGISTRY_PATH = WORKSPACE / "config" / "module-registry.json"

# ─── 스캔 대상 패턴 ────────────────────────────────────────────────────────────
CHAT_ID_PATTERN = re.compile(r"6937032012")  # noqa: modularity
WORKSPACE_PATH_PATTERN = re.compile(r"/home/jay/workspace")  # noqa: modularity
DQ_FONT_SIZES_PATTERN = re.compile(r"\b(84px|64px|40px)\b")  # noqa: modularity
TEAM_NAME_PATTERN = re.compile(r"\bdev[1-8]-team\b")  # noqa: modularity

# DQ 폰트 허용 import 패턴
DQ_IMPORT_PATTERN = re.compile(r"^\s*(?:import\s+dq_rules|from\s+tools\.dq_rules\b)", re.MULTILINE)

# ─── 제외 대상 ─────────────────────────────────────────────────────────────────
EXCLUDE_DIRS = {
    "config",
    "hooks",
    "memory/specs",
    "memory/tasks",
    "memory/reports",
    "memory/events",
    ".worktrees",
    "__pycache__",
    ".git",
    "node_modules",
    "output",
}
EXCLUDE_EXTS = {".md", ".json", ".yaml", ".yml"}

# ─── 오탐 제외 패턴 ───────────────────────────────────────────────────────────
_ENV_FALLBACK_RE = re.compile(
    r"os\.environ\.get\s*\(\s*[\"'][^\"']+[\"']\s*,\s*[\"']"
    r"|os\.getenv\s*\(\s*[\"'][^\"']+[\"']\s*,\s*[\"']"
    r"|default\s*=\s*os\.environ\.get\s*\("
)
_CONFIG_FALLBACK_RE = re.compile(r"(?:get_constant|get_path|get_config)\s*\(.*\)\s*(?:if|or)\b")

# ─── 유틸 함수 ─────────────────────────────────────────────────────────────────


def _is_excluded_path(rel: Path) -> bool:
    """config/ 또는 문서 디렉토리이면 True."""
    parts_str = rel.as_posix()
    for excl in EXCLUDE_DIRS:
        if parts_str.startswith(excl + "/") or parts_str == excl:
            return True
    return False


def _is_excluded_ext(path: Path) -> bool:
    return path.suffix.lower() in EXCLUDE_EXTS


def _collect_py_files(workspace: Path):
    """검사 대상 .py 파일 목록 반환."""
    results = []
    for p in workspace.rglob("*.py"):
        rel = p.relative_to(workspace)
        if _is_excluded_path(rel):
            continue
        if _is_excluded_ext(p):
            continue
        results.append(p)
    return sorted(results)


def _collect_all_text_files(workspace: Path):
    """모든 텍스트 파일 (.py, .sh, .js, .ts 등) 수집."""
    target_exts = {".py", ".sh", ".bash", ".js", ".ts", ".jsx", ".tsx"}
    results = []
    for p in workspace.rglob("*"):
        if not p.is_file():
            continue
        if p.suffix.lower() not in target_exts:
            continue
        rel = p.relative_to(workspace)
        if _is_excluded_path(rel):
            continue
        results.append(p)
    return sorted(results)


def _read_lines(path: Path):
    try:
        return path.read_text(encoding="utf-8").splitlines()
    except Exception:
        return []


def _get_docstring_ranges(source: str):
    """
    Python 소스에서 docstring이 차지하는 라인 범위(1-indexed) set 반환.
    ast로 파싱 불가 시 triple-quote 방식으로 fallback.
    """
    ranges = set()
    try:
        tree = ast.parse(source)
        for node in ast.walk(tree):
            if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef, ast.Module)):
                if (
                    node.body
                    and isinstance(node.body[0], ast.Expr)
                    and isinstance(node.body[0].value, ast.Constant)
                    and isinstance(node.body[0].value.value, str)
                ):
                    ds_node = node.body[0]
                    end_ln = ds_node.end_lineno or ds_node.lineno
                    for ln in range(ds_node.lineno, end_ln + 1):
                        ranges.add(ln)
    except SyntaxError:
        # fallback: triple quote 블록을 단순 파싱
        in_triple = False
        quote_char = None
        for i, line in enumerate(source.splitlines(), 1):
            stripped = line.strip()
            if not in_triple:
                for q in ('"""', "'''"):
                    if q in stripped:
                        count = stripped.count(q)
                        if count >= 2:
                            # 같은 줄에서 열리고 닫힘
                            ranges.add(i)
                            break
                        else:
                            in_triple = True
                            quote_char = q
                            ranges.add(i)
                            break
            else:
                ranges.add(i)
                if quote_char and quote_char in stripped:
                    in_triple = False
                    quote_char = None
    return ranges


def _is_comment_line(line: str, file_ext: str) -> bool:
    stripped = line.strip()
    if file_ext == ".py":
        return stripped.startswith("#")
    elif file_ext in (".sh", ".bash"):
        return stripped.startswith("#")
    return False


def _has_noqa(line: str) -> bool:
    return "# noqa: modularity" in line


def _is_env_fallback(line: str) -> bool:
    """os.environ.get / os.getenv fallback 패턴이면 True."""
    return bool(_ENV_FALLBACK_RE.search(line))


def _is_config_fallback(line: str) -> bool:
    """config getter + if/or fallback 패턴이면 True."""
    return bool(_CONFIG_FALLBACK_RE.search(line))


def _is_in_tests(rel: str) -> bool:
    """tests/ 디렉토리 내 파일이면 True."""
    return rel.startswith("tests/") or "/tests/" in rel


def _file_has_dq_import(path: Path) -> bool:
    try:
        content = path.read_text(encoding="utf-8")
        return bool(DQ_IMPORT_PATTERN.search(content))
    except Exception:
        return False


# ─── SCAN 커맨드 ───────────────────────────────────────────────────────────────


def run_scan(output_json: bool = False, exclude_tests: bool = False):
    """전체 코드베이스 스캔."""
    chat_id_violations = []
    abspath_violations = []
    dq_font_violations = []
    team_name_violations = []
    tests_violations = []

    skipped_env_fallback = 0
    skipped_config_fallback = 0

    py_files = _collect_py_files(WORKSPACE)
    all_files = _collect_all_text_files(WORKSPACE)

    # ChatID & 절대경로 & 팀명 → 모든 텍스트 파일 대상
    for fpath in all_files:
        rel = str(fpath.relative_to(WORKSPACE))
        ext = fpath.suffix.lower()
        lines = _read_lines(fpath)
        is_py = ext == ".py"
        in_tests = _is_in_tests(rel)

        # Python: docstring 범위 계산
        if is_py:
            try:
                source = fpath.read_text(encoding="utf-8")
            except Exception:
                source = ""
            docstring_lines = _get_docstring_ranges(source)
        else:
            docstring_lines = set()

        for lineno, line in enumerate(lines, 1):
            # 주석 라인 제외
            if _is_comment_line(line, ext):
                continue
            # noqa 제외
            if _has_noqa(line):
                continue
            # docstring 제외
            if is_py and lineno in docstring_lines:
                continue

            # ENV fallback 패턴 스킵
            if _is_env_fallback(line):
                if (
                    CHAT_ID_PATTERN.search(line)
                    or WORKSPACE_PATH_PATTERN.search(line)
                    or TEAM_NAME_PATTERN.search(line)
                ):
                    skipped_env_fallback += 1
                continue

            # Config fallback 패턴 스킵
            if _is_config_fallback(line):
                if (
                    CHAT_ID_PATTERN.search(line)
                    or WORKSPACE_PATH_PATTERN.search(line)
                    or TEAM_NAME_PATTERN.search(line)
                ):
                    skipped_config_fallback += 1
                continue

            stripped = line.strip()

            # ChatID
            if CHAT_ID_PATTERN.search(line):
                if in_tests:
                    tests_violations.append((rel, lineno, stripped, "chat_id"))
                else:
                    chat_id_violations.append((rel, lineno, stripped))

            # 절대경로
            if WORKSPACE_PATH_PATTERN.search(line):
                if in_tests:
                    tests_violations.append((rel, lineno, stripped, "absolute_path"))
                else:
                    abspath_violations.append((rel, lineno, stripped))

            # 팀명 (tests/ 제외 → WARNING 전환)
            if TEAM_NAME_PATTERN.search(line):
                if in_tests:
                    tests_violations.append((rel, lineno, stripped, "team_name"))
                else:
                    team_name_violations.append((rel, lineno, stripped))

    # DQ 폰트 사이즈 → Python 파일만 (dq_rules import 없는 파일)
    for fpath in py_files:
        rel = str(fpath.relative_to(WORKSPACE))
        ext = fpath.suffix.lower()
        in_tests = _is_in_tests(rel)
        if _file_has_dq_import(fpath):
            continue  # 허용
        lines = _read_lines(fpath)
        try:
            source = fpath.read_text(encoding="utf-8")
        except Exception:
            source = ""
        docstring_lines = _get_docstring_ranges(source)

        for lineno, line in enumerate(lines, 1):
            if _is_comment_line(line, ext):
                continue
            if _has_noqa(line):
                continue
            if lineno in docstring_lines:
                continue
            # ENV fallback 스킵
            if _is_env_fallback(line):
                if DQ_FONT_SIZES_PATTERN.search(line):
                    skipped_env_fallback += 1
                continue
            # Config fallback 스킵
            if _is_config_fallback(line):
                if DQ_FONT_SIZES_PATTERN.search(line):
                    skipped_config_fallback += 1
                continue
            if DQ_FONT_SIZES_PATTERN.search(line):
                stripped = line.strip()
                if in_tests:
                    tests_violations.append((rel, lineno, stripped, "dq_font_size"))
                else:
                    dq_font_violations.append((rel, lineno, stripped))

    # ─── 출력 ──────────────────────────────────────────────────────────────────
    if output_json:
        tests_warn_count = 0 if exclude_tests else len(tests_violations)
        result = {
            "chat_id": [{"file": f, "line": ln, "content": c} for f, ln, c in chat_id_violations],
            "absolute_path": [{"file": f, "line": ln, "content": c} for f, ln, c in abspath_violations],
            "dq_font_size": [{"file": f, "line": ln, "content": c} for f, ln, c in dq_font_violations],
            "team_name": [{"file": f, "line": ln, "content": c} for f, ln, c in team_name_violations],
            "tests_violations": (
                [{"file": f, "line": ln, "content": c, "category": cat} for f, ln, c, cat in tests_violations]
                if not exclude_tests
                else []
            ),
            "summary": {
                "total_fail": len(chat_id_violations) + len(abspath_violations) + len(dq_font_violations),
                "total_warn": len(team_name_violations) + tests_warn_count,
                "total": len(chat_id_violations)
                + len(abspath_violations)
                + len(dq_font_violations)
                + len(team_name_violations)
                + tests_warn_count,
                "skipped_env_fallback": skipped_env_fallback,
                "skipped_config_fallback": skipped_config_fallback,
                "tests_warnings": len(tests_violations),
            },
        }
        print(json.dumps(result, ensure_ascii=False, indent=2))
    else:
        print("=== 모듈화 스캔 결과 ===\n")

        # ChatID
        _print_violations("ChatID 하드코딩", chat_id_violations, severity="FAIL")

        # 절대경로
        _print_violations("절대경로 하드코딩", abspath_violations, severity="FAIL")

        # DQ 폰트 사이즈
        _print_violations("DQ 폰트 사이즈 하드코딩", dq_font_violations, severity="FAIL")

        # 팀명
        _print_violations("팀명 하드코딩", team_name_violations, severity="WARN")

        # tests/ 위반 (WARNING)
        if not exclude_tests and tests_violations:
            print(f"[WARN] tests/ 위반 → WARNING 전환 ({len(tests_violations)}건)")
            for fpath, lineno, content, category in tests_violations:
                print(f"  - [{category}] {fpath}:{lineno}: {content}")
            print()

        fail_total = len(chat_id_violations) + len(abspath_violations) + len(dq_font_violations)
        warn_total = len(team_name_violations) + (0 if exclude_tests else len(tests_violations))
        total = fail_total + warn_total

        print("=== 오탐 제외 통계 ===")
        print(f"  ENV fallback 패턴 스킵: {skipped_env_fallback}건")
        print(f"  Config fallback 패턴 스킵: {skipped_config_fallback}건")
        print(f"  tests/ → WARNING 전환: {len(tests_violations)}건")
        print()
        print(f"총 위반: {total}건 (FAIL: {fail_total}, WARN: {warn_total})")

    # exit code: tests_violations는 WARN이므로 FAIL 집계에서 제외
    fail_count = len(chat_id_violations) + len(abspath_violations) + len(dq_font_violations)
    return 0 if fail_count == 0 else 1


def _print_violations(label: str, violations: list, severity: str, suffix: str = ""):
    count = len(violations)
    tag = f"[{severity}]"
    print(f"{tag} {label} ({count}건){suffix}")
    for fpath, lineno, content in violations:
        print(f"  - {fpath}:{lineno}: {content}")
    print()


# ─── VERIFY 커맨드 ─────────────────────────────────────────────────────────────

# config 참조를 탐지하는 패턴 목록 (config loader 호출 방식 기반)
CONFIG_REF_PATTERNS = [
    re.compile(r"get_config\s*\("),
    re.compile(r"get_constant\s*\("),
    re.compile(r"load_config\s*\("),
    re.compile(r"config\["),
    re.compile(r"constants\["),
    re.compile(r"from\s+config"),
    re.compile(r"import\s+config"),
    re.compile(r"config\.loader"),
    re.compile(r"loader\.load"),
]

# source key별 구체적인 참조 패턴 (env var, config loader, direct reference 모두 포함)
SOURCE_REF_PATTERNS = {
    "chat_id": re.compile(r"(chat_id|CHAT_ID|COKACDIR_CHAT_ID)", re.IGNORECASE),
    "workspace_root": re.compile(
        r"(workspace_root|WORKSPACE_ROOT|roots\.workspace|/home/jay/workspace)", re.IGNORECASE
    ),
    "team_bot_mapping": re.compile(r'(team_bot_mapping|TEAM_BOT|get_constant\(["\']teams)', re.IGNORECASE),
    "dq_rules": re.compile(r"(dq_rules|dq-rules)", re.IGNORECASE),
    "design_palette": re.compile(r"(palette|design.system|teamColors|team_colors|category_colors)", re.IGNORECASE),
    "thresholds": re.compile(r"(threshold|idle_hours|ghost_hours|IDLE_THRESHOLD|GHOST_THRESHOLD)", re.IGNORECASE),
    "cokacdir_key": re.compile(r"(COKACDIR_KEY|cokacdir_key)", re.IGNORECASE),
    "team_to_bot": re.compile(r"(team_to_bot|TEAM_TO_BOT)", re.IGNORECASE),
    "bots": re.compile(r"(bots\b|bot_settings|BOT_SETTINGS)", re.IGNORECASE),
    "work_levels": re.compile(r"(work_levels|WORK_LEVELS)", re.IGNORECASE),
    "font_sizes": re.compile(r"(font_sizes|font_size|dq.rules|dq_rules)", re.IGNORECASE),
}


def _file_references_source(fpath: Path, source_key: str) -> bool:
    """파일이 해당 source를 참조하는지 확인."""
    try:
        content = fpath.read_text(encoding="utf-8")
    except Exception:
        return False

    # source별 구체적 패턴으로만 검사 (generic fallback 제거 — 오탐 방지)
    pattern = SOURCE_REF_PATTERNS.get(source_key)
    if pattern and pattern.search(content):
        return True

    return False


def _find_unregistered_config_refs(registry: dict) -> list:
    """
    코드에서 get_constant/get_config 호출을 찾아 registry에 없는 것 반환.
    반환: [(rel_path, usage_string), ...]
    """
    registered_keys = set(registry.get("sources", {}).keys())
    # source의 "key" 필드 값도 등록된 것으로 간주
    for src in registry.get("sources", {}).values():
        src_key = src.get("key")
        if src_key:
            registered_keys.add(src_key)

    # get_constant("xxx") 또는 get_config("xxx") 패턴
    call_pattern = re.compile(r'(?:get_constant|get_config)\s*\(\s*["\'](\w+)["\']')

    warnings = []
    all_files = _collect_all_text_files(WORKSPACE)

    for fpath in all_files:
        rel = str(fpath.relative_to(WORKSPACE))
        ext = fpath.suffix.lower()
        try:
            lines = fpath.read_text(encoding="utf-8").splitlines()
        except Exception:
            continue

        # Python 파일이면 docstring 범위 계산
        if ext == ".py":
            try:
                source_text = "\n".join(lines)
                docstring_lines = _get_docstring_ranges(source_text)
            except Exception:
                docstring_lines = set()
        else:
            docstring_lines = set()

        for lineno, line in enumerate(lines, 1):
            # 주석 라인 제외
            if _is_comment_line(line, ext):
                continue
            # noqa 제외
            if _has_noqa(line):
                continue
            # docstring 제외
            if ext == ".py" and lineno in docstring_lines:
                continue
            for m in call_pattern.finditer(line):
                key_used = m.group(1)
                # 등록된 키가 아니고, 이 파일이 해당 키의 used_by에도 없으면 WARNING
                if key_used not in registered_keys:
                    usage = m.group(0)
                    warnings.append((rel, usage))

    return warnings


def run_verify():
    """config/module-registry.json 검증."""
    if not REGISTRY_PATH.exists():
        print(f"[ERROR] registry 파일을 찾을 수 없습니다: {REGISTRY_PATH}")
        return 1

    try:
        registry = json.loads(REGISTRY_PATH.read_text(encoding="utf-8"))
    except json.JSONDecodeError as e:
        print(f"[ERROR] registry JSON 파싱 실패: {e}")
        return 1

    sources = registry.get("sources", {})

    print("=== 레지스트리 검증 결과 ===\n")

    total_sources = len(sources)
    ok_count = 0
    warn_count = 0
    fail_count = 0

    for source_key, source_info in sources.items():
        used_by = source_info.get("used_by", [])
        print(f"[{source_key}]")

        for ub_rel in used_by:
            # glob 패턴 형식 (예: "scripts/*.py (27 files)") 처리
            if "*" in ub_rel or "(" in ub_rel:
                print(f"  ℹ️  {ub_rel} — 디렉토리 패턴 (개별 검증 생략)")
                ok_count += 1
                continue

            ub_path = WORKSPACE / ub_rel

            if not ub_path.exists():
                print(f"  ⚠️  {ub_rel} — 파일 미존재")
                warn_count += 1
                continue

            refs = _file_references_source(ub_path, source_key)
            if refs:
                print(f"  ✅ {ub_rel} — config 참조 확인")
                ok_count += 1
            else:
                print(f"  ❌ {ub_rel} — config 참조 미확인")
                fail_count += 1

        print()

    # 미등록 config 참조 검사
    unregistered = _find_unregistered_config_refs(registry)
    if unregistered:
        print("미등록 config 참조 (WARNING):")
        for rel, usage in unregistered:
            print(f"  - {rel}: {usage} 사용하나 registry 미등록")
        warn_count += len(unregistered)
        print()

    print(f"총 검증: {total_sources} sources, 정상: {ok_count}, 경고: {warn_count}, 실패: {fail_count}")

    return 0 if fail_count == 0 else 1


# ─── DISCOVER 커맨드 ────────────────────────────────────────────────────────────

# 흔한 숫자 제외 목록 (HTTP 상태코드, 불린, CSS 픽셀/브레이크포인트, 기타 범용값)
_COMMON_NUMBERS = {
    # 기본 소수
    0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
    24, 25, 30, 32, 36, 40, 48, 50, 60, 64, 72, 80, 90, 96, 99,
    # HTTP 상태코드
    100, 200, 201, 202, 204, 206, 301, 302, 303, 304, 307, 308,
    400, 401, 402, 403, 404, 405, 406, 408, 409, 410, 413, 422, 423, 429,
    500, 501, 502, 503, 504,
    # CSS 브레이크포인트 및 흔한 픽셀값
    120, 128, 144, 160, 176, 192, 240, 256, 280, 300, 320, 360, 375, 400,
    480, 512, 540, 576, 600, 640, 720, 768, 800, 900, 960, 1024, 1080,
    1200, 1280, 1366, 1440, 1600, 1920, 2048, 2560, 3840,
    # 흔한 px/rem 크기
    700, 750, 576, 992, 1200,
    # 연도 (현재 기준 앞뒤)
    2020, 2021, 2022, 2023, 2024, 2025, 2026, 2027,
    # 기타 범용
    255, 360, 1000, 9999, 10000,
}

# discover용 정규식
_URL_RE = re.compile(r'https?://[^\s\'"<>]+')
_PORT_RE = re.compile(r':(\d{2,5})\b')
_INT_RE = re.compile(r'\b(\d{4,})\b')  # 4자리 이상 정수 (노이즈 감소)
_COLOR_RE = re.compile(r'#([0-9a-fA-F]{6}|[0-9a-fA-F]{3})\b')
_EMAIL_RE = re.compile(r'\b[A-Za-z0-9._%+\-]+@[A-Za-z0-9.\-]+\.[A-Za-z]{2,}\b')
_IMPORT_LINE_RE = re.compile(r'^\s*(?:import\s|from\s)')


def _load_lint_patterns_values() -> set:
    """lint-patterns.json에 등록된 패턴 regex 값 집합 반환 (이미 등록된 값 중복 방지용)."""
    lint_path = WORKSPACE / "config" / "lint-patterns.json"
    known = set()
    if not lint_path.exists():
        return known
    try:
        data = json.loads(lint_path.read_text(encoding="utf-8"))
        for pat in data.get("patterns", []):
            known.add(pat.get("regex", ""))
    except Exception:
        pass
    return known


def _extract_candidates_from_line(line: str, lineno: int, rel: str, collectors: dict):
    """
    한 줄에서 각 카테고리별 후보 값을 추출하여 collectors에 누적.
    collectors: { category: { value: [(rel, lineno), ...] } }
    """
    # import 문은 제외
    if _IMPORT_LINE_RE.match(line):
        return

    # API URL
    for m in _URL_RE.finditer(line):
        url = m.group(0).rstrip('.,;)')
        collectors["api_url"].setdefault(url, []).append((rel, lineno))

    # 포트번호 (URL 내부 포트는 URL로 이미 잡히므로 독립 포트만)
    # URL 매치 범위 제외
    url_spans = [(m.start(), m.end()) for m in _URL_RE.finditer(line)]
    for m in _PORT_RE.finditer(line):
        # URL 범위 내이면 스킵
        in_url = any(s <= m.start() < e for s, e in url_spans)
        if in_url:
            continue
        port_val = m.group(0)  # ":NNNN"
        port_num = int(m.group(1))
        if port_num < 1024 or port_num > 65535:
            continue
        collectors["port"].setdefault(port_val, []).append((rel, lineno))

    # 타임아웃/매직넘버 (3자리 이상 정수, 흔한 값 제외)
    for m in _INT_RE.finditer(line):
        num = int(m.group(1))
        if num in _COMMON_NUMBERS:
            continue
        # HTTP 상태코드 범위 모두 제외
        if 100 <= num <= 599:
            continue
        collectors["magic_number"].setdefault(str(num), []).append((rel, lineno))

    # 색상코드
    for m in _COLOR_RE.finditer(line):
        color = "#" + m.group(1).upper()
        collectors["color"].setdefault(color, []).append((rel, lineno))

    # 이메일/도메인
    for m in _EMAIL_RE.finditer(line):
        email = m.group(0)
        collectors["email"].setdefault(email, []).append((rel, lineno))


def run_discover(output_json: bool = False, min_occurrences: int = 3):
    """코드베이스에서 반복 등장하는 잠재적 하드코딩 후보를 자동 식별."""

    # lint-patterns.json에 이미 등록된 패턴값 로드 (중복 제외용)
    known_patterns = _load_lint_patterns_values()

    all_files = _collect_all_text_files(WORKSPACE)

    # discover에서 추가로 제외할 경로 조각 (중간 경로에 포함된 경우도 처리)
    _DISCOVER_EXCLUDE_PARTS = {"node_modules", "__pycache__", ".git", "output", "venv", ".venv", "site-packages", "_backup"}

    def _is_excluded_for_discover(fpath: Path) -> bool:
        parts = fpath.relative_to(WORKSPACE).parts
        for part in parts:
            if part in _DISCOVER_EXCLUDE_PARTS:
                return True
        return False

    # 카테고리별 수집기: { value: [(rel, lineno), ...] }
    collectors: dict = {
        "api_url": {},
        "port": {},
        "magic_number": {},
        "color": {},
        "email": {},
    }

    for fpath in all_files:
        if _is_excluded_for_discover(fpath):
            continue
        rel = str(fpath.relative_to(WORKSPACE))
        ext = fpath.suffix.lower()
        lines = _read_lines(fpath)
        is_py = ext == ".py"

        # Python: docstring 범위 계산
        if is_py:
            try:
                source = fpath.read_text(encoding="utf-8")
            except Exception:
                source = ""
            docstring_lines = _get_docstring_ranges(source)
        else:
            docstring_lines = set()

        for lineno, line in enumerate(lines, 1):
            # 주석 라인 제외
            if _is_comment_line(line, ext):
                continue
            # noqa 제외
            if _has_noqa(line):
                continue
            # docstring 제외
            if is_py and lineno in docstring_lines:
                continue

            _extract_candidates_from_line(line, lineno, rel, collectors)

    # 카테고리 레이블 매핑
    category_labels = {
        "api_url": "API URL",
        "port": "포트번호",
        "magic_number": "매직넘버",
        "color": "색상코드",
        "email": "이메일/도메인",
    }

    # 후보 필터링: min_occurrences 이상, 이미 등록된 패턴 제외
    candidates = []
    for category, value_map in collectors.items():
        for value, locations in value_map.items():
            # 중복 제거: 같은 파일+줄 조합은 한 번만 카운트
            unique_locations = list({(r, ln) for r, ln in locations})
            unique_locations.sort()

            if len(unique_locations) < min_occurrences:
                continue

            # 이미 등록된 패턴에 해당하는 값 제외 (regex 직접 매치)
            skip = False
            for pat_regex in known_patterns:
                if pat_regex and re.search(pat_regex, value):
                    skip = True
                    break
            if skip:
                continue

            candidates.append({
                "category": category,
                "value": value,
                "occurrences": len(unique_locations),
                "locations": [{"file": r, "line": ln} for r, ln in unique_locations],
            })

    # 발견 횟수 내림차순 정렬
    candidates.sort(key=lambda x: (-x["occurrences"], x["category"], x["value"]))

    # ─── 출력 ──────────────────────────────────────────────────────────────────
    if output_json:
        total_occurrences = sum(c["occurrences"] for c in candidates)
        result = {
            "candidates": candidates,
            "summary": {
                "total_candidates": len(candidates),
                "total_occurrences": total_occurrences,
            },
        }
        print(json.dumps(result, ensure_ascii=False, indent=2))
    else:
        print("=== 잠재적 하드코딩 후보 발견 ===\n")

        if not candidates:
            print(f"발견된 후보 없음 (기준: {min_occurrences}곳 이상)")
        else:
            for c in candidates:
                label = category_labels.get(c["category"], c["category"])
                print(f'[{label}] "{c["value"]}" — {c["occurrences"]}곳에서 발견')
                for loc in c["locations"]:
                    print(f'  - {loc["file"]}:{loc["line"]}')
                print()

        total_occurrences = sum(c["occurrences"] for c in candidates)
        print(f"총 후보: {len(candidates)}건, 총 등장: {total_occurrences}회 (기준: {min_occurrences}곳 이상)")

    return 0


# ─── MAIN ──────────────────────────────────────────────────────────────────────


def main():
    parser = argparse.ArgumentParser(description="모듈화 수동 검증 도구 (아르고스 테스터)")
    subparsers = parser.add_subparsers(dest="command", required=True)

    # scan 서브커맨드
    scan_parser = subparsers.add_parser("scan", help="전체 코드베이스에서 하드코딩된 값을 스캔합니다.")
    scan_parser.add_argument("--json", action="store_true", dest="output_json", help="결과를 JSON 형식으로 출력합니다.")
    scan_parser.add_argument(
        "--exclude-tests", action="store_true", dest="exclude_tests", help="tests/ 위반을 표시하지 않습니다."
    )

    # verify 서브커맨드
    subparsers.add_parser("verify", help="module-registry.json과 실제 코드를 대조 검증합니다.")

    # discover 서브커맨드
    discover_parser = subparsers.add_parser(
        "discover", help="코드베이스에서 반복 등장하는 잠재적 하드코딩 후보를 자동 식별합니다."
    )
    discover_parser.add_argument(
        "--json", action="store_true", dest="output_json", help="결과를 JSON 형식으로 출력합니다."
    )
    discover_parser.add_argument(
        "--min-occurrences",
        type=int,
        default=3,
        dest="min_occurrences",
        metavar="N",
        help="동일 값이 N곳 이상 등장해야 후보로 보고합니다. (기본값: 3)",
    )

    args = parser.parse_args()

    if args.command == "scan":
        exit_code = run_scan(output_json=args.output_json, exclude_tests=args.exclude_tests)
    elif args.command == "verify":
        exit_code = run_verify()
    elif args.command == "discover":
        exit_code = run_discover(output_json=args.output_json, min_occurrences=args.min_occurrences)
    else:
        parser.print_help()
        exit_code = 1

    sys.exit(exit_code)


if __name__ == "__main__":
    main()
