#!/usr/bin/env python3
"""cleanup-workspace.py — 워크스페이스 파일 자동 정리 스크립트.

사용법:
  python3 cleanup-workspace.py --dry-run    # 삭제 후보 출력 (기본값 — 명시적 --execute 없으면 절대 삭제 안 함)
  python3 cleanup-workspace.py --execute    # 실제 삭제 (명시적으로 --execute 플래그 필요)
  python3 cleanup-workspace.py --report     # 디스크 사용량 보고

보호 정책:
  memory/specs/protection-list.json 에서 로드. 변경 시 회장 승인 필요 (task-2569 박제).
  누락 시 fallback 하드코딩 사용.

IMPORTANT: dry-run이 기본값. --execute 를 명시적으로 전달하지 않으면 실제 삭제가 발생하지 않는다.
"""

import argparse, json, os, shutil, time
from datetime import datetime
from pathlib import Path
from typing import Any

# Fallback: protection-list.json 로드 실패 시 사용
_FALLBACK_PROTECTED_NAMES: list[str] = [
    "CLAUDE.md", "MEMORY.md", ".env", ".env.keys",
    "organization-structure.json", "task-timers.json", "token-ledger.json",
]
_FALLBACK_PROTECTED_DIRS: list[str] = [
    "memory/tasks", "memory/plans/tasks", "memory/reports",
    "memory/research", "memory/specs", "memory/meetings",
    "memory/plans", "memory/capabilities", ".tasks/locks", ".tasks/evidence",
]

# 모듈 레벨 캐시 (최초 로드 후 재사용)
_PROTECTION_CACHE: dict | None = None


def load_protection() -> dict:
    """memory/specs/protection-list.json 에서 보호 설정을 로드한다.

    로드 실패 시 fallback 하드코딩을 반환 (회복력 보장).
    """
    global _PROTECTION_CACHE
    if _PROTECTION_CACHE is not None:
        return _PROTECTION_CACHE
    cfg = Path(__file__).resolve().parent.parent / "memory" / "specs" / "protection-list.json"
    loaded: dict | None = None
    if cfg.exists():
        try:
            loaded = json.loads(cfg.read_text(encoding="utf-8"))
        except (json.JSONDecodeError, OSError):
            loaded = None
    if loaded is None:
        loaded = {
            "protected_paths": _FALLBACK_PROTECTED_DIRS,
            "protected_files": _FALLBACK_PROTECTED_NAMES,
            "preserved_lifecycle_paths": [],
        }
    _PROTECTION_CACHE = loaded
    return loaded


def _age(p: Path) -> float:
    return (datetime.now().timestamp() - p.stat().st_mtime) / 86400


def is_protected(path: Path, workspace: Path) -> bool:
    cfg = load_protection()
    protected_files: list[str] = cfg.get("protected_files", _FALLBACK_PROTECTED_NAMES)
    protected_paths: list[str] = cfg.get("protected_paths", _FALLBACK_PROTECTED_DIRS)

    if path.name in protected_files:
        return True
    try:
        rel = path.relative_to(workspace).as_posix()
    except ValueError:
        return False
    # 디렉토리 prefix 매칭 (trailing slash 제거 후 비교)
    for p_entry in protected_paths:
        p_norm = p_entry.rstrip("/")
        if rel == p_norm or rel.startswith(p_norm + "/"):
            return True
    return False


def _audit_log(workspace: Path, candidate: str, action: str, reason: str) -> None:
    """cleanup audit log 에 1줄 JSON append.

    action 값:
      - "skipped_protected" : 보호 경로라서 건너뜀
      - "dry_run_candidate" : dry-run 모드에서 후보 박제
      - "deleted"           : 실제 삭제 완료
      - "delete_error"      : 삭제 시도 중 오류
    """
    try:
        log_path = workspace / "memory" / "logs" / "cleanup-audit.jsonl"
        log_path.parent.mkdir(parents=True, exist_ok=True)
        record = {
            "ts": time.time(),
            "candidate": candidate,
            "action": action,
            "reason": reason,
        }
        with log_path.open("a", encoding="utf-8") as fh:
            fh.write(json.dumps(record, ensure_ascii=False) + "\n")
    except OSError:
        pass  # audit log 실패가 cleanup 전체를 막으면 안 됨


def _size(p: Path) -> int:
    if p.is_dir():
        return sum(f.stat().st_size for f in p.rglob("*") if f.is_file())
    try:
        return p.stat().st_size
    except OSError:
        return 0


def find_tmp_candidates(workspace: Path) -> list[Path]:
    d = workspace / "tmp"
    return [f for f in d.rglob("*") if d.exists() and f.is_file() and _age(f) > 7] if d.exists() else []


def find_events_candidates(workspace: Path) -> list[Path]:
    d = workspace / "memory" / "events"
    if not d.exists():
        return []
    return [f for f in d.iterdir()
            if f.is_file() and (f.name.endswith(".done.clear") or f.name.endswith(".done.acked"))
            and _age(f) > 30]


def find_dispatch_candidates(workspace: Path) -> list[Path]:
    d = workspace / "memory" / "tasks"
    if not d.exists():
        return []
    return [f for f in d.iterdir()
            if f.is_file() and f.name.startswith("dispatch-") and f.name.endswith(".md")
            and _age(f) > 90]


def find_logs_candidates(workspace: Path) -> list[Path]:
    d = workspace / "logs"
    if not d.exists():
        return []
    return [f for f in d.rglob("*")
            if f.is_file() and not f.name.startswith("cleanup-") and _age(f) > 60]


def find_backups_candidates(workspace: Path) -> list[Path]:
    d = workspace / "memory" / "backups"
    return [f for f in d.rglob("*") if d.exists() and f.is_file() and _age(f) > 90] if d.exists() else []


def find_team_task_candidates(workspace: Path) -> list[Path]:
    teams_dir = workspace / "teams"
    reports_dir = workspace / "memory" / "reports"
    if not teams_dir.exists():
        return []
    result: list[Path] = []
    for team in teams_dir.glob("dev*"):
        for task_dir in team.glob("task-*"):
            if not task_dir.is_dir() or _age(task_dir) <= 30:
                continue
            if reports_dir.exists() and any(reports_dir.glob(f"*{task_dir.name}*")):
                result.append(task_dir)
    return result


def find_cokacdir_candidates(cokacdir_ws: Path) -> list[Path]:
    if not cokacdir_ws.exists():
        return []
    return [f for proj in cokacdir_ws.iterdir() if proj.is_dir()
            for f in proj.rglob("*") if f.is_file() and _age(f) > 30]


def _all_candidates(workspace: Path, cokacdir_ws: Path) -> dict[str, list[Path]]:
    return {
        "tmp": find_tmp_candidates(workspace),
        "events": find_events_candidates(workspace),
        "dispatch": find_dispatch_candidates(workspace),
        "logs": find_logs_candidates(workspace),
        "backups": find_backups_candidates(workspace),
        "team_tasks": find_team_task_candidates(workspace),
        "cokacdir": find_cokacdir_candidates(cokacdir_ws),
    }


def run_cleanup(workspace: Path, dry_run: bool, cokacdir_ws: Path) -> None:
    """cleanup 실행. dry_run=True(기본값)이면 실제 삭제 없음.

    NOTE: dry_run 모드는 기본값이며 --execute 플래그 없이는 절대 삭제하지 않는다.
    """
    cats = _all_candidates(workspace, cokacdir_ws)
    all_candidates_flat = [p for items in cats.values() for p in items]
    tag = "[DRY-RUN]" if dry_run else "[DELETE]"
    log_lines: list[str] = []
    deleted = 0
    skipped_protected = 0

    for p in all_candidates_flat:
        if is_protected(p, workspace):
            # 보호 경로 — audit log에 박제 후 건너뜀
            _audit_log(workspace, str(p), "skipped_protected", "protected_path in protection-list")
            skipped_protected += 1
            continue
        line = f"{tag} {p}  ({_size(p):,} bytes)"
        print(line)
        log_lines.append(line)
        if dry_run:
            _audit_log(workspace, str(p), "dry_run_candidate", f"age>{_age(p):.1f}d")
        else:
            try:
                shutil.rmtree(p) if p.is_dir() else p.unlink()
                deleted += 1
                _audit_log(workspace, str(p), "deleted", f"age>{_age.__doc__ or ''}")
            except OSError as e:
                err = f"[ERROR] {p}: {e}"
                print(err)
                log_lines.append(err)
                _audit_log(workspace, str(p), "delete_error", str(e))

    total_candidates = len(all_candidates_flat) - skipped_protected
    if not dry_run:
        _write_log(workspace, log_lines)
        print(f"\n삭제 완료: {deleted}/{total_candidates} 항목 (보호 스킵: {skipped_protected}개)")
    else:
        print(f"\n[DRY-RUN] 삭제 후보: {total_candidates} 항목 (실제 삭제 없음, 보호 스킵: {skipped_protected}개)")


def _write_log(workspace: Path, lines: list[str]) -> None:
    logs_dir = workspace / "logs"
    logs_dir.mkdir(parents=True, exist_ok=True)
    log_file = logs_dir / f"cleanup-{datetime.now().strftime('%Y-%m-%d')}.log"
    with open(log_file, "a", encoding="utf-8") as fh:
        fh.write(f"\n=== cleanup-workspace {datetime.now().isoformat()} ===\n")
        fh.write("\n".join(lines) + "\n")


def build_report(workspace: Path, cokacdir_ws: Path) -> dict[str, Any]:
    cats = _all_candidates(workspace, cokacdir_ws)
    cat_info: dict[str, dict[str, Any]] = {}
    total_n, total_sz = 0, 0
    for cat, items in cats.items():
        filtered = [p for p in items if not is_protected(p, workspace)]
        sz = sum(_size(p) for p in filtered)
        cat_info[cat] = {"count": len(filtered), "size_bytes": sz}
        total_n += len(filtered); total_sz += sz
    return {"total_candidates": total_n, "total_size_bytes": total_sz, "categories": cat_info}


def print_report(workspace: Path, cokacdir_ws: Path) -> None:
    data = build_report(workspace, cokacdir_ws)
    print("=" * 60)
    print(f"  워크스페이스 정리 보고서  |  기준: {workspace}")
    print("=" * 60)
    for cat, info in data["categories"].items():
        print(f"  {cat:<15} {info['count']:>5}개   {info['size_bytes']/1048576:>8.2f} MB")
    print("-" * 60)
    print(f"  {'합계':<15} {data['total_candidates']:>5}개   {data['total_size_bytes']/1048576:>8.2f} MB 정리 가능")
    print("=" * 60)


def main() -> None:
    parser = argparse.ArgumentParser(description="워크스페이스 파일 자동 정리")
    g = parser.add_mutually_exclusive_group()
    g.add_argument("--dry-run", action="store_true", default=True, help="삭제 후보 출력 (기본값)")
    g.add_argument("--execute", action="store_true", help="실제 삭제")
    g.add_argument("--report", action="store_true", help="디스크 사용량 보고")
    parser.add_argument("--workspace", type=Path,
                        default=Path(os.environ.get("WORKSPACE_ROOT", "/home/jay/workspace")))
    args = parser.parse_args()
    ws: Path = args.workspace
    cokac = Path.home() / ".cokacdir" / "workspace"
    if args.report:
        print_report(ws, cokac)
    elif args.execute:
        run_cleanup(workspace=ws, dry_run=False, cokacdir_ws=cokac)
    else:
        run_cleanup(workspace=ws, dry_run=True, cokacdir_ws=cokac)


if __name__ == "__main__":
    main()
