"""memory-janitor.py - Letta #5 self_improvement 메모리 관리 도구.

MEMORY.md 크기 모니터링, memory/ 하위 파일 크기 리포트,
30일 이상 미참조 항목 감지, 중복 파일/유사 내용 감지.
"""

import argparse
import json
import os
import re
import sys
from datetime import datetime, timedelta
from pathlib import Path
from typing import Any

# 이 파일은 memory-janitor.py 이지만 import 시 memory_janitor 로 사용됨
# tests/test_memory_janitor.py 에서 sys.path 를 통해 import 함

LINE_LIMIT = 200
STALE_DAYS_DEFAULT = 30
TOP_FILES_COUNT = 10


def check_memory_md(memory_md_path: Path) -> dict[str, Any]:
    """MEMORY.md 줄 수를 체크하고 상태를 반환한다.

    Args:
        memory_md_path: MEMORY.md 파일 경로

    Returns:
        dict with keys: file, line_count, limit, status
        status: "OK" | "WARNING" | "NOT_FOUND"
    """
    if not memory_md_path.exists():
        return {
            "file": str(memory_md_path),
            "line_count": 0,
            "limit": LINE_LIMIT,
            "status": "NOT_FOUND",
        }

    try:
        text = memory_md_path.read_text(encoding="utf-8", errors="replace")
        line_count = len(text.splitlines())
        status = "OK" if line_count <= LINE_LIMIT else "WARNING"
        return {
            "file": str(memory_md_path),
            "line_count": line_count,
            "limit": LINE_LIMIT,
            "status": status,
        }
    except OSError:
        return {
            "file": str(memory_md_path),
            "line_count": 0,
            "limit": LINE_LIMIT,
            "status": "NOT_FOUND",
        }


def aggregate_memory_files(memory_dir: Path) -> dict[str, Any]:
    """memory/ 하위 전체 파일 크기를 집계한다.

    Args:
        memory_dir: memory 디렉토리 경로

    Returns:
        dict with keys: total_files, total_size_bytes, by_directory, largest_files
    """
    if not memory_dir.exists() or not memory_dir.is_dir():
        return {
            "total_files": 0,
            "total_size_bytes": 0,
            "by_directory": {},
            "largest_files": [],
        }

    total_files = 0
    total_size_bytes = 0
    by_directory: dict[str, dict[str, Any]] = {}
    all_files: list[dict[str, Any]] = []

    try:
        for entry in memory_dir.rglob("*"):
            if not entry.is_file():
                continue
            try:
                size = entry.stat().st_size
            except OSError:
                continue

            total_files += 1
            total_size_bytes += size

            # 디렉토리별 집계 - memory_dir 기준 상대 경로의 부모 디렉토리
            try:
                rel_parent = str(entry.relative_to(memory_dir).parent)
            except ValueError:
                rel_parent = "."

            if rel_parent not in by_directory:
                by_directory[rel_parent] = {"files": 0, "size_bytes": 0}
            by_directory[rel_parent]["files"] += 1
            by_directory[rel_parent]["size_bytes"] += size

            all_files.append({"path": str(entry), "size_bytes": size})
    except OSError:
        pass

    # 상위 10개 큰 파일
    largest_files = sorted(all_files, key=lambda x: x["size_bytes"], reverse=True)[:TOP_FILES_COUNT]

    return {
        "total_files": total_files,
        "total_size_bytes": total_size_bytes,
        "by_directory": by_directory,
        "largest_files": largest_files,
    }


def detect_stale_files(memory_dir: Path, days: int = STALE_DAYS_DEFAULT) -> list[dict[str, Any]]:
    """days일 이상 수정되지 않은 파일 목록을 반환한다 (삭제하지 않고 제안만).

    Args:
        memory_dir: memory 디렉토리 경로
        days: stale 기준 일수 (기본 30일)

    Returns:
        list of dict with keys: path, last_modified, days_stale
    """
    if not memory_dir.exists() or not memory_dir.is_dir():
        return []

    now = datetime.now()
    stale: list[dict[str, Any]] = []

    try:
        for entry in memory_dir.rglob("*"):
            if not entry.is_file():
                continue
            try:
                mtime = entry.stat().st_mtime
            except OSError:
                continue

            modified_dt = datetime.fromtimestamp(mtime)
            # timedelta.days: 초 이하 소수점 버림이므로 정확히 days일은 0 미만
            # days일 초과(days+1일 이상)만 stale
            days_stale = (now - modified_dt).days
            if days_stale > days:
                stale.append(
                    {
                        "path": str(entry),
                        "last_modified": modified_dt.strftime("%Y-%m-%d"),
                        "days_stale": days_stale,
                    }
                )
    except OSError:
        pass

    return stale


def _normalize_name(name: str) -> str:
    """파일명에서 하이픈/언더스코어를 제거해 정규화한다."""
    # 확장자 제거 후 하이픈/언더스코어를 공통 문자로 치환
    stem = Path(name).stem
    return re.sub(r"[-_]", "", stem).lower()


def detect_duplicates(scan_dirs: list[Path]) -> list[dict[str, Any]]:
    """파일명 유사도 및 같은 디렉토리 내 동일 크기 파일을 감지한다.

    Args:
        scan_dirs: 검색할 디렉토리 목록

    Returns:
        list of dict with keys: files, reason
        reason: "similar_name" | "same_size"
    """
    if not scan_dirs:
        return []

    all_files: list[Path] = []
    for scan_dir in scan_dirs:
        if not scan_dir.exists() or not scan_dir.is_dir():
            continue
        try:
            for entry in scan_dir.rglob("*"):
                if entry.is_file():
                    all_files.append(entry)
        except OSError:
            continue

    duplicates: list[dict[str, Any]] = []
    seen_pairs: set[frozenset[str]] = set()

    # 1. 파일명 유사도 체크 (하이픈/언더스코어 차이)
    # 정규화된 이름으로 그룹화
    name_groups: dict[str, list[Path]] = {}
    for f in all_files:
        normalized = _normalize_name(f.name)
        if normalized not in name_groups:
            name_groups[normalized] = []
        name_groups[normalized].append(f)

    for normalized, group in name_groups.items():
        if len(group) < 2:
            continue
        # 실제 이름이 다른 파일들만 (하이픈/언더스코어 차이 등)
        unique_names = {f.name for f in group}
        if len(unique_names) < 2:
            continue
        file_paths = [str(f) for f in group]
        pair_key = frozenset(file_paths)
        if pair_key not in seen_pairs:
            seen_pairs.add(pair_key)
            duplicates.append({"files": file_paths, "reason": "similar_name"})

    # 2. 같은 디렉토리 내 크기가 동일한 파일 감지
    # 디렉토리별로 그룹화
    dir_files: dict[Path, list[Path]] = {}
    for f in all_files:
        parent = f.parent
        if parent not in dir_files:
            dir_files[parent] = []
        dir_files[parent].append(f)

    for parent, files in dir_files.items():
        if len(files) < 2:
            continue
        # 크기별 그룹화
        size_groups: dict[int, list[Path]] = {}
        for f in files:
            try:
                size = f.stat().st_size
            except OSError:
                continue
            if size not in size_groups:
                size_groups[size] = []
            size_groups[size].append(f)

        for size, group in size_groups.items():
            if len(group) < 2:
                continue
            file_paths = [str(f) for f in group]
            pair_key = frozenset(file_paths)
            if pair_key not in seen_pairs:
                seen_pairs.add(pair_key)
                duplicates.append({"files": file_paths, "reason": "same_size"})

    return duplicates


def generate_recommendations(
    memory_md_check: dict[str, Any],
    stale_files: list[dict[str, Any]],
    duplicates: list[dict[str, Any]],
) -> list[str]:
    """분석 결과를 바탕으로 권고사항 목록을 생성한다.

    Args:
        memory_md_check: check_memory_md() 반환값
        stale_files: detect_stale_files() 반환값
        duplicates: detect_duplicates() 반환값

    Returns:
        list of recommendation strings
    """
    recs: list[str] = []

    # MEMORY.md 상태
    status = memory_md_check.get("status", "NOT_FOUND")
    line_count = memory_md_check.get("line_count", 0)
    limit = memory_md_check.get("limit", LINE_LIMIT)

    if status == "OK":
        recs.append(f"MEMORY.md는 {line_count}줄로 {limit}줄 제한 이내입니다.")
    elif status == "WARNING":
        recs.append(f"MEMORY.md가 {line_count}줄로 {limit}줄 제한을 초과했습니다. 정리가 필요합니다.")
    elif status == "NOT_FOUND":
        recs.append("MEMORY.md 파일을 찾을 수 없습니다.")

    # stale 파일 권고
    if stale_files:
        count = len(stale_files)
        recs.append(f"30일 미참조 파일 {count}개: 아카이브를 고려하세요.")
    else:
        recs.append("30일 이상 미참조 파일이 없습니다.")

    # 중복 파일 권고
    similar = [d for d in duplicates if d["reason"] == "similar_name"]
    for dup in similar:
        files = dup["files"]
        names = [Path(f).name for f in files]
        recs.append(f"유사 파일명 발견: {' / '.join(names)}")

    same_size = [d for d in duplicates if d["reason"] == "same_size"]
    if same_size:
        recs.append(f"동일 크기 파일 {len(same_size)}쌍 발견: 중복 여부를 확인하세요.")

    return recs


def find_memory_md(workspace: Path) -> Path | None:
    """워크스페이스 내 .claude/projects/ 하위에서 MEMORY.md를 검색한다.

    Args:
        workspace: 워크스페이스 루트 경로

    Returns:
        첫 번째로 발견된 MEMORY.md 경로, 없으면 None
    """
    projects_dir = workspace / ".claude" / "projects"
    if not projects_dir.exists():
        # fallback: memory/ 하위 MEMORY.md
        fallback = workspace / "memory" / "MEMORY.md"
        if fallback.exists():
            return fallback
        return None

    try:
        for entry in projects_dir.rglob("MEMORY.md"):
            return entry
    except OSError:
        pass
    return None


def generate_report(
    memory_dir: Path,
    memory_md_path: Path | None,
    scan_dirs: list[Path],
    stale_days: int = STALE_DAYS_DEFAULT,
) -> dict[str, Any]:
    """전체 메모리 관리 리포트를 생성한다.

    Args:
        memory_dir: memory/ 디렉토리 경로
        memory_md_path: MEMORY.md 파일 경로 (None 이면 NOT_FOUND)
        scan_dirs: 중복 감지를 위한 스캔 디렉토리 목록
        stale_days: stale 기준 일수

    Returns:
        JSON 스키마에 맞는 dict
    """
    # MEMORY.md 체크
    if memory_md_path is not None:
        md_check = check_memory_md(memory_md_path)
    else:
        md_check = {
            "file": "",
            "line_count": 0,
            "limit": LINE_LIMIT,
            "status": "NOT_FOUND",
        }

    # 파일 크기 집계
    mem_files = aggregate_memory_files(memory_dir)

    # stale 파일 감지
    stale = detect_stale_files(memory_dir, days=stale_days)

    # 중복 감지
    dups = detect_duplicates(scan_dirs)

    # 권고사항
    recs = generate_recommendations(md_check, stale, dups)

    return {
        "timestamp": datetime.now().strftime("%Y-%m-%dT%H:%M:%S"),
        "memory_files": mem_files,
        "memory_md_check": md_check,
        "stale_files": stale,
        "duplicates": dups,
        "recommendations": recs,
    }


def main() -> None:
    """CLI 진입점."""
    parser = argparse.ArgumentParser(description="memory-janitor: Letta #5 self_improvement 메모리 관리 도구")
    parser.add_argument(
        "--workspace",
        type=Path,
        default=Path(os.environ.get("WORKSPACE_ROOT", str(Path(__file__).resolve().parent.parent))),
        help="워크스페이스 루트 경로 (기본값: $WORKSPACE_ROOT 또는 /home/jay/workspace)",
    )
    parser.add_argument(
        "--memory-file",
        type=Path,
        default=None,
        help="MEMORY.md 파일 직접 지정 (미지정 시 자동 탐색)",
    )
    parser.add_argument(
        "--memory-dir",
        type=Path,
        default=None,
        help="memory 디렉토리 경로 (기본값: --workspace/memory)",
    )
    parser.add_argument(
        "--stale-days",
        type=int,
        default=STALE_DAYS_DEFAULT,
        help=f"stale 기준 일수 (기본값: {STALE_DAYS_DEFAULT})",
    )
    parser.add_argument(
        "--output",
        choices=["json", "text"],
        default="json",
        help="출력 형식 (기본값: json)",
    )
    args = parser.parse_args()

    workspace: Path = args.workspace
    memory_dir: Path = args.memory_dir if args.memory_dir else workspace / "memory"

    # MEMORY.md 경로 결정
    if args.memory_file:
        memory_md_path: Path | None = args.memory_file
    else:
        memory_md_path = find_memory_md(workspace)

    # 스캔 디렉토리 (스크립트, memory 포함)
    scan_dirs: list[Path] = []
    scripts_dir = workspace / "scripts"
    if scripts_dir.exists():
        scan_dirs.append(scripts_dir)
    if memory_dir.exists():
        scan_dirs.append(memory_dir)

    report = generate_report(
        memory_dir=memory_dir,
        memory_md_path=memory_md_path,
        scan_dirs=scan_dirs,
        stale_days=args.stale_days,
    )

    if args.output == "json":
        print(json.dumps(report, ensure_ascii=False, indent=2))
    else:
        # 텍스트 형식 출력
        print(f"=== Memory Janitor Report ({report['timestamp']}) ===")
        print()
        md = report["memory_md_check"]
        print(f"[MEMORY.md] {md['file']}: {md['line_count']}줄 / {md['limit']}줄 [{md['status']}]")
        print()
        mf = report["memory_files"]
        print(f"[memory/] 총 {mf['total_files']}개 파일, " f"{mf['total_size_bytes']:,} bytes")
        print()
        if report["stale_files"]:
            print(f"[Stale Files] {len(report['stale_files'])}개:")
            for sf in report["stale_files"][:5]:
                print(f"  - {sf['path']} ({sf['days_stale']}일 전)")
        print()
        if report["duplicates"]:
            print(f"[Duplicates] {len(report['duplicates'])}쌍:")
            for dup in report["duplicates"][:5]:
                print(f"  - {dup['reason']}: {', '.join(dup['files'])}")
        print()
        print("[Recommendations]")
        for rec in report["recommendations"]:
            print(f"  - {rec}")


if __name__ == "__main__":
    main()