#!/usr/bin/env python3
"""
collect_metrics.py — 효과 측정 프레임워크 데이터 수집 스크립트
task-timers.json, git log, memory/reports/, memory/events/에서
메트릭을 수집하여 memory/daily/metrics-YYYY-MM-DD.json에 저장.

weekly-report.py에서 호출 가능하도록 함수 인터페이스 제공.
"""

import argparse
import json
import os
import subprocess
import sys
from collections import defaultdict
from datetime import datetime, date
from pathlib import Path
from typing import Optional

WORKSPACE_ROOT = os.environ.get("WORKSPACE_ROOT", str(Path(__file__).resolve().parent.parent))
TASK_TIMERS_PATH = Path(WORKSPACE_ROOT) / "memory" / "task-timers.json"
REPORTS_DIR = Path(WORKSPACE_ROOT) / "memory" / "reports"
EVENTS_DIR = Path(WORKSPACE_ROOT) / "memory" / "events"
OUTPUT_DIR = Path(WORKSPACE_ROOT) / "memory" / "daily"


def load_task_timers() -> dict:
    """task-timers.json 로드"""
    if not TASK_TIMERS_PATH.exists():
        print(f"[WARN] task-timers.json 파일 없음: {TASK_TIMERS_PATH}", file=sys.stderr)
        return {}
    try:
        with open(TASK_TIMERS_PATH, "r", encoding="utf-8") as f:
            raw = json.load(f)
        return raw.get("tasks", {})
    except (json.JSONDecodeError, OSError) as e:
        print(f"[WARN] task-timers.json 로드 실패: {e}", file=sys.stderr)
        return {}


def _run_git_log(since: str, until: str) -> list[str]:
    """git log --all --oneline --since --until 실행 후 커밋 메시지 목록 반환."""
    cmd = [
        "git", "log", "--all", "--oneline",
        f"--since={since}",
        f"--until={until}",
    ]
    try:
        result = subprocess.run(
            cmd,
            cwd=WORKSPACE_ROOT,
            capture_output=True,
            text=True,
            shell=False,
        )
        if result.returncode != 0:
            print(f"[WARN] git log 실패 (returncode={result.returncode}): {result.stderr.strip()}", file=sys.stderr)
            return []
        lines = [line.strip() for line in result.stdout.splitlines() if line.strip()]
        return lines
    except FileNotFoundError:
        print("[WARN] git 명령어를 찾을 수 없습니다.", file=sys.stderr)
        return []
    except OSError as e:
        print(f"[WARN] git log 실행 오류: {e}", file=sys.stderr)
        return []


def collect_merge_conflicts(target_date: date, days: int = 1) -> dict:
    """M-1: git log에서 merge conflict 커밋 수 수집"""
    since = target_date.isoformat()
    # until은 해당 날짜 다음 날 00:00 (inclusive)
    from datetime import timedelta
    until = (target_date + timedelta(days=days)).isoformat()

    lines = _run_git_log(since, until)
    conflict_keywords = ["CONFLICT", "Merge conflict", "merge conflict"]
    count = 0
    for line in lines:
        # git --oneline 형식: "<hash> <message>"
        msg = line[line.find(" ") + 1:] if " " in line else line
        if any(kw in msg for kw in conflict_keywords):
            count += 1

    return {
        "value": count,
        "description": "머지 충돌 발생 횟수",
    }


def collect_reverts(target_date: date, days: int = 1) -> dict:
    """M-2: git log에서 revert/cherry-pick 커밋 수 수집"""
    from datetime import timedelta
    since = target_date.isoformat()
    until = (target_date + timedelta(days=days)).isoformat()

    lines = _run_git_log(since, until)
    total_commits = len(lines)
    revert_count = 0
    for line in lines:
        msg = line[line.find(" ") + 1:].lower() if " " in line else line.lower()
        if "revert" in msg or "cherry-pick" in msg:
            revert_count += 1

    rate_pct = round((revert_count / total_commits * 100), 2) if total_commits > 0 else 0.0

    return {
        "value": revert_count,
        "total_commits": total_commits,
        "rate_pct": rate_pct,
        "description": "기능 원복 발생률",
    }


def collect_daily_completed(target_date: date) -> dict:
    """M-3: task-timers.json에서 해당 날짜 완료 task 수 수집"""
    tasks = load_task_timers()
    date_str = target_date.isoformat()  # "YYYY-MM-DD"

    count = 0
    by_team: dict[str, int] = defaultdict(int)

    for task in tasks.values():
        if task.get("status") != "completed":
            continue
        end_time = task.get("end_time", "")
        if not end_time:
            continue
        # end_time 앞 10글자가 날짜 "YYYY-MM-DD"
        try:
            end_date_str = end_time[:10]
            if end_date_str == date_str:
                count += 1
                team_id = task.get("team_id", "unknown")
                by_team[team_id] += 1
        except (ValueError, TypeError):
            continue

    return {
        "value": count,
        "by_team": dict(by_team),
        "description": "일일 완료 task 수",
    }


def collect_qc_fails(target_date: date) -> dict:
    """M-5: QC FAIL 횟수 수집"""
    fail_count = 0
    date_str = target_date.isoformat()

    # memory/reports/ 디렉토리에서 "FAIL" 포함 보고서 수
    if REPORTS_DIR.exists():
        for report_file in REPORTS_DIR.rglob("*"):
            if not report_file.is_file():
                continue
            # 파일명 또는 수정 날짜로 기간 필터링
            # 파일명에 날짜가 포함된 경우 우선 확인
            fname = report_file.name
            file_date_match = False
            if date_str in fname:
                file_date_match = True
            else:
                # 파일 수정 시간으로 날짜 확인
                try:
                    mtime = datetime.fromtimestamp(report_file.stat().st_mtime)
                    if mtime.date().isoformat() == date_str:
                        file_date_match = True
                except OSError:
                    pass

            if not file_date_match:
                continue

            # 파일 내용에서 "FAIL" 검색
            try:
                content = report_file.read_text(encoding="utf-8", errors="replace")
                if "FAIL" in content:
                    fail_count += 1
            except OSError as e:
                print(f"[WARN] 보고서 파일 읽기 실패 ({report_file}): {e}", file=sys.stderr)

    # memory/events/ 디렉토리에서 .retry_count 파일 수 집계
    retry_count_files = 0
    if EVENTS_DIR.exists():
        for event_file in EVENTS_DIR.glob("*.retry_count"):
            if not event_file.is_file():
                continue
            try:
                mtime = datetime.fromtimestamp(event_file.stat().st_mtime)
                if mtime.date().isoformat() == date_str:
                    retry_count_files += 1
            except OSError:
                continue

    return {
        "value": fail_count,
        "retry_count_files": retry_count_files,
        "description": "QC FAIL 횟수",
    }


def collect_g1_fails(target_date: date) -> dict:
    """M-6: G1 게이트 FAIL 횟수 수집"""
    count = 0
    date_str = target_date.isoformat()

    if EVENTS_DIR.exists():
        for event_file in EVENTS_DIR.glob("*.g1-fail"):
            if not event_file.is_file():
                continue
            try:
                mtime = datetime.fromtimestamp(event_file.stat().st_mtime)
                if mtime.date().isoformat() == date_str:
                    count += 1
            except OSError:
                continue

    return {
        "value": count,
        "description": "G1 게이트 FAIL 횟수",
    }


def collect_ai_cost(target_date: date) -> dict:
    """M-7: 외부 AI 비용 합산"""
    tasks = load_task_timers()
    date_str = target_date.isoformat()

    total_cost = 0.0
    task_count = 0

    for task in tasks.values():
        if task.get("status") != "completed":
            continue
        end_time = task.get("end_time", "")
        if not end_time:
            continue
        try:
            end_date_str = end_time[:10]
            if end_date_str != date_str:
                continue
        except (ValueError, TypeError):
            continue

        token_usage = task.get("token_usage")
        if not token_usage:
            continue
        cost = token_usage.get("cost_estimate_usd")
        if cost is None:
            continue
        try:
            total_cost += float(cost)
            task_count += 1
        except (ValueError, TypeError) as e:
            print(f"[WARN] 비용 파싱 오류 (task={task.get('task_id', '?')}): {e}", file=sys.stderr)

    return {
        "value": round(total_cost, 6),
        "currency": "USD",
        "task_count": task_count,
        "description": "외부 AI 총 비용",
    }


def collect_all(target_date: Optional[date] = None) -> dict:
    """모든 메트릭 수집 (weekly-report.py에서 호출 가능)"""
    if target_date is None:
        target_date = date.today()

    result = {
        "date": target_date.isoformat(),
        "collected_at": datetime.now().isoformat(timespec="seconds"),
        "period": {
            "start": target_date.isoformat(),
            "end": target_date.isoformat(),
        },
        "metrics": {
            "M-1_merge_conflicts": collect_merge_conflicts(target_date),
            "M-2_revert_count": collect_reverts(target_date),
            "M-3_daily_completed_tasks": collect_daily_completed(target_date),
            "M-5_qc_fail_count": collect_qc_fails(target_date),
            "M-6_g1_fail_count": collect_g1_fails(target_date),
            "M-7_total_ai_cost": collect_ai_cost(target_date),
        },
    }
    return result


def save_metrics(data: dict, target_date: Optional[date] = None):
    """결과를 memory/daily/metrics-YYYY-MM-DD.json에 저장"""
    if target_date is None:
        target_date = date.today()
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    out_path = OUTPUT_DIR / f"metrics-{target_date.isoformat()}.json"
    with open(out_path, "w", encoding="utf-8") as f:
        json.dump(data, f, indent=2, ensure_ascii=False)
    return out_path


def main():
    parser = argparse.ArgumentParser(description="효과 측정 프레임워크 데이터 수집")
    parser.add_argument("--date", type=str, default=None, help="수집 대상 날짜 (YYYY-MM-DD, 기본: 오늘)")
    parser.add_argument("--days", type=int, default=1, help="수집 기간 일수 (기본: 1)")
    parser.add_argument("--dry-run", action="store_true", help="수집만 하고 저장하지 않음")
    args = parser.parse_args()

    target = date.fromisoformat(args.date) if args.date else date.today()
    data = collect_all(target)

    if args.dry_run:
        print(json.dumps(data, indent=2, ensure_ascii=False))
    else:
        path = save_metrics(data, target)
        print(f"[OK] 메트릭 저장 완료: {path}")


if __name__ == "__main__":
    main()
