#!/usr/bin/env python3
"""
bot_status_resolver.py — 봇 상태 라벨링 시스템
task-2375 (dev3/루)

배경: 2026-05-02 사고 — ps를 보지 않고 commits만 보고 봇 상태를 잘못 판단한 사건으로 인해
객관적 라벨로 자동 판정하는 도구 필요.
"""

import argparse
import json
import os
import subprocess
import time
from typing import Optional

WORKSPACE_ROOT = os.environ.get("WORKSPACE_ROOT", "/home/jay/workspace")


# ─────────────────────────────────────────────
# 순수 판정 함수 (테스트 친화적)
# ─────────────────────────────────────────────

def classify_status(ps_alive: bool, last_commit_age_min, pr_merged_at, pr_number, start_guard_fail: bool = False) -> str:
    """5개 라벨 판정 순수 함수.

    순서가 중요:
    1. MERGED — pr_merged_at 있으면 다른 조건 무시
    2. STALE  — start_guard_fail (시작조차 못한 task)
    3. ALIVE  — ps_alive AND last_commit_age_min < 5
    4. IDLE   — ps_alive (last_commit_age_min >= 5)
    5. STALE  — NOT ps_alive AND last_commit_age_min > 30 AND pr_number is None
    6. UNKNOWN — 기타
    """
    if pr_merged_at:
        return "MERGED"
    # NEW: start-guard 실패는 즉시 STALE 처리 (시작조차 못한 task)
    if start_guard_fail and not pr_merged_at:
        return "STALE"
    if ps_alive and last_commit_age_min is not None and last_commit_age_min < 5:
        return "ALIVE"
    if ps_alive:
        return "IDLE"
    if (not ps_alive
            and last_commit_age_min is not None
            and last_commit_age_min > 30
            and pr_number is None):
        return "STALE"
    return "UNKNOWN"


VERDICT_MAP = {
    "MERGED": "completed",
    "ALIVE": "in_progress",
    "IDLE": "in_progress",
    "STALE": "stalled",
    "UNKNOWN": "unknown",
}


# ─────────────────────────────────────────────
# 헬퍼: task-timers.json에서 팀 추론
# ─────────────────────────────────────────────

def _infer_team_from_timers(task_id: str) -> Optional[str]:
    """task-timers.json을 읽어서 task_id에 해당하는 팀 반환. 없으면 None."""
    timers_path = os.path.join(WORKSPACE_ROOT, "memory", "task-timers.json")
    try:
        with open(timers_path, encoding="utf-8") as f:
            data = json.load(f)
        # 구조: {"tasks": {...}} 또는 최상위 직접 dict
        tasks = data.get("tasks", data)
        entry = tasks.get(task_id)
        if entry and isinstance(entry, dict):
            team = entry.get("team_id", "")
            # "dev3-team" → "dev3", "dev6" → "dev6" 정규화
            if team.endswith("-team"):
                team = team[:-5]
            return team if team else None
    except Exception:
        pass
    return None


# ─────────────────────────────────────────────
# 데이터 수집 함수들 (subprocess 기반)
# ─────────────────────────────────────────────

def _run(cmd, timeout: int = 10):
    """subprocess로 명령 실행, (stdout, returncode) 반환. 실패시 graceful fallback.

    cmd는 list[str] (인수 분리). shell=False 고정 — task_id/team_id/pattern 등
    외부 입력이 그대로 흘러들어와도 command injection 불가.
    stderr는 무시 (graceful fallback).
    """
    try:
        result = subprocess.run(
            cmd,
            shell=False,
            stdout=subprocess.PIPE,
            stderr=subprocess.DEVNULL,
            text=True,
            timeout=timeout,
        )
        return result.stdout.strip(), result.returncode
    except Exception:
        return "", 1


def _check_ps(task_id: str, team_id: str):
    """ps aux로 프로세스 확인.

    Returns:
        (ps_alive: bool, ps_pids: list[int])
    """
    # task_id 기반 패턴과 team_id 기반 패턴 모두 검색
    patterns = []
    if task_id:
        patterns.append(task_id)
    if team_id:
        # "dev3" → claude.*dev3 패턴
        patterns.append(f"claude.*{team_id}")

    pids = []
    for pattern in patterns:
        # pgrep -a -f: 패턴(ERE)을 전체 커맨드라인에 매칭, "<PID> <cmd>" 출력.
        # ps|grep 파이프라인을 대체해 shell=False로 실행 가능하게 함.
        cmd = ["pgrep", "-a", "-f", pattern]
        stdout, _ = _run(cmd)
        if stdout:
            for line in stdout.splitlines():
                line = line.strip()
                if not line:
                    continue
                parts = line.split(None, 1)
                if parts:
                    try:
                        pid = int(parts[0])
                        if pid not in pids:
                            pids.append(pid)
                    except ValueError:
                        pass

    # 자기 자신(claude/bash 셸 self-match) 제거
    self_pid = os.getpid()
    ppid = os.getppid()
    pids = [p for p in pids if p != self_pid and p != ppid]

    return bool(pids), pids


def _get_last_commit(task_id: str, team_id: str):
    """git log에서 마지막 커밋 정보 반환.

    브랜치 우선순위:
    1. task/task-{id}-{team} (team_id 있을 때)
    2. main에서 task_id 메시지 매칭

    Returns:
        (last_commit_age_min: int|None, last_commit_sha: str|None)
    """
    branch_candidates = []
    if team_id:
        branch_candidates.append(f"task/{task_id}-{team_id}")
    branch_candidates.append("main")
    branch_candidates.append("HEAD")

    now = int(time.time())

    for branch in branch_candidates:
        if branch in ("main", "HEAD"):
            # main에서 task_id 메시지 포함 커밋 검색
            cmd = [
                "git", "-C", WORKSPACE_ROOT, "log", branch,
                f"--grep={task_id}", "--format=%ct %h", "-1",
            ]
        else:
            cmd = [
                "git", "-C", WORKSPACE_ROOT, "log", branch,
                "--format=%ct %h", "-1",
            ]

        stdout, rc = _run(cmd)
        if rc != 0 or not stdout:
            continue

        parts = stdout.strip().split()
        if len(parts) >= 2:
            try:
                commit_ts = int(parts[0])
                sha = parts[1]
                age_min = int((now - commit_ts) / 60)
                return age_min, sha
            except ValueError:
                pass

    return None, None


def _get_pr_info(task_id: str, team_id: str):
    """gh pr list로 PR 정보 조회.

    Returns:
        (pr_number: int|None, pr_state: str|None, pr_merged_at: str|None)
    """
    head_branch = f"task/{task_id}-{team_id}" if team_id else f"task/{task_id}"

    cmd = [
        "gh", "pr", "list", "--head", head_branch,
        "--json", "number,state,mergedAt", "--state", "all",
    ]
    stdout, rc = _run(cmd, timeout=15)

    if rc != 0 or not stdout:
        return None, None, None

    try:
        data = json.loads(stdout)
        if data:
            pr = data[0]
            number = pr.get("number")
            state = pr.get("state")
            merged_at = pr.get("mergedAt") or None
            return number, state, merged_at
    except (json.JSONDecodeError, IndexError, KeyError):
        pass

    return None, None, None


def _check_markers(task_id: str):
    """마커 파일 존재 여부 확인.

    Returns:
        dict with keys: done, work_done, merged, merge_failed, start_guard_fail (all bool)
    """
    events_dir = os.path.join(WORKSPACE_ROOT, "memory", "events")
    markers = {
        "done": False,
        "work_done": False,
        "merged": False,
        "merge_failed": False,
        "start_guard_fail": False,
    }
    suffixes = {
        "done": "done",
        "work_done": "work-done",
        "merged": "merged",
        "merge_failed": "merge-failed",
        "start_guard_fail": "start-guard-fail.json",  # 추가
    }
    for key, suffix in suffixes.items():
        path = os.path.join(events_dir, f"{task_id}.{suffix}")
        markers[key] = os.path.exists(path)
    return markers


# ─────────────────────────────────────────────
# evidence 빌더
# ─────────────────────────────────────────────

def _build_evidence(ps_alive, ps_pids, last_commit_age_min, last_commit_sha,
                    pr_number, pr_state, pr_merged_at, markers):
    evidence = []

    # ps
    if ps_pids:
        for pid in ps_pids:
            evidence.append(f"ps:{pid} (alive)")
    elif not ps_alive:
        evidence.append("ps:none (dead)")

    # commit
    if last_commit_sha and last_commit_age_min is not None:
        evidence.append(f"last_commit:{last_commit_sha} ({last_commit_age_min}m ago)")
    elif last_commit_sha:
        evidence.append(f"last_commit:{last_commit_sha}")
    else:
        evidence.append("no_commit_found")

    # PR
    if pr_merged_at:
        evidence.append(f"pr_merged:{pr_merged_at}")
    elif pr_number is not None:
        evidence.append(f"pr:{pr_number} ({pr_state})")
    else:
        evidence.append("no_pr_yet")

    # markers
    for key, val in markers.items():
        if val:
            evidence.append(f"marker:{key}")

    return evidence


# ─────────────────────────────────────────────
# 메인 resolve 함수
# ─────────────────────────────────────────────

def resolve(task_id: str, team_id: Optional[str] = None):
    """전체 조사 수행 후 결과 dict 반환."""
    # 팀 추론
    if not team_id:
        team_id = _infer_team_from_timers(task_id)

    # 데이터 수집
    ps_alive, ps_pids = _check_ps(task_id, team_id or "")
    last_commit_age_min, last_commit_sha = _get_last_commit(task_id, team_id or "")
    pr_number, pr_state, pr_merged_at = _get_pr_info(task_id, team_id or "")
    markers = _check_markers(task_id)

    # 상태 판정
    bot_status = classify_status(ps_alive, last_commit_age_min, pr_merged_at, pr_number, markers["start_guard_fail"])
    verdict = VERDICT_MAP[bot_status]

    # evidence
    evidence = _build_evidence(
        ps_alive, ps_pids, last_commit_age_min, last_commit_sha,
        pr_number, pr_state, pr_merged_at, markers
    )

    return {
        "task_id": task_id,
        "team_id": team_id,
        "bot_status": bot_status,
        "ps_alive": ps_alive,
        "ps_pids": ps_pids,
        "last_commit_age_min": last_commit_age_min,
        "last_commit_sha": last_commit_sha,
        "pr_number": pr_number,
        "pr_state": pr_state,
        "pr_merged_at": pr_merged_at,
        "markers": markers,
        "verdict": verdict,
        "evidence": evidence,
    }


# ─────────────────────────────────────────────
# 포맷터
# ─────────────────────────────────────────────

# 팀 ID → 한국어 이름 매핑 (알려진 것)
_TEAM_NAMES = {
    "dev1": "에리카", "dev2": "마르스", "dev3": "루",
    "dev4": "오시리스", "dev5": "이시스", "dev6": "페룬",
    "dev7": "아누", "dev8": "헤르메스",
}


def _format_human(result: dict) -> str:
    team_id = result.get("team_id") or "unknown"
    team_name = _TEAM_NAMES.get(team_id, team_id)
    status = result["bot_status"]
    task_id = result["task_id"]
    age = result.get("last_commit_age_min")
    pr_number = result.get("pr_number")
    verdict = result["verdict"]

    age_str = f"{age}분 전 commit" if age is not None else "commit 시간 불명"
    pr_str = f"PR #{pr_number}" if pr_number else "PR 생성 중"

    status_desc = {
        "MERGED": f"{task_id} 완료 (머지됨)",
        "ALIVE": f"{task_id} 활발히 진행 중 ({age_str})",
        "IDLE": f"{task_id} 진행 중 ({age_str}, {pr_str})",
        "STALE": f"{task_id} 멈춤 ({age_str}, PR 없음)",
        "UNKNOWN": f"{task_id} 상태 불명 ({age_str})",
    }

    desc = status_desc.get(status, f"{task_id} {verdict}")
    return f"{team_id} ({team_name}): {status} — {desc}"


# ─────────────────────────────────────────────
# CLI 진입점
# ─────────────────────────────────────────────

def main():
    parser = argparse.ArgumentParser(
        description="봇 상태 라벨링 시스템 — ps/git/gh/marker 기반 객관적 판정"
    )
    parser.add_argument("--task-id", required=True, help="조사할 task ID (예: task-2375)")
    parser.add_argument("--team", dest="team_id", default=None,
                        help="팀 ID (예: dev3). 미지정 시 task-timers.json에서 추론")
    parser.add_argument("--format", choices=["json", "human"], default="json",
                        help="출력 형식 (기본: json)")
    args = parser.parse_args()

    result = resolve(args.task_id, args.team_id)

    if args.format == "human":
        print(_format_human(result))
    else:
        print(json.dumps(result, ensure_ascii=False, indent=2))


if __name__ == "__main__":
    main()
