"""utils/recoverable_block_classifier.py — recoverable merge block 분류기.

task-2472 구현 3: RECOVERABLE_BLOCKED state 분류기.
branch protection merge block을 FAILED(terminal)가 아니라 RECOVERABLE_BLOCKED로 올바르게 분류.

매칭 우선순위: merge_state_status > unresolved_threads > error_message regex.
"""
from __future__ import annotations

import hashlib
import json
import os
import re
from datetime import datetime, timezone
from pathlib import Path
from typing import Optional

# --------------------------------------------------------------------------
# 상수 / 공개 패턴
# --------------------------------------------------------------------------

# recoverable merge block 패턴 (task-2472 구현 명세)
RECOVERABLE_PATTERNS: list[str] = [
    r"base branch policy prohibits the merge",
    r"required_review_thread_resolution",
    r"unresolved review thread",
    r"required checks pending",
    r"mergeStateStatus[\s:=\"]+BLOCKED",
    r"review thread resolution required",
]

# 컴파일된 패턴 목록
_COMPILED_PATTERNS: list[tuple[re.Pattern, str]] = [
    (re.compile(p, re.IGNORECASE), p)
    for p in RECOVERABLE_PATTERNS
]

# category 분류 기준
_PATTERN_CATEGORIES: list[tuple[re.Pattern, str]] = [
    (re.compile(r"base branch policy", re.IGNORECASE), "BRANCH_PROTECTION"),
    (re.compile(r"required_review_thread_resolution|unresolved review thread|review thread resolution required", re.IGNORECASE), "UNRESOLVED_THREAD"),
    (re.compile(r"required checks pending", re.IGNORECASE), "REQUIRED_CHECKS_PENDING"),
    (re.compile(r"mergeStateStatus[\s:=\"]+BLOCKED", re.IGNORECASE), "MERGE_STATE_BLOCKED"),
]

# audit jsonl 경로
AUDIT_JSONL_REL = Path("memory/orchestration-audit/recoverable-merge-block.jsonl")

# --------------------------------------------------------------------------
# 내부 헬퍼
# --------------------------------------------------------------------------


def _now_iso() -> str:
    return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")


def _evidence_hash_str(record: dict) -> str:
    serialized = json.dumps(record, sort_keys=True, ensure_ascii=False)
    return hashlib.sha256(serialized.encode("utf-8")).hexdigest()


def _workspace_root(workspace: Optional[Path] = None) -> Path:
    return workspace or Path(os.environ.get("WORKSPACE_ROOT", "/home/jay/workspace"))


def _classify_by_pattern(error_message: str) -> tuple[bool, str, Optional[str]]:
    """error_message에서 recoverable 패턴 매칭.

    Returns (recoverable, category, matched_pattern).
    """
    for pattern, pat_str in _COMPILED_PATTERNS:
        if pattern.search(error_message):
            # category 결정
            category = "UNKNOWN"
            for cat_pat, cat_name in _PATTERN_CATEGORIES:
                if cat_pat.search(pat_str) or cat_pat.search(error_message):
                    category = cat_name
                    break
            return True, category, pat_str
    return False, "UNKNOWN", None


# --------------------------------------------------------------------------
# 공개 API
# --------------------------------------------------------------------------


def classify_merge_block(
    error_message: str,
    *,
    merge_state_status: Optional[str] = None,
    unresolved_threads: Optional[list] = None,
) -> dict:
    """merge block 원인 분류.

    매칭 우선순위 (task-2472 명세):
    1. merge_state_status == "BLOCKED" → MERGE_STATE_BLOCKED (recoverable)
    2. unresolved_threads 비어있지 않음 → UNRESOLVED_THREAD (recoverable)
    3. error_message regex 매칭

    Parameters
    ----------
    error_message:
        merge 실패 메시지 (gh pr merge 출력 등).
    merge_state_status:
        GitHub mergeStateStatus 값 ("BLOCKED", "MERGEABLE", "BEHIND" 등).
    unresolved_threads:
        unresolved thread 목록 (있으면 UNRESOLVED_THREAD로 분류).

    Returns
    -------
    dict
        {
            "recoverable": bool,
            "category": str,  # BRANCH_PROTECTION | UNRESOLVED_THREAD | ...
            "matched_pattern": str | None,
            "reason": str,
        }
    """
    # 우선순위 1: merge_state_status
    if merge_state_status is not None:
        status_upper = str(merge_state_status).upper().strip()
        if status_upper == "BLOCKED":
            return {
                "recoverable": True,
                "category": "MERGE_STATE_BLOCKED",
                "matched_pattern": f"mergeStateStatus=BLOCKED",
                "reason": "mergeStateStatus=BLOCKED → RECOVERABLE_BLOCKED (branch protection 또는 thread 미해결 상태)",
            }
        elif status_upper in ("UNKNOWN", "DIRTY"):
            return {
                "recoverable": False,
                "category": "UNKNOWN",
                "matched_pattern": None,
                "reason": f"mergeStateStatus={status_upper} → non-recoverable FAILED",
            }

    # 우선순위 2: unresolved_threads
    if unresolved_threads is not None and len(unresolved_threads) > 0:
        thread_ids = [
            t.get("id", "?") if isinstance(t, dict) else str(t)
            for t in unresolved_threads[:5]
        ]
        return {
            "recoverable": True,
            "category": "UNRESOLVED_THREAD",
            "matched_pattern": "unresolved_threads",
            "reason": (
                f"unresolved thread {len(unresolved_threads)}건 존재 → RECOVERABLE_BLOCKED "
                f"(threads: {thread_ids})"
            ),
        }

    # 우선순위 3: error_message regex 매칭
    if error_message:
        recoverable, category, matched = _classify_by_pattern(error_message)
        if recoverable:
            return {
                "recoverable": True,
                "category": category,
                "matched_pattern": matched,
                "reason": f"error_message에서 recoverable 패턴 매칭: '{matched}'",
            }

    # 매칭 없음 → non-recoverable
    return {
        "recoverable": False,
        "category": "UNKNOWN",
        "matched_pattern": None,
        "reason": "알 수 없는 merge 실패 원인 → non-recoverable FAILED",
    }


def record_block_audit(
    *,
    task_id: str,
    pr_number: int,
    classification: dict,
    actor: str,
    input_state: str,
    output_state: str,
    error_message: str,
    workspace: Optional[Path] = None,
) -> Path:
    """recoverable merge block audit 기록.

    memory/orchestration-audit/recoverable-merge-block.jsonl 에 line append.
    필수 필드 10개: task_id, pr_number, actor, input_state, output_state,
    classification, error_message, reason, timestamp, evidence_hash
    """
    timestamp = _now_iso()
    base_record: dict = {
        "task_id": task_id,
        "pr_number": pr_number,
        "actor": actor,
        "input_state": input_state,
        "output_state": output_state,
        "classification": classification,
        "error_message": error_message,
        "reason": classification.get("reason", ""),
        "timestamp": timestamp,
    }
    ev_hash = _evidence_hash_str(base_record)
    record = {**base_record, "evidence_hash": ev_hash}

    work_root = _workspace_root(workspace)
    target = work_root / AUDIT_JSONL_REL
    target.parent.mkdir(parents=True, exist_ok=True)

    line = json.dumps(record, ensure_ascii=False) + "\n"
    fd = os.open(str(target), os.O_WRONLY | os.O_APPEND | os.O_CREAT, 0o644)
    try:
        os.write(fd, line.encode("utf-8"))
    finally:
        os.close(fd)

    return target
