#!/usr/bin/env python3
"""gemini_review_gate.py — Gemini Review를 GitHub status check로 강제하는 게이트.

CI에서 호출:
    1) status=pending 으로 check run 생성/갱신 (리뷰 진행 중)
    2) Gemini API 호출 → blocking 패턴 detect
    3) status=success/failure 로 check run 갱신
    4) 결과를 stdout에 JSON으로 출력 + memory/logs/gemini-calls.jsonl append

블로킹 패턴 (대소문자 무시 + 부분 매칭):
    en: blocking, critical issue, must fix, request changes
    ko: 차단, 필수 수정, 심각, blocker

차단 결정:
    - Gemini 응답에 위 패턴 1개 이상 → conclusion="failure"
    - 0건 → conclusion="success"
    - Gemini API 키 없거나 호출 실패 → conclusion="neutral" (CI 차단 X)

Rate limit / debounce:
    - 같은 SHA 중복 호출 차단 (cache 파일)
    - PR별 debounce 120초

CLI:
    python3 scripts/gemini_review_gate.py \
        --pr-number 123 --commit-sha abc... [--status pending|success|failure]
        [--repo OWNER/REPO] [--diff-file path/to/diff] [--mode call|publish-only]
"""

from __future__ import annotations

import argparse
import hashlib
import json
import os
import re
import subprocess
import sys
import time
from datetime import datetime, timezone
from pathlib import Path

WORKSPACE = Path(os.environ.get("WORKSPACE", str(Path(__file__).resolve().parent.parent)))
CACHE_DIR = WORKSPACE / "memory" / "cache"
LOG_PATH = WORKSPACE / "memory" / "logs" / "gemini-calls.jsonl"

BLOCKING_PATTERNS_EN = ["blocking", "critical issue", "must fix", "request changes", "blocker"]
BLOCKING_PATTERNS_KO = ["차단", "필수 수정", "심각", "blocker"]

CHECK_NAME = "gemini-review-gate"

DEBOUNCE_SECONDS = 120


def _now_iso() -> str:
    return datetime.now(timezone.utc).isoformat()


_NEGATION_PHRASES = (
    "no blocking",
    "no blocker",
    "not blocking",
    "no critical",
    "no must fix",
    "no request changes",
    "차단 없음",
    "필수 수정 없음",
    "심각한 문제 없음",
)


def detect_blocking(text: str) -> list[str]:
    """Return matched blocking patterns (case-insensitive, word-aware).

    - Strips out common negation phrases ("no blocking issues", "차단 없음" 등)
      so that `LGTM — no blocking issues` 가 false positive를 일으키지 않는다.
    - English patterns use word-boundary regex.
    - Korean patterns use plain substring (한글 단어 경계는 \\b가 안 통함).
    """
    if not text:
        return []
    lowered = text.lower()
    cleaned = lowered
    for neg in _NEGATION_PHRASES:
        cleaned = cleaned.replace(neg.lower(), " ")
    matches: list[str] = []
    for pat in BLOCKING_PATTERNS_EN:
        if re.search(rf"\b{re.escape(pat.lower())}\b", cleaned):
            matches.append(pat)
    for pat in BLOCKING_PATTERNS_KO:
        if pat.lower() in cleaned:
            matches.append(pat)
    return matches


def gh_api_json(args: list[str], input_data: str | None = None) -> tuple[int, str, str]:
    cmd = ["gh", "api"] + args
    proc = subprocess.run(cmd, capture_output=True, text=True, input=input_data, timeout=30)
    return proc.returncode, proc.stdout, proc.stderr


def should_call_gemini(pr_number: int, commit_sha: str) -> tuple[bool, str]:
    """Rate-limit + dedup gate. Returns (should_call, reason)."""
    CACHE_DIR.mkdir(parents=True, exist_ok=True)
    sha_cache = CACHE_DIR / f"gemini-{hashlib.sha1(commit_sha.encode()).hexdigest()}.json"
    if sha_cache.exists():
        return False, f"duplicate SHA cached at {sha_cache}"
    debounce_path = CACHE_DIR / f"gemini-pr-{pr_number}.lasttime"
    if debounce_path.exists():
        try:
            last = float(debounce_path.read_text().strip() or "0")
        except ValueError:
            last = 0.0
        if time.time() - last < DEBOUNCE_SECONDS:
            return False, f"debounce ({DEBOUNCE_SECONDS}s)"
    return True, "ok"


def write_call_log(record: dict) -> None:
    LOG_PATH.parent.mkdir(parents=True, exist_ok=True)
    with LOG_PATH.open("a", encoding="utf-8") as f:
        f.write(json.dumps(record, ensure_ascii=False) + "\n")


def call_gemini(diff_text: str, model: str | None = None) -> dict:
    """Lightweight Gemini API call. Stub-friendly; mock by setting GEMINI_REVIEW_MOCK=<json>.

    Returns:
        {"ok": bool, "text": str, "tokens_in": int, "tokens_out": int, "latency_ms": int, "error": str?}
    """
    mock_json = os.environ.get("GEMINI_REVIEW_MOCK")
    if mock_json:
        try:
            payload = json.loads(mock_json)
            return {
                "ok": True,
                "text": payload.get("text", ""),
                "tokens_in": payload.get("tokens_in", len(diff_text) // 4),
                "tokens_out": payload.get("tokens_out", len(payload.get("text", "")) // 4),
                "latency_ms": payload.get("latency_ms", 0),
            }
        except Exception as e:
            return {"ok": False, "text": "", "tokens_in": 0, "tokens_out": 0, "latency_ms": 0, "error": f"mock parse: {e}"}

    api_key = os.environ.get("GEMINI_API_KEY")
    if not api_key:
        return {"ok": False, "text": "", "tokens_in": 0, "tokens_out": 0, "latency_ms": 0, "error": "GEMINI_API_KEY missing"}
    try:
        import urllib.request
        import urllib.error
    except ImportError as e:
        return {"ok": False, "text": "", "tokens_in": 0, "tokens_out": 0, "latency_ms": 0, "error": f"urllib import: {e}"}

    use_model = model or os.environ.get("GEMINI_MODEL", "gemini-2.5-pro")
    url = f"https://generativelanguage.googleapis.com/v1beta/models/{use_model}:generateContent?key={api_key}"
    prompt = (
        "You are a strict code reviewer. Given the following diff, list any "
        "BLOCKING issues using the literal string 'blocking' or 'must fix' "
        "(English) or '차단'/'필수 수정' (Korean) in your response. "
        "If the diff is acceptable, respond with 'no blocking issues'.\n\n"
        f"DIFF:\n{diff_text[:32000]}"
    )
    body = json.dumps({"contents": [{"parts": [{"text": prompt}]}]}).encode("utf-8")
    req = urllib.request.Request(url, data=body, headers={"Content-Type": "application/json"})
    t0 = time.time()
    try:
        with urllib.request.urlopen(req, timeout=60) as resp:
            data = json.loads(resp.read().decode("utf-8"))
        latency_ms = int((time.time() - t0) * 1000)
        text = ""
        try:
            text = data["candidates"][0]["content"]["parts"][0]["text"]
        except (KeyError, IndexError):
            text = json.dumps(data)
        usage = data.get("usageMetadata", {}) or {}
        return {
            "ok": True,
            "text": text,
            "tokens_in": int(usage.get("promptTokenCount", 0)),
            "tokens_out": int(usage.get("candidatesTokenCount", 0)),
            "latency_ms": latency_ms,
        }
    except urllib.error.HTTPError as e:
        return {"ok": False, "text": "", "tokens_in": 0, "tokens_out": 0, "latency_ms": int((time.time() - t0) * 1000), "error": f"http {e.code}: {e.reason}"}
    except urllib.error.URLError as e:
        return {"ok": False, "text": "", "tokens_in": 0, "tokens_out": 0, "latency_ms": int((time.time() - t0) * 1000), "error": f"url: {e.reason}"}
    except Exception as e:  # noqa: BLE001
        return {"ok": False, "text": "", "tokens_in": 0, "tokens_out": 0, "latency_ms": int((time.time() - t0) * 1000), "error": f"call: {e}"}


def publish_check_run(repo: str, sha: str, status: str, conclusion: str | None = None, summary: str = "", details: str = "") -> dict:
    """Create or update gemini-review-gate check run via gh api.

    status: queued | in_progress | completed
    conclusion (only when completed): success | failure | neutral | cancelled | timed_out | action_required
    """
    payload = {
        "name": CHECK_NAME,
        "head_sha": sha,
        "status": status,
        "output": {"title": CHECK_NAME, "summary": summary or details[:200]},
    }
    if conclusion:
        payload["conclusion"] = conclusion
    if details:
        payload["output"]["text"] = details
    body = json.dumps(payload)
    args = ["-X", "POST", f"repos/{repo}/check-runs", "--input", "-"]
    rc, out, err = gh_api_json(args, input_data=body)
    return {"rc": rc, "stdout": out, "stderr": err}


def fetch_pr_diff(repo: str, pr_number: int) -> str:
    """Best-effort PR diff fetch. Falls back to empty string if unavailable."""
    proc = subprocess.run(
        ["gh", "pr", "diff", str(pr_number), "--repo", repo],
        capture_output=True, text=True, timeout=30,
    )
    if proc.returncode == 0:
        return proc.stdout
    return ""


def gate(args: argparse.Namespace) -> int:
    repo = args.repo or os.environ.get("GH_REPO", "JonghyukJeon/dev_workspace")
    pr_number = args.pr_number or 0
    commit_sha = args.commit_sha
    out: dict = {"name": CHECK_NAME, "pr": pr_number, "sha": commit_sha, "repo": repo, "ts": _now_iso()}

    if args.mode == "publish-only":
        if not args.status:
            print(json.dumps({"error": "--status required in publish-only mode"}), file=sys.stderr)
            return 2
        result = publish_check_run(
            repo, commit_sha,
            status="completed" if args.status in ("success", "failure", "neutral") else "in_progress",
            conclusion=args.status if args.status in ("success", "failure", "neutral") else None,
            summary=args.summary or args.status,
        )
        out["publish"] = result
        print(json.dumps(out, ensure_ascii=False))
        return 0 if result["rc"] == 0 else 1

    if not commit_sha:
        print(json.dumps({"error": "--commit-sha required"}), file=sys.stderr)
        return 2

    if args.status == "pending":
        publish_check_run(repo, commit_sha, status="in_progress", summary="Gemini review pending")
        out["state"] = "pending"
        print(json.dumps(out, ensure_ascii=False))
        return 0

    proceed, reason = should_call_gemini(pr_number, commit_sha)
    if not proceed and not args.force:
        out["state"] = "skipped"
        out["reason"] = reason
        publish_check_run(repo, commit_sha, status="completed", conclusion="neutral", summary=f"skipped: {reason}")
        print(json.dumps(out, ensure_ascii=False))
        return 0

    diff_text = ""
    if args.diff_file:
        try:
            diff_text = Path(args.diff_file).read_text(encoding="utf-8")
        except OSError as e:
            print(json.dumps({"warn": f"diff read: {e}"}), file=sys.stderr)
    if not diff_text and pr_number:
        diff_text = fetch_pr_diff(repo, pr_number)

    gemini_result = call_gemini(diff_text)
    matches = detect_blocking(gemini_result.get("text", ""))
    allow_neutral = getattr(args, "allow_neutral", False)
    if not gemini_result["ok"]:
        # task-2461 Phase 3 P1-1: gemini 호출 실패 시 failure로 강제
        # GEMINI_API_KEY missing / API timeout / network error 모두 차단 대상
        if allow_neutral:
            conclusion = "neutral"
            summary = f"gemini call failed (allow-neutral): {gemini_result.get('error', 'unknown')}"
        else:
            conclusion = "failure"
            summary = f"gemini call failed: {gemini_result.get('error', 'unknown')}"
            print(f"[GEMINI-GATE] FAIL — review not executed (reason: {gemini_result.get('error', 'unknown')})", file=sys.stderr)
    elif matches:
        conclusion = "failure"
        summary = f"blocking matches: {matches}"
    else:
        conclusion = "success"
        summary = "no blocking issues"

    record = {
        "timestamp": _now_iso(),
        "pr_number": pr_number,
        "commit_sha": commit_sha,
        "tokens_in": gemini_result.get("tokens_in", 0),
        "tokens_out": gemini_result.get("tokens_out", 0),
        "latency_ms": gemini_result.get("latency_ms", 0),
        "status": conclusion,
        "matches": matches,
        "ok": gemini_result["ok"],
    }
    write_call_log(record)
    if gemini_result["ok"]:
        sha_cache = CACHE_DIR / f"gemini-{hashlib.sha1(commit_sha.encode()).hexdigest()}.json"
        sha_cache.parent.mkdir(parents=True, exist_ok=True)
        sha_cache.write_text(json.dumps(record, ensure_ascii=False))
        debounce_path = CACHE_DIR / f"gemini-pr-{pr_number}.lasttime"
        debounce_path.write_text(str(time.time()))

    if args.publish_check:
        publish_check_run(repo, commit_sha, status="completed", conclusion=conclusion, summary=summary, details=gemini_result.get("text", ""))

    out["state"] = conclusion
    out["matches"] = matches
    out["latency_ms"] = gemini_result.get("latency_ms", 0)
    print(json.dumps(out, ensure_ascii=False))
    return 1 if conclusion == "failure" else 0


def main() -> int:
    ap = argparse.ArgumentParser(description="Gemini Review Gate — GitHub check run gate")
    ap.add_argument("--pr-number", type=int, default=0)
    ap.add_argument("--commit-sha", required=False, default="")
    ap.add_argument("--repo", default=os.environ.get("GH_REPO", "JonghyukJeon/dev_workspace"))
    ap.add_argument("--status", default="", choices=["", "pending", "success", "failure", "neutral"])
    ap.add_argument("--diff-file", default="")
    ap.add_argument("--mode", default="call", choices=["call", "publish-only"])
    ap.add_argument("--summary", default="")
    ap.add_argument("--publish-check", action="store_true", help="publish completion as a GitHub check run")
    ap.add_argument("--force", action="store_true", help="bypass dedup/debounce")
    ap.add_argument("--allow-neutral", action="store_true", help="DEBUG only — gemini fail 시 neutral 허용 (CI에서 사용 금지)")
    args = ap.parse_args()
    return gate(args)


if __name__ == "__main__":
    sys.exit(main())
