# -*- coding: utf-8 -*- """tests.regression.test_ci_watch_handoff_runner_fixture_parametrized — task-2642. 회장 verbatim (2026-05-23 19:38 KST) 1:1 박제 — 6 시나리오 fixture parametrized PASS 회귀. Layer A / NO-CRON: subprocess / cokacdir / merge / cron / live gh 호출 0. 모든 시나리오는 fixtures/ci_watch_handoff_runner//evidence.json 의 ci_status_sequence + router_call_sequence + auto_remediation_outcome_sequence 로 runner 를 호출하고 expected.json 의 terminal_state / callback_fired / audit_event_sequence 단언. """ from __future__ import annotations import json from pathlib import Path from typing import Iterator import pytest from utils.ci_watch_handoff_audit import ( AUDIT_SCHEMA, CiWatchHandoffAudit, ) from utils.ci_watch_handoff_runner import ( CALLBACK_ENVELOPE_BYTE_LIMIT, CANONICAL_ROOT, CiWatchHandoffRunner, CIStatusSnapshot, RouterCallResult, ) WORKSPACE = Path(__file__).resolve().parent.parent.parent FIXTURE_ROOT = WORKSPACE / "tests" / "fixtures" / "ci_watch_handoff_runner" SCENARIOS = ( "merge_ready_clean_gemini_fresh", "gemini_stale_nudge_posted_re_poll_fresh", "gemini_stale_after_nudge_timeout", "ci_failure_auto_remediation_medium_fix", "forbidden_path_modification_detected", "loop_boundary_three_high_attempts", ) def _load_fixture(scenario: str) -> tuple[dict, dict]: fdir = FIXTURE_ROOT / scenario evidence = json.loads((fdir / "evidence.json").read_text(encoding="utf-8")) expected = json.loads((fdir / "expected.json").read_text(encoding="utf-8")) return evidence, expected def _make_ci_fn(evidence: dict): snaps: list[CIStatusSnapshot] = [] for raw in evidence.get("ci_status_sequence", []): snaps.append( CIStatusSnapshot( status=raw["status"], failing_checks=tuple(raw.get("failing_checks", [])), severity=raw.get("severity", ""), forbidden_path_touched=raw.get("forbidden_path_touched", False), same_function_high_repeated=raw.get("same_function_high_repeated", False), ) ) it: Iterator[CIStatusSnapshot] = iter(snaps) def fn(_handoff: dict) -> CIStatusSnapshot: return next(it) return fn def _make_router_fn(evidence: dict): calls: list[RouterCallResult] = [] for raw in evidence.get("router_call_sequence", []): calls.append( RouterCallResult( final_state=raw["final_state"], permission_diagnostics_present=raw.get( "permission_diagnostics_present", False ), reason=raw.get("reason", ""), ) ) it: Iterator[RouterCallResult] = iter(calls) def fn(_handoff: dict) -> RouterCallResult: return next(it) return fn def _make_remediate_fn(evidence: dict): outcomes = list(evidence.get("auto_remediation_outcome_sequence", [])) if not outcomes: return None it = iter(outcomes) def fn(_handoff: dict, _ci_snap: CIStatusSnapshot) -> str: return next(it) return fn @pytest.mark.parametrize("scenario", SCENARIOS) def test_fixture_scenario_runs_runner_to_expected_terminal_state(tmp_path, scenario): evidence, expected = _load_fixture(scenario) audit = CiWatchHandoffAudit(tmp_path) capture: list = [] def callback_send_fn(envelope: str): capture.append(envelope) return len(envelope.encode("utf-8")) runner = CiWatchHandoffRunner( workspace_root=tmp_path, ci_status_fn=_make_ci_fn(evidence), gemini_router_call_fn=_make_router_fn(evidence), auto_remediation_fn=_make_remediate_fn(evidence), callback_send_fn=callback_send_fn, audit=audit, max_polls=20, loop_boundary_attempts=3, ) result = runner.run( evidence["handoff"], task_id=evidence.get("task_id", ""), watcher_schedule_id="sched-fixture", ) # terminal_state matches assert result.decision.terminal_state == expected["terminal_state"], ( f"[{scenario}] terminal_state mismatch: " f"expected={expected['terminal_state']!r}, " f"got={result.decision.terminal_state!r}; reason={result.decision.reason!r}" ) # router_final_state (when explicit) if "router_final_state" in expected: assert result.decision.router_final_state == expected["router_final_state"], ( f"[{scenario}] router_final_state mismatch: " f"expected={expected['router_final_state']!r}, " f"got={result.decision.router_final_state!r}" ) # ci_status (when explicit) if "ci_status" in expected: assert result.decision.ci_status == expected["ci_status"], ( f"[{scenario}] ci_status mismatch: " f"expected={expected['ci_status']!r}, " f"got={result.decision.ci_status!r}" ) # callback_fired assert result.callback_fired == expected["callback_fired"], ( f"[{scenario}] callback_fired mismatch: " f"expected={expected['callback_fired']}, got={result.callback_fired}" ) # callback_prompt_bytes within range if expected["callback_fired"]: bmin = expected["callback_prompt_bytes_min"] bmax = expected["callback_prompt_bytes_max"] assert bmin <= result.callback_prompt_bytes <= bmax, ( f"[{scenario}] callback_prompt_bytes={result.callback_prompt_bytes} " f"out of [{bmin}, {bmax}]" ) # hard envelope limit assert result.callback_prompt_bytes <= CALLBACK_ENVELOPE_BYTE_LIMIT # capture matches measured assert len(capture[0].encode("utf-8")) == result.callback_prompt_bytes # canonical_root present in envelope assert f"canonical_root={CANONICAL_ROOT}" in capture[0] # terminal_state present in envelope assert f"terminal_state={expected['terminal_state']}" in capture[0] # auto_remediation_attempts (when explicit) if "auto_remediation_attempts" in expected: assert result.decision.auto_remediation_attempts == expected[ "auto_remediation_attempts" ], ( f"[{scenario}] auto_remediation_attempts mismatch: " f"expected={expected['auto_remediation_attempts']}, " f"got={result.decision.auto_remediation_attempts}" ) # loop_iterations (when explicit) if "loop_iterations" in expected: assert result.decision.loop_iterations == expected["loop_iterations"], ( f"[{scenario}] loop_iterations mismatch: " f"expected={expected['loop_iterations']}, " f"got={result.decision.loop_iterations}" ) # audit_event_sequence lines = audit.path.read_text(encoding="utf-8").strip().splitlines() actual_events = [json.loads(l)["event"] for l in lines] assert actual_events == expected["audit_event_sequence"], ( f"[{scenario}] audit event sequence mismatch:\n" f" expected: {expected['audit_event_sequence']}\n" f" actual: {actual_events}" ) # audit schema everywhere for line in lines: rec = json.loads(line) assert rec["schema"] == AUDIT_SCHEMA # disallowed key sentinels never present assert "token" not in rec assert "authorization" not in rec # reason_must_contain assertions for needle in expected.get("reason_must_contain", []): assert needle in result.decision.reason, ( f"[{scenario}] reason {result.decision.reason!r} missing needle {needle!r}" )