
    3jU                       d Z ddlmZ ddlZddlZddlZddlZddlmZm	Z	 ddl
mZ ddlmZmZmZmZmZ ddlZdZdZd	Zd
Z ee      j1                         j2                  j2                  Zedz  dz  ZdZdZdZdZdZ dZ!dUdZ"dVdZ#dVdZ$dWdZ%e G d d             Z& G d d      Z'dd	 	 	 	 	 dXdZ(ddddd d!fd"dd#dd d$fd%dd&dd d'fd(dd)dd d*fd+dd,dd d-fd.dd/dd d0fd1dd2dd d3fd4dd5dd6dfd7dd8dd6dfd9dd:dd6dfd;dd<dd d=fd>d?d@dd6dfdAddBdCdDd d*fdEddFdGgdHd d-fdIddJdd d3fdKddLdd d$fdMddNdOgdPd d=fgZ)dYdQZ*dZd[dRZ+g dSZ,e-dTk(  r e. e+             y)\u>  anu_v3.critical7_classifier — Critical7 분류기 (task-2611 Track B).

회장 verbatim 2026-05-19 §2: Codex HIGH/HOLD 한 건이 Critical7
(보안·credential·permission·forbidden-path·scope-expansion·merge-write·
OWNER PAT) 계열인지 **코드로** 판정한다. Critical7 = CHAIR_HOLD(회장 보고),
아니면 non-Critical → AUTO_REMEDIATION_HOLD 후보.

단일 진실원: ``config/critical7_rules.yaml`` (7 family ruleset). 규칙을 코드에
중복 구현하지 않는다(drift 방지). shared invariant 파손은 Critical7 동급으로
CHAIR_HOLD (회장 §6).

Layer A / NO-CRON: 순수 결정 함수. ZERO cron / dispatch / subprocess /
cokacdir / GitHub / 파일쓰기. CLI ``--selftest`` 는 실 entrypoint regression
(mock-only 면 FAIL — 상수 분류기는 케이스별 verdict 불일치로 탈락).
    )annotationsN)	dataclassfield)Path)AnyDictListOptionalSequencezanu.critical7_classification.v1PASS
CHAIR_HOLDAUTO_REMEDIATION_HOLDconfigzcritical7_rules.yaml)security
credential
permissionforbidden_pathscope_expansionmerge_write	owner_pat).z no z(no z no_z(zero z zero z = 0z= 0,z = 0.z =0z==0z == 0z 0 z: 0,z: 0.z: 0
z: 0"zcount=0zcount: 0zcount:0z 0,z 0.z 0
zclaim: 0zclaim:0z	 is falsez = falsez=falsez false flagu   없음u   없다u   없습니다u   없었u   아님u   아닙니다u   금지u   거부u   허위u   거짓u    0회u   0회의u    0건u   0건의u    0개u   0개의u   절대 사용 불가u    사용 불가)	
refutationrefutedrefutezfalse positivezfalse-positivezintended_is_critical7=falsezintended_is_critical7: falsez forbidden 
prohibited(   )z. z.

z? z! z; z | c                    d}d}	 | j                  ||      }|dk(  r	 | S t        | ||t        |      z         sd}	 | S |t        |      z   }I)u  keyword 매치가 negation/REFUTATION 문맥에 등장하는지 확인.

    SAFE 방향만:
      * 한 문서 안에서 keyword 가 negation 없는 진짜 신호로 한 번이라도 등장
        하면(any non-negated occurrence) False — detection coverage 감소 0.
      * 모든 occurrence 가 negation 또는 구조 REFUTATION 마커와 동반될 때만
        True 반환(false-positive 차단).

    회장 verbatim §2.3: '0/no/없음/금지/forbidden/refute/false 동반·
    REFUTATION 패턴 인식'.
    r   FT)find_occurrence_is_negatedlen)	hay_lower
keyword_lcstartany_non_negatedidxs        2/home/jay/workspace/anu_v3/critical7_classifier.py_keyword_is_negatedr(   b   ss     EO
nnZ/"9
 	 &icC
O6KL"O c*o%     c                    t        | ||      S )u=   regex 매치의 위치가 negation/REFUTATION context 인지.)r    )r"   	start_idxend_idxs      r'   _regex_match_is_negatedr-   {   s    !)Y@@r)   c                    t        d|t        z
        }t        t        |       |t        z         }| || }t
        D ]  }||v s y t        | ||      }t        D ]  }||v s y y)Nr   TF)max_NEGATION_WINDOW_BEFOREminr!   _NEGATION_WINDOW_AFTER_TIGHT_NEGATION_TOKENS_enclosing_sentence_STRUCTURAL_REFUTATION_MARKERS)	r"   r+   r,   lo_thi_ttighttoksentencemks	            r'   r    r       s}    q)556Ds9~w)??@Dd4 E% %<
 #9iAH, > r)   c                    d}t         D ]3  }| j                  |d|      }|dk7  st        ||t        |      z         }5 t        |       }t         D ]&  }| j	                  ||      }|dk7  st        ||      }( | || S )u   keyword 를 둘러싼 한 '문장'을 추출. 문장 경계는 ``. / \n / ? / ! / ;``.

    완벽한 NL 문장 분할은 아니지만 false-positive/negative 균형에 충분 —
    SAFE 방향만(약화 없음).
    r   r   )_SENTENCE_BOUNDARIESrfindr/   r!   r   r1   )r"   r+   r,   
sent_startbpossent_ends          r'   r4   r4      s     J! 7ooaI."9Zs1v6J7
 9~H! *nnQ("98S)H* Z))r)   c                      e Zd ZU ded<   ded<   ded<   ded<   ded<   ded	<   ded
<   ded<    ee      Zded<   edd       ZddZ	y)Critical7Resultstrschema
finding_idboolis_critical7is_invariant_breakverdictzOptional[str]matched_rule_idfamily	List[str]matched_terms)default_factoryreasonsc                (    | j                   t        k(  S N)rK   r   selfs    r'   
chair_holdzCritical7Result.chair_hold   s    ||z))r)   c                    | j                   | j                  | j                  | j                  | j                  | j
                  | j                  t        | j                        t        | j                        d	S )N	rF   rG   rI   rJ   rK   rL   rM   rO   rQ   )
rF   rG   rI   rJ   rK   rL   rM   listrO   rQ   rT   s    r'   to_jsonzCritical7Result.to_json   s]    kk// --"&"9"9||#33kk!$"4"45DLL)

 
	
r)   N)returnrH   )r[   dict)
__name__
__module____qualname____annotations__r   rY   rQ   propertyrV   rZ    r)   r'   rD   rD      sU    KOL""t4GY4* *
r)   rD   c                      e Zd ZdZddZddZeddd       Zedd       Z	edd       Z
ddZ	 	 	 	 	 	 	 	 dd	Zdd
Zy)Critical7RulesetuQ   config/critical7_rules.yaml 로더 + 분류 엔진. 룰은 yaml 단일 진실원.c                   || _         |j                  d      | _        |j                  d      | _        |j                  dddg      D ch c]  }|j	                          c}| _        t        |j                  dg             | _        |j                  dg       D cg c]  }|j                          c}| _	        t        |j                  dd	            | _        | j                          y c c}w c c}w )
NrF   versionseverity_in_scopeHIGHCRITICALrulesinvariant_break_signalsinvariant_break_is_chair_holdT)rawgetrF   rf   upperrg   rY   rj   lowerrk   rH   rl   _assert_ruleset_not_weakened)rU   rj   ss      r'   __init__zCritical7Ruleset.__init__   s    ii)yy+$yy)<vz>RS"
AGGI"
 ,0		'20F+G
$yy)BBG(
AGGI(
$ .2II5t<.
* 	))+"
(
s   
C-C2c                    | j                   D ch c]  }|j                  d       }}t        D cg c]	  }||vs| }}|r t        ddj	                  |      z   dz         y c c}w c c}w )NrM   u2   Critical7 ruleset weakened — missing family(s): , u0    (회장 §5 분류 기준 임의 약화 금지))rj   rn   CRITICAL7_FAMILIES
ValueErrorjoin)rU   rpresentfmissings        r'   rq   z-Critical7Ruleset._assert_ruleset_not_weakened   su    ,0JJ7q155?770EAW4D1EED))G$%DE   8Es   A$	A)A)Nc                    |rt        |      nt        }t        |dd      5 } | t        j                  |            cd d d        S # 1 sw Y   y xY w)Nry   utf-8encoding)r   _DEFAULT_RULESopenyaml	safe_load)clspathpfhs       r'   loadzCritical7Ruleset.load   sC    DJN!S7+ 	+rt~~b)*	+ 	+ 	+s   AAc                Z   g }dD ]0  }| j                  |      }|s|j                  t        |             2 dD ]_  }| j                  |      }|st        |t        t
        f      r|j                  d |D               F|j                  t        |             a dj                  |      S )N)messagetexttitledetailactionsummary)pathsfilesr   filec              3  2   K   | ]  }t        |        y wrS   )rE   .0xs     r'   	<genexpr>z-Critical7Ruleset._haystack.<locals>.<genexpr>   s     0SV0s    
 )rn   appendrE   
isinstancerY   tupleextendrx   )findingpartskeyvpkeypvs         r'   	_haystackzCritical7Ruleset._haystack   s    N 	%CC ASV$	% 7 	&DT"B"tUm,0R00SW%	& {{5!!r)   c                    g }dD ]m  }| j                  |      }t        |t        t        f      r|j	                  d |D               C|sF|j                  t        |      j                                o |S )N)category
categoriesruletypetagtagsc              3  N   K   | ]  }t        |      j                           y wrS   )rE   rp   r   s     r'   r   z/Critical7Ruleset._categories.<locals>.<genexpr>  s     6qCFLLN6s   #%)rn   r   rY   r   r   r   rE   rp   )r   catsr   r   s       r'   _categorieszCritical7Ruleset._categories   sd    L 	,CC A!dE]+6A66CFLLN+	, r)   c                L   | j                  |      j                         }|dz   dj                  | j                  |            z   }t	        j
                  dd|      }g }| j                  D ]6  }|st	        j
                  dd|      }||v s||v s&|j                  |       8 |S )Nr   z[-_]+ )r   rp   rx   r   resubrk   r   )rU   r   blobnormhitssigsig_norms          r'   detect_invariant_breakz'Critical7Ruleset.detect_invariant_break
  s     ~~g&,,.f}v{{4+;+;G+DEEvvhT*// 	!CvvhS1Hd{h$.C 	! r)   c                   g }|j                  di       xs i }|j                  dg       xs g D ]2  }t        |      j                         |v s|j                  d|        4 |j                  dg       xs g D ]D  }|st        |      j                         }||v s$t	        ||      r1|j                  d|        F |j                  dg       xs g D ]X  }		 t        j                  |	|      }
|
r=t        ||
j                         |
j                               s|j                  d|	        Z |S # t
        j                  $ r Y rw xY w)Nmatchr   z	category:keywordszkw:regexzre:)rn   rE   rp   r   r(   r   searchr-   r$   enderror)rU   r   r"   r   r   mcatkwkw_lcrxm_objs              r'   _rule_matchzCritical7Ruleset._rule_match  sE    HHWb!'R55r*0b 	/C3x~~4'iu-.	/ %%
B'-2 	(BGMMOE	!*=i*Oc"J'	( %%$* 	B		"i0!8u{{}eiik" KK#bT
+	  88 s   AD44E
	E
c                
   t        |j                  d      xs |j                  d      xs d      }t        |j                  dd            j                         }g }| j                  |      }|rK| j                  r?|j                  ddj                  |      z          t        t        |ddt        d	d
||	      S || j                  vrG|j                  d| dt        | j                         d       t        t        |ddt        d d g |	      S | j                  |      j                         }| j                  |      }| j                   D ]  }| j#                  |||      }	|	s|j                  d|j                  d       d|j                  d       d       t        t        |ddt        |j                  d      |j                  d      |	|	      c S  |j                  d       t        t        |ddt        d d g |	      S )NidrG   unknownseverityrh   uD   shared invariant 파손 = Critical7 동급 CHAIR_HOLD (회장 §6): ru   TINVARIANT_BREAKinvariant_breakrX   z	severity z not in scope u    — Critical7 후보 아님Fu   Critical7 매치 rule=z family=rM   z -> CHAIR_HOLDuo   Critical7 어느 rule 에도 미매치 — non-Critical (AUTO_REMEDIATION_HOLD 후보, 회장 §3 자동수렴))rE   rn   ro   r   rl   r   rx   rD   CLASSIFIER_SCHEMAr   rg   sortedr   r   rp   r   rj   r   )
rU   r   fidr   rQ   invr"   r   r   r   s
             r'   classifyzCritical7Ruleset.classify<  s   '++d#Mw{{<'@MINw{{:v67==?))'2455NNV))C.! #(!#'" 1(!
 
 4111NNH:^F4;Q;Q4R3S./ #("#( $ 
 
 NN7+113	(JJ 	D##D)T:D,TXXd^,< ="hhx01A ',"!%',&$(HHTN88H-"&#
 
	& 	F	
 $$ 

 
	
r)   )rj   Dict[str, Any])r[   NonerS   )r   zOptional[Path]r[   z'Critical7Ruleset')r   r   r[   rE   )r   r   r[   rN   )r   r   r"   rE   r   zSequence[str]r[   rN   )r   r   r[   rD   )r]   r^   r_   __doc__rs   rq   classmethodr   staticmethodr   r   r   r   r   rb   r)   r'   rd   rd      s{    [," + + " "*  ("/2:G	<K
r)   rd   )rulesetc               T    |xs t         j                         }|j                  |       S )u,   단건 Critical7 분류 (공개 entrypoint).)rd   r   r   )r   r   rss      r'   classify_critical7r     s'     
	+$))+B;;wr)   zf-secrh   z1potential SQL injection in query builder (CWE-89))r   r   r   Tr   zf-credz0hardcoded api key ghp_ABCDEFGH12345678 committedr   zf-permz3script runs chmod 777 on output dir, world-writabler   zf-pathu0   write outside allowlist — wrote to .git/configr   zf-scopez5scope expansion: modified files not in task allowlistr   zf-mergez!executor ran git push origin mainr   zf-patz)used OWNER PAT to authenticate GitHub APIr   zf-covz7branch coverage dropped below threshold, uncovered codeFzf-testz1failing test: assertion error in regression suitezf-claimu/   claim mismatch — doc-only, no real entrypointzf-invu4   frozen anchor modified — byte-0 violation detectedr   zf-lowLOWz"SQL injection note (informational)zf-path-structz$unexpected write detected during runz.git/config)r   r   r   r   zf-scope-structz#out-of-scope write to another trackzteams/other/artifact.json)r   r   r   r   z
f-pat-barez+used a fine-grained PAT for GitHub API callzf-pat-wordsz)leaked personal access token in build logz	f-inv-catshared_invariant_breachzfrozen-anchor-modified)r   r   r   r   c            
        t         j                         } g }t        D ]  \  }}}| j                  |      }|j                  |k7  r&|j                  |d    d|j                   d|        |5|j                  |k7  r&|j                  |d    d|j                   d|        |s|j                  t        k7  s|j                  |d    d|j                   d        t        d t        D              }|s|j                  d       	 | j                          d
t        t        t        j                  t                     | j"                  t%        t              |st%        t              dz
  nd ||sdnddd	}t'        t)        j*                  |dd             |sdS dS # t        $ r}|j                  d	|        Y d }~d }~ww xY w)Nr   z: is_critical7=z
 expected z	: family=z
: verdict=z expected CHAIR_HOLDc              3  (   K   | ]
  }|d       yw)   Nrb   )r   cases     r'   r   z_selftest.<locals>.<genexpr>  s      !T!W!s   z1mock_only_guard: ruleset has no non-critical casezruleset weakened: zanu_v3.critical7_classifierr   r   FAILT)	modulerF   r   ruleset_versioncasespassedfailuresrK   mock_only_would_failF   ensure_asciiindentr   )rd   r   _SELFTEST_CASESr   rI   r   rM   rK   r   anyrq   rw   r   rE   r   relative_to_ROOTrf   r!   printjsondumps)	r   r   r   exp_c7
exp_familyry   const_chair_would_faileresults	            r'   	_selftestr     s   				 BH'6 #KK >>V#OO4=/0@
6(S !ahh*&<OO4=/188*JzlK aii:-OO4=/AII;6JK" ! !"1!  "KL2
'')
 0#~11%89::_%2:#o&*!)6v $
F 
$**V%
:;1#!#  2,QC0112s   F 	G'G  Gc                &   t        j                  d      }|j                  ddd       |j                  dt        d d	       |j                  d
t        d d	       |j	                  | t        |       nd       }|j                  r
t               S |j                  r%t        |j                        j                  d      nt        j                  j                         }t        j                  |      }t         j#                  |j$                  rt        |j$                        nd       }t'        |t
              r|n|g}|D cg c]!  }|j)                  |      j+                         # }}t-        t        j.                  |dd             yc c}w )Nz(Critical7 classifier (task-2611 Track B))descriptionz
--selftest
store_trueu    실 entrypoint regression 실행)r   helpz--inputu,   finding JSON 파일 경로 (없으면 stdin))r   defaultr   z--rulesu$   critical7_rules.yaml 경로 overrider~   r   Fr   r   r   )argparseArgumentParseradd_argumentrE   
parse_argsrY   selftestr   inputr   	read_textsysstdinreadr   loadsrd   r   rj   r   r   rZ   r   r   )	argvapargsrm   payloadr   findingsr{   outs	            r'   _mainr    sB   		 	 -W	XBOOL;  =OOICG  IOOIC?  A==t'7dTBD}}{ jj 

%%w%7!iinn. jjoG			4::tDJJ/4	HB$Wd3w'H-5
62;;q>!!#
6C
6	$**SuQ
78 7s   &F)r   r   r   r   rv   rD   rd   r   __main__)r"   rE   r#   rE   r[   rH   )r"   rE   r+   intr,   r  r[   rH   )r"   rE   r+   r  r,   r  r[   rE   )r   r   r   zOptional[Critical7Ruleset]r[   rD   )r[   r  rS   )r	  zOptional[Sequence[str]]r[   r  )/r   
__future__r   r   r   r   r  dataclassesr   r   pathlibr   typingr   r   r	   r
   r   r   r   r   r   r   __file__resolveparentr   r   rv   r3   r5   r0   r2   r=   r(   r-   r    r4   rD   rd   r   r   r   r  __all__r]   
SystemExitrb   r)   r'   <module>r     s   #   	 
 (  6 6 5 
/ X ''..!$:: 4 ("     D 2A
"** 
 
 
:C
 C
N GK  )C   DF	: &CE	< &FH	< &CE	 6HJ	 646	= <>	; JL
D &DF
D 6BD
D GI	 57
D
 7 
 &6+,. 
 f>@	; v<>	< V+'(* 
k9x,$^.	 z
UW
 r)   