
    Kii)                    \   d Z ddlmZ ddlZddlZddlZddlZddlmZ ddl	m
Z ddlmZ ddlmZmZmZmZ  ej&                  e      Z eej.                  j1                  d	d
            Z ej4                  d      Z	 d	 	 	 	 	 ddZddZ	 	 	 	 	 	 ddZddZ	 	 	 d	 	 	 	 	 	 	 	 	 	 	 ddZ y)zChange impact analysis for code review.

Maps git diffs to affected functions, flows, communities, and test coverage
gaps. Produces risk-scored, priority-ordered review guidance.
    )annotationsN)Any   )SECURITY_KEYWORDS)get_affected_flows)	GraphNode
GraphStore_sanitize_namenode_to_dictCRG_GIT_TIMEOUT30z^[A-Za-z0-9_.~^/@{}\-]+$c           	        t         j                  |      st        j                  d|       i S 	 t	        j
                  ddd|dgdddd| t        	      }|j                  d
k7  r0t        j                  d|j                  |j                  dd        i S 	 t        |j                        S # t        t        j                  f$ r"}t        j                  d|       i cY d}~S d}~ww xY w)aO  Run ``git diff --unified=0`` and extract changed line ranges per file.

    Args:
        repo_root: Absolute path to the repository root.
        base: Git ref to diff against (default: ``HEAD~1``).

    Returns:
        Mapping of file paths to lists of ``(start_line, end_line)`` tuples.
        Returns an empty dict on error.
    zInvalid git ref rejected: %sgitdiffz--unified=0z--Tzutf-8replace)capture_outputtextencodingerrorscwdtimeoutr   zgit diff failed (rc=%d): %sN   zgit diff error: %s)_SAFE_GIT_REFmatchloggerwarning
subprocessrun_GIT_TIMEOUT
returncodestderrOSErrorSubprocessError_parse_unified_diffstdout)	repo_rootbaseresultexcs       e/home/jay/workspace/scripts/.codegraph-venv/lib/python3.12/site-packages/code_review_graph/changes.pyparse_git_diff_rangesr+      s     t$5t<	FM46 
 !NN8&:K:KV]][_\_M`aI " v}}--	 Z//0 +S1	s   A#B) )C$CC$C$c                   i }d}t        j                  d      }t        j                  d      }| j                         D ]  }|j                  |      }|r|j	                  d      }(|j                  |      }|s<|?t        |j	                  d            }|j	                  d      rt        |j	                  d            nd}	|	dk(  r|}
n||	z   dz
  }
|j                  |g       j                  ||
f        |S )zParse unified diff output into file -> line-range mappings.

    Handles the ``@@ -old,count +new,count @@`` hunk header format.
    Nz^\+\+\+ b/(.+)$z^@@ .+? \+(\d+)(?:,(\d+))? @@r      r   )recompile
splitlinesr   groupint
setdefaultappend)	diff_textrangescurrent_filefile_patternhunk_patternline
file_match
hunk_matchstartcountends              r*   r$   r$   D   s    
 02F#L ::01L::>?L$$& E!''-
%++A.L!''-
,2
((+,E0:0@0@0CC
((+,Ezema'lB/66s|DE" M    c                   t               }g }|j                         D ]  \  }}| j                  |      }|s8| j                  |      }|D ]"  }|j	                  | j                  |             $ |D ]  }	|	j
                  |v r|	j                  |	j                  +|D ]R  \  }
}|	j                  |k  s|	j                  |
k\  s&|j                  |	       |j                  |	j
                             |S )zFind graph nodes whose line ranges overlap the changed lines.

    Args:
        store: The graph store.
        changed_ranges: Mapping of file paths to ``(start, end)`` tuples.

    Returns:
        Deduplicated list of overlapping graph nodes.
    )
setitemsget_nodes_by_fileget_files_matchingextendqualified_name
line_startline_endr4   add)storechanged_rangesseenr(   	file_pathr6   nodesmatched_pathsmpnoder=   r?   s               r*   map_changes_to_nodesrS   j   s     UD F+113 	6''	2!44Y?M# :U44R89:  
	D""d*&$--*?$ 
s??c)dmmu.DMM$'HHT001	
	* Mr@   c                   d}| j                  |j                        }|r|t        t        |      d      z  }n-| j	                  |j                        }|t        |dz  d      z  }| j                  |j                        }|D cg c]  }|j                  dk(  s| }}d}| j                  |j                        }	|	O|rM|D 
cg c]  }
|
j                   }}
| j                  |      }|j                         D ]  }|||	k7  s|dz  } |t        |dz  d      z  }| j                  |j                        }t        |      }|dt        |d	z  d
      dz  z
  z  }|j                  j                         |j                  j                         t!        fdt"        D              r|dz  }t        |      }|t        |dz  d      z  }t%        t        t'        |d      d
      d      S c c}w c c}
w )a  Compute a risk score (0.0 - 1.0) for a single node.

    Scoring factors:
      - Flow participation: 0.05 per flow membership, capped at 0.25
      - Community crossing: 0.05 per caller from a different community, capped at 0.15
      - Test coverage: 0.30 (untested) scaling down to 0.05 (5+ TESTED_BY edges)
      - Security sensitivity: 0.20 if name matches security keywords
      - Caller count: callers / 20, capped at 0.10
            g      ?g?CALLSr   r   g333333?g333333?g      @g      ?c              3  2   K   | ]  }|v xs |v   y w)N ).0kw
name_lowerqn_lowers     r*   	<genexpr>z%compute_risk_score.<locals>.<genexpr>   s"     
K"2-rX~-
Ks   g?g      4@g?   )get_flow_criticalities_for_nodeidminsumcount_flow_membershipsget_edges_by_targetrG   kindget_node_community_idsource_qualified$get_community_ids_by_qualified_namesvaluesget_transitive_testslennamelowerany_SECURITY_KEYWORDSroundmax)rK   rR   scoreflow_criticalities
flow_countcallersecaller_edgescross_communitynode_cidedge
caller_qnscid_mapcidtransitive_tests
test_countcaller_countr[   r\   s                    @@r*   compute_risk_scorer      s    E >>twwGS+,d3311$'':
Z$&-- ''(;(;<G&<!!&&G*;A<L<O**4773H8DEd++E
E<<ZH>># 	%C3(?1$	% 
S4'..E 11$2E2EF%&J	TSc)3/$677E "J""((*H

K8J
KK |$L	S$d++ES_c*A..9 = Fs   G/G/G4c           	        ||t        ||      }|rt        | |      }n)g }|D ]"  }|j                  | j                  |             $ |D cg c]  }|j                  dv r| }}g }	|D ].  }
t        | |
      }|	j                  i t        |
      d|i       0 t        d |	D        d      }t        | |      }g }|D ]  }
|
j                  r| j                  |
j                        }t        d |D              r>|j                  t        |
j                        t        |
j                        |
j                   |
j"                  |
j$                  d        t'        |	d	 d
      dd }dt)        |       ddt)        |       dd|d    ddt)        |       dd|dg}|r8|dd D cg c]  }|d   	 }}|j                  ddj+                  |              dj+                  |      ||	|d   ||dS c c}w c c}w )a  Analyze changes and produce risk-scored review guidance.

    Args:
        store: The graph store.
        changed_files: List of changed file paths.
        changed_ranges: Optional pre-parsed diff ranges. If not provided and
            ``repo_root`` is given, they are computed via git.
        repo_root: Repository root (for git diff).
        base: Git ref to diff against.

    Returns:
        Dict with ``summary``, ``risk_score``, ``changed_functions``,
        ``affected_flows``, ``test_gaps``, and ``review_priorities``.
    N)FunctionTestClass
risk_scorec              3  &   K   | ]	  }|d      yw)r   NrX   )rY   nrs     r*   r]   z"analyze_changes.<locals>.<genexpr>  s     >R<(>s   rU   )defaultc              3  :   K   | ]  }|j                   d k(    yw)	TESTED_BYN)re   )rY   rv   s     r*   r]   z"analyze_changes.<locals>.<genexpr>  s     9Q166[(9s   )rl   rG   filerH   rI   c                    | d   S )Nr   rX   )xs    r*   <lambda>z!analyze_changes.<locals>.<lambda>  s
    < r@   T)keyreverse
   z	Analyzed z changed file(s):z  - z changed function(s)/class(es)totalz affected flow(s)z test gap(s)z  - Overall risk score: z.2f   rl   z  - Untested: z, 
affected_flows)summaryr   changed_functionsr   	test_gapsreview_priorities)r+   rS   rF   rD   re   r   r4   r   rq   r   is_testrd   rG   rn   r
   rl   rN   rH   rI   sortedrk   join)rK   changed_filesrL   r&   r'   changed_nodesfpnchanged_funcs
node_risksrR   riskoverall_riskaffectedr   testedr   summary_partsg	gap_namess                       r*   analyze_changesr      sq   , )"7.y$? ,UNC  	>B  !8!8!<=	>
 !6622 	
M  (*J !%. 
4 
$
 	 >:>LL "%7H ')I <<**4+>+>?9&99&tyy1"01D1D"E"oo MM  z/HRVWX[Y[\ C&''89
s=!""@A
x !!23
s9~l+
"<"45M (1"161QvY6	6~dii	.B-CDE 99]+"'"#34. e^ 7s   G:.G?)HEAD~1)r&   strr'   r   return dict[str, list[tuple[int, int]]])r5   r   r   r   )rK   r	   rL   r   r   zlist[GraphNode])rK   r	   rR   r   r   float)NNr   )rK   r	   r   z	list[str]rL   z'dict[str, list[tuple[int, int]]] | Noner&   z
str | Noner'   r   r   zdict[str, Any])!__doc__
__future__r   loggingosr.   r   typingr   	constantsr   ro   flowsr   graphr   r	   r
   r   	getLogger__name__r   r2   environgetr   r/   r   r+   r$   rS   r   r   rX   r@   r*   <module>r      s   #  	 	   > % F F			8	$2::>>"3T:;

67 ".".
". &".JL%%4% %Z2/z ?C \\\ <\ 	\
 \ \r@   