
    Ki2                        d Z ddlmZ ddlZddlmZmZ ddlmZm	Z	  ej                  e      ZdddZ	 d	 	 	 	 	 ddZdd	Z	 d	 	 	 	 	 dd
Z	 	 	 	 ddZy)zcGraph analysis: hub detection, bridge nodes, knowledge gaps,
surprise scoring, suggested questions.    )annotationsN)Counterdefaultdict   )
GraphStore_sanitize_namec                f   | j                         }t               }t               }|D ]0  }||j                  xx   dz  cc<   ||j                  xx   dz  cc<   2 | j	                  d      }| j                         }g }|D ]  }	|	j                  }
|j                  |
d      }|j                  |
d      }||z   }|dk(  r>|j                  t        |	j                        |	j                  |	j                  |	j                  ||||j                  |
      d        |j                  d d       |d| S )	zFind the most connected nodes (highest in+out degree), excluding File nodes.

    Returns list of dicts with: name, qualified_name, kind, file,
    in_degree, out_degree, total_degree, community_id
    r   Texclude_filesr   )namequalified_namekindfile	in_degree
out_degreetotal_degreecommunity_idc                    | d   S )Nr    xs    f/home/jay/workspace/scripts/.codegraph-venv/lib/python3.12/site-packages/code_review_graph/analysis.py<lambda>z find_hub_nodes.<locals>.<lambda>3   s    a/     keyreverseN)get_all_edgesr   source_qualifiedtarget_qualifiedget_all_nodesget_all_community_idsr   getappendr   r   r   	file_pathsort)storetop_nedgesr   r   enodescommunity_mapscorednqnindoutdtotals                 r   find_hub_nodesr3      s:    !E '	I!(J +1%%&!+&!$$%*%+
 d3E//1MF mmB"~~b!$d
A:"166*..FFKK!)--b1	
 		$ KK/K>&5>r   c                   ddl }| j                         }|j                         }|dkD  r!t        d|      }|j	                  ||d      }n|dkD  r|j	                  |d      }ng S | j                         }| j                  d      D ci c]  }|j                  | }	}g }
|j                         D ]  \  }}|dk  s||	vr|	|   }|j                  d	k(  r%|
j                  t        |j                        |j                  |j                  |j                  t        |d
      |j                  |      d        |
j!                  d d       |
d| S c c}w )a1  Find nodes with highest betweenness centrality.

    These are architectural chokepoints that sit on shortest paths
    between many node pairs. If they break, multiple communities
    lose connectivity.

    Returns list of dicts with: name, qualified_name, kind, file,
    betweenness, community_id
    r   Ni  i  T)k
normalized)r6   r
   File   )r   r   r   r   betweennessr   c                    | d   S )Nr9   r   r   s    r   r   z#find_bridge_nodes.<locals>.<lambda>i   s    q/ r   r   )networkx_build_networkx_graphnumber_of_nodesminbetweenness_centralityr"   r!   r   itemsr   r$   r   r   r%   roundr#   r&   )r'   r(   nxnxgn_nodesr5   bcr,   r.   node_mapresultsr/   scores                r   find_bridge_nodesrI   7   sp     
%
%
'C !!#G~W&&saD&A	1&&st&<	//1M $$4$8 	
!H 
 GXXZ 	EA:8+RL66V"166*..FFKK ?)--b1
 	 LL/L>6E?-s   
Ec                0   | j                         }| j                  d      }| j                         }t               }t	               }|D ][  }||j
                  xx   dz  cc<   ||j                  xx   dz  cc<   |j                  dk(  sA|j                  |j
                         ] g }|D ]l  }|j                  |j                  d      }	|	dk  s%|j                  t        |j                        |j                  |j                  |j                  |	d       n | j                         }
g }|
D ]P  }|j                  dd      dk  s|j                  |d	   |j                  d
d      |j                  dd      d       R g }|D ]  }|j                  |j                  d      }	|	dk\  s%|j                  |vs4|j                   rA|j                  t        |j                        |j                  |j                  |j                  |	d        |j#                  d d       t%        t              }|D ]>  }|j                  |j                        }|!||   j                  |j                         @ g }|
D ]  }|d	   }|j                  |t	                     }t'        |      dk(  s1|j                  dd      dk\  sG|j                  ||j                  d
d      |j                  dd      t)        t+        |            d        |dd ||dd |dS )aD  Identify structural weaknesses in the codebase graph.

    Returns dict with categories:
    - isolated_nodes: degree <= 1, disconnected from graph
    - thin_communities: fewer than 3 members
    - untested_hotspots: high-degree nodes with no TESTED_BY edges
    - single_file_communities: entire community in one file
    Tr
   r   	TESTED_BYr   )r   r   r   r   degreesize   idr    )r   r   rM      c                    | d   S )NrL   r   r   s    r   r   z%find_knowledge_gaps.<locals>.<lambda>   s
    ak r   r   N)r   r   rM   r   2      )isolated_nodesthin_communitiesuntested_hotspotssingle_file_communities)r   r!   r"   r   setr   r    r   addr#   r   r$   r   r   r%   get_communities_listis_testr&   r   lennextiter)r'   r)   r+   r,   rL   tested_nodesr*   isolatedr.   dcommunitiesthincrW   
comm_filescidsingle_filefiless                     r   find_knowledge_gapsrj   m   s    !Ed3E//1M %YF UL 1q!!"a'"q!!"a'"66[ Q//0	1 H 	JJq''+6OO&qvv."#"2"2 	 ,,.KD 55aKK !$fb)fa(   JJq''+F$$L8		$$&qvv."#"2"2&  !4  
 '2#&6J - 0 01?sO,-
 K 	gsCE*u:?quuVQ/14 #fb)fa(T%[)	  		 #3B- .s3#.	 r   c                   | j                         }| j                  d      }| j                         }|D ci c]  }|j                  | }}t	               }|D ]0  }||j
                  xx   dz  cc<   ||j                  xx   dz  cc<   2 |j                         D 	cg c]
  }	|	dkD  s	|	 }
}	|
sg S t        |
      t        |
      dz     }t        |dz  d      }g }|D ]w  }|j                  |j
                        }|j                  |j                        }|r|s?|j                  dk(  s|j                  dk(  r^d	}g }|j                  |j
                        }|j                  |j                        }||||k7  r|dz  }|j                  d       d|j                  v r|j                  j                  dd      d   nd}d|j                  v r|j                  j                  dd      d   nd}|r|r||k7  r|dz  }|j                  d       |j                  |j
                  d      }|j                  |j                  d      }|dk  r||k\  s
|dk  r||k\  r|dz  }|j                  d       |j                   |j                   k7  r%|j                  dk(  r|dz  }|j                  d       |j                  dk(  r%|j                  dk(  r|dz  }|j                  d       |dkD  s|j                  t#        |j$                        |j
                  t#        |j$                        |j                  |j                  t'        |d      |||d	       z |j)                  d d       |d
| S c c}w c c}	w )a~  Find edges with high surprise scores.

    Detects unexpected architectural coupling based on:
    - Cross-community: source and target in different communities
    - Cross-language: different file languages
    - Peripheral-to-hub: low-degree node to high-degree node
    - Cross-file-type: test calling production or vice versa
    - Non-standard edge kind for the node types
    Tr
   r   r      rN   
   r7   g        Ng333333?cross-community.rP   g?zcross-languagezperipheral-to-hubCALLSg333333?zcross-test-boundaryTypezunusual-edge-kind)	sourcer   targetr    	edge_kindsurprise_scorereasonssource_communitytarget_communityc                    | d   S )Nrv   r   r   s    r   r   z-find_surprising_connections.<locals>.<lambda>*  s    a() r   r   )r   r!   r"   r   r   r   r    valuessortedr]   maxr#   r   r$   r%   rsplitr\   r   r   rA   r&   )r'   r(   r)   r+   r,   r.   rF   rL   r*   rb   degrees
median_deghigh_deg_thresholdscored_edgessrctgtrH   rw   src_cidtgt_cidsrc_langtgt_langsrc_degtgt_degs                           r   find_surprising_connectionsr      s    !Ed3E//1M-23  !#3H3 %YF (q!!"a'"q!!"a'"(
 !--/3QQUq3G3	W!23JZ!^R0L ?ll1--.ll1--.#88vV!3  ##A$6$67##A$6$67'w&SLENN,-
 cmm# MM  a(,)+ 	 cmm# MM  a(,)+ 	 X%9SLENN+, **Q//3**Q//3\g);;qL#55SLENN./ ;;#++%!&&G*;TMENN01 66WV!3TMENN./19(2$%$6$6(2$%$6$6VV"'q/"$+$+
! 
k?B )4   i 4 4s   M4
M9)M9c                   g }t        | d      }|D ]"  }|j                  dd|d    d|d   dd	       $ t        | d      }| j                         }|D ch c]  }|j                  d
k(  r|j
                   }}|D ]0  }|d   |vs|j                  dd|d    d|d    d|d   dd	       2 t        | d      }	|	D ]<  }
d|
d   v s|j                  dd|
d    d|
d    d|
d    d|
d    d	|
d   dd	       > t        |       }|d   dd D ]+  }|j                  dd |d    d!|d"    d#d$|d%    d&d	       - |d'   dd D ](  }|j                  d(d|d    d|d)    d*|d   dd	       * |S c c}w )+am  Auto-generate review questions from graph analysis.

    Categories:
    - bridge_node: Why does X connect communities A and B?
    - isolated_node: Is X dead code or dynamically invoked?
    - low_cohesion: Should community X be split?
    - hub_risk: Does hub node X have adequate test coverage?
    - surprising: Why does A call B across community boundary?
    rN   )r(   bridge_node'r   z`' is a critical connector between multiple code regions. Is it adequately tested and documented?r   high)categoryquestionrt   priorityrK   hub_riskz
Hub node 'z' has r   z> connections but no direct test coverage. Should it be tested?rn   rw   surprising_connectionrs   z' (community rx   z	) calls 'rt   ry   z ). Is this coupling intentional?r   mediumrV   Nrl   thin_communityzCommunity 'z' has only rM   z0 member(s). Should it be merged with a neighbor?z
community:r   lowrW   untested_hotspotrL   z2 connections but no test coverage. Is this a risk?)rI   r$   r3   r   r   r   r   rj   )r'   	questionsbridgesbhubsr)   r*   testedh	surprisessgapsre   s                r   generate_suggested_questionsr   /  s~    I  Q/G 
%AfI; 4 5 ()	
 		
 %q)D!E !&&K/ 	
F   f,& 66() *
 ,-"
 
 ,E;I ),3(}M+,- .(}M+,- .,- ./$   u%D$%bq) 
(ai[V9+ *+ #1^#4"56	
 		
 %&r* 
*AfI;fQx[M 2" # () 	
 		
 }s   "E5)rm   )r'   r   r(   intreturn
list[dict])r'   r   r   zdict[str, list[dict]])   )r'   r   r   r   )__doc__
__future__r   loggingcollectionsr   r   graphr   r   	getLogger__name__loggerr3   rI   rj   r   r   r   r   r   <module>r      s   * #  , -			8	$&T %'33!33lXx %'d d !d d N]]]r   