
    Kio                    B   U d Z ddlmZ ddlZddlZddlmZmZ ddlm	Z	 ddl
mZmZmZmZ  ej                  e      Z	 ddlZdZd
dddddddZded<    eh d      Zd'dZd(dZd)dZd*dZd+dZ	 	 	 	 	 	 d,dZd-dZ 	 d.	 	 	 	 	 	 	 d/dZ!	 d.	 	 	 	 	 	 	 	 	 d0dZ"	 d.	 	 	 	 	 	 	 	 	 d0dZ#	 	 d1	 	 	 	 	 	 	 	 	 	 	 d2dZ$	 d3	 	 	 	 	 d4d Z%	 d3	 	 	 	 	 	 	 d5d!Z&	 	 	 	 	 	 d6d"Z'	 d7	 	 	 	 	 	 	 d8d#Z( ejR                  d$ejT                        Z+d9d%Z,d:d&Z-y# e$ r dZd	ZY w xY w);zCommunity/cluster detection for the code knowledge graph.

Detects communities of related code nodes using the Leiden algorithm (via igraph,
optional) with a file-based grouping fallback when igraph is not installed.
    )annotationsN)Counterdefaultdict)Any   )	GraphEdge	GraphNode
GraphStore_sanitize_nameTF      ?      ?g?gffffff?g333333?皙?g333333?)CALLSIMPORTS_FROMINHERITS
IMPLEMENTSCONTAINS	TESTED_BY
DEPENDS_ONzdict[str, float]EDGE_WEIGHTS>"   atbydoinismyofontoaddallandforgethasnewrunsetthefrominitmainmakenoneselftestthatthiswithbuildcreatedeleteremoveupdatec                   | sy| D cg c]  }|j                    }}t        |      }| D cg c]  }|j                  dk(  s|j                    }}|rPt	        |      }|j                  d      d   \  }}|t        |       dz  kD  r|r| dt        |       S t        |      S t        |       }|r|d   nd}	|r	|	r| d|	 S |r|S |	r|	S yc c}w c c}w )	a%  Generate a meaningful name for a community of nodes.

    Algorithm:
    1. Find most common module/file prefix among members
    2. If a dominant class exists (>40% of nodes), use its name
    3. Fallback: most frequent keyword in function/class names
    4. Format: "{prefix}-{keyword}"
    emptyClassr   r   r   - cluster)		file_path_extract_file_prefixkindnamer   most_commonlen_to_slug_extract_keywords)
membersm
file_pathsprefixclass_namesclass_counts	top_class	top_countkeywordskeywords
             i/home/jay/workspace/scripts/.codegraph-venv/lib/python3.12/site-packages/code_review_graph/communities.py_generate_community_namerR   :   s      (//!!++/J/!*-F $+@aaff.?166@K@{++77:1=	9s7|c)) 8I#6"788I&& !)H%hqk2G'7)$$/ 0 As   CCCc                N   | syg }| D ]o  }|j                  dd      j                  d      }t        |      dk\  r|j                  |d          G|d   j	                  dd      d	   }|j                  |       q t        |      }|j                  d      d	   \  }}t        |      S )
zDFind the most common short directory or module name from file paths.r=   \/   .r   r   )replacesplitrD   appendrsplitr   rC   rE   )rI   partsfpsegmentsstemcountstop_part_s           rQ   r@   r@   a   s    E ::dC(..s3x=ALL"&B<&&sA.q1DLL U^F$$Q'*KHaH    c                @   t               }| D ]b  }|j                  dv st        |j                        }|D ]7  }|j	                         }|t
        vst        |      dkD  s+||xx   dz  cc<   9 d |sg S |j                  d      D cg c]  \  }}|	 c}}S c c}}w )z@Extract the most frequent meaningful keywords from member names.)Functionr;   TestTyper      )r   rA   _split_namerB   lower_COMMON_WORDSrD   rC   )rG   word_countsrH   wordswwlrd   s          rQ   rF   rF   v   s     '	K )66::'E )WWY]*s2w{Oq(O)) 	%11!45$!QA555s   
Bc                    t        j                  dd|       }t        j                  d|      D cg c]  }|s|	 c}S c c}w )z0Split a camelCase or snake_case name into words.z([a-z])([A-Z])z\1_\2z	[_\-.\s]+)resubr[   )rB   sps      rQ   rk   rk      s8     	 (D1Axxa06!AA666s   ??c                p    t        j                  dd| j                               j                  d      dd S )z+Convert a string to a short lowercase slug.z
[^a-z0-9]+r<   N   )rs   rt   rl   strip)ru   s    rQ   rE   rE      s-    66-aggi066s;CR@@re   c                   i }t        |       D ]  \  }}|D ]  }|||<   	  t        |       }dg|z  }dg|z  }|D ]s  }	|j                  |	j                        }
|j                  |	j                        }|
|>|
|k(  r|
J ||
xx   dz  cc<   U|
||
xx   dz  cc<   |g||xx   dz  cc<   u g }t        |      D ]+  }||   ||   z   }|j                  |dkD  r||   |z  nd       - |S )a	  Compute cohesion for multiple communities in a single O(edges) pass.

    Builds a ``qualified_name -> community_index`` reverse map (each node
    appears in at most one community since all callers produce partitions),
    then walks every edge exactly once, bucketing it into internal/external
    counters per community.

    Total work: O(edges + sum(|members|)) instead of
    O(edges * communities) for naive per-community cohesion.

    Returns a list of cohesion scores aligned with ``community_member_qns``.
    r   r           )	enumeraterD   r$   source_qualifiedtarget_qualifiedranger\   )community_member_qns	all_edges	qn_to_idxidxrG   qnninternalexternalesctcresultsitotals                  rQ   _compute_cohesion_batchr      s7     !#I!"67  W 	 BIbM	   	 !AsQwHsQwH "]]1--.]]1--.:"*8>!>RLAL~!~!" G1X Bhqk)eaix{U*SAB Nre   c                    t        t              }| D ]R  }||j                     j                  |j                         ||j                     j                  |j                         T |S )z:Build adjacency list from edges (one pass over all edges).)r   listr}   r\   r~   )edgesadjr   s      rQ   _build_adjacencyr      s`     +D 1C ;A&&q'9'9:A&&q'9'9:; Jre   c                "    t        | g|      d   S )zCompute cohesion: internal_edges / (internal_edges + external_edges).

    For multiple communities, prefer :func:`_compute_cohesion_batch`, which
    runs in O(edges) total instead of O(edges) per community.
    r   )r   )
member_qnsr   r   s      rQ   _compute_cohesionr      s     #J<;A>>re   c                   t         g S i }i }t        |       D ]  \  }}|||j                  <   |||<    |sg S t        j	                  dt        |             t        j                  t        |      d      }g }	g }
t               }|D ]  }|j                  |j                        }|j                  |j                        }|<|?||k7  sEt        ||      t        ||      f}||vsb|j                  |       |	j                  |       |
j                  t        j                  |j                   d              |	st#        | |||      S |j%                  |	       |
|j&                  d<   ddl}|j+                         }t        d	d
|j-                  t        |d            z        }t        j	                  d|j+                         |j/                                |j1                  dd|d      }t        j	                  dt        |             g }|D ]c  }t        |      |k  r|D cg c]  }||v s||    }}t        |      |k  r8|D ch c]  }|j                   }}|j                  ||f       e t3        |D cg c]  }|d   	 c}|      }g }t5        ||      D ]  \  \  }}}t7        d |D              }|r|j9                  d      d   d   nd}t;        |      }|j                  |dt        |      t=        |d      |dt        |       d|D cg c]  }|j                   c}|d        t        j	                  dt        |             |S c c}w c c}w c c}w c c}w )a  Detect communities using Leiden algorithm via igraph.

    Caps Leiden at ``n_iterations=2`` (sufficient for code dependency graphs)
    and skips the recursive sub-community splitting pass that caused
    exponential blow-up on large repos (>100k nodes).
    Nz Building igraph with %d nodes...F)r   directedr   r   weightr   g?r   
   z'Running Leiden on %d nodes, %d edges...
modularityrV   )objective_functionweights
resolutionn_iterationsz;Leiden complete, found %d partitions. Computing cohesion...r   c              3  N   K   | ]  }|j                   s|j                     y wNlanguage.0rH   s     rQ   	<genexpr>z!_detect_leiden.<locals>.<genexpr>7       HQQZZajjH   %%r=      zCommunity of z nodesrB   levelsizecohesiondominant_languagedescriptionrG   r   z,Community detection complete: %d communities)igr|   qualified_nameloggerinforD   Graphr(   r$   r}   r~   minmaxr    r\   r   rA   _detect_file_based	add_edgesesmathvcountlog10ecountcommunity_leidenr   zipr   rC   rR   round) nodesr   min_sizer   r   idx_to_noder   nodeg	edge_listr   
seen_edgesr   src_idxtgt_idxpairr   n_nodesr   	partitionpendingcluster_idsrG   rH   r   rv   	cohesionscommunitiesr   lang_countsdominant_langrB   s                                    rQ   _detect_leidenr      sc    
z	 "I(*KU# 4)*	$%%&A 	
KK2C	NC
3y>E2A')IG'*uJ >-- 2 23-- 2 237#67g;M)3w+@AD:%t$  &|//<=> !%cBBKK	ADDN
 hhjGT3C,<!==>J
KK1	
AHHJ
 ""'	 # I KKEI
 79G  .{h&+6Ka!{:J;q>KKw<("0781a&&8
8,-. (w(?!1(?GI(*K+.w	+B '*xH'HH<G//215a8R'0Lh*!.*3w<.?29:Q((:$	
 		  KK>K@PQ5 L 9 )@ ;s   *	M4MM M%M*c                t    g } D ]R  }|j                   j                  dd      j                  d      }|j                  |dd D cg c]  }|s|	 c}       T d|rFt	        d |D              }t        |      D ]&  |d      t        fd|D              rdz   & n d fd	}	t        fd
|D        d      }
 |	d      }t        d|
dz         D ]5  } |	|      }t        fd|j                         D              }|}|dk\  s5 n |}g }|j                         D ]A  \  }}t        |      k  r|D ch c]  }|j                   }}|j                  |||f       C t        |D cg c]  }|d   	 c}|      }g }t        ||      D ]  \  \  }}}}t        d |D              }|r|j!                  d      d   d   nd}t#        |      }|j                  |dt        |      t%        |d      |d| |D cg c]  }|j                   c}|d        |S c c}w c c}w c c}w c c}w )zGroup nodes by directory when Leiden is unavailable or over-fragments.

    Strips the longest common directory prefix from all file paths, then
    adaptively picks a grouping depth that yields 10-200 communities.
    rT   rU   NrX   r   c              3  2   K   | ]  }t        |        y wr   rD   )r   rv   s     rQ   r   z%_detect_file_based.<locals>.<genexpr>c  s     5!s1v5s   c              3  .   K   | ]  }|   k(    y wr    )r   rv   r   segs     rQ   r   z%_detect_file_based.<locals>.<genexpr>f  s     611Q43;6s   r   c                V   t        t              }D ]  }|j                  j                  dd      j	                  d      }|d d D cg c]  }|s|	 }}|	d  }|rdj                  |d |        }n|r|d   j                  dd      d   nd}||   j                  |        |S c c}w )NrT   rU   rX   rY   r   r   root)r   r   r?   rZ   r[   joinr]   r\   )
depthgroupsr   r^   rv   	dir_parts	remainderkeyr   
prefix_lens
           rQ   _group_at_depthz+_detect_file_based.<locals>._group_at_depthk  s    -8-> 	"AKK''c288=E$)#2J4q!4I4!*+.Ihhy%015:eBi&&sA.q13Kq!	"  5s   B&B&c              3  :   K   | ]  }t        |      z
    y wr   r   )r   rv   r   s     rQ   r   z%_detect_file_based.<locals>.<genexpr>y  s     @QSVj(@s   defaultc              3  @   K   | ]  }t        |      k\  sd   yw)r   Nr   )r   vr   s     rQ   r   z%_detect_file_based.<locals>.<genexpr>}  s     Jqs1v7IJs   r   rV   c              3  N   K   | ]  }|j                   s|j                     y wr   r   r   s     rQ   r   z%_detect_file_based.<locals>.<genexpr>  r   r   r=   r   zDirectory-based community: r   )r   intreturnzdict[str, list[GraphNode]])r?   rZ   r[   r\   r   r   r!   r   sumvaluesitemsrD   r   r   r   r   rC   rR   r   )r   r   r   r   all_dir_partsr   r^   rv   shortestr   	max_depthbest_groupsr   r   
qualifyingby_dirr   dir_pathrG   rH   r   r   r   r   r   r   rB   r   r   r   s   ` `                        @@@rQ   r   r   O  sj    &(M ;##D#.44S9s9Aqa9:;
 J5}55x 	A"1%C666U
	 @-@!LI!!$Kq)a-(  'JFMMOJJ
 F <>G#\\^ 8'w<("0781a&&8
8':67	8 (w(?!1(?GI(*K58)5L 1'7JH'HH<G//215a8R'0Lh*!.8
C29:Q((:$	
 		  I :\ 9 )@ ;s   
H&
H&
H+6H0H5c                $   t         s| S t        d | D              }|dk(  r| S t        t        ||z        |      }g }t        d | D        d      dz   }| D ]  }	t	        |	j                  dg             }
t        |
      |k  r|j                  |	       ?|D cg c]  }|j                  |
v r| }}|D cg c]   }|j                  |
v r|j                  |
v r|" }}t        |      |k  r|j                  |	       t        |      D ci c]  \  }}|j                  | }}}g }g }|D ]  }|j                  |j                        }|j                  |j                        }|<|?||k7  sE|j                  ||f       |j                  t        j                  |j                  d              |s|j                  |	       s	 t        j                  t        |      |d	      }||j                   d
<   |j#                  dd
d      }i }t        |j$                        D ]3  \  }}|j'                  |g       j                  ||   j                         5 t        |      dk  r|j                  |	       &|	j                  dd      }|	j                  dd      }|j)                         D ]X  }||d| z   |	j                  dd      dz   ||t        |      d|	j                  d      d| d	}|j                  |       |dz  }Z t*        j-                  d|t        |
      t        |              |S c c}w c c}w c c}}w # t.        $ r= t*        j1                  d|	j                  dd      d       |j                  |	       Y 9w xY w)zRecursively split communities that exceed threshold_pct of total.

    Uses Leiden on the subgraph of oversized communities. If igraph is
    not available, returns communities unchanged.
    c           
   3  r   K   | ]/  }|j                  d t        |j                  dg                    1 yw)r   rG   N)r$   rD   r   cs     rQ   r   z#_split_oversized.<locals>.<genexpr>  s3       	
fc!%%	2./0s   57r   c              3  @   K   | ]  }|j                  d d        yw)idr   N)r$   r   s     rQ   r   z#_split_oversized.<locals>.<genexpr>  s     -AtQ-s   r   r   rG   r   F)r   r   r   r   r   )r   r   r   r   rB   r=   z-subr   r{   r   zSplit from )	r   rB   r   	parent_idrG   r   r   r   r   z3Split oversized community '%s' (%d members) into %dz-Failed to split community '%s', keeping as-isT)exc_info)IGRAPH_AVAILABLEr   r   r   r(   r$   rD   r\   r   r}   r~   r|   r   rA   r   r   r   r   
membership
setdefaultr   r   r   	Exceptionwarning)r   r   r   threshold_pctmin_split_sizer   	thresholdresultnext_idcommrG   r   member_nodesr   member_edgesr   r   ig_edges
ig_weightssitir   r   sub_communitiesr   cidr   	comm_namesub_memberssub_comms                                 rQ   _split_oversizedr    s      E zC-.?IF--q	
G  e dhhy"-.w<9$MM$ 
7* 
 

 
""g-&&'1 
 
 |~-MM$
 ",/
1 a
	 
 +-"$
 	Aq112Bq112B~".R2XR)!! $$QVVS1	 MM$:	 l#A
 (ADDN**#/  + I 57O%i&:&:; S**33:: %44
 ?#q(d#q)I,I.557 !%$wi(88!XXgq1A5!**, #)-+* &i[1 h'1#& KK'GO$oe N MA


L  	 NN $	   MM$	 s-   L95%L>
M	B0M	;B9M		ANNc                   | j                         }| j                  d      }t        |      }t        j	                  dt        |      t        |             t        r%t        j	                  d       t        ||||      }n$t        j	                  d       t        ||||      }t        |||      }|D ]  }d|v st        |d         |d<   |d=  |S )a  Detect communities in the code graph.

    Uses the Leiden algorithm via igraph if available, otherwise falls back to
    file-based grouping.

    Args:
        store: The GraphStore instance.
        min_size: Minimum number of nodes for a community to be included.

    Returns:
        List of community dicts with keys: name, level, size, cohesion,
        dominant_language, description, members, member_qns.
    T)exclude_filesz Loaded %d unique nodes, %d edgesz4Detecting communities with Leiden algorithm (igraph)r   z:igraph not available, using file-based community detectionr   )get_all_edgesget_all_nodesr   r   r   rD   r   r   r   r  r   )storer   r   unique_nodesr   r   r	  s          rQ   detect_communitiesr  5  s    " ##%I&&T&:L 9
%C
KK*L3y>
 JK y(LPQ$\9hCP yG  #4!%d<&8!9D\"#
 Nre   c                    |sy| j                   }dj                  dt        |      z        }|j                  d| d|      j	                         }|r|d   dk(  ryt        | |      }t        | |      S )a  Re-detect communities only if changed files affect existing communities.

    If no existing communities contain nodes from changed files, skips
    re-detection entirely (the common case for small changes). Otherwise
    re-runs full community detection.

    Args:
        store: The GraphStore instance.
        changed_files: List of file paths that have changed.
        min_size: Minimum number of nodes for a community to be included.

    Returns:
        Number of communities detected, or 0 if skipped.
    r   ,?z`SELECT COUNT(DISTINCT community_id) FROM nodes WHERE community_id IS NOT NULL AND file_path IN ())r   )_connr   rD   executefetchoner  store_communities)r  changed_filesr   connplaceholdersaffectedr   s          rQ   incremental_detect_communitiesr)  g  s    & ;;D 88C#m"445L||<<H>	L hj	  x{a' %UX>KUK00re   c                h   | j                   }|j                  d       	 |j                  d       |j                  d       d}|D ]  }|j                  d|d   |j                  dd      |j                  dd	      |d
   |j                  dd      |j                  dd      f      }|j                  }|j                  dg       }|r7dj	                  dt        |      z        }|j                  d| d|g|z          |dz  } |j                          |S # t        $ r |j                           w xY w)am  Store detected communities in the database.

    Clears existing communities and community_id assignments, then inserts
    the new communities and updates node community_id references.

    Args:
        store: The GraphStore instance.
        communities: List of community dicts from detect_communities().

    Returns:
        Number of communities stored.
    zBEGIN IMMEDIATEzDELETE FROM communitiesz$UPDATE nodes SET community_id = NULLr   zINSERT INTO communities
                       (name, level, cohesion, size, dominant_language, description)
                   VALUES (?, ?, ?, ?, ?, ?)rB   r   r   r{   r   r   r=   r   rG   r  r  z;UPDATE nodes SET community_id = ? WHERE qualified_name IN (r   r   )	r!  r"  r$   	lastrowidr   rD   commitBaseExceptionrollback)	r  r   r&  countr	  cursorcommunity_idr   r'  s	            rQ   r$  r$    sH   $ ;;D 	LL"#"./;< 	D\\0 LHHWa(HHZ-LHH0"5HH]B/	F "++L )R0J"xxc*o(=>QR^Q__`a!NZ/ QJE1	4 	 L  s   C5D D1c                   h d}||vrd}|dv rdnd}| j                   j                  d| d| |f      j                         }g }|D ]u  }| j                  |d         D cg c]  }t	        |       }	}|j                  |d   t	        |d	         |d
   |d   |d   |d   xs dt	        |d   xs d      |	d       w |S c c}w )a  Retrieve stored communities from the database.

    Args:
        store: The GraphStore instance.
        sort_by: Column to sort by ("size", "cohesion", "name").
        min_size: Minimum community size to include.

    Returns:
        List of community dicts.
    >   rB   r   r   r   )r   r   DESCASCz3SELECT * FROM communities WHERE size >= ? ORDER BY  r   rB   r   r   r   r=   r   )r   rB   r   r   r   r   r   rG   )r!  r"  fetchallget_community_member_qnsr   r\   )
r  sort_byr   valid_sortsorderrowsr   rowr   r   s
             rQ   get_communitiesr=    s    /Kk!!55F5E
 ;;
=gYawO	 hj 	
 )+K  44SY?
 2

 

 	d)"3v;/\JK!$%8!9!?R)#m*<*BC!	
 		$ !
s   !CzC(^test[-/]|[-/]test([:/]|$)|it:should|describe:|spec[-/]|[-/]spec$)c                >    t        t        j                  |             S )z?Return True if a community name indicates it is test-dominated.)bool_TEST_COMMUNITY_REsearch)rB   s    rQ   _is_test_communityrB    s    "))$/00re   c           
        t        |       }i }|D ]0  }|j                  dd      }|j                  dg       D ]  }|||<   	 2 | j                         }g }t               }|D ]  }	|	j                  dk(  r|j                  |	j
                        }
|j                  |	j                        }|
L|O|
|k7  sUt        |
|      t        |
|      f}||xx   dz  cc<   |j                  |
||	j                  t        |	j
                        t        |	j                        d        g }|D ci c]  }|j                  dd      |d    }}|j                         D ]j  \  \  }}}|dkD  s|j                  |d	|       }|j                  |d	|       }t        |      st        |      rP|j                  d
| d| d| d       l |||dS c c}w )a@  Generate an architecture overview based on community structure.

    Builds a node-to-community mapping, counts cross-community edges,
    and generates warnings for high coupling.

    Args:
        store: The GraphStore instance.

    Returns:
        Dict with keys: communities, cross_community_edges, warnings.
    r   r   rG   r   r   )source_communitytarget_community	edge_kindsourcetargetrB   r   z
community-zHigh coupling (z edges) between 'z' and '')r   cross_community_edgeswarnings)r=  r$   r  r   rA   r}   r~   r   r   r\   r   rC   rB  )r  r   node_to_communityr	  comm_idr   r   cross_edgescross_countsr   src_commtgt_commr   rK  r   comm_name_mapc1c2r/  name1name2s                        rQ   get_architecture_overviewrW    s    "%(K )+ ,((4#((9b) 	,B$+b!	,, ##%I(*K-4YL  66[ $((););<$((););< $H$(+S8-DED!#$,$,VV(););<(););<  . H8CD1QUU4^QvY.DMD'335 R%2:!%%bJrd*;<E!%%bJrd*;<E "%(,>u,EOO!% )7'%+ #!,  Es   *G)rG   list[GraphNode]r   str)rI   	list[str]r   rY  )rG   rX  r   rZ  )rB   rY  r   rZ  )ru   rY  r   rY  )r   zlist[set[str]]r   list[GraphEdge]r   zlist[float])r   r[  r   zdict[str, list[str]]r   )r   zset[str]r   r[  r   dict[str, list[str]] | Noner   float)
r   rX  r   r[  r   r   r   r\  r   list[dict[str, Any]])g      ?r   )r   
list[dict]r   rX  r   r[  r  r]  r  r   r   r_  )rV   )r  r
   r   r   r   r^  )r  r
   r%  rZ  r   r   r   r   )r  r
   r   r^  r   r   )r   r   )r  r
   r8  rY  r   r   r   r^  )rB   rY  r   r?  )r  r
   r   zdict[str, Any]).__doc__
__future__r   loggingrs   collectionsr   r   typingr   graphr   r	   r
   r   	getLogger__name__r   igraphr   r   ImportErrorr   __annotations__	frozensetrm   rR   r@   rF   rk   rE   r   r   r   r   r   r  r  r)  r$  r=  compile
IGNORECASEr@  rB  rW  r   re   rQ   <module>rn     s   #  	 ,  C C			8	$ "    $N*6 7A,(,, ,^ (,
?
?
? 
%
? 	
?, (,	eee e 
%	e
 eb (,	SSS S 
%	S
 S~  CCC C 	C
 C CX ()//!$//j %1%1%1 %1 		%1P::$8::| ?@.. #.8;..b  RZZJMM 1
D]  	Bs   D 	DD