
    Ki$^                    	   U d Z ddlmZ ddlZddlZddlZddlmZ ddlm	Z	 ddl
mZ ddlmZmZmZ  ej"                  e      Z ej(                  d	ej*                         ej(                  d
ej*                         ej(                  dej*                         ej(                  dej*                         ej(                  dej*                         ej(                  dej*                         ej(                  dej*                         ej(                  dej*                         ej(                  dej*                         ej(                  dej*                         ej(                  dej*                         ej(                  d       ej(                  dej*                         ej(                  dej*                         ej(                  dej*                         ej(                  dej*                         ej(                  dej*                         ej(                  dej*                         ej(                  d       ej(                  dej*                         ej(                  dej*                         ej(                  dej*                         ej(                  d       ej(                  d ej*                         ej(                  d!ej*                        gZd"ed#<    ej(                  d$       ej(                  d%       ej(                  d&       ej(                  d'       ej(                  d(       ej(                  d)       ej(                  d*       ej(                  d+       ej(                  d,       ej(                  d-       ej(                  d.       ej(                  d/       ej(                  d0       ej(                  d1       ej(                  d2       ej(                  d3       ej(                  d4       ej(                  d5       ej(                  d6       ej(                  d7       ej(                  d8       ej(                  d9      gZd"ed:<   dHd;ZdHd<Z ej(                  d=      ZdId>Z	 dJ	 	 	 	 	 dKd?Z	 dL	 	 	 	 	 	 	 dMd@Z	 	 dN	 	 	 	 	 	 	 dOdAZdPdBZ dQdCZ!	 dL	 	 	 	 	 	 	 dRdDZ"	 	 dS	 	 	 	 	 	 	 dTdEZ#dUdFZ$	 	 	 	 	 	 dVdGZ%y)Wam  Execution flow detection, tracing, and criticality scoring.

Detects entry points in the codebase (functions with no incoming CALLS edges,
framework-decorated handlers, and conventional name patterns), traces execution
paths via forward BFS through CALLS edges, scores each flow for criticality,
and persists results to the ``flows`` / ``flow_memberships`` tables.
    )annotationsN)deque)Optional   )SECURITY_KEYWORDS)	GraphNode
GraphStore_sanitize_namez9app\.(get|post|put|delete|patch|route|websocket|on_event)z)router\.(get|post|put|delete|patch|route)z/blueprint\.(route|before_request|after_request)z!(before|after)_(request|response)zclick\.(command|group)z\w+\.(command|group)\bz$(field|model)_(serializer|validator)z+(celery\.)?(task|shared_task|periodic_task)receiverapi_viewz
\baction\bzpytest\.(fixture|mark)z#(override_settings|modify_settings)z(event\.)?listens_forz1(Get|Post|Put|Delete|Patch|RequestMapping)Mappingz,(Scheduled|EventListener|Bean|Configuration)z3(Component|Injectable|Controller|Module|Guard|Pipe)z#(Subscribe|Mutation|Query|Resolver)z3(app|router)\.(get|post|put|delete|patch|use|all)\bz'@(Override|OnLifecycleEvent|Composable)z((HiltViewModel|AndroidEntryPoint|Inject)z7\w+\.(tool|tool_plain|system_prompt|result_validator)\bz^tool\bz2\w+\.(middleware|exception_handler|on_exception)\bz\w+\.route\bzlist[re.Pattern[str]]_FRAMEWORK_DECORATOR_PATTERNSz^main$z
^__main__$z^test_z
^Test[A-Z]z^on_z^handle_z	^handler$z^handle$z^lambda_handler$z	^upgrade$z^downgrade$z
^lifespan$z^get_db$z8^on(Create|Start|Resume|Pause|Stop|Destroy|Bind|Receive)z^do(Get|Post|Put|Delete)$z-^do_(GET|POST|PUT|DELETE|PATCH|HEAD|OPTIONS)$z^log_message$z^(middleware|errorHandler)$zl^ng(OnInit|OnChanges|OnDestroy|DoCheck|AfterContentInit|AfterContentChecked|AfterViewInit|AfterViewChecked)$zL^(transform|writeValue|registerOnChange|registerOnTouched|setDisabledState)$zG^(canActivate|canDeactivate|canActivateChild|canLoad|canMatch|resolve)$zZ^(componentDidMount|componentDidUpdate|componentWillUnmount|shouldComponentUpdate|render)$_ENTRY_NAME_PATTERNSc                    | j                   j                  d      }|syt        |t              r|g}|D ]!  }t        D ]  }|j                  |      s  y # y)zCReturn True if *node* has a decorator matching a framework pattern.
decoratorsFT)extraget
isinstancestrr   search)noder   decpats       c/home/jay/workspace/scripts/.codegraph-venv/lib/python3.12/site-packages/code_review_graph/flows.py_has_framework_decoratorr   v   s\    -J*c" \
 0 	Czz#	     c                T    t         D ]  }|j                  | j                        s y y)zHReturn True if *node*'s name matches a conventional entry-point pattern.TF)r   r   name)r   r   s     r   _matches_entry_namer      s)    # ::dii  r   zN([\\/]__tests__[\\/]|\.spec\.[jt]sx?$|\.test\.[jt]sx?$|[\\/]test_[^/\\]*\.py$)c                >    t        t        j                  |             S )z2Return True if *file_path* looks like a test file.)bool_TEST_FILE_REr   )	file_paths    r   _is_test_filer#      s    $$Y/00r   c                   | j                         }| j                  ddg      }g }t               }|D ]  }|s"|j                  st	        |j
                        r'd}|j                  |vrd}t        |      rd}t        |      rd}|sV|j                  |vse|j                  |       |j                  |j                          |S )a  Find functions that are entry points in the graph.

    An entry point is a Function/Test node that either:
    1. Has no incoming CALLS edges (true root), or
    2. Has a framework decorator (e.g. ``@app.get``), or
    3. Matches a conventional name pattern (``main``, ``test_*``, etc.).

    When *include_tests* is False (the default), Test nodes are excluded so
    that flow analysis focuses on production entry points.
    FunctionTestFT)get_all_call_targetsget_nodes_by_kindsetis_testr#   r"   qualified_namer   r   appendadd)storeinclude_testscalled_qnamescandidate_nodesentry_pointsseen_qnr   is_entrys           r   detect_entry_pointsr5      s     ..0M --z6.BCO$&LG -$,,-2O m3H $D)H t$H++7:%KK++,)-, r   c           
        g }g }t               }t               }|j                  |j                  df       |j	                  |j                         |j                  |j
                         |j                  |j                         d}|r|j                         \  }}	|	|kD  r|	}|	|k\  r"| j                  |      }
|
D ]  }|j                  dk7  r|j                  }||v r$| j                  |      }|8|j	                  |       |j                  |j
                         |j                  |       |j                  ||	dz   f        |rt        |      dk  ryt        |D ch c]!  }| j                  |      x}|j                  # c}      }t        |j                        |j                  |j
                  ||t        |      t        |      |dd	}t!        ||       |d<   |S c c}w )	zTrace a single execution flow from *ep* via forward BFS.

    Returns a flow dict (see :func:`trace_flows` for the schema) or ``None``
    if the flow is trivial (single-node, no outgoing CALLS that resolve).
    r   CALLSNr              )	r   entry_pointentry_point_idpathdepth
node_count
file_countfilescriticalityrA   )r)   r   r,   r+   r-   idpopleftget_edges_by_sourcekindtarget_qualifiedget_nodelenlistr"   r
   r   compute_criticality)r.   ep	max_depthpath_idspath_qnamesvisitedqueueactual_depth
current_qnr=   edgesedge	target_qntarget_nodeqnnr@   flows                     r   _trace_single_flowrZ      s    HKG$)GE 
LL"##Q'(KK!!"OOBEEr(()L
!MMO
E< LI ))*5 	1DyyG#--IG#..3K"KK	"OOKNN+y)LL)UQY/0	1 2 8}q##A0 	
 E rww'((%%(m%j
D .dE:DK%s   &&G)c                    t        | |      }g }|D ]#  }t        | ||      }||j                  |       % |j                  d d       |S )aR  Trace execution flows from every entry point via forward BFS.

    Returns a list of flow dicts, each containing:
      - name: human-readable flow name (entry point name)
      - entry_point: qualified name of the entry point
      - entry_point_id: node database id of the entry point
      - path: ordered list of node IDs in the flow
      - depth: maximum BFS depth reached
      - node_count: number of distinct nodes in the path
      - file_count: number of distinct files touched
      - files: list of distinct file paths
      - criticality: computed criticality score (0.0-1.0)
    )r/   c                    | d   S )NrA    fs    r   <lambda>ztrace_flows.<locals>.<lambda>.  s    Q}- r   Tkeyreverse)r5   rZ   r,   sort)r.   rL   r/   r2   flowsrK   rY   s          r   trace_flowsrf     s\    $ 'uMJLE !%Y7LL 
JJ-tJ<Lr   c                P   | j                  dg       }|syg }|D ]'  }|j                  |      }|s|j                  |       ) |syt        |D ch c]  }|j                   c}      }|dkD  rt        |dz
  dz  d      nd}d}|D ]U  }|j                  |j                        }	|	D ]3  }
|
j                  dk(  s|j                  |
j                        /|dz  }5 W t        |dz  d      }d}|D ]P  }|j                  j                         }|j                  j                         }t        D ]  }||v s||v s|dz  } P R t        |t        t        |      d      z  d      }d}|D ]:  }|j                  |j                        }|D ]  }|j                  d	k(  s|dz  } : < |t        t        |      d      z  }d|z
  }| j                  d
d      }t        |dz  d      }|dz  |dz  z   |dz  z   |dz  z   |dz  z   }t!        t        t        |d      d      d      S c c}w )zScore a flow from 0.0 to 1.0 based on multiple weighted factors.

    Weights:
      - File spread:         0.30
      - External calls:      0.20
      - Security sensitivity: 0.25
      - Test coverage gap:   0.15
      - Depth:               0.10
    r<   r9   r   g      @g      ?r   r7   g      @	TESTED_BYr=   g      $@g333333?g?g      ?g333333?g?   )r   get_node_by_idr,   rH   r"   minrD   r+   rE   rG   rF   r   lower_SECURITY_KEYWORDSmaxget_edges_by_targetround)rY   r.   node_idsnodesnidrX   r?   file_spreadexternal_countrS   eexternal_scoresecurity_hits
name_lowerqn_lowerkwsecurity_scoretested_counttested_edgestecoveragetest_gapr=   depth_scorerA   s                            r   rJ   rJ   7  s    ((62.H  E   %LLO
  51aakk12J6@1n#zA~,c2#K N $))!*:*:; 	$Avv U^^A4F4F%G%O!#	$$ #-s3N M VV\\^
##))+$ 	BZ2>"	 SZ);;SAN L 001A1AB 	Bww+%!	 c#e*a00HX~H HHWa EedlC(K 	d
4
	 
4
	  T/	 
		  Sc*C0!44e 2s   H#c                V   | j                   }|j                  d       	 |j                  d       |j                  d       d}|D ]  }t        j                  |j	                  dg             }|j                  d|d   |d   |d	   |d
   |d   |d   |f       |j                  d      j                         d   }|j	                  dg       }t        |      D ]  \  }}	|j                  d||	|f        |dz  } |j                          |S # t        $ r |j                           w xY w)zXClear existing flows and persist new ones.

    Returns the number of flows stored.
    zBEGIN IMMEDIATEzDELETE FROM flow_membershipszDELETE FROM flowsr   r<   zINSERT INTO flows
                   (name, entry_point_id, depth, node_count, file_count,
                    criticality, path_json)
                   VALUES (?, ?, ?, ?, ?, ?, ?)r   r;   r=   r>   r?   rA   SELECT last_insert_rowid()TINSERT OR IGNORE INTO flow_memberships (flow_id, node_id, position) VALUES (?, ?, ?)r   )
_connexecutejsondumpsr   fetchone	enumeratecommitBaseExceptionrollback)
r.   re   conncountrY   	path_jsonflow_idrq   positionnode_ids
             r   store_flowsr     sQ    ;;D 	LL"#%34() 	D

488FB#78ILL3
 L)*M&&' ll#?@IIKANG xx+H%.x%8 !''gx0 QJE7	: 	 L  s   C,D D(c                   |sy| j                   }t        |      }dj                  dt        |      z        }|j	                  d| d|      j                         }|D cg c]  }|d   	 }}t               }	|rSdj                  dt        |      z        }
|j	                  d|
 d|      j                         }|D ch c]  }|d   	 }	}|D ](  }|j	                  d|f       |j	                  d|f       * |j                          t        |       }|D cg c]   }|j                  |v s|j                  |	v r|" }}g }|D ]#  }t        | ||      }||j                  |       % d}|D ]  }t        j                  |j                  d	g             }|j	                  d
|d   |d   |d   |d   |d   |d   |f       |j	                  d      j                         d   }|j                  d	g       }t!        |      D ]  \  }}|j	                  d|||f        |dz  } |j                          |S c c}w c c}w c c}w )ar  Re-trace only flows that touch *changed_files*.  Much faster than full trace.

    1. Find flow IDs whose memberships reference nodes in *changed_files*.
    2. Collect the entry-point node IDs of those flows before deleting them.
    3. Delete only the affected flows and their memberships.
    4. Re-detect entry points, keeping those in *changed_files* **or** whose
       node ID was an entry point of a deleted flow.
    5. BFS-trace each relevant entry point via :func:`_trace_single_flow`.
    6. INSERT the new flows (without clearing unrelated flows).

    Returns the number of re-traced flows that were stored.
    r   ,?zlSELECT DISTINCT fm.flow_id FROM flow_memberships fm JOIN nodes n ON n.id = fm.node_id WHERE n.file_path IN ()z.SELECT entry_point_id FROM flows WHERE id IN (z.DELETE FROM flow_memberships WHERE flow_id = ?zDELETE FROM flows WHERE id = ?r<   zINSERT INTO flows
               (name, entry_point_id, depth, node_count, file_count,
                criticality, path_json)
               VALUES (?, ?, ?, ?, ?, ?, ?)r   r;   r=   r>   r?   rA   r   r   r   )r   r)   joinrH   r   fetchallr   r5   r"   rB   rZ   r,   r   r   r   r   r   )r.   changed_filesrL   r   changed_file_setplaceholdersaffected_rowsraffected_idsentry_point_idsep_placeholdersep_rowsfidr2   rK   relevant_eps	new_flowsrY   r   r   r   rq   r   r   s                           r   incremental_trace_flowsr     s   " ;;D=)
 88C#m"445LLL!!-a	1 		
 hj  #00QAaD0L0
 !$O((3\):#:;,,+,A/
 (*	 	
 *11A1Q411
  ?EvN5v>? 	KKM
 'u-L!<<++ruu/G 	L  I #!%Y7T"# E JJtxx34	/
 V%&W\"\"]#	
 ,,;<EEGJ88FB'!*8!4 	HgLL#'8,	 	
58 	KKMLU 1 2s    H6?H;%I c                L   h d}||vrd}|dv rdnd}| j                   j                  d| d| d|f      j                         }g }|D ]X  }|j                  |d	   t	        |d
         |d   |d   |d   |d   |d   t        j                  |d         |d   |d   d
       Z |S )zRetrieve stored flows from the database.

    Args:
        store: The graph store.
        sort_by: Column to sort by (``criticality``, ``depth``, ``node_count``).
        limit: Maximum number of flows to return.
    >   r   r=   r?   r>   rA   rA   )rA   r=   r>   r?   DESCASCzSELECT * FROM flows ORDER BY  z LIMIT ?rB   r   r;   r=   r>   r?   r   
created_at
updated_at)
rB   r   r;   r=   r>   r?   rA   r<   r   r   )r   r   r   r,   r
   r   loads)r.   sort_bylimitallowed_sortorderrowsresultsrows           r   	get_flowsr   3  s     PLl"!UUF[`E ;;
'y%A	 hj 	
 G d)"3v;/!"23\l+l+}-JJs;/0l+l+
 	 Nr   c                   | j                   j                  d|f      j                         }|yt        j                  |d         }g }|D ]  }| j                  |      }|s|j                  |j                  t        |j                        |j                  |j                  |j                  |j                  t        |j                        d        |d   t        |d         |d   |d   |d	   |d
   |d   |||d   |d   dS )zRetrieve a single flow with full path details.

    Returns a dict with the flow metadata plus a ``steps`` list containing
    each node's name, kind, file, and line info.
    z SELECT * FROM flows WHERE id = ?Nr   )r   r   rE   file
line_startline_endr+   rB   r   r;   r=   r>   r?   rA   r   r   )rB   r   r;   r=   r>   r?   rA   r<   stepsr   r   )r   r   r   r   r   rj   r,   rB   r
   r   rE   r"   r   r   r+   )r.   r   r   rM   r   rs   r   s          r   get_flow_by_idr   ]  s    ++

*WJhj  {**S%56H E ##C(LL77&tyy1		"oo MM"01D1D"E  $is6{+./W,','=),',' r   c                   |sg ddS | j                  |      }|sg ddS | j                  |      }|sg ddS g }|D ]"  }t        | |      }|s|j                  |       $ |j	                  d d       |t        |      dS )zFind flows that include nodes from the given changed files.

    Returns::

        {
            "affected_flows": [<flow dicts>],
            "total": <int>,
        }
    r   )affected_flowstotalc                &    | j                  dd      S )NrA   r   )r   r^   s    r   r`   z$get_affected_flows.<locals>.<lambda>  s    mQ 7 r   Tra   )get_node_ids_by_filesget_flow_ids_by_node_idsr   r,   rd   rH   )r.   r   rq   flow_idsaffectedr   rY   s          r   get_affected_flowsr     s     "$q11 **=9H"$q11 --h7H"$q11H "eS)OOD!" MM7MF #X r   )r   r   returnr    )r"   r   r   r    )F)r.   r	   r/   r    r   zlist[GraphNode])   )r.   r	   rK   r   rL   intr   Optional[dict])r   F)r.   r	   rL   r   r/   r    r   
list[dict])rY   dictr.   r	   r   float)r.   r	   re   r   r   r   )r.   r	   r   	list[str]rL   r   r   r   )rA   2   )r.   r	   r   r   r   r   r   r   )r.   r	   r   r   r   r   )r.   r	   r   r   r   r   )&__doc__
__future__r   r   loggingrecollectionsr   typingr   	constantsr   rm   graphr   r	   r
   	getLogger__name__loggercompile
IGNORECASEr   __annotations__r   r   r   r!   r#   r5   rZ   rf   rJ   r   r   r   r   r   r]   r   r   <module>r      s   #   	   > 8 8			8	$ BJJKR]][BJJ;R]]KBJJA2==QBJJ3R]]CBJJ("--8BJJ("--8BJJ6FBJJ=r}}MBJJ{BMM*BJJ{BMM*BJJ}bmm,BJJ()BJJ5r}}EBJJ'7BJJCR]]SBJJ>NBJJEr}}UBJJ5r}}EBJJEFBJJ92==IBJJ:BMMJBJJI2==YBJJzBJJDbmmTBJJ.Q)8 4 )Z BJJyBJJ}BJJyBJJ}BJJwBJJ{BJJ|BJJ{BJJ"#BJJ|BJJ~BJJ}BJJ{BJJJKBJJ+,BJJ?@BJJ BJJ-.BJJ	R
 BJJ^_BJJYZBJJ	+G'/ + '^ 

U
1  --- -p EEE E 	ET   	HK5f3r kkk k 		kj !''' ' 	'T*Z((( 
(r   