
    i/                        d Z ddlZddlZddlZddlZddlZddlZddlZddlZddl	m
Z
  ej                  e      Zh dZg dZdededefd	Zdededefd
ZdedededefdZdedefdZdedefdZd/dedededededefdZedk(  r9 ej4                  ej6                  d        ej8                  d      Zej=                  dd !       ej=                  d"d#       ej=                  d$ed%       ej=                  d&ed%       ej?                         Z  ee jB                        Z" ee jB                  e"e jF                  e jH                  e jJ                  '      Z& e' ejP                  e&d()             e&d*   d+k(  r ejR                  d       ye&d*   d,k(  r ejR                  d-       y ejR                  d.       yy)0uG  
impact_scanner.py — Reverse impact scanner.

Given a set of modified files, extracts symbols (functions/classes) from them,
then greps the entire project to find OTHER files that reference those symbols
but were NOT modified. Detects cases where a bot modifies file A but misses
file B that also references the same symbol.
    N)Path>   idkeydataitemlistnamepathtypeerroreventindexitemspropsstatevalueconfigresultoptionsrequestresponse)node_modulesz.git__pycache__distbuildz
.worktreesz.next	file_path
diff_linesreturnc                 f   	 t        |       j                  dd      }t        j                  ||       }t        |      }g }t        j                  |      D ]  }t        |t        j                  t        j                  t        j                  f      s=|rIt        t        |j                  t!        |d|j                        dz               }|j#                  |      s|j%                  |j&                          |S # t        t
        f$ r#}t        j                  d| |       g cY d}~S d}~ww xY w)	a  Extract function/class symbol names from a Python file using the ast module.

    If diff_lines is provided, only returns symbols whose definition lines
    overlap with the changed lines. If diff_lines is empty, returns all
    top-level function and class names.

    Args:
        file_path: Absolute or relative path to the Python source file.
        diff_lines: List of changed line numbers (1-based). Empty means all.

    Returns:
        List of symbol name strings. Empty list on parse error.
    utf-8replaceencodingerrors)filenamezFailed to parse %s: %sN
end_lineno   )r   	read_textastparseOSErrorSyntaxErrorloggerdebugsetwalk
isinstanceFunctionDefAsyncFunctionDefClassDefrangelinenogetattrintersectionappendr	   )	r   r   sourcetreeexcdiff_setsymbolsnode
node_liness	            -/home/jay/workspace/scripts/impact_scanner.pyextract_symbols_pythonrB       s    i**GI*Nyy)4
 :HG "$#2F2F UVU4;;lDKK0X[\0\]^J**84tyy!" N! [! -y#>	s   3C> >D0D+%D0+D0c                    t        j                  d      }	 t        |       j                  dd      j	                         }t        |      }g }t        |d      D ]@  \  }}|r||vr|j                  |      }	|	s!|j                  |	j                  d	             B |S # t
        $ r#}t        j                  d| |       g cY d}~S d}~ww xY w)
a!  Extract exported symbol names from a TypeScript/JavaScript file via regex.

    Matches patterns like:
        export function Foo, export class Bar, export const baz, etc.

    If diff_lines is provided, only returns symbols from matching lines.
    If diff_lines is empty, returns all exported symbols.

    Args:
        file_path: Absolute or relative path to the TS/JS source file.
        diff_lines: List of changed line numbers (1-based). Empty means all.

    Returns:
        List of symbol name strings. Empty list on read error.
    z;export\s+(function|class|type|interface|const|enum)\s+(\w+)r    r!   r"   zFailed to read %s: %sNr'   )start   )recompiler   r(   
splitlinesr+   r-   r.   r/   	enumeratesearchr9   group)
r   r   patternlinesr<   r=   r>   r6   linematchs
             rA   extract_symbols_typescriptrP   D   s      jjWXGY))79)MXXZ
 :HG!%q1 +h.t$NN5;;q>*+ N  ,i=	s   *B   	C)CCCsymbolproject_rootexclude_filesc                 f   g d}t         D ]	  }|d|gz  } |d| z   dz   |gz  }	 t        j                  |ddd      }t               }|D ]O  }t        |      }	|j                  t        |	j                                      |j                  t        |	             Q g }
|j                  j                         D ]  }|j                  d
d      }t        |      dk  r$|d   |d   |d   }}}	 t!        |      }t        t        |      j                               }||v s||v rj|
j%                  |||j'                         d        |
S # t        j                  $ r t        j                  d|        g cY S t        $ r#}t        j                  d| |       g cY d	}~S d	}~ww xY w# t"        $ r Y w xY w)a  Search the project for references to a symbol in Python/TS/JS files.

    Skips node_modules, .git, __pycache__, dist, build, .worktrees, .next
    directories and also skips the files listed in exclude_files.

    Args:
        symbol: The symbol name to search for.
        project_root: Root directory of the project to search.
        exclude_files: List of file paths (relative or absolute) to skip.

    Returns:
        List of dicts with keys "file", "line", "content".
        Returns empty list on timeout or error.
    )grepz-rnz--include=*.pyz--include=*.tsz--include=*.tsxz--include=*.jsz--include=*.jsxz--exclude-dirz\bT   )capture_outputtexttimeoutzgrep timed out for symbol '%s'zgrep failed for symbol '%s': %sN:rE   r   r'   )filerN   content)EXCLUDE_DIRS
subprocessrunTimeoutExpiredr-   warningr+   r/   r   addstrresolvestdoutrH   splitlenint
ValueErrorr9   strip)rQ   rR   rS   cmddprocr<   abs_excludesefpresultsraw_linepartsr   
lineno_strr\   r6   abs_files                     rA   grep_referencesrv   h   s   C  $##$ EFNU"L11C~~	
 5L !HQYY[)*Q !
 GKK**, XsA&u:>).q58U1Xw:		_F tI..01|#yL'@	6gmmoVWX" NA $$ 7@	 8&#F	(  		s5   E	 ,F$	+F!6F!>FF!F!$	F0/F0diff_outputc                 Z   t        j                  d      }t               }|j                  |       D ]n  }t	        |j                  d            }|j                  d      t	        |j                  d            nd}t        |||z         D ]  }|j                  |        p t        |      S )aB  Parse `git diff` output and return added/changed line numbers in the new file.

    Reads @@ hunk headers of the form @@ -a,b +c,d @@ and collects the range
    [c, c+d) as changed lines.

    Args:
        diff_output: Raw text output of a git diff command.

    Returns:
        Sorted list of 1-based line numbers.
    z&@@ -\d+(?:,\d+)? \+(\d+)(?:,(\d+))? @@r'   rE   )	rF   rG   r/   finditerrh   rK   r5   rb   sorted)rw   hunk_rerM   rO   rD   countis          rA   _parse_diff_linesr~      s     jjBCGEE!!+. EKKN#',{{1~'AEKKN#queem, 	AIIaL	
 %=    c                 T   g dg dfD ]x  }	 t        j                  || ddd      }|j                  dk(  rL|j                  j	                         D cg c]#  }|j                         s|j                         % }}|r|c S z g S c c}w # t         j                  t        f$ r Y w xY w)a]  Return a list of file paths modified relative to HEAD in the given repo.

    Tries `git diff --name-only HEAD` first; falls back to
    `git diff --name-only --cached` if that fails.

    Args:
        project_root: Root directory of the git repository.

    Returns:
        List of file path strings relative to project_root. Empty on error.
    )gitdiff--name-onlyHEAD)r   r   r   z--cachedT
   cwdrW   rX   rY   r   )r^   r_   
returncodere   rH   rj   r`   r+   )rR   rk   rm   ffiless        rA   get_modified_files_from_gitr      s     	/2 	>> #D !#,0KK,B,B,DRq	RR L$ I S ))73 		s*   ABB(B:BBB'&B'       modified_filestask_idmax_symbolsrY   c                    t        j                         |z   }g }g }g }|D ]S  }	t        |	      }
|
j                         st        |       |
z  }
|j	                  t        |
j                                      U |D ]  }t        j                         |k\  rt        j                  d       |d||dc S t        |      j                         st        t        |       |z        n|}t        |      j                  j                         }g }	 t        j                  dddd|g| ddd	
      }|j                  dk(  rt        |j                        }|dk(  rt%        ||      }n*|dv rt'        ||      }nt        j)                  d||       |D cg c]  }|t*        vs| }}|d| }|j-                  |       |D ]W  }t        j                         |k\  r t        j                  d       |d||dc c S t/        || |      }|j-                  |       Y  t1        |      }|dk(  rd}n
|d	k  rd}nd}||||dS # t        j                   t"        f$ r Y 	w xY wc c}w )u  Orchestrate symbol extraction and reference scanning for modified files.

    For each modified file:
      1. Determines the language from the extension.
      2. Obtains changed line numbers via `git diff HEAD -- <file>`.
      3. Extracts symbols, filters common names, limits to max_symbols.
      4. Greps the project for references in unmodified files.

    Gate results:
      - "PASS"  — 0 unmodified references found
      - "WARN"  — 1-5 unmodified references found
      - "BLOCK" — 6+ unmodified references found

    Args:
        project_root: Root directory of the project.
        modified_files: List of modified file paths (relative to project_root).
        task_id: Optional task identifier included in the output dict.
        max_symbols: Maximum number of symbols to check per file.
        timeout: Overall time budget in seconds.

    Returns:
        Dict with keys: task_id, gate_result, unmodified_references, symbols_checked.
    z(Overall timeout reached; stopping early.WARN)r   gate_resultunmodified_referencessymbols_checkedr   r   r   z--Tr   r   r   z.py)z.tsz.tsxz.jsz.jsxz,Unsupported extension '%s' for %s; skipping.Nz*Overall timeout reached during grep phase.PASSBLOCK)time	monotonicr   is_absoluter9   rc   rd   r-   ra   suffixlowerr^   r_   r   r~   re   r`   r+   rB   rP   r.   COMMON_FILTERextendrv   rg   )rR   r   r   r   rY   deadliner   r   abs_modifiedmfrp   rel_filer   r   r   rm   raw_symbolssfilteredselectedrQ   refs	ref_countgates                           rA   scanr      s   0 ~~')H"$OL .H}}\"Q&AC		,-	. # 4/>>x'NNEF"%)>#2	  ?C8n>X>X>ZC\*X56`h	h&&,,. 
	>>h7 #D !#.t{{;

 U?0JGK554Y
KKLLGQYZ  +E!a}.DAEEL[)x( 
	/F~~8+KL&#)-B'6	  #6<FD!((.
	/U4/l )*IA~	a !6*	 K ))73 		 Fs   	AH9II9II__main__z#%(levelname)s %(name)s: %(message)s)levelformatz)Impact Scanner - reverse dependency check)descriptionz--project-rootT)requiredz	--task-id)defaultz--max-symbols)r   r   z	--timeout)r   rY   F)ensure_asciir   r   r   r'   rE   )r   r   r   )*__doc__argparser)   jsonloggingrF   r^   sysr   pathlibr   	getLogger__name__r-   r   r]   rc   r   rB   rP   rv   r~   r   rh   dictr   basicConfigWARNINGArgumentParserparseradd_argument
parse_argsargsrR   r   r   r   rY   r   printdumpsexit r   rA   <module>r      s3    
   	  
  			8	$ _!c !t ! !H!# !4 !D !HCC Cs C4 CD CL3 4 ,c d Bfs fD f3 fRU fdg fqu fR zGgoo6[\$X$$1\]F
(48
R0
c1=
#r:D01B1BCN$$F 
*$**V%
01m&		&	(5 r   