
    Ki+                        d Z ddlmZ ddlZddlmZ ddlmZ ddlm	Z	 ddl
mZ  ej                  e      Zg d	Zdd
ZddZ	 d	 	 	 	 	 	 	 ddZy)zSToken reduction benchmark -- measures graph query efficiency vs naive file reading.    )annotationsN)Path)Any   )
GraphStore)hybrid_search)zhow does authentication workzwhat is the main entry pointz$how are database connections managedz%what error handling patterns are usedz&how do tests verify core functionalityc                2    t        dt        |       dz        S )z)Rough token estimate: ~4 chars per token.r      )maxlen)texts    m/home/jay/workspace/scripts/.codegraph-venv/lib/python3.12/site-packages/code_review_graph/token_benchmark.pyestimate_tokensr      s    q#d)q.!!    c                    d}d}|D ]:  }| j                  d|       D ]!  }	 |t        |j                  d            z  }# < |S # t        $ r Y 3w xY w)z+Count tokens in all parseable source files.r   )z.pyz.jsz.tsz.goz.rsz.javaz.cz.cppz.rbz.phpz.swiftz.kt*replace)errors)rglobr   	read_textOSError)	repo_roottotalextsextfs        r   compute_naive_tokensr      s|    ED  1SE+ 	AKKyK1 	 L  s   A	AAc           
        |t         }t        |      }g }|D ]  }t        | |d      }d}|D ]]  }|t        t	        |            z  }|j                  dd      }	| j                  |	      dd }
|
D ]  }|t        t	        |            z  } _ |dkD  r||z  }nd}|j                  |||t        |d      d        |rt        d	 |D              t        |      z  }nd}||t        |d      d
|dddS )zRun token reduction benchmark.

    Compares naive full-corpus token cost vs graph query token
    cost for a set of sample questions.
    N   )limitr   qualified_name r   )questionnaive_tokensgraph_tokensreduction_ratioc              3  &   K   | ]	  }|d      yw)r&   N ).0rs     r   	<genexpr>z&run_token_benchmark.<locals>.<genexpr>X   s     6#$6s   zGraph queries use ~z.0fz,x fewer tokens than reading all source files)naive_corpus_tokensper_questionaverage_reduction_ratiosummary)_SAMPLE_QUESTIONSr   r   r   strgetget_edges_by_sourceappendroundsumr   )storer   	questionsnaive_totalresultsqsearch_resultsr%   r*   qnedgeseratio	avg_ratios                 r   run_token_benchmarkrB   /   sB    %	&y1KG &uaq9 	8AOCF33L',B--b1"15E 8A 778	8 !,.EE'($UA	
 	!. 6g66'l 	
 	  +#(A#6!)C 1, - r   )r   r1   returnint)r   r   rC   rD   )N)r7   r   r   r   r8   zlist[str] | NonerC   zdict[str, Any])__doc__
__future__r   loggingpathlibr   typingr   graphr   searchr   	getLogger__name__loggerr0   r   r   rB   r(   r   r   <module>rO      sl    Y "     !			8	$ "
* #'777  7 	7r   