
    Ki                        U d Z ddlmZ ddlZddlZddlmZmZ  ej                  e	      Z
ddZdddZddZddZdd	Zddd
ZeeeeedZded<   dddZy)zJMeasures total tokens consumed by agent workflows against benchmark repos.    )annotationsN)AnyCallablec                P    t        t        j                  | t                    dz  S )zEstimate token count from JSON-serializable object.

    Uses character count / 4 as a rough approximation for English + code.
    )default   )lenjsondumpsstr)objs    r/home/jay/workspace/scripts/.codegraph-venv/lib/python3.12/site-packages/code_review_graph/eval/token_benchmark.pyestimate_tokensr      s    
 tzz#s+,11    c                    ddl m} ddlm} d}g } |d| |      }t	        |      }||z  }|j                  d|d        ||| d	
      }t	        |      }||z  }|j                  d|d       d|t        |      |dS )z=Simulate a review workflow and measure total tokens consumed.   get_minimal_contextdetect_changes_funcr   zreview changestask	repo_rootbaser   tooltokensminimalr   r   detail_leveldetect_changes_minimalreviewworkflowtotal_tokens
tool_callscallstools.contextr   tools.reviewr   r   appendr	   r   r   r   r   r%   r'   resultr   s           r   benchmark_review_workflowr.      s    32LE !&6)RVWFV$FFL	LL/6BC !diiXFV$FFL	LL2fEF $%j	 r   c                b   ddl m} ddlm} ddlm} d}g } |d|       }t        |      }||z  }|j                  d|d	        || d
      }t        |      }||z  }|j                  d|d	        || d
      }t        |      }||z  }|j                  d|d	       d|t        |      |dS )z.Simulate an architecture exploration workflow.r   )list_communities_funcr   )
list_flowsr   zmap architecturer   r   r   r   r   )r   r    list_communities_minimallist_flows_minimalarchitecturer#   )	tools.community_toolsr0   r)   r   tools.flows_toolsr1   r   r+   r	   )r   r0   r   r1   r%   r'   r-   r   s           r   benchmark_architecture_workflowr8   0   s    =3.LE &8INFV$FFL	LL/6BC"YYOFV$FFL	LL4GH))DFV$FFL	LL.&AB #$%j	 r   c                    ddl m} ddlm} d}g } |d|       }t	        |      }||z  }|j                  d|d        |d	| d
      }t	        |      }||z  }|j                  d|d       d|t        |      |dS )zSimulate a debug workflow.r   r   )semantic_search_nodesr   zdebug login bugr2   r   r   loginr   )queryr   r    semantic_search_minimaldebugr#   )r)   r   tools.queryr:   r   r+   r	   )r   r   r:   r%   r'   r-   r   s          r   benchmark_debug_workflowr@   P   s    33LE &79MFV$FFL	LL/6BC"F V$FFL	LL3vFG $%j	 r   c                    ddl m} ddlm} d}g } |d|       }t	        |      }||z  }|j                  d|d        || 	      }t	        |      }||z  }|j                  d
|d       d|t        |      |dS )z Simulate an onboarding workflow.r   r   )list_graph_statsr   zonboard developerr2   r   r   r   rB   onboardr#   )r)   r   r?   rB   r   r+   r	   )r   r   rB   r%   r'   r-   r   s          r   benchmark_onboard_workflowrE   l   s    3.LE &9YOFV$FFL	LL/6BC	2FV$FFL	LL,?@ $%j	 r   c                    ddl m} ddlm} d}g } |d| |      }t	        |      }||z  }|j                  d|d        ||| d	
      }t	        |      }||z  }|j                  d|d       d|t        |      |dS )z$Simulate a pre-merge check workflow.r   r   r   r   zpre-merge checkr   r   r   r   r   r!   	pre_merger#   r(   r,   s           r   benchmark_pre_merge_workflowrH      s    32LE &79SWXFV$FFL	LL/6BC diiXFV$FFL	LL2fEF  $%j	 r   )r"   r5   r>   rD   rG   zdict[str, Callable[..., dict]]ALL_WORKFLOWSc                R   g }t         j                         D ]C  \  }}	 d|j                  j                  v r || |      }n	 ||       }|j	                  |       E |S # t
        $ r>}t        j                  d||       |j	                  |t        |      d       Y d}~d}~ww xY w)z/Run all workflow benchmarks and return results.r   )r   r   rC   zBenchmark %s failed: %s)r$   errorN)	rI   items__code__co_varnamesr+   	Exceptionloggerwarningr   )r   r   resultsnamefnr-   es          r   run_all_benchmarksrV      s    G!'') 	@b	@000id;i0NN6"	@ N  	@NN4dA>NNs1v>??	@s   =A	B&(4B!!B&)r   r   returnint)zHEAD~1)r   r   r   r   rW   dict)r   r   rW   rY   )r   r   r   r   rW   z
list[dict])__doc__
__future__r   r
   loggingtypingr   r   	getLogger__name__rP   r   r.   r8   r@   rE   rH   rI   __annotations__rV    r   r   <module>rb      sj    P "    			8	$28@846 (3%)-1- r   