
    Ki;                    \    d Z ddlmZ ddlZddlmZ ddlmZ ddZddZ	ddZ
dd	Zdd
Zy)zMarkdown report generator for evaluation benchmark results.

Takes a list of benchmark result dicts and produces a formatted markdown table
suitable for inclusion in documentation or CI output.
    )annotationsN)Path)Anyc           
     6   | syg }|j                  d       |j                  d       g }t               }| D ]6  }|D ]/  }|dk7  s	||vs|j                  |       |j                  |       1 8 |j                  d       |j                  d       ddj                  |      z   dz   }d	dj                  d
 |D              z   dz   }|j                  |       |j                  |       | D ]f  }|j	                  dd      }|D cg c]  }t        |j	                  |d             }	}|j                  d| ddj                  |	      z   dz          h |j                  d       |j                  d       |j                  d       | D ]z  }|j	                  dd      }|j                  d|        |j                  d       |D ]+  }|j	                  |d      }
|j                  d| d|
        - |j                  d       | dj                  |      S c c}w )a  Generate a markdown report from benchmark results.

    Each result dict should contain at minimum a ``benchmark`` key identifying
    the benchmark name, plus any metric keys (e.g. ``ratio``,
    ``reduction_percent``, ``mrr``, ``precision``, ``recall``, ``f1``).

    Args:
        results: List of result dicts from benchmark runs.

    Returns:
        A markdown string containing a summary table and per-benchmark details.
    z5# Evaluation Report

No benchmark results to report.
# Evaluation Report 	benchmarkz
## Summaryz| Benchmark |  |  |z| --- | c              3      K   | ]  }d   ywz---N .0_s     k/home/jay/workspace/scripts/.codegraph-venv/lib/python3.12/site-packages/code_review_graph/eval/reporter.py	<genexpr>z+generate_markdown_report.<locals>.<genexpr>0   s     '@!'@   unknown-| z
## Detailsz### z- **z**: 
)appendsetaddjoingetstr)resultslinesall_keysseenrkheader	separatornamevaluesvs              r   generate_markdown_reportr*      s    IE	LL&'	LL HUD  	AKATM"	 
LL	LL

8 44t;FUZZ'@x'@@@4GI	LL	LL Auu[),.67#aeeAsm$77r$s^ejj&884?@A
 
LL 
LL	LL uu[),tD6]#R 	,AaALL4s$qc*+	, 	R 99U# 8s   "Hc                    g }t        | j                  d| d            D ]>  }t        |d      5 }t        j                  |      }|j                  |       ddd       @ |S # 1 sw Y   LxY w)z@Read all CSV files matching a prefix from the results directory.z*_z_*.csvr   )newlineN)sortedglobopencsv
DictReaderextend)results_dirprefixrowspfreaders         r   
_read_csvsr9   J   sv    !#DK$$r&%89:  !R  	 A^^A&FKK	  	   K	  	 s   'A$$A-	c                (   g }|j                  ddj                  |       z   dz          |j                  ddj                  d | D              z   dz          |D ](  }|j                  ddj                  |      z   dz          * dj                  |      S )z-Build a markdown table from headers and rows.r   r
   r   c              3      K   | ]  }d   ywr   r   r   s     r   r   z_md_table.<locals>.<genexpr>X   s     ":Q5":r   r   )r   r   )headersr5   r    rows       r   	_md_tabler>   T   s    E	LL

7++d23	LL

":'":::TAB 4TEJJsO+d23499U    c                p   t        |       } g }|j                  d       |j                  d       |j                  d       |j                  d       |j                  d       |j                  d       |j                  d       |j                  d       g d}|D ]  }t        | |      }|s|j                  dd	      j	                         }|j                  d
|        |j                  d       t        |d   j                               }|D cg c]"  }|D cg c]  }|j                  |d       c}$ }	}}|j                  t        ||	             |j                  d        t        |      dk  r"|j                  d       |j                  d       dj                  |      S c c}w c c}}w )ar  Generate a full markdown evaluation report from CSV result files.

    Reads all CSV files in *results_dir*, groups them by benchmark type,
    and produces a markdown report with methodology notes and per-benchmark
    result tables.

    Args:
        results_dir: Directory containing CSV result files.

    Returns:
        Markdown string with the full report.
    r   r   z## Methodologyz9Benchmarks are run against real open-source repositories.z=Token counts use a consistent `len(text) // 4` approximation.z1Impact accuracy uses graph edges as ground truth.)token_efficiencyimpact_accuracyflow_completenesssearch_qualitybuild_performancer    z## r   r      zNo benchmark results found.r   )r   r   r9   replacetitlelistkeysr   r>   lenr   )
r3   r    benchmark_typesbtyper5   rI   r<   r#   h
table_rowss
             r   generate_full_reportrQ   ^   sq    {#KE	LL&'	LL	LL!"	LL	LLLM	LLPQ	LLDE	LLO ! +u-c3'--/s5']#RtAw||~&@DE1g6quuQ}6E
EYw
34R 5zQ23R99U 7Es   	F2F-6F2-F2c                   t        |       } g }t        | d      }|r|j                  d       |j                  d       g d}g }|D ]  }|j                  |j                  dd      |j                  dd      |j                  dd      |j                  d	d      |j                  d
d      |j                  dd      |j                  dd      g        |j                  t	        ||             |j                  d       t        | d      }t        | d      }t        | d      }|s|s|r|j                  d       |j                  d       g d}i }	i }
|D ]7  }|j                  dd      |	j                  |j                  dd      i       d<   9 |D ]7  }|j                  dd      |	j                  |j                  dd      i       d<   9 |D ]a  }|j                  dd      }|	j                  |i        	 |
j                  |g       j                  t        |j                  dd                   c g }t        |	j                               D ]  \  }}|
j                  |g       }|r*t        t        t        |      t        |      z  d            nd}|j                  |t        |j                  dd            t        |j                  dd            |g        |j                  t	        ||             |j                  d       t        | d      }|r|j                  d       |j                  d       g d}g }|D ]h  }|j                  |j                  dd      |j                  dd      |j                  dd      |j                  dd      |j                  dd      g       j |j                  t	        ||             |j                  d       |syd j                  |      S # t        t        f$ r Y /w xY w)!a=  Generate concise README-ready tables from CSV result files.

    Produces three tables:
    - Table A: Token Efficiency
    - Table B: Accuracy & Quality
    - Table C: Performance

    Args:
        results_dir: Directory containing CSV result files.

    Returns:
        Markdown string with the three tables.
    rA   z### Token Efficiencyr   )RepoFileszNaive TokenszStandard TokenszGraph TokenszNaive/Graphz	Std/Graphrepor   changed_filesnaive_tokensstandard_tokensgraph_tokensnaive_to_graph_ratiostandard_to_graph_ratiorB   rC   rD   z### Accuracy & Quality)rS   z	Impact F1zFlow Recallz
Search MRRf1?recallreciprocal_rankr      rE   z### Performance)rS   rT   NodeszFlow Det. (s)zSearch (ms)
file_count
node_countflow_detection_secondssearch_avg_mszNo benchmark results found.
r   )r   r9   r   r   r>   
setdefaultfloat
ValueError	TypeErrorr-   itemsr   roundsumrL   r   )r3   r    te_rowsr<   rP   r#   ia_rowsfc_rowssq_rows	repo_data	mrr_accumrU   dmrr_valsmrrbp_rowss                   r   generate_readme_tablesrw      s    {#KE &89G+,R
 
 		Afc"os+nc*'-nc*,c2/5 		 	Yw
34R &78G&9:G&67G'W-.RD24	,.	 	RAABtSAQI  vs!3R8>	R 	ZAEFUU8UXEYI  vs!3R8B	Z 	A55%D  r*$$T2.55eAEEBSUV<W6XY		 
ioo/0 	GD! }}T2.H  E#h-#h-7;< 
 AEE$$%AEE(C()	 	 	Yw
34R &9:G&'RL
 	Afc"lC(lC(.4os+ 	 	Yw
34R.99US 	* s   6:O++O>=O>)r   zlist[dict[str, Any]]returnr   )r3   r   r4   r   rx   zlist[dict[str, str]])r<   z	list[str]r5   zlist[list[str]]rx   r   )r3   z
str | Pathrx   r   )__doc__
__future__r   r0   pathlibr   typingr   r*   r9   r>   rQ   rw   r   r?   r   <module>r}      s4    # 
  9x2jfr?   