
    i;3                       U d Z ddlmZ ddlZddlZddlZddlZddlZddlm	Z	m
Z
 ddlmZ ddlmZ g dg dg d	g d
g ddZded<   e	 G d d             Ze	 G d d             Zd$dZd%dZd&dZd&dZd&dZd&dZd'dZd(dZd)dZd*dZd+dZd*dZd,dZd-d Z d.d!Z!d/d0d"Z"e#d#k(  r ejH                   e"              yy)1a  LLM-as-judge skill quality evaluation framework.

Evaluates SKILL.md files against 5 standard sections using rule-based scoring.
Optionally supports LLM evaluation (stub only, requires --api-key).

Usage:
    python3 skill-judge.py /path/to/SKILL.md
    python3 skill-judge.py --dir ~/.claude/skills/
    python3 skill-judge.py --use-llm /path/to/SKILL.md
    python3 skill-judge.py --format json /path/to/SKILL.md
    python3 skill-judge.py --format summary --dir ~/.claude/skills/
    )annotationsN)	dataclassfield)Path)Any)descriptionoverviewu   개요u   설명)zwhen to usewhen_to_usetriggeru	   트리거u   사용 시점u   사용시점)instructionsinstructionrulesruleworkflowu   지침u   규칙)zoutput formatoutput_formatoutputu   출력 형식u   출력형식u	   결과물u   결과)examplesexampleu   예시u   예제)r   r
   r   r   r   zdict[str, list[str]]SECTION_ALIASESc                  X    e Zd ZU dZded<   ded<   ded<   ded<    ed	d
      Zded<   y)SectionScorez$Score for a single SKILL.md section.strnameboolfoundintscorelength F)defaultreprcontentN)__name__
__module____qualname____doc____annotations__r   r"        D/home/jay/workspace/.worktrees/task-2116-dev1/scripts/skill-judge.pyr   r   M   s*    .
IKJK%0GS0r)   r   c                  f    e Zd ZU dZded<   ded<   ded<   ded<   ded	<   ded
<   ded<   dZded<   y)SkillResultz-Evaluation result for a single SKILL.md file.r   
skill_pathr   total_scoregradezdict[str, SectionScore]sections	penaltiesbonusesmoder   messageN)r#   r$   r%   r&   r'   r4   r(   r)   r*   r,   r,   X   s4    7OJ%%NL
IGSr)   r,   c                >    | j                         j                         S )z6Lower-case and strip whitespace from a heading string.)striplower)texts    r*   _normalize_headingr9   k   s    ::<r)   c                v    t        |       }t        j                         D ]  \  }}|D ]  }||k(  s	|c c S   y)z8Return the canonical section key for a heading, or None.N)r9   r   items)heading
normalizedkeyaliasesaliass        r*   _match_section_keyrA   p   sI    #G,J'--/ W 	EU"
	 r)   c                
    d| v S )Nz```r(   r"   s    r*   _has_code_blockrD   z   s    Gr)   c                ^    t        t        j                  d| t        j                              S )Nz^\s*[-*+]\sr   research	MULTILINErC   s    r*   	_has_listrJ   ~       		.'2<<@AAr)   c                ^    t        t        j                  d| t        j                              S )Nz^\s*\d+\.\srF   rC   s    r*   _has_numbered_listrM      rK   r)   c                L    t        |       xs t        |       xs t        |       S )zFReturn True if content contains code blocks, lists, or numbered lists.)rD   rJ   rM   rC   s    r*   _has_specificityrO      s#    7#Xy'9X=OPW=XXr)   c                   t         D ci c]	  }|dddd }}| j                         s|S | j                  dd      j                  dd      }|j                         }g }t	        |      D ]^  \  }}t        j                  d|      }|s|j                  d	      j                         }	t        |	      }|L|j                  ||f       ` t	        |      D ]y  \  }
\  }}|d	z   }|
d	z   t        |      k  r||
d	z      d   n
t        |      }||| }dj                  |      j                         }d
||   d<   |||   d<   t        |      ||   d<   { |S c c}w )a  Parse a SKILL.md content string into a dict keyed by canonical section names.

    Returns a dict with keys: description, when_to_use, instructions,
    output_format, examples. Each value is a dict with:
        found (bool), length (int), content (str).
    Fr   r   )r   r   r"   z

z^##\s+(.+)$   Tr   r"   r   )r   r6   replace
splitlines	enumeraterG   matchgrouprA   appendlenjoin)r"   r>   result
normalisedlinessection_positionsidxlinemheading_textposline_idxstartendsection_linessection_contents                   r*   parse_skill_sectionsrj      s    gv(v_bWY.Z)Z(vF(v==? .66tTBJ !!#E 02u% 5	THH^T*771:++-L$\2C!((#s45 !**; < 5_h1/2Qw=N9O/Oa(+UXY^U_eC())M288:#sG!0sI #O 4sH5 MA )ws   Ec                    |st        | dddd      S d}t        |      }|dz  }|dk\  r|dz  }t        |      r|dz  }|dk\  r|dz  }t        | d|||      S )	a  Score a single section's content on a 0-20 scale.

    Scoring breakdown:
        +5  section exists (content non-empty)
        +5  content >= 50 chars
        +5  contains code block, list, or numbered list (specificity)
        +5  content >= 200 chars (completeness)
    Fr   r   )r   r   r   r   r"      2      T)r   rZ   rO   )r   r"   r   r   s       r*   score_sectionro      s~     U!ArRRE\F 
QJE |
  
 }
TU6SZ[[r)   c                4    | dk\  ry| dk\  ry| dk\  ry| dk\  ryy	)
z*Convert a numeric score to a letter grade.Z   AK   B<   C-   DFr(   r.   s    r*   compute_grader{      s1    b						r)   c           	        t        |       }|j                         st        d|        |j                  d      }t	        |      }t        |      }i }d}t        D ]*  }||   }t        ||d         }	|	||<   ||	j                  z  }, d}
d}|dk  rd}
|dk\  rd	}t        d||
z   |z         }||
z   |z   }t        |      }t        t        |j                               ||||
|d
      S )a#  Evaluate a single SKILL.md file using rule-based scoring.

    Args:
        skill_path: Absolute or relative path to a SKILL.md file.

    Returns:
        SkillResult with total_score, grade, per-section scores, etc.

    Raises:
        FileNotFoundError: If the file does not exist.
    zSKILL.md not found: zutf-8)encodingr   r"   i  ii'  rl   z
rule-basedr-   r.   r/   r0   r1   r2   r3   )r   existsFileNotFoundError	read_textrZ   rj   r   ro   r   maxr{   r,   r   resolve)r-   pathr"   	total_lenparsedr0   	raw_scorer>   section_datassr1   r2   r.   r/   s                 r*   evaluate_skillr      s    
D;;="6zl CDDnngn.GGI!'*F(*HI c{3Y 78RXX		 IG3	EaY.89K
 i''1K+&Et||~& r)   c                   t        |       }|j                         st        d|        |j                         st	        d|        g }t        |j                  d            D ]%  }|j                  t        t        |                   ' |S )aW  Evaluate all SKILL.md files found recursively under dir_path.

    Args:
        dir_path: Path to a directory containing skill subdirectories.

    Returns:
        List of SkillResult, one per SKILL.md found.

    Raises:
        FileNotFoundError: If dir_path does not exist.
        NotADirectoryError: If dir_path is not a directory.
    zDirectory not found: zNot a directory: zSKILL.md)
r   r   r   is_dirNotADirectoryErrorsortedrglobrY   r   r   )dir_pathdpathresultsskill_mds       r*   evaluate_directoryr   #  s     NE<<>"7z BCC<<> #4XJ!?@@!#G5;;z23 6~c(m456 Nr)   c           
     l    t        |       }t        t        |j                               ddi dddd      S )a  LLM evaluation stub.

    Returns a SkillResult with mode='llm-stub' and an explanatory message.
    Does NOT make any API calls (to avoid cost).

    Args:
        skill_path: Path to a SKILL.md file.

    Returns:
        SkillResult with mode='llm-stub'.
    r   ry   zllm-stubzSLLM evaluation requires --api-key. Pass --api-key <key> to enable real LLM scoring.)r-   r.   r/   r0   r1   r2   r3   r4   )r   r,   r   r   )r-   r   s     r*   llm_evaluater   =  s>     
Dt||~&e	 	r)   c                ^   i }| j                   j                         D ],  \  }}|j                  |j                  |j                  d||<   . | j
                  | j                  | j                  || j                  | j                  | j                  d}| j                  r| j                  |d<   |S )z2Convert a SkillResult to a JSON-serialisable dict.)r   r   r   r~   r4   )r0   r;   r   r   r   r-   r.   r/   r1   r2   r3   r4   )r\   sections_dictr>   r   ds        r*   result_to_dictr   V  s    /1M??((* 
RXXXXii
c
 ''))!%%>>A ~~~~)Hr)   c           	        t        dd        t        dddddddd       t        d	        | D ]v  }t        |j                        j                  j                  xs t        |j                        j                  }t        |dd|j
                  dd|j                  d       x t        d        t        d
t        |        d       | r.t        d | D              t        |       z  }t        d|d       yy)z4Print a human-readable summary for multiple results.rQ   z<============================================================Skill40 Scorez>6Gradez<------------------------------------------------------------zTotal: z skills evaluatedc              3  4   K   | ]  }|j                     y wNrz   ).0rs     r*   	<genexpr>z!_print_summary.<locals>.<genexpr>~  s     1A!--1s   zAverage score: z.1fN)	printr   r-   parentr   r.   r/   rZ   sum)r   r   r   avgs       r*   _print_summaryr   s  s    	Bvh-	WRL'"Qwrl
34	VH <ALL!((--Hall1C1H1Hb	1==,Aaggb\:;< 
VH	GCL>!2
34111CL@Cy)* r)   c                    t        d| j                          t        d| j                   d| j                   d| j                          | j
                  rt        d| j
                          t        d| j                   d| j                          t        d       | j                  j                         D ]C  \  }}|j                  rd	nd
}t        d|dd|dd|j                  dd|j                          E y)z;Print a detailed human-readable report for a single result.z
Skill: zScore: z	  Grade: z  Mode: z	Message: zPenalties: z  Bonuses: z

Sections:FOUNDMISSINGz  20z [7z] score=2z	  length=N)r   r-   r.   r/   r3   r4   r1   r2   r0   r;   r   r   r   )r\   r>   r   statuss       r*   _print_detailr     s    	If''(
)*	GF&&'yhv{{m
TU~~	&..)*+	K(()V^^4D
EF	-??((* QRHH)3r("VAJhrxxl)BII;OPQr)   c                6   t        j                  d      }|j                  ddd       |j                  dd	       |j                  d
ddd       |j                  ddd       |j                  dg ddd       |j                  |       }g }|j                  rt        |j                        }nW|j                  r:|j                  rt        |j                        g}n(t        |j                        g}n|j                          y|j                  dk(  rO|D cg c]  }t        |       }}t        |      dk(  r|d   n|}t        t        j                   |dd             y|j                  dk(  rt#        |       y|D ]  }t%        |        yc c}w )Nz3Evaluate SKILL.md quality using rule-based scoring.)r   r   ?zPath to a SKILL.md file)nargshelpz--dirz)Directory containing skill subdirectories)r   z	--use-llm
store_trueFz2Use LLM evaluation (stub only, requires --api-key))actionr    r   z	--api-keyr   z2API key for LLM evaluation (not used in stub mode))r    r   z--format)jsonsummarydetailr   zOutput format (default: detail))choicesr    r   rS   r   r      )ensure_asciiindentr   )argparseArgumentParseradd_argument
parse_argsdirr   r   use_llmr   r   
print_helpformatr   rZ   r   r   dumpsr   r   )argvparserargsr   r   dictsr   s          r*   mainr     s   $$1fgF
c0IJ
&QR
A	   A  
 -.	   T"D!#Gxx$TXX.	<<#DII./G%dii01G{{f,34q"44 Z1_q%djjeA>?  
		!w
   	A!	  5s   F__main__)r8   r   returnr   )r<   r   r   z
str | None)r"   r   r   r   )r"   r   r   zdict[str, dict[str, Any]])r   r   r"   r   r   r   )r.   r   r   r   )r-   r   r   r,   )r   r   r   list[SkillResult])r\   r,   r   zdict[str, Any])r   r   r   None)r\   r,   r   r   r   )r   zlist[str] | Noner   r   )%r&   
__future__r   r   r   osrG   sysdataclassesr   r   pathlibr   typingr   r   r'   r   r,   r9   rA   rD   rJ   rM   rO   rj   ro   r{   r   r   r   r   r   r   r   r#   exitr(   r)   r*   <module>r      s    #   	 	 
 (  C')% '^ 1 1 1 
 
 
$ 
BBY(V\B6r42:+
Q/d zCHHTV r)   