()
| 192 | |
| 193 | |
| 194 | def main(): |
| 195 | parser = argparse.ArgumentParser(description="Improve a skill description based on eval results") |
| 196 | parser.add_argument("--eval-results", required=True, help="Path to eval results JSON (from run_eval.py)") |
| 197 | parser.add_argument("--skill-path", required=True, help="Path to skill directory") |
| 198 | parser.add_argument("--history", default=None, help="Path to history JSON (previous attempts)") |
| 199 | parser.add_argument("--model", required=True, help="Model for improvement") |
| 200 | parser.add_argument("--verbose", action="store_true", help="Print thinking to stderr") |
| 201 | args = parser.parse_args() |
| 202 | |
| 203 | skill_path = Path(args.skill_path) |
| 204 | if not (skill_path / "SKILL.md").exists(): |
| 205 | print(f"Error: No SKILL.md found at {skill_path}", file=sys.stderr) |
| 206 | sys.exit(1) |
| 207 | |
| 208 | eval_results = json.loads(Path(args.eval_results).read_text()) |
| 209 | history = [] |
| 210 | if args.history: |
| 211 | history = json.loads(Path(args.history).read_text()) |
| 212 | |
| 213 | name, _, content = parse_skill_md(skill_path) |
| 214 | current_description = eval_results["description"] |
| 215 | |
| 216 | if args.verbose: |
| 217 | print(f"Current: {current_description}", file=sys.stderr) |
| 218 | print(f"Score: {eval_results['summary']['passed']}/{eval_results['summary']['total']}", file=sys.stderr) |
| 219 | |
| 220 | new_description = improve_description( |
| 221 | skill_name=name, |
| 222 | skill_content=content, |
| 223 | current_description=current_description, |
| 224 | eval_results=eval_results, |
| 225 | history=history, |
| 226 | model=args.model, |
| 227 | ) |
| 228 | |
| 229 | if args.verbose: |
| 230 | print(f"Improved: {new_description}", file=sys.stderr) |
| 231 | |
| 232 | # Output as JSON with both the new description and updated history |
| 233 | output = { |
| 234 | "description": new_description, |
| 235 | "history": history + [{ |
| 236 | "description": current_description, |
| 237 | "passed": eval_results["summary"]["passed"], |
| 238 | "failed": eval_results["summary"]["failed"], |
| 239 | "total": eval_results["summary"]["total"], |
| 240 | "results": eval_results["results"], |
| 241 | }], |
| 242 | } |
| 243 | print(json.dumps(output, indent=2)) |
| 244 | |
| 245 | |
| 246 | if __name__ == "__main__": |
no test coverage detected