Parse a skill directory into structured data.
(skill_dir: Path)
| 57 | |
| 58 | |
| 59 | def parse_skill(skill_dir: Path) -> ParsedSkill: |
| 60 | """Parse a skill directory into structured data.""" |
| 61 | skill_md = skill_dir / "SKILL.md" |
| 62 | if not skill_md.exists(): |
| 63 | raise FileNotFoundError(f"No SKILL.md found in {skill_dir}") |
| 64 | |
| 65 | content = skill_md.read_text(encoding="utf-8") |
| 66 | frontmatter, body = _split_frontmatter(content) |
| 67 | lines = body.strip().split("\n") |
| 68 | |
| 69 | h2_count = sum(1 for line in lines if re.match(r"^## ", line)) |
| 70 | h3_count = sum(1 for line in lines if re.match(r"^### ", line)) |
| 71 | |
| 72 | code_blocks = re.findall(r"```(\w*)", content) |
| 73 | code_block_languages = [lang for lang in code_blocks if lang] |
| 74 | |
| 75 | lower_body = body.lower() |
| 76 | has_examples = bool(re.search(r"(## example|### example|## usage)", lower_body)) |
| 77 | has_troubleshooting = bool(re.search(r"(## troubleshoot|## common issue|## faq)", lower_body)) |
| 78 | |
| 79 | refs_dir = skill_dir / "references" |
| 80 | assets_dir = skill_dir / "assets" |
| 81 | reference_files = ( |
| 82 | [f.name for f in refs_dir.iterdir() if f.is_file()] if refs_dir.exists() else [] |
| 83 | ) |
| 84 | asset_files = ( |
| 85 | [f.name for f in assets_dir.iterdir() if f.is_file()] if assets_dir.exists() else [] |
| 86 | ) |
| 87 | |
| 88 | total_lines = len(content.split("\n")) |
| 89 | for ref_file in reference_files: |
| 90 | ref_path = refs_dir / ref_file |
| 91 | total_lines += len(ref_path.read_text(encoding="utf-8").split("\n")) |
| 92 | |
| 93 | must_pattern = re.compile(r"\b(MUST|NEVER|ALWAYS)\b") |
| 94 | must_count = len(must_pattern.findall(content)) |
| 95 | |
| 96 | cross_refs = re.findall(r"(?:skill|skills)/([a-z0-9-]+)", body) |
| 97 | |
| 98 | return ParsedSkill( |
| 99 | path=skill_dir, |
| 100 | name=frontmatter.get("name", skill_dir.name), |
| 101 | description=frontmatter.get("description", ""), |
| 102 | line_count=len(content.split("\n")), |
| 103 | h2_count=h2_count, |
| 104 | h3_count=h3_count, |
| 105 | code_block_count=len(code_blocks), |
| 106 | code_block_languages=code_block_languages, |
| 107 | has_examples=has_examples, |
| 108 | has_troubleshooting=has_troubleshooting, |
| 109 | has_references=refs_dir.exists(), |
| 110 | has_assets=assets_dir.exists(), |
| 111 | reference_files=reference_files, |
| 112 | asset_files=asset_files, |
| 113 | total_content_lines=total_lines, |
| 114 | must_never_always_count=must_count, |
| 115 | cross_references=cross_refs, |
| 116 | raw_content=content, |