MCPcopy
hub / github.com/MinishLab/semble / _grep_file_units

Function _grep_file_units

benchmarks/token_efficiency.py:107–123  ·  view source on GitHub ↗

Return whole matched files in match-count order.

(
    pattern: str,
    repo_dir: Path,
)

Source from the content-addressed store, hash-verified

105
106
107def _grep_file_units(
108 pattern: str,
109 repo_dir: Path,
110) -> list[Chunk]:
111 """Return whole matched files in match-count order."""
112 matches = _rg_matches(pattern, repo_dir)
113 if not matches:
114 return []
115 ranked = sorted(Counter(path for path, _ in matches[:_RG_MAX_MATCHES]).items(), key=lambda kv: (-kv[1], kv[0]))
116 units: list[Chunk] = []
117 for path, _ in ranked:
118 try:
119 text = Path(path).read_text(encoding="utf-8", errors="replace")
120 except OSError:
121 continue
122 units.append(Chunk(content=text, file_path=path, start_line=1, end_line=text.count("\n") + 1))
123 return units
124
125
126def _keywords(query: str) -> list[str]:

Callers 2

Calls 2

ChunkClass · 0.90
_rg_matchesFunction · 0.85

Tested by

no test coverage detected