MCPcopy Index your code
hub / github.com/idank/explainshell / classify

Method classify

explainshell/extraction/prefilter.py:133–187  ·  view source on GitHub ↗
(self, gz_path: str)

Source from the content-addressed store, hash-verified

131 self._filter_index = self.s.extractor_info_index()
132
133 def classify(self, gz_path: str) -> Decision:
134 short_path = config.source_from_path(gz_path)
135
136 if self.small_only or self.large_only:
137 size = os.path.getsize(gz_path)
138 if self.small_only and size > self.size_threshold:
139 return SizeSkip(gz_path, short_path, size, self.size_threshold, "small")
140 if self.large_only and size <= self.size_threshold:
141 return SizeSkip(gz_path, short_path, size, self.size_threshold, "large")
142
143 if os.path.islink(gz_path):
144 canonical_path = os.path.realpath(gz_path)
145 canonical_source = config.source_from_path(canonical_path)
146 if canonical_source != short_path:
147 return Symlink(
148 gz_path=gz_path,
149 short_path=short_path,
150 canonical_source=canonical_source,
151 stale_in_db=self.s.has_manpage_source(short_path),
152 canonical_in_inputs=canonical_path in self.normalized_inputs,
153 )
154
155 if self.overwrite and self.filter_specs:
156 existing = self._filter_index.get(short_path)
157 if existing is not None:
158 stored_extractor, stored_meta = existing
159 if _matches_filter(
160 self.filter_specs,
161 stored_extractor,
162 stored_meta,
163 ):
164 # Matching row: queue for re-extraction. Deliberately skip
165 # the dedup branch — and don't seed _hash_to_canonical —
166 # so a same-hash sibling doesn't silently alias onto this
167 # row's stale parsed_manpages.
168 return Work(gz_path, short_path)
169 # Non-matching row: keep its data; don't seed dedup either,
170 # for the same reason.
171 return FilterSkip(
172 gz_path=gz_path,
173 short_path=short_path,
174 stored_extractor=stored_extractor,
175 stored_model=stored_meta.model,
176 )
177
178 if not self.overwrite and self.s.has_manpage_source(short_path):
179 return AlreadyStored(gz_path, short_path)
180
181 h = common.gz_sha256(gz_path)
182 key = _dedup_key(h, short_path)
183 canonical = self._hash_to_canonical.get(key)
184 if canonical is not None:
185 return ContentDup(gz_path, short_path, canonical)
186 self._hash_to_canonical[key] = short_path
187 return Work(gz_path, short_path)
188
189
190# ---------------------------------------------------------------------------

Calls 9

SizeSkipClass · 0.85
SymlinkClass · 0.85
_matches_filterFunction · 0.85
WorkClass · 0.85
FilterSkipClass · 0.85
AlreadyStoredClass · 0.85
_dedup_keyFunction · 0.85
ContentDupClass · 0.85
has_manpage_sourceMethod · 0.45