MCPcopy Index your code
hub / github.com/AsyncFuncAI/deepwiki-open / should_process_file

Function should_process_file

api/data_pipeline.py:243–310  ·  view source on GitHub ↗

Determine if a file should be processed based on inclusion/exclusion rules. Args: file_path (str): The file path to check use_inclusion (bool): Whether to use inclusion mode included_dirs (List[str]): List of directories to include in

(file_path: str, use_inclusion: bool, included_dirs: List[str], included_files: List[str],
                           excluded_dirs: List[str], excluded_files: List[str])

Source from the content-addressed store, hash-verified

241 logger.info(f"Reading documents from {path}")
242
243 def should_process_file(file_path: str, use_inclusion: bool, included_dirs: List[str], included_files: List[str],
244 excluded_dirs: List[str], excluded_files: List[str]) -> bool:
245 """
246 Determine if a file should be processed based on inclusion/exclusion rules.
247
248 Args:
249 file_path (str): The file path to check
250 use_inclusion (bool): Whether to use inclusion mode
251 included_dirs (List[str]): List of directories to include
252 included_files (List[str]): List of files to include
253 excluded_dirs (List[str]): List of directories to exclude
254 excluded_files (List[str]): List of files to exclude
255
256 Returns:
257 bool: True if the file should be processed, False otherwise
258 """
259 file_path_parts = os.path.normpath(file_path).split(os.sep)
260 file_name = os.path.basename(file_path)
261
262 if use_inclusion:
263 # Inclusion mode: file must be in included directories or match included files
264 is_included = False
265
266 # Check if file is in an included directory
267 if included_dirs:
268 for included in included_dirs:
269 clean_included = included.strip("./").rstrip("/")
270 if clean_included in file_path_parts:
271 is_included = True
272 break
273
274 # Check if file matches included file patterns
275 if not is_included and included_files:
276 for included_file in included_files:
277 if file_name == included_file or file_name.endswith(included_file):
278 is_included = True
279 break
280
281 # If no inclusion rules are specified for a category, allow all files from that category
282 if not included_dirs and not included_files:
283 is_included = True
284 elif not included_dirs and included_files:
285 # Only file patterns specified, allow all directories
286 pass # is_included is already set based on file patterns
287 elif included_dirs and not included_files:
288 # Only directory patterns specified, allow all files in included directories
289 pass # is_included is already set based on directory patterns
290
291 return is_included
292 else:
293 # Exclusion mode: file must not be in excluded directories or match excluded files
294 is_excluded = False
295
296 # Check if file is in an excluded directory
297 for excluded in excluded_dirs:
298 clean_excluded = excluded.strip("./").rstrip("/")
299 if clean_excluded in file_path_parts:
300 is_excluded = True

Callers 1

read_all_documentsFunction · 0.85

Calls

no outgoing calls

Tested by

no test coverage detected