MCPcopy
hub / github.com/huggingface/datasets / test_get_data_files_patterns

Function test_get_data_files_patterns

tests/test_data_files.py:653–682  ·  view source on GitHub ↗
(base_path, data_file_per_split)

Source from the content-addressed store, hash-verified

651 ],
652)
653def test_get_data_files_patterns(base_path, data_file_per_split):
654 data_file_per_split = {k: v if isinstance(v, list) else [v] for k, v in data_file_per_split.items()}
655 data_file_per_split = {
656 split: [
657 base_path + ("/" if base_path and base_path[-1] != "/" else "") + file_path
658 for file_path in data_file_per_split[split]
659 ]
660 for split in data_file_per_split
661 }
662 file_paths = sum(data_file_per_split.values(), [])
663 DummyTestFS = mock_fs(file_paths)
664 fs = DummyTestFS()
665
666 def resolver(pattern):
667 pattern = base_path + ("/" if base_path and base_path[-1] != "/" else "") + pattern
668 return [
669 file_path[len(fs._strip_protocol(base_path)) :].lstrip("/")
670 for file_path in fs.glob(pattern)
671 if fs.isfile(file_path)
672 ]
673
674 patterns_per_split = _get_data_files_patterns(resolver)
675 assert list(patterns_per_split.keys()) == list(data_file_per_split.keys()) # Test split order with list()
676 for split, patterns in patterns_per_split.items():
677 matched = [file_path for pattern in patterns for file_path in resolver(pattern)]
678 expected = [
679 fs._strip_protocol(file_path)[len(fs._strip_protocol(base_path)) :].lstrip("/")
680 for file_path in data_file_per_split[split]
681 ]
682 assert matched == expected
683
684
685def test_get_data_patterns_from_directory_with_the_word_data_twice(tmp_path):

Callers

nothing calls this directly

Calls 6

_get_data_files_patternsFunction · 0.90
mock_fsFunction · 0.85
resolverFunction · 0.85
itemsMethod · 0.80
DummyTestFSClass · 0.70
_strip_protocolMethod · 0.45

Tested by

no test coverage detected