(fpath)
| 11 | |
| 12 | |
| 13 | def get_cache_mapping(fpath): |
| 14 | with open(fpath, 'r', encoding='utf-8') as f: |
| 15 | text = f.read() |
| 16 | idx = text.find('| Dataset ID |') |
| 17 | text = text[idx:] |
| 18 | text_list = text.split('\n')[2:] |
| 19 | cache_mapping = {} # dataset_id -> (dataset_size, stat) |
| 20 | for text in text_list: |
| 21 | if not text: |
| 22 | continue |
| 23 | items = text.split('|') |
| 24 | key = items[1] if items[1] != '-' else items[6] |
| 25 | key = re.search(r'\[(.+?)\]', key).group(1) |
| 26 | stat = items[3:5] |
| 27 | if stat[0] == '-': |
| 28 | stat = ('huge dataset', '-') |
| 29 | cache_mapping[key] = stat |
| 30 | return cache_mapping |
| 31 | |
| 32 | |
| 33 | def get_dataset_id(key): |
no test coverage detected