add cmdline flags for configuring datasets
()
| 226 | return [s/final_sum for s in splits] |
| 227 | |
| 228 | def configure_data(): |
| 229 | |
| 230 | """add cmdline flags for configuring datasets""" |
| 231 | # These are options that are used by data_utils, but are either |
| 232 | # deprecated or not meant to be exposed to the command line user. |
| 233 | # These options are intneded to be set in code by specific scripts. |
| 234 | defaults = { |
| 235 | 'world_size': 1, |
| 236 | 'rank': -1, |
| 237 | 'persist_state': 0, |
| 238 | 'lazy': False, |
| 239 | 'transpose': False, |
| 240 | 'data_set_type': 'supervised', |
| 241 | 'seq_length': 256, |
| 242 | 'eval_seq_length': 256, |
| 243 | 'samples_per_shard': 100 |
| 244 | } |
| 245 | |
| 246 | return DataConfig(defaults=defaults) |
no test coverage detected