| 7 | import argparse |
| 8 | |
| 9 | class FileSplitter(object): |
| 10 | BLOCK_FILENAME_FORMAT = 'block_{0}.dat' |
| 11 | |
| 12 | def __init__(self, filename): |
| 13 | self.filename = filename |
| 14 | self.block_filenames = [] |
| 15 | |
| 16 | def write_block(self, data, block_number): |
| 17 | filename = self.BLOCK_FILENAME_FORMAT.format(block_number) |
| 18 | with open(filename, 'w') as file: |
| 19 | file.write(data) |
| 20 | self.block_filenames.append(filename) |
| 21 | |
| 22 | def get_block_filenames(self): |
| 23 | return self.block_filenames |
| 24 | |
| 25 | def split(self, block_size, sort_key=None): |
| 26 | i = 0 |
| 27 | with open(self.filename) as file: |
| 28 | while True: |
| 29 | lines = file.readlines(block_size) |
| 30 | |
| 31 | if lines == []: |
| 32 | break |
| 33 | |
| 34 | if sort_key is None: |
| 35 | lines.sort() |
| 36 | else: |
| 37 | lines.sort(key=sort_key) |
| 38 | |
| 39 | self.write_block(''.join(lines), i) |
| 40 | i += 1 |
| 41 | |
| 42 | def cleanup(self): |
| 43 | map(lambda f: os.remove(f), self.block_filenames) |
| 44 | |
| 45 | |
| 46 | class NWayMerge(object): |