(arr, fmt_meta, chunk_sizes, path_fmt, vector_rows=False)
| 11 | |
| 12 | |
| 13 | def _chunk_numpy_array(arr, fmt_meta, chunk_sizes, path_fmt, vector_rows=False): |
| 14 | paths = [] |
| 15 | offset = 0 |
| 16 | |
| 17 | for j, n in enumerate(chunk_sizes): |
| 18 | path = os.path.abspath(path_fmt % j) |
| 19 | arr_chunk = arr[offset : offset + n] |
| 20 | shape = arr_chunk.shape |
| 21 | logging.info("Chunking %d-%d" % (offset, offset + n)) |
| 22 | # If requested we write multi-column arrays as single-column vector Parquet files |
| 23 | array_parser = array_readwriter.get_array_parser(**fmt_meta) |
| 24 | if ( |
| 25 | isinstance(array_parser, ParquetArrayParser) |
| 26 | and len(shape) > 1 |
| 27 | and shape[1] > 1 |
| 28 | ): |
| 29 | array_parser.write(path, arr_chunk, vector_rows=vector_rows) |
| 30 | else: |
| 31 | array_parser.write(path, arr_chunk) |
| 32 | offset += n |
| 33 | paths.append(path) |
| 34 | |
| 35 | return paths |
| 36 | |
| 37 | |
| 38 | def _initialize_num_chunks(g, num_chunks, kwargs=None): |
no test coverage detected