(self, uri: str, max_samples: Optional[int] = -1,
return_idx: bool = True)
| 41 | self._total_consumed = 0 |
| 42 | |
| 43 | def read(self, uri: str, max_samples: Optional[int] = -1, |
| 44 | return_idx: bool = True): |
| 45 | n_samples = 0 |
| 46 | |
| 47 | try: |
| 48 | with self.__get_filehandle(uri) as fh: |
| 49 | for idx, obj in enumerate(fh): |
| 50 | try: |
| 51 | record = self._obj_decoder.decode(obj) |
| 52 | if return_idx: |
| 53 | yield idx, record |
| 54 | else: |
| 55 | yield record |
| 56 | except Exception as e: |
| 57 | self._print(f"__SAMPLE_READ_ERROR__ {uri}/{idx}: " |
| 58 | f"{e.__class__.__name__}: {e}") |
| 59 | continue |
| 60 | |
| 61 | n_samples += 1 |
| 62 | |
| 63 | if n_samples >= max_samples > 0: |
| 64 | break |
| 65 | except S3ReadError as e: |
| 66 | raise e |
| 67 | except LocalReadError: |
| 68 | raise e |
| 69 | except Exception as e: |
| 70 | raise UnknownReadError(f"unknown __URI_READ_ERROR__ {uri}: " |
| 71 | f"{e.__class__.__name__}: {e}") |
| 72 | |
| 73 | def __get_filehandle(self, uri: str): |
| 74 | uri = urlparse(uri) |
no test coverage detected