MCPcopy Index your code
hub / github.com/togethercomputer/RedPajama-Data / _dload_file

Method _dload_file

app/src/token_count.py:139–156  ·  view source on GitHub ↗
(uri: ParseResult, client)

Source from the content-addressed store, hash-verified

137
138 @staticmethod
139 def _dload_file(uri: ParseResult, client) -> Tuple[DlStatus, io.BytesIO]:
140 try:
141 streaming_body = client.get_object(
142 Bucket=uri.netloc, Key=uri.path.lstrip("/")
143 )["Body"]
144 buffer = io.BytesIO(streaming_body.read())
145 msg = f"__S3_URI_READ_SUCCESS__ success reading {uri.path}"
146 is_success = True
147 except Exception as e:
148 msg = (
149 f"__S3_URI_READ_ERROR__ failed reading {uri.path}: "
150 f"caught exception {e.__class__.__name__}: {e}"
151 )
152 buffer = None
153 is_success = False
154
155 read_status = DlStatus(is_success=is_success, msg=msg, uri=str(uri))
156 return read_status, buffer
157
158 def __load_input_ids(
159 self, snapshot: str

Callers 1

_handle_documentsMethod · 0.95

Calls 2

DlStatusClass · 0.85
readMethod · 0.80

Tested by

no test coverage detected