MCPcopy
hub / github.com/mosaicml/composer / upload_file

Function upload_file

composer/checkpoint/upload.py:136–213  ·  view source on GitHub ↗

Standalone function for uploading a checkpoint file. This function does not actually upload the checkpoint; it initiates the RemoteUploader's uploading of it Args: source_path (str): The path to the file to upload. dest_dir (str): The directory/uri to upload the file to.

(
    dest_dir: str,
    source_path: Optional[str]=None,
    symlink_granularity: Optional[str]=None, # file, dir, or None
    symlink_name: Optional[str]='latest.symlink',
    async_upload: bool = True,
    state: Optional[State] = None,
    overwrite: bool = False,
)

Source from the content-addressed store, hash-verified

134
135
136def upload_file(
137 dest_dir: str,
138 source_path: Optional[str]=None,
139 symlink_granularity: Optional[str]=None, # file, dir, or None
140 symlink_name: Optional[str]='latest.symlink',
141 async_upload: bool = True,
142 state: Optional[State] = None,
143 overwrite: bool = False,
144):
145 """Standalone function for uploading a checkpoint file.
146
147 This function does not actually upload the checkpoint; it initiates the RemoteUploader's uploading of it
148 Args:
149 source_path (str): The path to the file to upload.
150 dest_dir (str): The directory/uri to upload the file to.
151 symlink_granularity (Optional[str]): The granularity to use for symlinking. One of 'file', 'dir', or None.
152 if None: no symlink uploaded
153 if 'file': command remoteuploader to wait until the file (specificied by source_path) is uploaded and then uploads a symlink pointing to the uploaded file
154 if 'dir': command remoteuploader to wait until all files across all ranks are uploaded to dest_dir and then uploads a symlink
155 pointing to the remote directory (prefix in object_store terminology).
156 symlink_name (Optional[str]): The name to use for the symlink. Defaults to 'latest.symlink'.
157 async_upload (bool): If True, the uploads will be done asynchronously via the RemoteUploader and this function will return immediately.
158 state (Optional[State]): If async_upload is True, then state must be specified so that the remote_uploader can be
159 either extracted from state.callbacks or initialized and added to state.callbacks.
160 overwrite (bool): If allow overwrite existing remote checkpoint files
161 """
162 remote_uploader = RemoteUploader(remote_folder=dest_dir)
163 dest_path = ''
164 if source_path is not None:
165 _, _, dest_path = parse_uri(dest_dir)
166 remote_file_name = os.path.join(dest_path, os.path.basename(source_path))
167 else:
168 remote_file_name = None
169 all_remote_file_names = dist.all_gather_object([remote_file_name] if remote_file_name is not None else [])
170 remote_file_names = []
171 for filenames in all_remote_file_names:
172 remote_file_names += filenames
173
174 if source_path is not None:
175 assert remote_uploader is not None
176 assert remote_file_name is not None
177 remote_uploader.upload_file_async(
178 remote_file_name=remote_file_name,
179 file_path=pathlib.Path(source_path),
180 overwrite=overwrite,
181 )
182 symlink_remote_file_name = None
183 if dist.get_global_rank() == 0:
184 if symlink_name is not None:
185 symlink_remote_file_name = os.path.join(dest_path, symlink_name)
186 if symlink_granularity == 'file':
187 create_symlink_file(dest_path, symlink_name)
188 elif symlink_granularity == 'dir':
189 create_symlink_file(str(pathlib.Path(dest_path).parent), symlink_name)
190 elif symlink_granularity is not None:
191 raise ValueError(f'Unrecognized symlink granularity: {symlink_granularity}')
192 else:
193 symlink_remote_file_name = None

Callers 1

test_upload_fileFunction · 0.90

Calls 7

upload_file_asyncMethod · 0.95
waitMethod · 0.95
RemoteUploaderClass · 0.90
parse_uriFunction · 0.90
create_symlink_fileFunction · 0.90

Tested by 1

test_upload_fileFunction · 0.72