MCPcopy
hub / github.com/deepspeedai/DeepSpeedExamples / get_checkpoint_iteration

Function get_checkpoint_iteration

Megatron-LM/utils.py:244–269  ·  view source on GitHub ↗
(args)

Source from the content-addressed store, hash-verified

242
243
244def get_checkpoint_iteration(args):
245 # Read the tracker file and set the iteration.
246 tracker_filename = get_checkpoint_tracker_filename(args.load)
247 if not os.path.isfile(tracker_filename):
248 print_rank_0('WARNING: could not find the metadata file {} '.format(
249 tracker_filename))
250 print_rank_0(' will not load any checkpoints and will start from '
251 'random')
252 return 0, False, False
253 iteration = 0
254 release = False
255 with open(tracker_filename, 'r') as f:
256 metastring = f.read().strip()
257 try:
258 iteration = int(metastring)
259 except ValueError:
260 release = metastring == 'release'
261 if not release:
262 print_rank_0('ERROR: Invalid metadata file {}. Exiting'.format(
263 tracker_filename))
264 exit()
265
266 assert iteration > 0 or release, 'error parsing metadata file {}'.format(
267 tracker_filename)
268
269 return iteration, release, True
270
271def load_checkpoint(model, optimizer, lr_scheduler, args):
272 """Load a model checkpoint."""

Callers 1

load_checkpointFunction · 0.85

Calls 2

print_rank_0Function · 0.70

Tested by

no test coverage detected