hub / github.com/modelscope/ms-swift / ExpManager

Class ExpManager

scripts/benchmark/exp_utils.py:90–377 · view source on GitHub ↗

Source from the content-addressed store, hash-verified

88
89
90	class ExpManager:
91
92	RESULT_FILE = 'result.jsonl'
93
94	def __init__(self):
95	self.exps = []
96
97	def assert_gpu_not_overlap(self):
98	all_gpus = set()
99	for exp in self.exps:
100	gpus = exp.runtime['env']['CUDA_VISIBLE_DEVICES'].split(',')
101	if all_gpus & set(gpus):
102	raise ValueError(f'GPU overlap: {self.exps}!')
103	all_gpus.update(gpus)
104
105	def run(self, exp: Experiment):
106	if os.path.exists(os.path.join(exp.input_args.save_dir, exp.name + '.json')):
107	with open(os.path.join(exp.input_args.save_dir, exp.name + '.json'), 'r', encoding='utf-8') as f:
108	_json = json.load(f)
109	if exp.eval_dataset and 'eval_result' not in _json['record']:
110	if not exp.do_eval:
111	logger.info(f'Experiment {exp.name} need eval, load from file.')
112	exp.load(_json)
113	exp.do_eval = True
114	else:
115	logger.warn(f'Experiment {exp.name} already done, skip')
116	return
117
118	if exp.do_eval:
119	runtime = self._build_eval_cmd(exp)
120	exp.runtime = runtime
121	envs = deepcopy(runtime.get('env', {}))
122	envs.update(os.environ)
123	logger.info(f'Running cmd: {runtime["running_cmd"]}, env: {runtime.get("env", {})}')
124	os.makedirs('exp', exist_ok=True)
125	log_file = os.path.join('exp', f'{exp.name}.eval.log')
126	exp.handler = subprocess.Popen(runtime['running_cmd'] + f' > {log_file} 2>&1', env=envs, shell=True)
127	self.exps.append(exp)
128	self.assert_gpu_not_overlap()
129	return
130
131	if any([exp.name == e.name for e in self.exps]):
132	raise ValueError(f'Why exp name duplicate? {exp.name}')
133	elif exp.cmd == 'export' and any([exp.cmd == 'export' for exp in self.exps]): # noqa
134	raise AssertionError('Cannot run parallel export task.')
135	else:
136	exp.create_time = time.time()
137	runtime = self._build_cmd(exp)
138	exp.runtime = runtime
139	envs = deepcopy(runtime.get('env', {}))
140	envs.update(os.environ)
141	logger.info(f'Running cmd: {runtime["running_cmd"]}, env: {runtime.get("env", {})}')
142	os.makedirs('exp', exist_ok=True)
143	log_file = os.path.join('exp', f'{exp.name}.{exp.cmd}.log')
144	exp.handler = subprocess.Popen(runtime['running_cmd'] + f' > {log_file} 2>&1', env=envs, shell=True)
145	self.exps.append(exp)
146	self.assert_gpu_not_overlap()
147

Callers 1

llm_expFunction · 0.90

Calls

no outgoing calls

Tested by

no test coverage detected