MCPcopy
hub / github.com/modelscope/ms-swift / ExpManager

Class ExpManager

scripts/benchmark/exp_utils.py:90–377  ·  view source on GitHub ↗

Source from the content-addressed store, hash-verified

88
89
90class ExpManager:
91
92 RESULT_FILE = 'result.jsonl'
93
94 def __init__(self):
95 self.exps = []
96
97 def assert_gpu_not_overlap(self):
98 all_gpus = set()
99 for exp in self.exps:
100 gpus = exp.runtime['env']['CUDA_VISIBLE_DEVICES'].split(',')
101 if all_gpus & set(gpus):
102 raise ValueError(f'GPU overlap: {self.exps}!')
103 all_gpus.update(gpus)
104
105 def run(self, exp: Experiment):
106 if os.path.exists(os.path.join(exp.input_args.save_dir, exp.name + '.json')):
107 with open(os.path.join(exp.input_args.save_dir, exp.name + '.json'), 'r', encoding='utf-8') as f:
108 _json = json.load(f)
109 if exp.eval_dataset and 'eval_result' not in _json['record']:
110 if not exp.do_eval:
111 logger.info(f'Experiment {exp.name} need eval, load from file.')
112 exp.load(_json)
113 exp.do_eval = True
114 else:
115 logger.warn(f'Experiment {exp.name} already done, skip')
116 return
117
118 if exp.do_eval:
119 runtime = self._build_eval_cmd(exp)
120 exp.runtime = runtime
121 envs = deepcopy(runtime.get('env', {}))
122 envs.update(os.environ)
123 logger.info(f'Running cmd: {runtime["running_cmd"]}, env: {runtime.get("env", {})}')
124 os.makedirs('exp', exist_ok=True)
125 log_file = os.path.join('exp', f'{exp.name}.eval.log')
126 exp.handler = subprocess.Popen(runtime['running_cmd'] + f' > {log_file} 2>&1', env=envs, shell=True)
127 self.exps.append(exp)
128 self.assert_gpu_not_overlap()
129 return
130
131 if any([exp.name == e.name for e in self.exps]):
132 raise ValueError(f'Why exp name duplicate? {exp.name}')
133 elif exp.cmd == 'export' and any([exp.cmd == 'export' for exp in self.exps]): # noqa
134 raise AssertionError('Cannot run parallel export task.')
135 else:
136 exp.create_time = time.time()
137 runtime = self._build_cmd(exp)
138 exp.runtime = runtime
139 envs = deepcopy(runtime.get('env', {}))
140 envs.update(os.environ)
141 logger.info(f'Running cmd: {runtime["running_cmd"]}, env: {runtime.get("env", {})}')
142 os.makedirs('exp', exist_ok=True)
143 log_file = os.path.join('exp', f'{exp.name}.{exp.cmd}.log')
144 exp.handler = subprocess.Popen(runtime['running_cmd'] + f' > {log_file} 2>&1', env=envs, shell=True)
145 self.exps.append(exp)
146 self.assert_gpu_not_overlap()
147

Callers 1

llm_expFunction · 0.90

Calls

no outgoing calls

Tested by

no test coverage detected