(self, exp: Experiment)
| 103 | all_gpus.update(gpus) |
| 104 | |
| 105 | def run(self, exp: Experiment): |
| 106 | if os.path.exists(os.path.join(exp.input_args.save_dir, exp.name + '.json')): |
| 107 | with open(os.path.join(exp.input_args.save_dir, exp.name + '.json'), 'r', encoding='utf-8') as f: |
| 108 | _json = json.load(f) |
| 109 | if exp.eval_dataset and 'eval_result' not in _json['record']: |
| 110 | if not exp.do_eval: |
| 111 | logger.info(f'Experiment {exp.name} need eval, load from file.') |
| 112 | exp.load(_json) |
| 113 | exp.do_eval = True |
| 114 | else: |
| 115 | logger.warn(f'Experiment {exp.name} already done, skip') |
| 116 | return |
| 117 | |
| 118 | if exp.do_eval: |
| 119 | runtime = self._build_eval_cmd(exp) |
| 120 | exp.runtime = runtime |
| 121 | envs = deepcopy(runtime.get('env', {})) |
| 122 | envs.update(os.environ) |
| 123 | logger.info(f'Running cmd: {runtime["running_cmd"]}, env: {runtime.get("env", {})}') |
| 124 | os.makedirs('exp', exist_ok=True) |
| 125 | log_file = os.path.join('exp', f'{exp.name}.eval.log') |
| 126 | exp.handler = subprocess.Popen(runtime['running_cmd'] + f' > {log_file} 2>&1', env=envs, shell=True) |
| 127 | self.exps.append(exp) |
| 128 | self.assert_gpu_not_overlap() |
| 129 | return |
| 130 | |
| 131 | if any([exp.name == e.name for e in self.exps]): |
| 132 | raise ValueError(f'Why exp name duplicate? {exp.name}') |
| 133 | elif exp.cmd == 'export' and any([exp.cmd == 'export' for exp in self.exps]): # noqa |
| 134 | raise AssertionError('Cannot run parallel export task.') |
| 135 | else: |
| 136 | exp.create_time = time.time() |
| 137 | runtime = self._build_cmd(exp) |
| 138 | exp.runtime = runtime |
| 139 | envs = deepcopy(runtime.get('env', {})) |
| 140 | envs.update(os.environ) |
| 141 | logger.info(f'Running cmd: {runtime["running_cmd"]}, env: {runtime.get("env", {})}') |
| 142 | os.makedirs('exp', exist_ok=True) |
| 143 | log_file = os.path.join('exp', f'{exp.name}.{exp.cmd}.log') |
| 144 | exp.handler = subprocess.Popen(runtime['running_cmd'] + f' > {log_file} 2>&1', env=envs, shell=True) |
| 145 | self.exps.append(exp) |
| 146 | self.assert_gpu_not_overlap() |
| 147 | |
| 148 | def _build_eval_cmd(self, exp: Experiment): |
| 149 | gpu = exp.eval_requirements.get('gpu', None) |
no test coverage detected