(self, predictor_args: PredictorArgument)
| 1065 | self.seq_lens_handle = self.predictor.get_input_handle("seq_lens_this_time") |
| 1066 | |
| 1067 | def _create_predictor(self, predictor_args: PredictorArgument): |
| 1068 | if not is_paddlenlp_ops_available(): |
| 1069 | raise ValueError( |
| 1070 | "you should install the paddlenlp ops to run inference predictor, " |
| 1071 | "https://github.com/PaddlePaddle/PaddleNLP/blob/develop/csrc/README.md" |
| 1072 | ) |
| 1073 | |
| 1074 | infer_model_path = get_infer_model_path(predictor_args.model_name_or_path, predictor_args.model_prefix) |
| 1075 | |
| 1076 | config = paddle.inference.Config(infer_model_path + ".pdmodel", infer_model_path + ".pdiparams") |
| 1077 | |
| 1078 | config.switch_ir_optim(False) |
| 1079 | device_id = int(os.environ.get("FLAGS_selected_gpus", 0)) |
| 1080 | config.enable_use_gpu(100, device_id) |
| 1081 | # config.disable_glog_info() |
| 1082 | # config.enable_memory_optim() |
| 1083 | |
| 1084 | if self.tensor_parallel_degree > 1: |
| 1085 | trainer_endpoints = fleet.worker_endpoints() |
| 1086 | current_endpoint = trainer_endpoints[self.tensor_parallel_rank] |
| 1087 | |
| 1088 | dist_config = config.dist_config() |
| 1089 | dist_config.set_ranks(self.tensor_parallel_degree, self.tensor_parallel_rank) |
| 1090 | dist_config.set_endpoints(trainer_endpoints, current_endpoint) |
| 1091 | dist_config.enable_dist_model(True) |
| 1092 | |
| 1093 | dist_config.set_comm_init_config(os.path.join(predictor_args.model_name_or_path, "rank_mapping.csv")) |
| 1094 | config.set_dist_config(dist_config) |
| 1095 | |
| 1096 | self.predictor = paddle.inference.create_predictor(config) |
| 1097 | |
| 1098 | def _share_data(self): |
| 1099 | """ |
no test coverage detected