Run the engine enqueue with allocated output tensors, for debug purpose, since it is a sync call and slower than run
(self,
inputs: Dict[str, "torch.Tensor"],
context=None)
| 298 | return ok |
| 299 | |
| 300 | def _debug_run(self, |
| 301 | inputs: Dict[str, "torch.Tensor"], |
| 302 | context=None) -> Dict[str, "torch.Tensor"]: |
| 303 | '''Run the engine enqueue with allocated output tensors, for debug purpose, since it is a sync call and slower than run |
| 304 | ''' |
| 305 | import torch |
| 306 | |
| 307 | inputs_info = [ |
| 308 | TensorInfo(name, torch_dtype_to_trt(tensor.dtype), tensor.shape) |
| 309 | for name, tensor in inputs.items() |
| 310 | ] |
| 311 | outputs_info = self.infer_shapes(inputs_info) |
| 312 | outputs = { |
| 313 | t.name: |
| 314 | torch.empty(tuple(t.shape), |
| 315 | dtype=trt_dtype_to_torch(t.dtype), |
| 316 | device='cuda') |
| 317 | for t in outputs_info |
| 318 | } |
| 319 | with _scoped_stream() as stream: |
| 320 | self.run(inputs=inputs, |
| 321 | outputs=outputs, |
| 322 | stream=stream, |
| 323 | context=context) |
| 324 | return outputs |