Run Performance Benckmark with given onnx model. (Or Openvino IR) By default this function will run with Async Mode.
(ir_or_onnx_file: str, samples: int = 500, jobs: int = 4)
| 8 | raise Exception('Please Install Openvino >= 2022.1.0') |
| 9 | |
| 10 | def Benchmark(ir_or_onnx_file: str, samples: int = 500, jobs: int = 4) -> float: |
| 11 | """ Run Performance Benckmark with given onnx model. (Or Openvino IR) |
| 12 | |
| 13 | By default this function will run with Async Mode. |
| 14 | """ |
| 15 | # https://docs.openvino.ai/latest/api/ie_python_api/_autosummary/openvino.runtime.InferRequest.html |
| 16 | core = ov.Core() |
| 17 | # core.add_extension("path_to_extension_library.so") |
| 18 | model = core.read_model(ir_or_onnx_file) |
| 19 | compiled_model = core.compile_model(model, 'CPU') |
| 20 | |
| 21 | infer_request = compiled_model.create_infer_request() |
| 22 | print(f'Openvino Model Loaded: {len(infer_request.input_tensors)} Input Tensors, {len(infer_request.output_tensors)} Output Tensors') |
| 23 | |
| 24 | feed_dict = [] |
| 25 | for tensor in infer_request.input_tensors: |
| 26 | feed_dict.append(np.random.random(size=tensor.shape).astype(tensor.element_type.to_dtype())) |
| 27 | |
| 28 | # Start async inference on a single infer request |
| 29 | infer_request.start_async() |
| 30 | # Wait for 1 milisecond |
| 31 | infer_request.wait_for(1) |
| 32 | # Wait for inference completion |
| 33 | infer_request.wait() |
| 34 | infer_queue = ov.AsyncInferQueue(compiled_model, jobs=jobs) |
| 35 | |
| 36 | tick = time() |
| 37 | for _ in tqdm(range(samples)): |
| 38 | # Wait for at least one available infer request and start asynchronous inference |
| 39 | infer_queue.start_async(feed_dict) |
| 40 | # Wait for all requests to complete |
| 41 | infer_queue.wait_all() |
| 42 | tok = time() |
| 43 | |
| 44 | print(f'Time span: {tok - tick : .4f} sec') |
| 45 | return tick - tok |
| 46 | |
| 47 | """ |
| 48 | infer_request.infer(feed_dict) |
| 49 | for record in infer_request.get_profiling_info(): |
| 50 | print(record.node_name, record.node_type, record.cpu_time.total_seconds(), record.real_time.total_seconds()) |
| 51 | """ |
no test coverage detected