(mod, inputs_np, target, legalize=False, cuda_graph=False)
| 50 | |
| 51 | |
| 52 | def build_and_run(mod, inputs_np, target, legalize=False, cuda_graph=False): |
| 53 | dev = tvm.device(target, 0) |
| 54 | with tvm.transform.PassContext( |
| 55 | config={ |
| 56 | "relax.backend.use_cuda_graph": cuda_graph, |
| 57 | "relax.transform.apply_legalize_ops": legalize, |
| 58 | } |
| 59 | ): |
| 60 | ex = tvm.compile(mod, target) |
| 61 | vm = relax.VirtualMachine(ex, dev) |
| 62 | f = vm["main"] |
| 63 | inputs = [tvm.runtime.tensor(inp, dev) for inp in inputs_np] |
| 64 | |
| 65 | # For cuda graph, run the compiled function twice to make sure that we can launch the cached |
| 66 | # graph on the second run. |
| 67 | if cuda_graph: |
| 68 | f(*inputs) |
| 69 | |
| 70 | return f(*inputs).numpy() |
| 71 | |
| 72 | |
| 73 | def get_result_with_relax_cublas_offload(mod, np_inputs, cuda_graph=False, bind_constants=False): |
no test coverage detected
searching dependent graphs…