(mod, inputs_np, target, legalize=True, cuda_graph=False)
| 90 | |
| 91 | |
| 92 | def build_and_run(mod, inputs_np, target, legalize=True, cuda_graph=False): |
| 93 | with tvm.transform.PassContext( |
| 94 | config={ |
| 95 | "relax.backend.use_cuda_graph": cuda_graph, |
| 96 | "relax.transform.apply_legalize_ops": legalize, |
| 97 | } |
| 98 | ): |
| 99 | ex = tvm.compile(mod, target) |
| 100 | |
| 101 | dev = tvm.device(target, 0) |
| 102 | vm = relax.VirtualMachine(ex, dev) |
| 103 | f = vm["main"] |
| 104 | inputs = [tvm.runtime.tensor(inp, dev) for inp in inputs_np] |
| 105 | |
| 106 | # For cuda graph, run the compiled function twice to make sure that we can launch the cached |
| 107 | # graph on the second run. |
| 108 | if cuda_graph: |
| 109 | f(*inputs) |
| 110 | |
| 111 | return f(*inputs).numpy() |
| 112 | |
| 113 | |
| 114 | def build_cutlass(mod, assert_all_bindings_fused=True, num_final_bindings=1): |
no test coverage detected
searching dependent graphs…