(mod, inputs_np, target, legalize=False, cuda_graph=False)
| 111 | |
| 112 | |
| 113 | def build_and_run(mod, inputs_np, target, legalize=False, cuda_graph=False): |
| 114 | dev = tvm.device(target, 0) |
| 115 | with tvm.transform.PassContext( |
| 116 | config={ |
| 117 | "relax.backend.use_cuda_graph": cuda_graph, |
| 118 | "relax.transform.apply_legalize_ops": legalize, |
| 119 | } |
| 120 | ): |
| 121 | ex = tvm.compile(mod, target) |
| 122 | vm = relax.VirtualMachine(ex, dev) |
| 123 | f = vm["main"] |
| 124 | inputs = [tvm.runtime.tensor(inp, dev) for inp in inputs_np] |
| 125 | |
| 126 | # For cuda graph, run the compiled function twice to make sure that we can launch the cached |
| 127 | # graph on the second run. |
| 128 | if cuda_graph: |
| 129 | f(*inputs) |
| 130 | |
| 131 | return f(*inputs).numpy() |
| 132 | |
| 133 | |
| 134 | @pytest.mark.parametrize( |
no test coverage detected
searching dependent graphs…