MCPcopy
hub / github.com/apache/tvm / main

Function main

python/tvm/exec/gpu_memory_bandwidth.py:178–258  ·  view source on GitHub ↗

Entry point

()

Source from the content-addressed store, hash-verified

176
177
178def main(): # pylint: disable=too-many-locals
179 """Entry point"""
180 args = _parse_args()
181 # pylint: disable=invalid-name
182 target = tvm.target.Target(args.target)
183 if args.target_host is not None:
184 target = tvm.target.Target(args.target, host=args.target_host)
185 dtype = args.dtype
186 rpcConfig = None
187 if args.rpc_host is not None and args.rpc_port is not None and args.rpc_key is not None:
188 rpcConfig = RPCConfig(
189 tracker_host=args.rpc_host,
190 tracker_port=args.rpc_port,
191 tracker_key=args.rpc_key,
192 session_priority=1,
193 session_timeout_sec=600,
194 )
195
196 a = np.random.uniform(-1, 1, (args.xo, args.k, args.xi)).astype(dtype)
197 b = np.zeros((args.xo, args.xi), dtype=dtype)
198 num_bytes = a.size * a.itemsize + b.size * b.itemsize
199 print("###### Bandwidth Test ######")
200 print(
201 f"Workload [XO, K, XI] => [XO, XI]. "
202 f"[{args.xo}, {args.k}, {args.xi}] => [{args.xo}, {args.xi}]"
203 )
204 print(f"Input size: {num_bytes / 1048576} MB")
205 print(f"Target: {target}")
206
207 # pylint: enable=invalid-name
208 best_bandwidth = -1
209 for len_bx, len_tx, len_vec in itertools.product(
210 args.bx,
211 args.tx,
212 args.vec,
213 ):
214 func = _workload(
215 len_xo=args.xo,
216 len_k=args.k,
217 len_xi=args.xi,
218 dtype=dtype,
219 )
220 sch = tvm.s_tir.Schedule(func)
221 _schedule(sch, len_bx, len_tx, len_vec)
222
223 if rpcConfig is None:
224 _, profile_result = local_run(
225 tvm.compile(sch.mod, target=target),
226 target.kind.name,
227 [a, b],
228 evaluator_config=EvaluatorConfig(
229 number=10,
230 repeat=1,
231 min_repeat_ms=100,
232 enable_cpu_cache_flush=False,
233 ),
234 )
235 else:

Callers 1

Calls 13

RPCConfigClass · 0.90
local_runFunction · 0.90
EvaluatorConfigClass · 0.90
rpc_runFunction · 0.90
printFunction · 0.85
_workloadFunction · 0.85
_scheduleFunction · 0.85
uniformMethod · 0.80
productMethod · 0.80
_parse_argsFunction · 0.70
astypeMethod · 0.45
zerosMethod · 0.45

Tested by

no test coverage detected

Used in the wild real call sites across dependent graphs

searching dependent graphs…