hub / github.com/NVIDIA/TensorRT-LLM / __init__

Method init

examples/models/contrib/dit/sample.py:32–104 · view source on GitHub ↗

(self,
                 config,
                 debug_mode=True,
                 stream: torch.cuda.Stream = None)

Source from the content-addressed store, hash-verified

30	class TllmDiT(object):
31
32	def __init__(self,
33	config,
34	debug_mode=True,
35	stream: torch.cuda.Stream = None):
36	self.dtype = config['pretrained_config']['dtype']
37
38	rank = tensorrt_llm.mpi_rank()
39	world_size = config['pretrained_config']['mapping']['world_size']
40	cp_size = config['pretrained_config']['mapping']['cp_size']
41	tp_size = config['pretrained_config']['mapping']['tp_size']
42	pp_size = config['pretrained_config']['mapping']['pp_size']
43	assert pp_size == 1
44	self.mapping = tensorrt_llm.Mapping(world_size=world_size,
45	rank=rank,
46	cp_size=cp_size,
47	tp_size=tp_size,
48	pp_size=1,
49	gpus_per_node=args.gpus_per_node)
50
51	local_rank = rank % self.mapping.gpus_per_node
52	self.device = torch.device(f'cuda:{local_rank}')
53	torch.cuda.set_device(self.device)
54	CUASSERT(cudart.cudaSetDevice(local_rank))
55
56	self.stream = stream
57	if self.stream is None:
58	self.stream = torch.cuda.Stream(self.device)
59	torch.cuda.set_stream(self.stream)
60
61	engine_file = os.path.join(args.tllm_model_dir, f"rank{rank}.engine")
62	logger.info(f'Loading engine from {engine_file}')
63	with open(engine_file, "rb") as f:
64	engine_buffer = f.read()
65
66	assert engine_buffer is not None
67
68	self.session = Session.from_serialized_engine(engine_buffer)
69
70	self.debug_mode = debug_mode
71
72	self.inputs = {}
73	self.outputs = {}
74	self.buffer_allocated = False
75
76	expected_tensor_names = ['latent', 'timestep', 'label', 'output']
77
78	if self.mapping.tp_size > 1:
79	self.buffer, self.all_reduce_workspace = CustomAllReduceHelper.allocate_workspace(
80	self.mapping,
81	CustomAllReduceHelper.max_workspace_size_auto(
82	self.mapping.tp_size))
83	self.inputs['all_reduce_workspace'] = self.all_reduce_workspace
84	expected_tensor_names += ['all_reduce_workspace']
85
86	found_tensor_names = [
87	self.session.engine.get_tensor_name(i)
88	for i in range(self.session.engine.num_io_tensors)
89	]

Callers

nothing calls this directly

Calls 7

from_serialized_engineMethod · 0.80

allocate_workspaceMethod · 0.80

max_workspace_size_autoMethod · 0.80

CUASSERTFunction · 0.70

deviceMethod · 0.45

infoMethod · 0.45

errorMethod · 0.45

Tested by

no test coverage detected

Method __init__

Source from the content-addressed store, hash-verified

Callers

Calls 7

Tested by

Method init