MCPcopy
hub / github.com/NVIDIA/TensorRT-LLM / __init__

Method __init__

examples/models/contrib/dit/sample.py:32–104  ·  view source on GitHub ↗
(self,
                 config,
                 debug_mode=True,
                 stream: torch.cuda.Stream = None)

Source from the content-addressed store, hash-verified

30class TllmDiT(object):
31
32 def __init__(self,
33 config,
34 debug_mode=True,
35 stream: torch.cuda.Stream = None):
36 self.dtype = config['pretrained_config']['dtype']
37
38 rank = tensorrt_llm.mpi_rank()
39 world_size = config['pretrained_config']['mapping']['world_size']
40 cp_size = config['pretrained_config']['mapping']['cp_size']
41 tp_size = config['pretrained_config']['mapping']['tp_size']
42 pp_size = config['pretrained_config']['mapping']['pp_size']
43 assert pp_size == 1
44 self.mapping = tensorrt_llm.Mapping(world_size=world_size,
45 rank=rank,
46 cp_size=cp_size,
47 tp_size=tp_size,
48 pp_size=1,
49 gpus_per_node=args.gpus_per_node)
50
51 local_rank = rank % self.mapping.gpus_per_node
52 self.device = torch.device(f'cuda:{local_rank}')
53 torch.cuda.set_device(self.device)
54 CUASSERT(cudart.cudaSetDevice(local_rank))
55
56 self.stream = stream
57 if self.stream is None:
58 self.stream = torch.cuda.Stream(self.device)
59 torch.cuda.set_stream(self.stream)
60
61 engine_file = os.path.join(args.tllm_model_dir, f"rank{rank}.engine")
62 logger.info(f'Loading engine from {engine_file}')
63 with open(engine_file, "rb") as f:
64 engine_buffer = f.read()
65
66 assert engine_buffer is not None
67
68 self.session = Session.from_serialized_engine(engine_buffer)
69
70 self.debug_mode = debug_mode
71
72 self.inputs = {}
73 self.outputs = {}
74 self.buffer_allocated = False
75
76 expected_tensor_names = ['latent', 'timestep', 'label', 'output']
77
78 if self.mapping.tp_size > 1:
79 self.buffer, self.all_reduce_workspace = CustomAllReduceHelper.allocate_workspace(
80 self.mapping,
81 CustomAllReduceHelper.max_workspace_size_auto(
82 self.mapping.tp_size))
83 self.inputs['all_reduce_workspace'] = self.all_reduce_workspace
84 expected_tensor_names += ['all_reduce_workspace']
85
86 found_tensor_names = [
87 self.session.engine.get_tensor_name(i)
88 for i in range(self.session.engine.num_io_tensors)
89 ]

Callers

nothing calls this directly

Calls 7

allocate_workspaceMethod · 0.80
CUASSERTFunction · 0.70
deviceMethod · 0.45
infoMethod · 0.45
errorMethod · 0.45

Tested by

no test coverage detected