MCPcopy Index your code
hub / github.com/deepspeedai/DeepSpeedExamples / DecodeIds

Method DecodeIds

Megatron-LM/data_utils/tokenization.py:338–362  ·  view source on GitHub ↗

convert Ids to tokens accounting for command and type tokens, tokens are joined and returned as a string.

(self, Ids, type_token=False)

Source from the content-addressed store, hash-verified

336 return self.text_tokenizer.TokenToId(token)+self.num_command_tokens
337
338 def DecodeIds(self, Ids, type_token=False):
339 """
340 convert Ids to tokens accounting for command and type tokens, tokens
341 are joined and returned as a string.
342 """
343 if type_token:
344 return ' '.join(Id.token if isinstance(Id, TypeToken) else self.type_id_map[Id].token for Id in Ids)
345 rtn_strs = []
346 current_str = []
347 if isinstance(Ids, Tokenization):
348 Ids = Ids.tokenization
349 for Id in Ids:
350 if isinstance(Id, CommandToken):
351 rtn_strs.append(self.text_tokenizer.DecodeIds(current_str))
352 current_str = []
353 rtn_strs.append(t.token)
354 elif Id < self.num_command_tokens:
355 rtn_strs.append(self.text_tokenizer.DecodeIds(current_str))
356 current_str = []
357 rtn_strs.append(self.command_id_map[Id].token)
358 else:
359 current_str.append(Id - self.num_command_tokens)
360 if current_str != []:
361 rtn_strs.append(self.text_tokenizer.DecodeIds(current_str))
362 return ' '.join(rtn_strs)
363
364 def DecodeTokens(self, Tokens, type_token=False):
365 """

Callers 2

generate_samplesFunction · 0.45
DecodeIdsMethod · 0.45

Calls 1

appendMethod · 0.80

Tested by

no test coverage detected