MCPcopy Index your code
hub / github.com/THUDM/GLM / DecodeIds

Method DecodeIds

data_utils/tokenization.py:427–451  ·  view source on GitHub ↗

convert Ids to tokens accounting for command and type tokens, tokens are joined and returned as a string.

(self, Ids, type_token=False)

Source from the content-addressed store, hash-verified

425 return self.text_tokenizer.TokenToId(token) + self.num_command_tokens
426
427 def DecodeIds(self, Ids, type_token=False):
428 """
429 convert Ids to tokens accounting for command and type tokens, tokens
430 are joined and returned as a string.
431 """
432 if type_token:
433 return ' '.join(Id.token if isinstance(Id, TypeToken) else self.type_id_map[Id].token for Id in Ids)
434 rtn_strs = []
435 current_str = []
436 if isinstance(Ids, Tokenization):
437 Ids = Ids.tokenization
438 for Id in Ids:
439 if isinstance(Id, CommandToken):
440 rtn_strs.append(self.text_tokenizer.DecodeIds(current_str))
441 current_str = []
442 rtn_strs.append(Id.token)
443 elif Id < self.num_command_tokens:
444 rtn_strs.append(self.text_tokenizer.DecodeIds(current_str))
445 current_str = []
446 rtn_strs.append(self.command_id_map[Id].token)
447 else:
448 current_str.append(Id - self.num_command_tokens)
449 if current_str != []:
450 rtn_strs.append(self.text_tokenizer.DecodeIds(current_str))
451 return ' '.join(rtn_strs)
452
453 def DecodeTokens(self, Tokens, type_token=False):
454 """

Callers 15

make_block_dataMethod · 0.45
construct_blocksMethod · 0.45
finetuneFunction · 0.45
debug_finetune_dataFunction · 0.45
print_masked_textFunction · 0.45
sample_sequenceFunction · 0.45
read_contextFunction · 0.45
generate_samplesFunction · 0.45
get_verbalization_idsFunction · 0.45
create_examplesMethod · 0.45
create_examplesMethod · 0.45
create_examplesMethod · 0.45

Calls 1

appendMethod · 0.80

Tested by

no test coverage detected