MCPcopy
hub / github.com/continuedev/continue / decode

Method decode

core/llm/llamaTokenizer.js:387–408  ·  view source on GitHub ↗
(tokenIds, add_bos_token = true, add_preceding_space = true)

Source from the content-addressed store, hash-verified

385 }
386
387 decode(tokenIds, add_bos_token = true, add_preceding_space = true) {
388 const utf8byteVals = [];
389 const startIndex = add_bos_token ? 1 : 0;
390 for (let i = startIndex; i < tokenIds.length; i++) {
391 const tokenId = tokenIds[i];
392 const tokenString = this.vocabById[tokenId];
393 if (tokenString.startsWith("<0x") && tokenString.endsWith(">")) {
394 // Special case
395 const utf8byte = hexToUtf8Byte(tokenString);
396 utf8byteVals.push(utf8byte);
397 } else {
398 // Typical case
399 const utf8bytes = this.utf8Encoder.encode(tokenString);
400 utf8bytes.forEach((utf8Byte) => utf8byteVals.push(utf8Byte));
401 }
402 }
403 const uint8Array = new Uint8Array(utf8byteVals);
404 const decodedString = this.utf8Decoder.decode(uint8Array);
405 const spacesFixed = decodedString.replaceAll(this.vocabById[29871], " ");
406 // Note that preceding space must be removed here at string level, not earlier at token level, because multiple consecutive spaces are represented as single token.
407 return add_preceding_space ? spacesFixed.slice(1) : spacesFixed;
408 }
409
410 defaultTests(tokenizer) {
411 function isEqual(arr1, arr2) {

Callers

nothing calls this directly

Calls 4

hexToUtf8ByteFunction · 0.70
pushMethod · 0.65
encodeMethod · 0.65
decodeMethod · 0.65

Tested by

no test coverage detected