MCPcopy
hub / github.com/continuedev/continue / encode

Method encode

core/llm/llamaTokenizer.js:243–385  ·  view source on GitHub ↗
(
    prompt,
    add_bos_token = true,
    add_preceding_space = true,
    log_performance = false,
  )

Source from the content-addressed store, hash-verified

241 }
242
243 encode(
244 prompt,
245 add_bos_token = true,
246 add_preceding_space = true,
247 log_performance = false,
248 ) {
249 let startTime = null;
250 if (log_performance) {
251 startTime = performance.now();
252 }
253
254 if (!this.vocabById || !this.vocabByString || !this.merges) {
255 console.log("Tokenizer not initialized properly!");
256 return;
257 }
258 if (prompt.length === 0) {
259 return [];
260 }
261 // Initially each character is transformed to a tokenId, later there will be merges of these.
262 const tokenIds = this.mapCharactersToTokenIds(
263 prompt,
264 add_bos_token,
265 add_preceding_space,
266 );
267
268 // Set up priority queue to efficiently iterate merge possibilities in priority order
269 const mergeQueue = new PriorityQueue((a, b) => {
270 return a.mergePrio < b.mergePrio;
271 });
272
273 const addToMergeQueue = (leftNode) => {
274 const mergeIdentifierString = this.getMergeIdentifierString(
275 leftNode.tokenId,
276 leftNode.next.tokenId,
277 );
278 // Merge priority is primarily determined by the location of the merge in the "merges" data,
279 // secondarily determined by the relative position of the node in the linked list
280 // (We want to perform equal merges from left to right)
281 const mergePrio =
282 this.merges.get(mergeIdentifierString) +
283 leftNode.origPos / prompt.length;
284 if (mergePrio) {
285 // If mergePrio not found in merges, that means this merge is not possible according to vocabulary.
286 leftNode.mergePrio = mergePrio;
287 leftNode.mergeToString = mergeIdentifierString.replace(" ", "");
288 mergeQueue.push(leftNode);
289 }
290 };
291
292 // Fill merge queue from initial merge possibilities and construct linked list
293 let firstTokenNode = {
294 origPos: 0,
295 tokenId: tokenIds[0],
296 prev: null,
297 next: null,
298 };
299 let prevTokenNode = firstTokenNode;
300 for (let i = 1; i < tokenIds.length; i++) {

Callers

nothing calls this directly

Calls 6

isEmptyMethod · 0.95
popMethod · 0.95
logMethod · 0.65
getMethod · 0.65
pushMethod · 0.65

Tested by

no test coverage detected