(text: string, dims = 384)
| 26 | } |
| 27 | |
| 28 | function deterministicEmbedding(text: string, dims = 384): Float32Array { |
| 29 | const arr = new Float32Array(dims); |
| 30 | const words = text.toLowerCase().split(/\W+/).filter(w => w.length > 2); |
| 31 | for (const word of words) { |
| 32 | for (let i = 0; i < word.length; i++) { |
| 33 | const idx = (word.charCodeAt(i) * 31 + i * 17) % dims; |
| 34 | arr[idx] += 1; |
| 35 | const idx2 = (word.charCodeAt(i) * 37 + i * 13 + word.length * 7) % dims; |
| 36 | arr[idx2] += 0.5; |
| 37 | } |
| 38 | } |
| 39 | const norm = Math.sqrt(arr.reduce((s, v) => s + v * v, 0)); |
| 40 | if (norm > 0) for (let i = 0; i < dims; i++) arr[i] /= norm; |
| 41 | return arr; |
| 42 | } |
| 43 | |
| 44 | function estimateTokens(text: string): number { |
| 45 | return Math.ceil(text.length / 4); |
no outgoing calls
no test coverage detected