(e)
| 191 | /*!***************************!*\ |
| 192 | !*** ./src/tokenizers.js ***! |
| 193 | \***************************/(e,t,n)=>{n.r(t),n.d(t,{AlbertTokenizer:()=>xe,AutoTokenizer:()=>ht,BartTokenizer:()=>Be,BertTokenizer:()=>ve,BlenderbotSmallTokenizer:()=>lt,BlenderbotTokenizer:()=>ot,BloomTokenizer:()=>Ne,CLIPTokenizer:()=>rt,CamembertTokenizer:()=>Fe,CodeGenTokenizer:()=>nt,CodeLlamaTokenizer:()=>qe,CohereTokenizer:()=>pt,ConvBertTokenizer:()=>Se,DebertaTokenizer:()=>ke,DebertaV2Tokenizer:()=>$e,DistilBertTokenizer:()=>Ee,ElectraTokenizer:()=>Ie,EsmTokenizer:()=>Xe,FalconTokenizer:()=>We,GPT2Tokenizer:()=>Oe,GPTNeoXTokenizer:()=>He,GemmaTokenizer:()=>Qe,Grok1Tokenizer:()=>Ye,HerbertTokenizer:()=>Ce,LlamaTokenizer:()=>je,M2M100Tokenizer:()=>et,MBart50Tokenizer:()=>De,MBartTokenizer:()=>Le,MPNetTokenizer:()=>Ue,MarianTokenizer:()=>st,MobileBertTokenizer:()=>Me,NllbTokenizer:()=>Je,NougatTokenizer:()=>dt,PreTrainedTokenizer:()=>be,Qwen2Tokenizer:()=>Ke,RoFormerTokenizer:()=>Pe,RobertaTokenizer:()=>Re,SiglipTokenizer:()=>at,SpeechT5Tokenizer:()=>ut,SqueezeBertTokenizer:()=>Te,T5Tokenizer:()=>ze,TokenizerModel:()=>M,VitsTokenizer:()=>ct,Wav2Vec2CTCTokenizer:()=>it,WhisperTokenizer:()=>tt,XLMRobertaTokenizer:()=>Ge,XLMTokenizer:()=>Ae,is_chinese_char:()=>_});var r=n(/*! ./utils/generic.js */"./src/utils/generic.js"),a=n(/*! ./utils/core.js */"./src/utils/core.js"),s=n(/*! ./utils/hub.js */"./src/utils/hub.js"),i=n(/*! ./utils/maths.js */"./src/utils/maths.js"),o=n(/*! ./utils/tensor.js */"./src/utils/tensor.js"),l=n(/*! ./utils/data-structures.js */"./src/utils/data-structures.js"),u=n(/*! @huggingface/jinja */"./node_modules/@huggingface/jinja/dist/index.js"),d=n(/*! ./models/whisper/common_whisper.js */"./src/models/whisper/common_whisper.js");n(/*! ./utils/constants.js */"./src/utils/constants.js");async function c(e,t){const n=await Promise.all([(0,s.getModelJSON)(e,"tokenizer.json",!0,t),(0,s.getModelJSON)(e,"tokenizer_config.json",!0,t)]);return null!==t.legacy&&(n[1].legacy=t.legacy),n}function p(e,t=!0){if(void 0!==e.Regex){let t=e.Regex.replace(/\\([#&~])/g,"$1");for(const[e,n]of v)t=t.replaceAll(e,n);return new RegExp(t,"gu")}if(void 0!==e.String){const n=(0,a.escapeRegExp)(e.String);return new RegExp(t?n:`(${n})`,"gu")}return console.warn("Unknown pattern type:",e),null}function h(e){return new Map(Object.entries(e))}function m(e){const t=e.dims;switch(t.length){case 1:return e.tolist();case 2:if(1!==t[0])throw new Error("Unable to decode tensor with `batch size !== 1`. Use `tokenizer.batch_decode(...)` for batched inputs.");return e.tolist()[0];default:throw new Error(`Expected tensor to have 1-2 dimensions, got ${t.length}.`)}}function f(e){return e.replace(/ \./g,".").replace(/ \?/g,"?").replace(/ \!/g,"!").replace(/ ,/g,",").replace(/ \' /g,"'").replace(/ n\'t/g,"n't").replace(/ \'m/g,"'m").replace(/ \'s/g,"'s").replace(/ \'ve/g,"'ve").replace(/ \'re/g,"'re")}function g(e){return e.replace(/\p{M}/gu,"")}function _(e){return e>=19968&&e<=40959||e>=13312&&e<=19903||e>=131072&&e<=173791||e>=173824&&e<=177983||e>=177984&&e<=178207||e>=178208&&e<=183983||e>=63744&&e<=64255||e>=194560&&e<=195103}const w="\\p{P}\\u0021-\\u002F\\u003A-\\u0040\\u005B-\\u0060\\u007B-\\u007E",y=new RegExp(`^[${w}]+$`,"gu"),b=".,!?…。,、।۔،",v=new Map([["(?i:'s|'t|'re|'ve|'m|'ll|'d)","(?:'([sS]|[tT]|[rR][eE]|[vV][eE]|[mM]|[lL][lL]|[dD]))"],[` ?[^(\\s|[${b}])]+`,` ?[^\\s${b}]+`]]);class x{constructor(e){this.content=e.content,this.id=e.id,this.single_word=e.single_word??!1,this.lstrip=e.lstrip??!1,this.rstrip=e.rstrip??!1,this.special=e.special??!1,this.normalized=e.normalized??null}}class M extends r.Callable{constructor(e){super(),this.config=e,this.vocab=[],this.tokens_to_ids=new Map,this.unk_token_id=void 0,this.unk_token=void 0,this.end_of_word_suffix=void 0,this.fuse_unk=this.config.fuse_unk??!1}static fromConfig(e,...t){switch(e.type){case"WordPiece":return new T(e);case"Unigram":return new k(e,...t);case"BPE":return new S(e);default:if(e.vocab)return Array.isArray(e.vocab)?new k(e,...t):new P(e,...t);throw new Error(`Unknown TokenizerModel type: ${e.type}`)}}_call(e){return e=this.encode(e),this.fuse_unk&&(e=function(e,t,n){const r=[];let a=0;for(;a<e.length;)if(r.push(e[a]),(t.get(e[a])??n)===n)for(;++a<e.length&&(t.get(e[a])??n)===n;)t.get(r.at(-1))!==n&&(r[r.length-1]+=e[a]);else++a;return r}(e,this.tokens_to_ids,this.unk_token_id)),e}encode(e){throw Error("encode should be implemented in subclass.")}convert_tokens_to_ids(e){return e.map((e=>this.tokens_to_ids.get(e)??this.unk_token_id))}convert_ids_to_tokens(e){return e.map((e=>this.vocab[e]??this.unk_token))}}class T extends M{constructor(e){super(e),this.tokens_to_ids=h(e.vocab),this.unk_token_id=this.tokens_to_ids.get(e.unk_token),this.unk_token=e.unk_token,this.max_input_chars_per_word=e.max_input_chars_per_word??100,this.vocab=new Array(this.tokens_to_ids.size);for(const[e,t]of this.tokens_to_ids)this.vocab[t]=e}encode(e){const t=[];for(const n of e){const e=[...n];if(e.length>this.max_input_chars_per_word){t.push(this.unk_token);continue}let r=!1,a=0;const s=[];for(;a<e.length;){let t=e.length,n=null;for(;a<t;){let r=e.slice(a,t).join("");if(a>0&&(r=this.config.continuing_subword_prefix+r),this.tokens_to_ids.has(r)){n=r;break}--t}if(null===n){r=!0;break}s.push(n),a=t}r?t.push(this.unk_token):t.push(...s)}return t}}class k extends M{constructor(e,t){super(e);const n=e.vocab.length;this.vocab=new Array(n),this.scores=new Array(n);for(let t=0;t<n;++t){const n=e.vocab[t];this.vocab[t]=n[0],this.scores[t]=n[1]}this.unk_token_id=e.unk_id,this.unk_token=this.vocab[e.unk_id],this.tokens_to_ids=new Map(this.vocab.map(((e,t)=>[e,t]))),this.bos_token=" ",this.bos_token_id=this.tokens_to_ids.get(this.bos_token),this.eos_token=t.eos_token,this.eos_token_id=this.tokens_to_ids.get(this.eos_token),this.unk_token=this.vocab[this.unk_token_id],this.minScore=(0,i.min)(this.scores)[0],this.unk_score=this.minScore-10,this.scores[this.unk_token_id]=this.unk_score,this.trie=new l.CharTrie,this.trie.extend(this.vocab),this.fuse_unk=!0}populateNodes(e){const t=e.chars;let n=0;for(;n<t.length;){let r=!1;const s=[],i=t.slice(n).join(""),o=this.trie.commonPrefixSearch(i);for(const t of o){s.push(t);const i=this.tokens_to_ids.get(t),o=this.scores[i],l=(0,a.len)(t);e.insert(n,l,o,i),r||1!==l||(r=!0)}r||e.insert(n,1,this.unk_score,this.unk_token_id),n+=1}}tokenize(e){const t=new l.TokenLattice(e,this.bos_token_id,this.eos_token_id);return this.populateNodes(t),t.tokens()}encode(e){const t=[];for(const n of e){const e=this.tokenize(n);t.push(...e)}return t}}const $=(()=>{const e=[...Array.from({length:"~".charCodeAt(0)-"!".charCodeAt(0)+1},((e,t)=>t+"!".charCodeAt(0))),...Array.from({length:"¬".charCodeAt(0)-"¡".charCodeAt(0)+1},((e,t)=>t+"¡".charCodeAt(0))),...Array.from({length:"ÿ".charCodeAt(0)-"®".charCodeAt(0)+1},((e,t)=>t+"®".charCodeAt(0)))],t=e.slice();let n=0;for(let r=0;r<256;++r)e.includes(r)||(e.push(r),t.push(256+n),n+=1);const r=t.map((e=>String.fromCharCode(e)));return Object.fromEntries(e.map(((e,t)=>[e,r[t]])))})(),C=(0,a.reverseDictionary)($);class S extends M{constructor(e){super(e),this.tokens_to_ids=h(e.vocab),this.unk_token_id=this.tokens_to_ids.get(e.unk_token),this.unk_token=e.unk_token,this.vocab=new Array(this.tokens_to_ids.size);for(const[e,t]of this.tokens_to_ids)this.vocab[t]=e;const t=Array.isArray(e.merges[0]);this.merges=t?e.merges:e.merges.map((e=>e.split(" ",2))),this.bpe_ranks=new Map(this.merges.map(((e,t)=>[JSON.stringify(e),t]))),this.end_of_word_suffix=e.end_of_word_suffix,this.continuing_subword_suffix=e.continuing_subword_suffix??null,this.byte_fallback=this.config.byte_fallback??!1,this.byte_fallback&&(this.text_encoder=new TextEncoder),this.ignore_merges=this.config.ignore_merges??!1,this.cache=new Map}bpe(e){if(0===e.length)return[];const t=this.cache.get(e);if(void 0!==t)return t;const n=Array.from(e);this.end_of_word_suffix&&(n[n.length-1]+=this.end_of_word_suffix);let r=[];if(n.length>1){const e=new l.PriorityQueue(((e,t)=>e.score<t.score));let t={token:n[0],bias:0,prev:null,next:null},a=t;for(let t=1;t<n.length;++t){const r={bias:t/n.length,token:n[t],prev:a,next:null};a.next=r,this._add_node(e,a),a=r}for(;!e.isEmpty();){const n=e.pop();if(n.deleted||!n.next||n.next.deleted)continue;if(n.deleted=!0,n.next.deleted=!0,n.prev){const e={...n.prev};n.prev.deleted=!0,n.prev=e,e.prev?e.prev.next=e:t=e}const r={token:n.token+n.next.token,bias:n.bias,prev:n.prev,next:n.next.next};r.prev?(r.prev.next=r,this._add_node(e,r.prev)):t=r,r.next&&(r.next.prev=r,this._add_node(e,r))}for(let e=t;null!==e;e=e.next)r.push(e.token)}else r=n;if(this.continuing_subword_suffix)for(let e=0;e<r.length-1;++e)r[e]+=this.continuing_subword_suffix;return this.cache.set(e,r),r}_add_node(e,t){const n=this.bpe_ranks.get(JSON.stringify([t.token,t.next.token]));void 0!==n&&(t.score=n+t.bias,e.push(t))}encode(e){const t=[];for(const n of e){if(this.ignore_merges&&this.tokens_to_ids.has(n)){t.push(n);continue}const e=this.bpe(n);for(const n of e)if(this.tokens_to_ids.has(n))t.push(n);else if(this.byte_fallback){const e=Array.from(this.text_encoder.encode(n)).map((e=>`<0x${e.toString(16).toUpperCase().padStart(2,"0")}>`));e.every((e=>this.tokens_to_ids.has(e)))?t.push(...e):t.push(this.unk_token)}else t.push(this.unk_token)}return t}}class P extends M{constructor(e,t){super(e),this.tokens_to_ids=h(t.target_lang?e.vocab[t.target_lang]:e.vocab),this.bos_token=t.bos_token,this.bos_token_id=this.tokens_to_ids.get(this.bos_token),this.eos_token=t.eos_token,this.eos_token_id=this.tokens_to_ids.get(this.eos_token),this.pad_token=t.pad_token,this.pad_token_id=this.tokens_to_ids.get(this.pad_token),this.unk_token=t.unk_token,this.unk_token_id=this.tokens_to_ids.get(this.unk_token),this.vocab=new Array(this.tokens_to_ids.size);for(const[e,t]of this.tokens_to_ids)this.vocab[t]=e}encode(e){return e}}class E extends r.Callable{constructor(e){super(),this.config=e}static fromConfig(e){if(null===e)return null;switch(e.type){case"BertNormalizer":return new N(e);case"Precompiled":return new pe(e);case"Sequence":return new R(e);case"Replace":return new F(e);case"NFC":return new A(e);case"NFKC":return new I(e);case"NFKD":return new z(e);case"Strip":return new O(e);case"StripAccents":return new B(e);case"Lowercase":return new L(e);case"Prepend":return new D(e);default:throw new Error(`Unknown Normalizer type: ${e.type}`)}}normalize(e){throw Error("normalize should be implemented in subclass.")}_call(e){return this.normalize(e)}}class F extends E{normalize(e){const t=p(this.config.pattern);return null===t?e:e.replaceAll(t,this.config.content)}}class A extends E{normalize(e){return e=e.normalize("NFC")}}class I extends E{normalize(e){return e=e.normalize("NFKC")}}class z extends E{normalize(e){return e=e.normalize("NFKD")}}class O extends E{normalize(e){return this.config.strip_left&&this.config.strip_right?e=e.trim():(this.config.strip_left&&(e=e.trimStart()),this.config.strip_right&&(e=e.trimEnd())),e}}class B extends E{normalize(e){return e=g(e)}}class L extends E{normalize(e){return e=e.toLowerCase()}}class D extends E{normalize(e){return e=this.config.prepend+e}}class R extends E{constructor(e){super(e),this.normalizers=e.normalizers.map((e=>E.fromConfig(e)))}normalize(e){return this.normalizers.reduce(((e,t)=>t.normalize(e)),e)}}class N extends E{_tokenize_chinese_chars(e){const t=[];for(let n=0;n<e.length;++n){const r=e[n];_(r.charCodeAt(0))?(t.push(" "),t.push(r),t.push(" ")):t.push(r)}return t.join("")}stripAccents(e){return e.normalize("NFD").replace(/\p{Mn}/gu,"")}_is_control(e){switch(e){case"\t":case"\n":case"\r":return!1;default:return/^\p{Cc}|\p{Cf}|\p{Co}|\p{Cs}$/u.test(e)}}_clean_text(e){const t=[];for(const n of e){const e=n.charCodeAt(0);0===e||65533===e||this._is_control(n)||(/^\s$/.test(n)?t.push(" "):t.push(n))}return t.join("")}normalize(e){return this.config.clean_text&&(e=this._clean_text(e)),this.config.handle_chinese_chars&&(e=this._tokenize_chinese_chars(e)),this.config.lowercase?(e=e.toLowerCase(),!1!==this.config.strip_accents&&(e=this.stripAccents(e))):this.config.strip_accents&&(e=this.stripAccents(e)),e}}class V extends r.Callable{static fromConfig(e){if(null===e)return null;switch(e.type){case"BertPreTokenizer":return new j(e);case"Sequence":return new he(e);case"Whitespace":return new me(e);case"WhitespaceSplit":return new fe(e);case"Metaspace":return new de(e);case"ByteLevel":return new q(e);case"Split":return new G(e);case"Punctuation":return new U(e);case"Digits":return new W(e);case"Replace":return new ge(e);default:throw new Error(`Unknown PreTokenizer type: ${e.type}`)}}pre_tokenize_text(e,t){throw Error("pre_tokenize_text should be implemented in subclass.")}pre_tokenize(e,t){return(Array.isArray(e)?e.map((e=>this.pre_tokenize_text(e,t))):this.pre_tokenize_text(e,t)).flat()}_call(e,t){return this.pre_tokenize(e,t)}}class j extends V{constructor(e){super(),this.pattern=new RegExp(`[^\\s${w}]+|[${w}]`,"gu")}pre_tokenize_text(e,t){return e.trim().match(this.pattern)||[]}}class q extends V{constructor(e){super(),this.config=e,this.add_prefix_space=this.config.add_prefix_space,this.trim_offsets=this.config.trim_offsets,this.use_regex=this.config.use_regex??!0,this.pattern=/'s|'t|'re|'ve|'m|'ll|'d| ?\p{L}+| ?\p{N}+| ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+/gu,this.byte_encoder=$,this.text_encoder=new TextEncoder}pre_tokenize_text(e,t){this.add_prefix_space&&!e.startsWith(" ")&&(e=" "+e);return(this.use_regex?e.match(this.pattern)||[]:[e]).map((e=>Array.from(this.text_encoder.encode(e),(e=>this.byte_encoder[e])).join("")))}}class G extends V{constructor(e){super(),this.config=e,this.pattern=p(this.config.pattern,this.config.invert)}pre_tokenize_text(e,t){return null===this.pattern?[]:this.config.invert?e.match(this.pattern)||[]:function(e,t){const n=[];let r=0;for(const a of e.matchAll(t)){const t=a[0];r<a.index&&n.push(e.slice(r,a.index)),t.length>0&&n.push(t),r=a.index+t.length}return r<e.length&&n.push(e.slice(r)),n}(e,this.pattern)}}class U extends V{constructor(e){super(),this.config=e,this.pattern=new RegExp(`[^${w}]+|[${w}]+`,"gu")}pre_tokenize_text(e,t){return e.match(this.pattern)||[]}}class W extends V{constructor(e){super(),this.config=e;const t="[^\\d]+|\\d"+(this.config.individual_digits?"":"+");this.pattern=new RegExp(t,"gu")}pre_tokenize_text(e,t){return e.match(this.pattern)||[]}}class H extends r.Callable{constructor(e){super(),this.config=e}static fromConfig(e){if(null===e)return null;switch(e.type){case"TemplateProcessing":return new Q(e);case"ByteLevel":return new Y(e);case"RobertaProcessing":return new K(e);case"BertProcessing":return new X(e);case"Sequence":return new Z(e);default:throw new Error(`Unknown PostProcessor type: ${e.type}`)}}post_process(e,...t){throw Error("post_process should be implemented in subclass.")}_call(e,...t){return this.post_process(e,...t)}}class X extends H{constructor(e){super(e),this.cls=e.cls[0],this.sep=e.sep[0]}post_process(e,t=null,{add_special_tokens:n=!0}={}){n&&(e=(0,a.mergeArrays)([this.cls],e,[this.sep]));let r=new Array(e.length).fill(0);if(null!==t){const s=n&&this instanceof K?[this.sep]:[],i=n?[this.sep]:[];e=(0,a.mergeArrays)(e,s,t,i),r=(0,a.mergeArrays)(r,new Array(t.length+s.length+i.length).fill(1))}return{tokens:e,token_type_ids:r}}}class K extends X{}class Q extends H{constructor(e){super(e),this.single=e.single,this.pair=e.pair}post_process(e,t=null,{add_special_tokens:n=!0}={}){const r=null===t?this.single:this.pair;let s=[],i=[];for(const o of r)"SpecialToken"in o?n&&(s.push(o.SpecialToken.id),i.push(o.SpecialToken.type_id)):"Sequence"in o&&("A"===o.Sequence.id?(s=(0,a.mergeArrays)(s,e),i=(0,a.mergeArrays)(i,new Array(e.length).fill(o.Sequence.type_id))):"B"===o.Sequence.id&&(s=(0,a.mergeArrays)(s,t),i=(0,a.mergeArrays)(i,new Array(t.length).fill(o.Sequence.type_id))));return{tokens:s,token_type_ids:i}}}class Y extends H{post_process(e,t=null){return t&&(e=(0,a.mergeArrays)(e,t)),{tokens:e}}}class Z extends H{constructor(e){super(e),this.processors=e.processors.map((e=>H.fromConfig(e)))}post_process(e,t=null,n={}){let r;for(const a of this.processors)if(a instanceof Y){if(e=a.post_process(e).tokens,t){t=a.post_process(t).tokens}}else{const s=a.post_process(e,t,n);e=s.tokens,r=s.token_type_ids}return{tokens:e,token_type_ids:r}}}class J extends r.Callable{constructor(e){super(),this.config=e,this.added_tokens=[],this.end_of_word_suffix=null,this.trim_offsets=e.trim_offsets}static fromConfig(e){if(null===e)return null;switch(e.type){case"WordPiece":return new ae(e);case"Metaspace":return new ce(e);case"ByteLevel":return new se(e);case"Replace":return new ee(e);case"ByteFallback":return new te(e);case"Fuse":return new ne(e);case"Strip":return new re(e);case"Sequence":return new oe(e);case"CTC":return new ie(e);case"BPEDecoder":return new le(e);default:throw new Error(`Unknown Decoder type: ${e.type}`)}}_call(e){return this.decode(e)}decode(e){return this.decode_chain(e).join("")}decode_chain(e){throw Error("`decode_chain` should be implemented in subclass.")}}class ee extends J{decode_chain(e){const t=p(this.config.pattern);return null===t?e:e.map((e=>e.replaceAll(t,this.config.content)))}}class te extends J{constructor(e){super(e),this.text_decoder=new TextDecoder}decode_chain(e){const t=[];let n=[];for(const r of e){let e=null;if(6===r.length&&r.startsWith("<0x")&&r.endsWith(">")){const t=parseInt(r.slice(3,5),16);isNaN(t)||(e=t)}if(null!==e)n.push(e);else{if(n.length>0){const e=this.text_decoder.decode(Uint8Array.from(n));t.push(e),n=[]}t.push(r)}}if(n.length>0){const e=this.text_decoder.decode(Uint8Array.from(n));t.push(e),n=[]}return t}}class ne extends J{decode_chain(e){return[e.join("")]}}class re extends J{constructor(e){super(e),this.content=this.config.content,this.start=this.config.start,this.stop=this.config.stop}decode_chain(e){return e.map((e=>{let t=0;for(let n=0;n<this.start&&e[n]===this.content;++n)t=n+1;let n=e.length;for(let t=0;t<this.stop;++t){const r=e.length-t-1;if(e[r]!==this.content)break;n=r}return e.slice(t,n)}))}}class ae extends J{constructor(e){super(e),this.cleanup=e.cleanup}decode_chain(e){return e.map(((e,t)=>(0!==t&&(e=e.startsWith(this.config.prefix)?e.replace(this.config.prefix,""):" "+e),this.cleanup&&(e=f(e)),e)))}}class se extends J{constructor(e){super(e),this.byte_decoder=C,this.text_decoder=new TextDecoder("utf-8",{fatal:!1,ignoreBOM:!0}),this.end_of_word_suffix=null}convert_tokens_to_string(e){const t=e.join(""),n=new Uint8Array([...t].map((e=>this.byte_decoder[e])));return this.text_decoder.decode(n)}decode_chain(e){const t=[];let n=[];for(const r of e)void 0!==this.added_tokens.find((e=>e.content===r))?(n.length>0&&(t.push(this.convert_tokens_to_string(n)),n=[]),t.push(r)):n.push(r);return n.length>0&&t.push(this.convert_tokens_to_string(n)),t}}class ie extends J{constructor(e){super(e),this.pad_token=this.config.pad_token,this.word_delimiter_token=this.config.word_delimiter_token,this.cleanup=this.config.cleanup}convert_tokens_to_string(e){if(0===e.length)return"";const t=[e[0]];for(let n=1;n<e.length;++n)e[n]!==t.at(-1)&&t.push(e[n]);let n=t.filter((e=>e!==this.pad_token)).join("");return this.cleanup&&(n=f(n).replaceAll(this.word_delimiter_token," ").trim()),n}decode_chain(e){return[this.convert_tokens_to_string(e)]}}class oe extends J{constructor(e){super(e),this.decoders=e.decoders.map((e=>J.fromConfig(e)))}decode_chain(e){return this.decoders.reduce(((e,t)=>t.decode_chain(e)),e)}}class le extends J{constructor(e){super(e),this.suffix=this.config.suffix}decode_chain(e){return e.map(((t,n)=>t.replaceAll(this.suffix,n===e.length-1?"":" ")))}}class ue extends J{decode_chain(e){let t="";for(let n=1;n<e.length;n+=2)t+=e[n];return[t]}}class de extends V{constructor(e){super(),this.addPrefixSpace=e.add_prefix_space,this.replacement=e.replacement,this.strRep=e.str_rep||this.replacement,this.prepend_scheme=e.prepend_scheme??"always"}pre_tokenize_text(e,{section_index:t}={}){let n=e.replaceAll(" ",this.strRep);return this.addPrefixSpace&&!n.startsWith(this.replacement)&&("always"===this.prepend_scheme||"first"===this.prepend_scheme&&0===t)&&(n=this.strRep+n),[n]}}class ce extends J{constructor(e){super(e),this.addPrefixSpace=e.add_prefix_space,this.replacement=e.replacement}decode_chain(e){const t=[];for(let n=0;n<e.length;++n){let r=e[n].replaceAll(this.replacement," ");this.addPrefixSpace&&0==n&&r.startsWith(" ")&&(r=r.substring(1)),t.push(r)}return t}}class pe extends E{constructor(e){super(e),this.charsmap=e.precompiled_charsmap}normalize(e){if((e=(e=e.replace(/[\u0001-\u0008\u000B\u000E-\u001F\u007F\u008F\u009F]/gm,"")).replace(/[\u0009\u000A\u000C\u000D\u00A0\u1680\u2000-\u200F\u2028\u2029\u202F\u205F\u2581\u3000\uFEFF\uFFFD]/gm," ")).includes("~")){const t=e.split("~");e=t.map((e=>e.normalize("NFKC"))).join("~")}else e=e.normalize("NFKC");return e}}class he extends V{constructor(e){super(),this.tokenizers=e.pretokenizers.map((e=>V.fromConfig(e)))}pre_tokenize_text(e,t){return this.tokenizers.reduce(((e,n)=>n.pre_tokenize(e,t)),[e])}}class me extends V{constructor(e){super()}pre_tokenize_text(e,t){return e.match(/\w+|[^\w\s]+/g)||[]}}class fe extends V{constructor(e){super()}pre_tokenize_text(e,t){return function(e){return e.match(/\S+/g)||[]}(e)}}class ge extends V{constructor(e){super(),this.config=e,this.pattern=p(this.config.pattern),this.content=this.config.content}pre_tokenize_text(e,t){return null===this.pattern?[e]:[e.replaceAll(this.pattern,this.config.content)]}}const _e=["bos_token","eos_token","unk_token","sep_token","pad_token","cls_token","mask_token"];function we(e,t,n,r){for(const s of Object.keys(e)){const i=t-e[s].length,o=n(s),l=new Array(i).fill(o);e[s]="right"===r?(0,a.mergeArrays)(e[s],l):(0,a.mergeArrays)(l,e[s])}}function ye(e,t){for(const n of Object.keys(e))e[n].length=t}class be extends r.Callable{return_token_type_ids=!1;padding_side="right";constructor(e,t){super(),this._tokenizer_config=t,this.normalizer=E.fromConfig(e.normalizer),this.pre_tokenizer=V.fromConfig(e.pre_tokenizer),this.model=M.fromConfig(e.model,t),this.post_processor=H.fromConfig(e.post_processor),this.decoder=J.fromConfig(e.decoder),this.special_tokens=[],this.all_special_ids=[],this.added_tokens=[];for(const t of e.added_tokens){const e=new x(t);this.added_tokens.push(e),this.model.tokens_to_ids.set(e.content,e.id),this.model.vocab[e.id]=e.content,e.special&&(this.special_tokens.push(e.content),this.all_special_ids.push(e.id))}if(this.additional_special_tokens=t.additional_special_tokens??[],this.special_tokens.push(...this.additional_special_tokens),this.special_tokens=[...new Set(this.special_tokens)],this.decoder&&(this.decoder.added_tokens=this.added_tokens,this.decoder.end_of_word_suffix=this.model.end_of_word_suffix),this.added_tokens_regex=this.added_tokens.length>0?new RegExp(this.added_tokens.slice().sort(((e,t)=>t.content.length-e.content.length)).map((e=>`${e.lstrip?"\\s*":""}(${(0,a.escapeRegExp)(e.content)})${e.rstrip?"\\s*":""}`)).join("|")):null,this.mask_token=this.getToken("mask_token"),this.mask_token_id=this.model.tokens_to_ids.get(this.mask_token),this.pad_token=this.getToken("pad_token","eos_token"),this.pad_token_id=this.model.tokens_to_ids.get(this.pad_token),this.sep_token=this.getToken("sep_token"),this.sep_token_id=this.model.tokens_to_ids.get(this.sep_token),this.unk_token=this.getToken("unk_token"),this.unk_token_id=this.model.tokens_to_ids.get(this.unk_token),this.model_max_length=t.model_max_length,this.remove_space=t.remove_space,this.clean_up_tokenization_spaces=t.clean_up_tokenization_spaces??!0,this.do_lowercase_and_remove_accent=t.do_lowercase_and_remove_accent??!1,t.padding_side&&(this.padding_side=t.padding_side),this.legacy=!1,this.chat_template=t.chat_template??null,Array.isArray(this.chat_template)){const e=Object.create(null);for(const{name:t,template:n}of this.chat_template){if("string"!=typeof t||"string"!=typeof n)throw new Error('Chat template must be a list of objects with "name" and "template" properties');e[t]=n}this.chat_template=e}this._compiled_template_cache=new Map}getToken(...e){for(const t of e){const e=this._tokenizer_config[t];if(e){if("object"==typeof e){if("AddedToken"===e.__type)return e.content;throw Error(`Unknown token: ${e}`)}return e}}return null}static async from_pretrained(e,{progress_callback:t=null,config:n=null,cache_dir:r=null,local_files_only:a=!1,revision:s="main",legacy:i=null}={}){return new this(...await c(e,{progress_callback:t,config:n,cache_dir:r,local_files_only:a,revision:s,legacy:i}))}_call(e,{text_pair:t=null,add_special_tokens:n=!0,padding:r=!1,truncation:a=null,max_length:s=null,return_tensor:l=!0,return_token_type_ids:u=null}={}){const d=Array.isArray(e);let c;if(d){if(0===e.length)throw Error("text array must be non-empty");if(null!==t){if(!Array.isArray(t))throw Error("text_pair must also be an array");if(e.length!==t.length)throw Error("text and text_pair must have the same length");c=e.map(((e,r)=>this._encode_plus(e,{text_pair:t[r],add_special_tokens:n,return_token_type_ids:u})))}else c=e.map((e=>this._encode_plus(e,{add_special_tokens:n,return_token_type_ids:u})))}else{if(null==e)throw Error("text may not be null or undefined");if(Array.isArray(t))throw Error("When specifying `text_pair`, since `text` is a string, `text_pair` must also be a string (i.e., not an array).");c=[this._encode_plus(e,{text_pair:t,add_special_tokens:n,return_token_type_ids:u})]}if(null===s?s="max_length"===r?this.model_max_length:(0,i.max)(c.map((e=>e.input_ids.length)))[0]:a||console.warn("Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=true` to explicitly truncate examples to max length."),s=Math.min(s,this.model_max_length??1/0),r||a)for(let e=0;e<c.length;++e)c[e].input_ids.length!==s&&(c[e].input_ids.length>s?a&&ye(c[e],s):r&&we(c[e],s,(e=>"input_ids"===e?this.pad_token_id:0),this.padding_side));const p={};if(l){if((!r||!a)&&c.some((e=>{for(const t of Object.keys(e))if(e[t].length!==c[0][t]?.length)return!0;return!1})))throw Error("Unable to create tensor, you should probably activate truncation and/or padding with 'padding=true' and 'truncation=true' to have batched tensors with the same length.");const e=[c.length,c[0].input_ids.length];for(const t of Object.keys(c[0]))p[t]=new o.Tensor("int64",BigInt64Array.from(c.flatMap((e=>e[t])).map(BigInt)),e)}else{for(const e of Object.keys(c[0]))p[e]=c.map((t=>t[e]));if(!d)for(const e of Object.keys(p))p[e]=p[e][0]}return p}_encode_text(e){if(null===e)return null;const t=(this.added_tokens_regex?e.split(this.added_tokens_regex).filter((e=>e)):[e]).map(((e,t)=>{if(void 0!==this.added_tokens.find((t=>t.content===e)))return e;{if(!0===this.remove_space&&(e=e.trim().split(/\s+/).join(" ")),this.do_lowercase_and_remove_accent&&(e=function(e){return g(e.toLowerCase())}(e)),null!==this.normalizer&&(e=this.normalizer(e)),0===e.length)return[];const n=null!==this.pre_tokenizer?this.pre_tokenizer(e,{section_index:t}):[e];return this.model(n)}})).flat();return t}_encode_plus(e,{text_pair:t=null,add_special_tokens:n=!0,return_token_type_ids:r=null}={}){const{tokens:a,token_type_ids:s}=this._tokenize_helper(e,{pair:t,add_special_tokens:n}),i=this.model.convert_tokens_to_ids(a),o={input_ids:i,attention_mask:new Array(i.length).fill(1)};return(r??this.return_token_type_ids)&&s&&(o.token_type_ids=s),o}_tokenize_helper(e,{pair:t=null,add_special_tokens:n=!1}={}){const r=this._encode_text(e),s=this._encode_text(t);return this.post_processor?this.post_processor(r,s,{add_special_tokens:n}):{tokens:(0,a.mergeArrays)(r??[],s??[])}}tokenize(e,{pair:t=null,add_special_tokens:n=!1}={}){return this._tokenize_helper(e,{pair:t,add_special_tokens:n}).tokens}encode(e,{text_pair:t=null,add_special_tokens:n=!0,return_token_type_ids:r=null}={}){return this._encode_plus(e,{text_pair:t,add_special_tokens:n,return_token_type_ids:r}).input_ids}batch_decode(e,t={}){return e instanceof o.Tensor&&(e=e.tolist()),e.map((e=>this.decode(e,t)))}decode(e,t={}){if(e instanceof o.Tensor&&(e=m(e)),!Array.isArray(e)||0===e.length||!(0,a.isIntegralNumber)(e[0]))throw Error("token_ids must be a non-empty array of integers.");return this.decode_single(e,t)}decode_single(e,{skip_special_tokens:t=!1,clean_up_tokenization_spaces:n=null}){let r=this.model.convert_ids_to_tokens(e);t&&(r=r.filter((e=>!this.special_tokens.includes(e))));let a=this.decoder?this.decoder(r):r.join(" ");return this.decoder&&this.decoder.end_of_word_suffix&&(a=a.replaceAll(this.decoder.end_of_word_suffix," "),t&&(a=a.trim())),(n??this.clean_up_tokenization_spaces)&&(a=f(a)),a}get_chat_template({chat_template:e=null,tools:t=null}={}){if(this.chat_template&&"object"==typeof this.chat_template){const n=this.chat_template;if(null!==e&&Object.hasOwn(n,e))e=n[e];else if(null===e)if(null!==t&&"tool_use"in n)e=n.tool_use;else{if(!("default"in n))throw Error(`This model has multiple chat templates with no default specified! Please either pass a chat template or the name of the template you wish to use to the 'chat_template' argument. Available template names are ${Object.keys(n).sort()}.`);e=n.default}}else if(null===e){if(!this.chat_template)throw Error("Cannot use apply_chat_template() because tokenizer.chat_template is not set and no template argument was passed! For information about writing templates and setting the tokenizer.chat_template attribute, please see the documentation at https://huggingface.co/docs/transformers/main/en/chat_templating");e=this.chat_template}return e}apply_chat_template(e,{tools:t=null,documents:n=null,chat_template:r=null,add_generation_prompt:a=!1,tokenize:s=!0,padding:i=!1,truncation:o=!1,max_length:l=null,return_tensor:d=!0,return_dict:c=!1,tokenizer_kwargs:p={},...h}={}){if("string"!=typeof(r=this.get_chat_template({chat_template:r,tools:t})))throw Error("chat_template must be a string, but got "+typeof r);let m=this._compiled_template_cache.get(r);void 0===m&&(m=new u.Template(r),this._compiled_template_cache.set(r,m));const f=Object.create(null);for(const e of _e){const t=this.getToken(e);t&&(f[e]=t)}const g=m.render({messages:e,add_generation_prompt:a,tools:t,documents:n,...f,...h});if(s){const e=this._call(g,{add_special_tokens:!1,padding:i,truncation:o,max_length:l,return_tensor:d,...p});return c?e:e.input_ids}return g}}class ve extends be{return_token_type_ids=!0}class xe extends be{return_token_type_ids=!0}class Me extends be{return_token_type_ids=!0}class Te extends be{return_token_type_ids=!0}class ke extends be{return_token_type_ids=!0}class $e extends be{return_token_type_ids=!0}class Ce extends be{return_token_type_ids=!0}class Se extends be{return_token_type_ids=!0}class Pe extends be{return_token_type_ids=!0}class Ee extends be{}class Fe extends be{}class Ae extends be{return_token_type_ids=!0;constructor(e,t){super(e,t),console.warn('WARNING: `XLMTokenizer` is not yet supported by Hugging Face\'s "fast" tokenizers library. Therefore, you may experience slightly inaccurate results.')}}class Ie extends be{return_token_type_ids=!0}class ze extends be{}class Oe extends be{}class Be extends be{}class Le extends be{constructor(e,t){super(e,t),this.languageRegex=/^[a-z]{2}_[A-Z]{2}$/,this.language_codes=this.special_tokens.filter((e=>this.languageRegex.test(e))),this.lang_to_token=e=>e}_build_translation_inputs(e,t,n){return Ze(this,e,t,n)}}class De extends Le{}class Re extends be{}class Ne extends be{}const Ve="▁";class je extends be{padding_side="left";constructor(e,t){super(e,t),this.legacy=t.legacy??!0,this.legacy||(this.normalizer=null,this.pre_tokenizer=new de({replacement:Ve,add_prefix_space:!0,prepend_scheme:"first"}))}_encode_text(e){if(null===e)return null;if(this.legacy||0===e.length)return super._encode_text(e);let t=super._encode_text(Ve+e.replaceAll(Ve," "));return t.length>1&&t[0]===Ve&&this.special_tokens.includes(t[1])&&(t=t.slice(1)),t}}class qe extends be{}class Ge extends be{}class Ue extends be{}class We extends be{}class He extends be{}class Xe extends be{}class Ke extends be{}class Qe extends be{}class Ye extends be{}function Ze(e,t,n,r){if(!("language_codes"in e)||!Array.isArray(e.language_codes))throw new Error("Tokenizer must have `language_codes` attribute set and it should be an array of language ids.");if(!("languageRegex"in e&&e.languageRegex instanceof RegExp))throw new Error("Tokenizer must have `languageRegex` attribute set and it should be a regular expression.");if(!("lang_to_token"in e)||"function"!=typeof e.lang_to_token)throw new Error("Tokenizer must have `lang_to_token` attribute set and it should be a function.");const a=r.src_lang,s=r.tgt_lang;if(!e.language_codes.includes(s))throw new Error(`Target language code "${s}" is not valid. Must be one of: {${e.language_codes.join(", ")}}`);if(void 0!==a){if(!e.language_codes.includes(a))throw new Error(`Source language code "${a}" is not valid. Must be one of: {${e.language_codes.join(", ")}}`);for(const t of e.post_processor.config.single)if("SpecialToken"in t&&e.languageRegex.test(t.SpecialToken.id)){t.SpecialToken.id=e.lang_to_token(a);break}}return r.forced_bos_token_id=e.model.convert_tokens_to_ids([e.lang_to_token(s)])[0],e._call(t,n)}class Je extends be{constructor(e,t){super(e,t),this.languageRegex=/^[a-z]{3}_[A-Z][a-z]{3}$/,this.language_codes=this.special_tokens.filter((e=>this.languageRegex.test(e))),this.lang_to_token=e=>e}_build_translation_inputs(e,t,n){return Ze(this,e,t,n)}}class et extends be{constructor(e,t){super(e,t),this.languageRegex=/^__[a-z]{2,3}__$/,this.language_codes=this.special_tokens.filter((e=>this.languageRegex.test(e))).map((e=>e.slice(2,-2))),this.lang_to_token=e=>`__${e}__`}_build_translation_inputs(e,t,n){return Ze(this,e,t,n)}}class tt extends be{get timestamp_begin(){return this.model.convert_tokens_to_ids(["<|notimestamps|>"])[0]+1}_decode_asr(e,{return_timestamps:t=!1,return_language:n=!1,time_precision:r=null,force_full_sequences:a=!0}={}){if(null===r)throw Error("Must specify time_precision");let s=null;const o="word"===t;function l(){return{language:s,timestamp:[null,null],text:""}}const u=[];let c=l(),p=0;const h=this.timestamp_begin;let m=[],f=[],g=!1,_=null;const w=new Set(this.all_special_ids);for(const n of e){const e=n.tokens,a=o?n.token_timestamps:null;let b=null,v=h;if("stride"in n){const[t,a,s]=n.stride;if(p-=a,_=t-s,a&&(v=a/r+h),s)for(let t=e.length-1;t>=0;--t){const n=Number(e[t]);if(n>=h){if(null!==b&&(n-h)*r<_)break;b=n}}}let x=[],M=[];for(let n=0;n<e.length;++n){const _=Number(e[n]);if(w.has(_)){const e=this.decode([_]),n=d.WHISPER_LANGUAGE_MAPPING.get(e.slice(2,-2));if(void 0!==n){if(null!==s&&n!==s&&!t){m.push(x);const e=this.findLongestCommonSequence(m)[0],t=this.decode(e);c.text=t,u.push(c),m=[],x=[],c=l()}s=c.language=n}}else if(_>=h){const e=(_-h)*r+p,t=(0,i.round)(e,2);if(null!==b&&_>=b)g=!0;else if(g||m.length>0&&_<v)g=!1;else if(null===c.timestamp[0])c.timestamp[0]=t;else if(t===c.timestamp[0]);else{c.timestamp[1]=t,m.push(x),o&&f.push(M);const[e,n]=this.findLongestCommonSequence(m,f),r=this.decode(e);c.text=r,o&&(c.words=this.collateWordTimestamps(e,n,s)),u.push(c),m=[],x=[],f=[],M=[],c=l()}}else if(x.push(_),o){let e,t=(0,i.round)(a[n]+p,2);if(n+1<a.length){e=(0,i.round)(a[n+1]+p,2);const s=this.decode([_]);y.test(s)&&(e=(0,i.round)(Math.min(t+r,e),2))}else e=null;M.push([t,e])}}if("stride"in n){const[e,t,r]=n.stride;p+=e-r}x.length>0?(m.push(x),o&&f.push(M)):m.every((e=>0===e.length))&&(c=l(),m=[],x=[],f=[],M=[])}if(m.length>0){if(a&&t)throw new Error("Whisper did not predict an ending timestamp, which can happen if audio is cut off in the middle of a word. Also make sure WhisperTimeStampLogitsProcessor was used during generation.");const[e,n]=this.findLongestCommonSequence(m,f),r=this.decode(e);c.text=r,o&&(c.words=this.collateWordTimestamps(e,n,s)),u.push(c)}let b=Object.create(null);const v=u.map((e=>e.text)).join("");if(t||n){for(let e=0;e<u.length;++e){const r=u[e];t||delete r.timestamp,n||delete r.language}if(o){const e=[];for(const t of u)for(const n of t.words)e.push(n);b={chunks:e}}else b={chunks:u}}return[v,b]}findLongestCommonSequence(e,t=null){let n=e[0],r=n.length,a=[];const s=Array.isArray(t)&&t.length>0;let i=s?[]:null,o=s?t[0]:null;for(let l=1;l<e.length;++l){const u=e[l];let d=0,c=[r,r,0,0];const p=u.length;for(let e=1;e<r+p;++e){const a=Math.max(0,r-e),i=Math.min(r,r+p-e),h=n.slice(a,i),m=Math.max(0,e-r),f=Math.min(p,e),g=u.slice(m,f);if(h.length!==g.length)throw new Error("There is a bug within whisper `decode_asr` function, please report it. Dropping to prevent bad inference.");let _;_=s?h.filter(((e,n)=>e===g[n]&&o[a+n]<=t[l][m+n])).length:h.filter(((e,t)=>e===g[t])).length;const w=_/e+e/1e4;_>1&&w>d&&(d=w,c=[a,i,m,f])}const[h,m,f,g]=c,_=Math.floor((m+h)/2),w=Math.floor((g+f)/2);a.push(...n.slice(0,_)),n=u.slice(w),r=n.length,s&&(i.push(...o.slice(0,_)),o=t[l].slice(w))}return a.push(...n),s?(i.push(...o),[a,i]):[a,[]]}collateWordTimestamps(e,t,n){const[r,a,s]=this.combineTokensIntoWords(e,n),i=[];for(let e=0;e<r.length;++e){const n=s[e];i.push({text:r[e],timestamp:[t[n.at(0)][0],t[n.at(-1)][1]]})}return i}combineTokensIntoWords(e,t,n="\"'“¡¿([{-",r="\"'.。,,!!??::”)]}、"){let a,s,i;return["chinese","japanese","thai","lao","myanmar"].includes(t=t??"english")?[a,s,i]=this.splitTokensOnUnicode(e):[a,s,i]=this.splitTokensOnSpaces(e),this.mergePunctuations(a,s,i,n,r)}decode(e,t){let n;return t?.decode_with_timestamps?(e instanceof o.Tensor&&(e=m(e)),n=this.decodeWithTimestamps(e,t)):n=super.decode(e,t),n}decodeWithTimestamps(e,t){const n=t?.time_precision??.02,r=Array.from(this.all_special_ids).at(-1)+1;let a=[[]];for(let t of e)if(t=Number(t),t>=r){const e=((t-r)*n).toFixed(2);a.push(`<|${e}|>`),a.push([])}else a[a.length-1].push(t);return a=a.map((e=>"string"==typeof e?e:super.decode(e,t))),a.join("")}splitTokensOnUnicode(e){const t=this.decode(e,{decode_with_timestamps:!0}),n=[],r=[],a=[];let s=[],i=[],o=0;for(let l=0;l<e.length;++l){const u=e[l];s.push(u),i.push(l);const d=this.decode(s,{decode_with_timestamps:!0});d.includes("�")&&"�"!==t[o+d.indexOf("�")]||(n.push(d),r.push(s),a.push(i),s=[],i=[],o+=d.length)}return[n,r,a]}splitTokensOnSpaces(e){const[t,n,r]=this.splitTokensOnUnicode(e),a=[],s=[],i=[],o=new RegExp(`^[${w}]$`,"gu");for(let e=0;e<t.length;++e){const l=t[e],u=n[e],d=r[e],c=u[0]>=this.model.tokens_to_ids.get("<|endoftext|>"),p=l.startsWith(" "),h=l.trim(),m=o.test(h);if(c||p||m||0===a.length)a.push(l),s.push(u),i.push(d);else{const e=a.length-1;a[e]+=l,s[e].push(...u),i[e].push(...d)}}return[a,s,i]}mergePunctuations(e,t,n,r,s){const i=structuredClone(e),o=structuredClone(t),l=structuredClone(n);let u=i.length-2,d=i.length-1;for(;u>=0;)i[u].startsWith(" ")&&r.includes(i[u].trim())?(i[d]=i[u]+i[d],o[d]=(0,a.mergeArrays)(o[u],o[d]),l[d]=(0,a.mergeArrays)(l[u],l[d]),i[u]="",o[u]=[],l[u]=[]):d=u,--u;for(u=0,d=1;d<i.length;)!i[u].endsWith(" ")&&s.includes(i[d])?(i[u]+=i[d],o[u]=(0,a.mergeArrays)(o[u],o[d]),l[u]=(0,a.mergeArrays)(l[u],l[d]),i[d]="",o[d]=[],l[d]=[]):u=d,++d;return[i.filter((e=>e)),o.filter((e=>e.length>0)),l.filter((e=>e.length>0))]}}class nt extends be{}class rt extends be{}class at extends be{}class st extends be{constructor(e,t){super(e,t),this.languageRegex=/^(>>\w+<<)\s*/g,this.supported_language_codes=this.model.vocab.filter((e=>this.languageRegex.test(e))),console.warn('WARNING: `MarianTokenizer` is not yet supported by Hugging Face\'s "fast" tokenizers library. Therefore, you may experience slightly inaccurate results.')}_encode_text(e){if(null===e)return null;const[t,...n]=e.trim().split(this.languageRegex);if(0===n.length)return super._encode_text(t);if(2===n.length){const[e,t]=n;return this.supported_language_codes.includes(e)||console.warn(`Unsupported language code "${e}" detected, which may lead to unexpected behavior. Should be one of: ${JSON.stringify(this.supported_language_codes)}`),(0,a.mergeArrays)([e],super._encode_text(t))}}}class it extends be{}class ot extends be{}class lt extends be{}class ut extends be{}class dt extends be{}class ct extends be{constructor(e,t){super(e,t),this.decoder=new ue({})}}class pt extends be{}class ht{static TOKENIZER_CLASS_MAPPING={T5Tokenizer:ze,DistilBertTokenizer:Ee,CamembertTokenizer:Fe,DebertaTokenizer:ke,DebertaV2Tokenizer:$e,BertTokenizer:ve,HerbertTokenizer:Ce,ConvBertTokenizer:Se,RoFormerTokenizer:Pe,XLMTokenizer:Ae,ElectraTokenizer:Ie,MobileBertTokenizer:Me,SqueezeBertTokenizer:Te,AlbertTokenizer:xe,GPT2Tokenizer:Oe,BartTokenizer:Be,MBartTokenizer:Le,MBart50Tokenizer:De,RobertaTokenizer:Re,WhisperTokenizer:tt,CodeGenTokenizer:nt,CLIPTokenizer:rt,SiglipTokenizer:at,MarianTokenizer:st,BloomTokenizer:Ne,NllbTokenizer:Je,M2M100Tokenizer:et,LlamaTokenizer:je,CodeLlamaTokenizer:qe,XLMRobertaTokenizer:Ge,MPNetTokenizer:Ue,FalconTokenizer:We,GPTNeoXTokenizer:He,EsmTokenizer:Xe,Wav2Vec2CTCTokenizer:it,BlenderbotTokenizer:ot,BlenderbotSmallTokenizer:lt,SpeechT5Tokenizer:ut,NougatTokenizer:dt,VitsTokenizer:ct,Qwen2Tokenizer:Ke,GemmaTokenizer:Qe,Grok1Tokenizer:Ye,CohereTokenizer:pt,PreTrainedTokenizer:be};static async from_pretrained(e,{progress_callback:t=null,config:n=null,cache_dir:r=null,local_files_only:a=!1,revision:s="main",legacy:i=null}={}){const[o,l]=await c(e,{progress_callback:t,config:n,cache_dir:r,local_files_only:a,revision:s,legacy:i}),u=l.tokenizer_class?.replace(/Fast$/,"")??"PreTrainedTokenizer";let d=this.TOKENIZER_CLASS_MAPPING[u];return d||(console.warn(`Unknown tokenizer class "${u}", attempting to construct from base class.`),d=be),new d(o,l)}}},"./src/utils/audio.js": |
| 194 | /*!****************************!*\ |
| 195 | !*** ./src/utils/audio.js ***! |
| 196 | \****************************/(e,t,n)=>{n.r(t),n.d(t,{hamming:()=>d,hanning:()=>u,mel_filter_bank:()=>f,read_audio:()=>o,spectrogram:()=>_,window_function:()=>w});var r=n(/*! ./hub.js */"./src/utils/hub.js"),a=n(/*! ./maths.js */"./src/utils/maths.js"),s=n(/*! ./core.js */"./src/utils/core.js"),i=n(/*! ./tensor.js */"./src/utils/tensor.js");async function o(e,t){if("undefined"==typeof AudioContext)throw Error("Unable to load audio from path/URL since `AudioContext` is not available in your environment. Instead, audio data should be passed directly to the pipeline/processor. For more information and some example code, see https://huggingface.co/docs/transformers.js/guides/node-audio-processing.");const n=await(await(0,r.getFile)(e)).arrayBuffer(),a=new AudioContext({sampleRate:t});void 0===t&&console.warn(`No sampling rate provided, using default of ${a.sampleRate}Hz.`);const s=await a.decodeAudioData(n);let i;if(2===s.numberOfChannels){const e=Math.sqrt(2),t=s.getChannelData(0),n=s.getChannelData(1);i=new Float32Array(t.length);for(let r=0;r<s.length;++r)i[r]=e*(t[r]+n[r])/2}else i=s.getChannelData(0);return i}function l(e,t){if(e<1)return new Float64Array;if(1===e)return new Float64Array([1]);const n=1-t,r=2*Math.PI/(e-1),a=new Float64Array(e);for(let s=0;s<e;++s)a[s]=t-n*Math.cos(s*r);return a}function u(e){return l(e,.5)}function d(e){return l(e,.54)}const c={htk:e=>2595*Math.log10(1+e/700),kaldi:e=>1127*Math.log(1+e/700),slaney:(e,t=1e3,n=15,r=27/Math.log(6.4))=>e>=t?n+Math.log(e/t)*r:3*e/200};function p(e,t="htk"){const n=c[t];if(!n)throw new Error('mel_scale should be one of "htk", "slaney" or "kaldi".');return"number"==typeof e?n(e):e.map((e=>n(e)))}const h={htk:e=>700*(10**(e/2595)-1),kaldi:e=>700*(Math.exp(e/1127)-1),slaney:(e,t=1e3,n=15,r=Math.log(6.4)/27)=>e>=n?t*Math.exp(r*(e-n)):200*e/3};function m(e,t,n){const r=(t-e)/(n-1);return Float64Array.from({length:n},((t,n)=>e+r*n))}function f(e,t,n,r,a,s=null,i="htk",o=!1){if(null!==s&&"slaney"!==s)throw new Error('norm must be one of null or "slaney"');const l=m(p(n,i),p(r,i),t+2);let u,d=function(e,t="htk"){const n=h[t];if(!n)throw new Error('mel_scale should be one of "htk", "slaney" or "kaldi".');return"number"==typeof e?n(e):e.map((e=>n(e)))}(l,i);if(o){const t=a/(2*e);u=p(Float64Array.from({length:e},((e,n)=>n*t)),i),d=l}else u=m(0,Math.floor(a/2),e);const c=function(e,t){const n=Float64Array.from({length:t.length-1},((e,n)=>t[n+1]-t[n])),r=Array.from({length:e.length},(()=>new Array(t.length)));for(let n=0;n<e.length;++n){const a=r[n];for(let r=0;r<t.length;++r)a[r]=t[r]-e[n]}const a=t.length-2,s=Array.from({length:a},(()=>new Array(e.length)));for(let t=0;t<e.length;++t){const e=r[t];for(let r=0;r<a;++r){const a=-e[r]/n[r],i=e[r+2]/n[r+1];s[r][t]=Math.max(0,Math.min(a,i))}}return s}(u,d);if(null!==s&&"slaney"===s)for(let n=0;n<t;++n){const t=c[n],r=2/(d[n+2]-d[n]);for(let n=0;n<e;++n)t[n]*=r}return c}function g(e,t,n,r,s){if(n<=0)throw new Error("reference must be greater than zero");if(r<=0)throw new Error("min_value must be greater than zero");n=Math.max(r,n);const i=Math.log10(n);for(let n=0;n<e.length;++n)e[n]=t*Math.log10(Math.max(r,e[n])-i);if(null!==s){if(s<=0)throw new Error("db_range must be greater than zero");const t=(0,a.max)(e)[0]-s;for(let n=0;n<e.length;++n)e[n]=Math.max(e[n],t)}return e}async function _(e,t,n,r,{fft_length:o=null,power:l=1,center:u=!0,pad_mode:d="reflect",onesided:c=!0,preemphasis:p=null,mel_filters:h=null,mel_floor:m=1e-10,log_mel:f=null,reference:_=1,min_value:w=1e-10,db_range:y=null,remove_dc_offset:b=null,min_num_frames:v=null,max_num_frames:x=null,do_pad:M=!0,transpose:T=!1}={}){const k=t.length;if(null===o&&(o=n),n>o)throw Error(`frame_length (${n}) may not be larger than fft_length (${o})`);if(k!==n)throw new Error(`Length of the window (${k}) must equal frame_length (${n})`);if(r<=0)throw new Error("hop_length must be greater than zero");if(null===l&&null!==h)throw new Error("You have provided `mel_filters` but `power` is `None`. Mel spectrogram computation is not yet supported for complex-valued spectrogram. Specify `power` to fix this issue.");if(u){if("reflect"!==d)throw new Error(`pad_mode="${d}" not implemented yet.`);const t=Math.floor((o-1)/2)+1;e=function(e,t,n){const r=new e.constructor(e.length+t+n),a=e.length-1;for(let n=0;n<e.length;++n)r[t+n]=e[n];for(let n=1;n<=t;++n)r[t-n]=e[(0,s.calculateReflectOffset)(n,a)];for(let i=1;i<=n;++i)r[a+t+i]=e[(0,s.calculateReflectOffset)(a-i,a)];return r}(e,t,t)}let $=Math.floor(1+Math.floor((e.length-n)/r));null!==v&&$<v&&($=v);const C=c?Math.floor(o/2)+1:o;let S=$,P=$;null!==x&&(x>$?M&&(P=x):P=S=x);const E=new a.FFT(o),F=new Float64Array(o),A=new Float64Array(E.outputBufferSize),I=new Float32Array(C*P);for(let a=0;a<S;++a){const s=a*r,i=Math.min(e.length-s,n);i!==n&&F.fill(0,0,n);for(let t=0;t<i;++t)F[t]=e[s+t];if(b){let e=0;for(let t=0;t<i;++t)e+=F[t];const t=e/i;for(let e=0;e<i;++e)F[e]-=t}if(null!==p){for(let e=i-1;e>=1;--e)F[e]-=p*F[e-1];F[0]*=1-p}for(let e=0;e<t.length;++e)F[e]*=t[e];E.realTransform(A,F);for(let e=0;e<C;++e){const t=e<<1;I[e*P+a]=A[t]**2+A[t+1]**2}}if(null!==l&&2!==l){const e=2/l;for(let t=0;t<I.length;++t)I[t]**=e}const z=h.length;let O=await(0,i.matmul)(new i.Tensor("float32",h.flat(),[z,C]),new i.Tensor("float32",I,[C,P]));T&&(O=O.transpose(1,0));const B=O.data;for(let e=0;e<B.length;++e)B[e]=Math.max(m,B[e]);if(null!==l&&null!==f){const e=Math.min(B.length,S*z);switch(f){case"log":for(let t=0;t<e;++t)B[t]=Math.log(B[t]);break;case"log10":for(let t=0;t<e;++t)B[t]=Math.log10(B[t]);break;case"dB":if(1===l)!function(e,t=1,n=1e-5,r=null){g(e,20,t,n,r)}(B,_,w,y);else{if(2!==l)throw new Error(`Cannot use log_mel option '${f}' with power ${l}`);!function(e,t=1,n=1e-10,r=null){g(e,10,t,n,r)}(B,_,w,y)}break;default:throw new Error(`log_mel must be one of null, 'log', 'log10' or 'dB'. Got '${f}'`)}}return O}function w(e,t,{periodic:n=!0,frame_length:r=null,center:a=!0}={}){const s=n?e+1:e;let i;switch(t){case"boxcar":i=new Float64Array(s).fill(1);break;case"hann":case"hann_window":i=u(s);break;case"hamming":i=d(s);break;case"povey":i=u(s).map((e=>Math.pow(e,.85)));break;default:throw new Error(`Unknown window type ${t}.`)}if(n&&(i=i.subarray(0,e)),null===r)return i;if(e>r)throw new Error(`Length of the window (${e}) may not be larger than frame_length (${r})`);return i}},"./src/utils/constants.js": |
no outgoing calls