MCPcopy Index your code
hub / github.com/huggingface/diffusers / tokenize

Method tokenize

src/diffusers/pipelines/kolors/tokenizer.py:48–61  ·  view source on GitHub ↗
(self, s: str, encode_special_tokens=False)

Source from the content-addressed store, hash-verified

46 self.role_special_token_expression = "|".join([re.escape(token) for token in role_special_tokens])
47
48 def tokenize(self, s: str, encode_special_tokens=False):
49 if encode_special_tokens:
50 last_index = 0
51 t = []
52 for match in re.finditer(self.role_special_token_expression, s):
53 if last_index < match.start():
54 t.extend(self.sp_model.EncodeAsPieces(s[last_index : match.start()]))
55 t.append(s[match.start() : match.end()])
56 last_index = match.end()
57 if last_index < len(s):
58 t.extend(self.sp_model.EncodeAsPieces(s[last_index:]))
59 return t
60 else:
61 return self.sp_model.EncodeAsPieces(s)
62
63 def encode(self, s: str, bos: bool = False, eos: bool = False) -> list[int]:
64 assert isinstance(s, str)

Callers 2

_tokenizeMethod · 0.45
_maybe_convert_promptMethod · 0.45

Calls 1

startMethod · 0.45

Tested by

no test coverage detected