MCPcopy
hub / github.com/kohya-ss/sd-scripts / tokenize

Method tokenize

library/strategy_anima.py:55–71  ·  view source on GitHub ↗
(self, text: Union[str, List[str]])

Source from the content-addressed store, hash-verified

53 self.t5_max_length = t5_max_length
54
55 def tokenize(self, text: Union[str, List[str]]) -> List[torch.Tensor]:
56 text = [text] if isinstance(text, str) else text
57
58 # Tokenize with Qwen3
59 qwen3_encoding = self.qwen3_tokenizer(
60 text, return_tensors="pt", truncation=True, padding="max_length", max_length=self.qwen3_max_length
61 )
62 qwen3_input_ids = qwen3_encoding["input_ids"]
63 qwen3_attn_mask = qwen3_encoding["attention_mask"]
64
65 # Tokenize with T5 (for LLM Adapter target tokens)
66 t5_encoding = self.t5_tokenizer(
67 text, return_tensors="pt", truncation=True, padding="max_length", max_length=self.t5_max_length
68 )
69 t5_input_ids = t5_encoding["input_ids"]
70 t5_attn_mask = t5_encoding["attention_mask"]
71 return [qwen3_input_ids, qwen3_attn_mask, t5_input_ids, t5_attn_mask]
72
73
74class AnimaTextEncodingStrategy(TextEncodingStrategy):

Callers 5

trainFunction · 0.95
trainFunction · 0.95
test_text_encoder_cacheFunction · 0.95
cache_batch_outputsMethod · 0.45

Calls

no outgoing calls

Tested by 2

test_text_encoder_cacheFunction · 0.76