Method tokenize

library/strategy_anima.py:55–71 · view source on GitHub ↗

(self, text: Union[str, List[str]])

Source from the content-addressed store, hash-verified

53	self.t5_max_length = t5_max_length
54
55	def tokenize(self, text: Union[str, List[str]]) -> List[torch.Tensor]:
56	text = [text] if isinstance(text, str) else text
57
58	# Tokenize with Qwen3
59	qwen3_encoding = self.qwen3_tokenizer(
60	text, return_tensors="pt", truncation=True, padding="max_length", max_length=self.qwen3_max_length
61	)
62	qwen3_input_ids = qwen3_encoding["input_ids"]
63	qwen3_attn_mask = qwen3_encoding["attention_mask"]
64
65	# Tokenize with T5 (for LLM Adapter target tokens)
66	t5_encoding = self.t5_tokenizer(
67	text, return_tensors="pt", truncation=True, padding="max_length", max_length=self.t5_max_length
68	)
69	t5_input_ids = t5_encoding["input_ids"]
70	t5_attn_mask = t5_encoding["attention_mask"]
71	return [qwen3_input_ids, qwen3_attn_mask, t5_input_ids, t5_attn_mask]
72
73
74	class AnimaTextEncodingStrategy(TextEncodingStrategy):

trainFunction · 0.95

test_text_encoder_cacheFunction · 0.95

test_full_batch_simulationFunction · 0.95

cache_batch_outputsMethod · 0.45

no outgoing calls

test_text_encoder_cacheFunction · 0.76

test_full_batch_simulationFunction · 0.76