(self, text: Union[str, List[str]])
| 53 | self.t5_max_length = t5_max_length |
| 54 | |
| 55 | def tokenize(self, text: Union[str, List[str]]) -> List[torch.Tensor]: |
| 56 | text = [text] if isinstance(text, str) else text |
| 57 | |
| 58 | # Tokenize with Qwen3 |
| 59 | qwen3_encoding = self.qwen3_tokenizer( |
| 60 | text, return_tensors="pt", truncation=True, padding="max_length", max_length=self.qwen3_max_length |
| 61 | ) |
| 62 | qwen3_input_ids = qwen3_encoding["input_ids"] |
| 63 | qwen3_attn_mask = qwen3_encoding["attention_mask"] |
| 64 | |
| 65 | # Tokenize with T5 (for LLM Adapter target tokens) |
| 66 | t5_encoding = self.t5_tokenizer( |
| 67 | text, return_tensors="pt", truncation=True, padding="max_length", max_length=self.t5_max_length |
| 68 | ) |
| 69 | t5_input_ids = t5_encoding["input_ids"] |
| 70 | t5_attn_mask = t5_encoding["attention_mask"] |
| 71 | return [qwen3_input_ids, qwen3_attn_mask, t5_input_ids, t5_attn_mask] |
| 72 | |
| 73 | |
| 74 | class AnimaTextEncodingStrategy(TextEncodingStrategy): |
no outgoing calls