Prefix is the *actual* input to the model.
(self, numbers: List[int], target: int)
| 47 | return item |
| 48 | |
| 49 | def encode_prefix(self, numbers: List[int], target: int): |
| 50 | """Prefix is the *actual* input to the model.""" |
| 51 | user_message = USER_TEMPLATE.format(numbers=numbers, target=target) |
| 52 | prefix = self.tokenizer.encode_chat_with_response_prompt( |
| 53 | [ |
| 54 | {"role": "system", "content": SYSTEM_MESSAGE}, |
| 55 | {"role": "user", "content": user_message}, |
| 56 | ], |
| 57 | RESPONSE_PROMPT, |
| 58 | ) |
| 59 | tokens = self.tokenizer.tokenize(prefix) |
| 60 | return { |
| 61 | "prefix": prefix, |
| 62 | "prefix_tokens": tokens.tokens, |
| 63 | "prefix_token_ids": tokens.ids, |
| 64 | } |
| 65 | |
| 66 | @staticmethod |
| 67 | def collate_fn(batch: List[Dict[str, Any]]) -> MiniBatch: |
no test coverage detected