Encode an input example using this pattern-verbalizer pair. :param example: the input example to encode :param priming: whether to use this example for priming :param labeled: if ``priming=True``, whether the label should be appended to this example :return:
(self, example: InputExample, priming: bool = False, labeled: bool = False)
| 164 | return new_parts_a, new_parts_b |
| 165 | |
| 166 | def encode(self, example: InputExample, priming: bool = False, labeled: bool = False): |
| 167 | """ |
| 168 | Encode an input example using this pattern-verbalizer pair. |
| 169 | |
| 170 | :param example: the input example to encode |
| 171 | :param priming: whether to use this example for priming |
| 172 | :param labeled: if ``priming=True``, whether the label should be appended to this example |
| 173 | :return: A tuple, consisting of a list of input ids and a list of token type ids |
| 174 | """ |
| 175 | |
| 176 | if not priming: |
| 177 | assert not labeled, "'labeled' can only be set to true if 'priming' is also set to true" |
| 178 | |
| 179 | tokenizer = self.tokenizer |
| 180 | raw_parts_a, raw_parts_b = self.get_parts(example) |
| 181 | |
| 182 | raw_parts_a = [x if isinstance(x, tuple) else (x, False) for x in raw_parts_a] |
| 183 | prompt_id = tokenizer.num_tokens |
| 184 | |
| 185 | def encode_input(raw_parts): |
| 186 | parts = [] |
| 187 | for x, s in raw_parts: |
| 188 | if isinstance(x, str): |
| 189 | x = tokenizer.EncodeAsIds(x) |
| 190 | elif isinstance(x, int): |
| 191 | x = [prompt_id] * x |
| 192 | else: |
| 193 | pass |
| 194 | parts.append((x, s)) |
| 195 | return parts |
| 196 | |
| 197 | parts_a = encode_input(raw_parts_a) |
| 198 | if self.prefix_prompt > 0: |
| 199 | parts_a = [([prompt_id] * self.prefix_prompt, False)] + parts_a |
| 200 | |
| 201 | parts_b = None |
| 202 | if raw_parts_b: |
| 203 | raw_parts_b = [x if isinstance(x, tuple) else (x, False) for x in raw_parts_b] |
| 204 | parts_b = encode_input(raw_parts_b) |
| 205 | |
| 206 | if self.is_multi_token: |
| 207 | answers = self.get_answers(example) |
| 208 | if example.label is not None: |
| 209 | label = self.label_list.index(example.label) |
| 210 | else: |
| 211 | label = 0 |
| 212 | |
| 213 | if not self.fast_decode: |
| 214 | ids_list, positions_list, sep_list, mask_list, target_list, prompt_list = [], [], [], [], [], [] |
| 215 | segment_id_list = [] |
| 216 | if priming: |
| 217 | answer = answers[label] |
| 218 | answer_ids = get_verbalization_ids(answer, tokenizer, force_single_token=False) |
| 219 | self.num_truncated += self.truncate(parts_a, parts_b, answer_ids, max_length=self.max_seq_length) |
| 220 | tokens_a = [token_id for part, _ in parts_a for token_id in part] |
| 221 | tokens_b = [token_id for part, _ in parts_b for token_id in part] if parts_b else None |
| 222 | input_ids = tokens_a |
| 223 | if tokens_b: |
no test coverage detected