(self, i)
| 218 | return len(self.raw_data) |
| 219 | |
| 220 | def __getitem__(self, i) -> Dict[str, torch.Tensor]: |
| 221 | if i in self.cached_data_dict: |
| 222 | return self.cached_data_dict[i] |
| 223 | |
| 224 | ret = preprocess([self.raw_data[i]["conversations"]], self.tokenizer) |
| 225 | ret = dict( |
| 226 | input_ids=ret["input_ids"][0], |
| 227 | labels=ret["labels"][0], |
| 228 | attention_mask=ret["attention_mask"][0], |
| 229 | ) |
| 230 | self.cached_data_dict[i] = ret |
| 231 | |
| 232 | return ret |
| 233 | |
| 234 | |
| 235 | def make_supervised_data_module( |
nothing calls this directly
no test coverage detected