process+tokenize string and return string,label,and stringlen
(self, index)
| 275 | return len(self.X) |
| 276 | |
| 277 | def __getitem__(self, index): |
| 278 | """process+tokenize string and return string,label,and stringlen""" |
| 279 | x = self.X[index] |
| 280 | if self.tokenizer is not None: |
| 281 | x = self.tokenizer.EncodeAsIds(x, self.preprocess_fn) |
| 282 | elif self.preprocess_fn is not None: |
| 283 | x = self.preprocess_fn(x) |
| 284 | y = self.Y[index] |
| 285 | if isinstance(y, str): |
| 286 | if self.tokenizer is not None: |
| 287 | y = self.tokenizer.EncodeAsIds(y, self.preprocess_fn) |
| 288 | elif self.preprocess_fn is not None: |
| 289 | y = self.preprocess_fn(y) |
| 290 | return {'text': x, 'length': len(x), 'label': y} |
| 291 | |
| 292 | def write(self, writer_gen=None, path=None, skip_header=False): |
| 293 | """ |
nothing calls this directly
no test coverage detected