Concatenate captions and OCR tokens.
(batch, size)
| 15 | |
| 16 | @batchify |
| 17 | def preprocess(batch, size): |
| 18 | """Concatenate captions and OCR tokens.""" |
| 19 | for caption_images in chain.from_iterable(batch['caption_images']): |
| 20 | caption = caption_images['caption'] |
| 21 | for cil_pair in caption_images['cil_pairs']: |
| 22 | sub_caption = cil_pair['sub_caption'] |
| 23 | ocr = " ".join(cil_pair['image_ocr']) |
| 24 | if text:=" ".join(filter(None, [caption, sub_caption, ocr])): |
| 25 | yield dict( |
| 26 | text=text, |
| 27 | image=convert(expand(cil_pair['image'], size, do_trim=True), "png") |
| 28 | ) |
| 29 | |
| 30 | def parse_args(): |
| 31 | argument_parser = ArgumentParser( |