(args)
| 50 | |
| 51 | |
| 52 | def main(args): |
| 53 | paths = [] |
| 54 | if pathlib.Path(args.path).is_dir(): |
| 55 | for root, dirs, files in os.walk(args.path): |
| 56 | for fname in files: |
| 57 | if (pathlib.Path(root) / fname).is_dir(): |
| 58 | continue |
| 59 | paths.append(pathlib.Path(root) / fname) |
| 60 | else: |
| 61 | paths.append(args.path) |
| 62 | |
| 63 | docs = [] |
| 64 | for p in paths: |
| 65 | try: |
| 66 | log.info(f"Loading {p}") |
| 67 | docs.append((p, load_document(str(p), ocr_reader=args.ocr, use_embedded_text=args.use_embedded_text))) |
| 68 | except UnsupportedDocument as e: |
| 69 | log.warning(f"Cannot load {p}: {e}. Skipping...") |
| 70 | |
| 71 | log.info(f"Done loading {len(docs)} file(s).") |
| 72 | if not docs: |
| 73 | return |
| 74 | |
| 75 | log.info("Loading pipelines.") |
| 76 | |
| 77 | nlp = pipeline("document-question-answering", model=args.checkpoint) |
| 78 | if args.classify: |
| 79 | classify = pipeline("document-classification", model=args.classify_checkpoint) |
| 80 | |
| 81 | log.info("Ready to start evaluating!") |
| 82 | |
| 83 | max_fname_len = max(len(str(p)) for (p, _) in docs) |
| 84 | max_question_len = max(len(q) for q in args.questions) if len(args.questions) > 0 else 0 |
| 85 | for i, (p, d) in enumerate(docs): |
| 86 | if i > 0 and len(args.questions) > 1: |
| 87 | print("") |
| 88 | |
| 89 | if args.classify: |
| 90 | cls = classify(**d.context)[0] |
| 91 | print(f"{str(p):<{max_fname_len}} Document Type: {cls['label']}") |
| 92 | |
| 93 | for q in args.questions: |
| 94 | try: |
| 95 | response = nlp(question=q, **d.context) |
| 96 | if isinstance(response, list): |
| 97 | response = response[0] if len(response) > 0 else None |
| 98 | except Exception: |
| 99 | log.error(f"Failed while processing {str(p)} on question: '{q}'") |
| 100 | raise |
| 101 | |
| 102 | answer = response["answer"] if response is not None else "NULL" |
| 103 | print(f"{str(p):<{max_fname_len}} {q:<{max_question_len}}: {answer}") |
nothing calls this directly
no test coverage detected