New document summarization method built on slim-summary-tool.
(self, fp, fn, topic="key points", query=None, text_only=True, max_batch_cap=15,
summary_model="slim-summary-tool", real_time_update=True)
| 990 | return response |
| 991 | |
| 992 | def summarize_document_fc(self, fp, fn, topic="key points", query=None, text_only=True, max_batch_cap=15, |
| 993 | summary_model="slim-summary-tool", real_time_update=True): |
| 994 | |
| 995 | """ New document summarization method built on slim-summary-tool. """ |
| 996 | |
| 997 | if real_time_update: |
| 998 | logger.info(f"update: Prompt - summarize_document_fc - document - {fn}") |
| 999 | |
| 1000 | # note: when loading model, context window is automatically set based on model |
| 1001 | self.load_model(summary_model, temperature=0.0, sample=False) |
| 1002 | |
| 1003 | self.llm_max_output_len = 150 |
| 1004 | |
| 1005 | if not query: |
| 1006 | sources = self.add_source_document(fp, fn) |
| 1007 | else: |
| 1008 | sources = self.add_source_document(fp, fn, query=query) |
| 1009 | |
| 1010 | if len(self.source_materials) > max_batch_cap: |
| 1011 | self.source_materials = self.source_materials[0:max_batch_cap] |
| 1012 | |
| 1013 | if real_time_update: |
| 1014 | |
| 1015 | logger.info(f"update: Prompt - summarize_document_fc - number of source batches - " |
| 1016 | f"{len(self.source_materials)}") |
| 1017 | |
| 1018 | key_points = [] |
| 1019 | |
| 1020 | responses = self.prompt_with_source(topic, first_source_only=False, verbose=True) |
| 1021 | |
| 1022 | for i, resp in enumerate(responses): |
| 1023 | |
| 1024 | for point in resp["llm_response"]: |
| 1025 | if point not in key_points: |
| 1026 | if point.strip(): |
| 1027 | if not point.strip().startswith("Not Found"): |
| 1028 | key_points.append(point) |
| 1029 | |
| 1030 | return key_points |
| 1031 | |
| 1032 | def summarize_document_from_library(self, library, doc_id=None, filename=None, query=None, |
| 1033 | text_only=True,max_batch_cap=10): |