Summarize the text using the AI model. Args: text (str): The text to summarize min_length (int, optional): The minimum length of the summary. Defaults to 64. Returns: str: The summarized text
(self, text: str, min_length: int = 64)
| 201 | return "cpu" |
| 202 | |
| 203 | def summarize(self, text: str, min_length: int = 64) -> str: |
| 204 | """ |
| 205 | Summarize the text using the AI model. |
| 206 | Args: |
| 207 | text (str): The text to summarize |
| 208 | min_length (int, optional): The minimum length of the summary. Defaults to 64. |
| 209 | Returns: |
| 210 | str: The summarized text |
| 211 | """ |
| 212 | if self.tokenizer is None or self.model is None: |
| 213 | self.logger.warning("No tokenizer or model to perform summarization.") |
| 214 | return text |
| 215 | if len(text) < min_length*1.5: |
| 216 | return text |
| 217 | max_length = len(text) // 2 if len(text) > min_length*2 else min_length*2 |
| 218 | input_text = "summarize: " + text |
| 219 | inputs = self.tokenizer(input_text, return_tensors="pt", max_length=512, truncation=True) |
| 220 | summary_ids = self.model.generate( |
| 221 | inputs['input_ids'], |
| 222 | max_length=max_length, |
| 223 | min_length=min_length, |
| 224 | length_penalty=1.0, |
| 225 | num_beams=4, |
| 226 | early_stopping=True |
| 227 | ) |
| 228 | summary = self.tokenizer.decode(summary_ids[0], skip_special_tokens=True) |
| 229 | summary = summary.replace('summary:', '') |
| 230 | self.logger.info(f"Memory summarized from len {len(text)} to {len(summary)}.") |
| 231 | self.logger.info(f"Summarized text:\n{summary}") |
| 232 | return summary |
| 233 | |
| 234 | #@timer_decorator |
| 235 | def compress(self) -> str: |
no test coverage detected