Flexible intake method accepts multiple forms of input text: --if string, then packages as a dictionary, and adds to the work_queue --if dictionary, then checks the keys and adds to the work_queue --if list, then unpacks and iterates, adding each entry as a dicti
(self, text, text_key="text")
| 213 | return len(self.report) |
| 214 | |
| 215 | def load_work(self, text, text_key="text"): |
| 216 | |
| 217 | """ Flexible intake method accepts multiple forms of input text: |
| 218 | --if string, then packages as a dictionary, and adds to the work_queue |
| 219 | --if dictionary, then checks the keys and adds to the work_queue |
| 220 | --if list, then unpacks and iterates, adding each entry as a dictionary onto the work queue """ |
| 221 | |
| 222 | new_entries_created = 0 |
| 223 | |
| 224 | if isinstance(text, str): |
| 225 | new_entry = {"text": text, "file_source": "NA", "page_num": "NA"} |
| 226 | self.work_queue.append(new_entry) |
| 227 | new_entries_created += 1 |
| 228 | self._expand_report() |
| 229 | |
| 230 | if isinstance(text, dict): |
| 231 | if text_key in text and "file_source" in text and "page_num" in text: |
| 232 | self.work_queue.append(text) |
| 233 | new_entries_created += 1 |
| 234 | self._expand_report() |
| 235 | else: |
| 236 | if text_key not in text: |
| 237 | logging.warning("could not identify dictionary type.") |
| 238 | return -1 |
| 239 | else: |
| 240 | if "file_source" not in text: |
| 241 | text.update({"file_source": "NA"}) |
| 242 | if "page_num" not in text: |
| 243 | text.update({"page_num": "NA"}) |
| 244 | self.work_queue.append(text) |
| 245 | new_entries_created += 1 |
| 246 | self._expand_report() |
| 247 | |
| 248 | if isinstance(text, list): |
| 249 | # need to check the type of the entries in the list |
| 250 | for i, elements in enumerate(text): |
| 251 | |
| 252 | if isinstance(elements, str): |
| 253 | new_entry = {"text": elements, "file_source": "NA", "page_num": "NA"} |
| 254 | self.work_queue.append(new_entry) |
| 255 | new_entries_created += 1 |
| 256 | self._expand_report() |
| 257 | |
| 258 | if isinstance(elements, dict): |
| 259 | if text_key in elements and "file_source" in elements and "page_num" in elements: |
| 260 | self.work_queue.append(elements) |
| 261 | new_entries_created += 1 |
| 262 | self._expand_report() |
| 263 | else: |
| 264 | if text_key not in elements: |
| 265 | logging.warning("update: load - skipping - could not identify " |
| 266 | "dictionary type - %s", elements) |
| 267 | else: |
| 268 | if "file_source" not in elements: |
| 269 | elements.update({"file_source": "NA"}) |
| 270 | if "page_num" not in elements: |
| 271 | elements.update({"page_num": "NA"}) |
| 272 | self.work_queue.append(elements) |