(knowledge_id, user_id)
| 42 | |
| 43 | |
| 44 | def get_sync_handler(knowledge_id, user_id): |
| 45 | from knowledge.serializers.document import DocumentSerializers |
| 46 | |
| 47 | knowledge = QuerySet(Knowledge).filter(id=knowledge_id).first() |
| 48 | |
| 49 | def handler(child_link: ChildLink, response: Fork.Response): |
| 50 | if response.status == 200: |
| 51 | try: |
| 52 | document_name = ( |
| 53 | child_link.tag.text |
| 54 | if child_link.tag is not None and len(child_link.tag.text.strip()) > 0 |
| 55 | else child_link.url |
| 56 | ) |
| 57 | paragraphs = get_split_model("web.md").parse(response.content) |
| 58 | first = QuerySet(Document).filter(meta__source_url=child_link.url.strip(), knowledge=knowledge).first() |
| 59 | if first is not None: |
| 60 | # 如果存在,使用文档同步 |
| 61 | DocumentSerializers.Sync(data={"document_id": first.id}).sync() |
| 62 | else: |
| 63 | # 插入 |
| 64 | DocumentSerializers.Create(data={"knowledge_id": knowledge.id, "user_id": user_id}).save( |
| 65 | { |
| 66 | "name": document_name, |
| 67 | "paragraphs": paragraphs, |
| 68 | "meta": {"source_url": child_link.url.strip(), "selector": knowledge.meta.get("selector")}, |
| 69 | "type": KnowledgeType.WEB, |
| 70 | }, |
| 71 | with_valid=True, |
| 72 | ) |
| 73 | except Exception as e: |
| 74 | maxkb_logger.error(f"{str(e)}:{traceback.format_exc()}") |
| 75 | |
| 76 | return handler |
| 77 | |
| 78 | |
| 79 | def get_sync_web_document_handler(knowledge_id, user_id): |
no outgoing calls
no test coverage detected