()
| 457 | print("you may observe an even more significant speed advantage for Crawl4AI.") |
| 458 | |
| 459 | async def generate_knowledge_graph(): |
| 460 | class Entity(BaseModel): |
| 461 | name: str |
| 462 | description: str |
| 463 | |
| 464 | class Relationship(BaseModel): |
| 465 | entity1: Entity |
| 466 | entity2: Entity |
| 467 | description: str |
| 468 | relation_type: str |
| 469 | |
| 470 | class KnowledgeGraph(BaseModel): |
| 471 | entities: List[Entity] |
| 472 | relationships: List[Relationship] |
| 473 | |
| 474 | extraction_strategy = LLMExtractionStrategy( |
| 475 | provider='openai/gpt-4o-mini', # Or any other provider, including Ollama and open source models |
| 476 | api_token=os.getenv('OPENAI_API_KEY'), # In case of Ollama just pass "no-token" |
| 477 | schema=KnowledgeGraph.model_json_schema(), |
| 478 | extraction_type="schema", |
| 479 | instruction="""Extract entities and relationships from the given text.""" |
| 480 | ) |
| 481 | async with AsyncWebCrawler() as crawler: |
| 482 | url = "https://paulgraham.com/love.html" |
| 483 | result = await crawler.arun( |
| 484 | url=url, |
| 485 | bypass_cache=True, |
| 486 | extraction_strategy=extraction_strategy, |
| 487 | # magic=True |
| 488 | ) |
| 489 | # print(result.extracted_content) |
| 490 | with open(os.path.join(__location__, "kb.json"), "w") as f: |
| 491 | f.write(result.extracted_content) |
| 492 | |
| 493 | async def fit_markdown_remove_overlay(): |
| 494 | async with AsyncWebCrawler(headless = False) as crawler: |
nothing calls this directly
no test coverage detected
searching dependent graphs…