MCPcopy
hub / github.com/unclecode/crawl4ai / generate_knowledge_graph

Function generate_knowledge_graph

docs/examples/quickstart_async.py:459–491  ·  view source on GitHub ↗
()

Source from the content-addressed store, hash-verified

457 print("you may observe an even more significant speed advantage for Crawl4AI.")
458
459async def generate_knowledge_graph():
460 class Entity(BaseModel):
461 name: str
462 description: str
463
464 class Relationship(BaseModel):
465 entity1: Entity
466 entity2: Entity
467 description: str
468 relation_type: str
469
470 class KnowledgeGraph(BaseModel):
471 entities: List[Entity]
472 relationships: List[Relationship]
473
474 extraction_strategy = LLMExtractionStrategy(
475 provider='openai/gpt-4o-mini', # Or any other provider, including Ollama and open source models
476 api_token=os.getenv('OPENAI_API_KEY'), # In case of Ollama just pass "no-token"
477 schema=KnowledgeGraph.model_json_schema(),
478 extraction_type="schema",
479 instruction="""Extract entities and relationships from the given text."""
480 )
481 async with AsyncWebCrawler() as crawler:
482 url = "https://paulgraham.com/love.html"
483 result = await crawler.arun(
484 url=url,
485 bypass_cache=True,
486 extraction_strategy=extraction_strategy,
487 # magic=True
488 )
489 # print(result.extracted_content)
490 with open(os.path.join(__location__, "kb.json"), "w") as f:
491 f.write(result.extracted_content)
492
493async def fit_markdown_remove_overlay():
494 async with AsyncWebCrawler(headless = False) as crawler:

Callers

nothing calls this directly

Calls 3

AsyncWebCrawlerClass · 0.90
arunMethod · 0.80

Tested by

no test coverage detected

Used in the wild real call sites across dependent graphs

searching dependent graphs…