web-crawling / main.py
pvanand's picture
Update main.py
600b195 verified
raw
history blame
516 Bytes
import asyncio
import nest_asyncio
from crawl4ai import AsyncWebCrawler
from crawl4ai.extraction_strategy import JsonCssExtractionStrategy, LLMExtractionStrategy
import json
import time
from pydantic import BaseModel, Field
nest_asyncio.apply()
async def simple_crawl():
async with AsyncWebCrawler(verbose=True) as crawler:
result = await crawler.arun(url="https://www.nbcnews.com/business")
print(len(result.markdown))
return result
result = await simple_crawl()
print(result.markdown)