pvanand commited on
Commit
2c451af
·
verified ·
1 Parent(s): e724620

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +11 -7
main.py CHANGED
@@ -1,13 +1,11 @@
1
  import os
 
2
  from fastapi import FastAPI, HTTPException
3
  from pydantic import BaseModel, Field
4
- from typing import List
5
  from crawl4ai import AsyncWebCrawler
6
- from crawl4ai.extraction_strategy import LLMExtractionStrategy
7
  import json
8
- from dotenv import load_dotenv
9
-
10
- load_dotenv() # Load environment variables from .env file
11
 
12
  app = FastAPI()
13
 
@@ -30,6 +28,12 @@ async def crawl(input: CrawlerInput):
30
  if len(input.columns) != len(input.descriptions):
31
  raise HTTPException(status_code=400, detail="Number of columns must match number of descriptions")
32
 
 
 
 
 
 
 
33
  async with AsyncWebCrawler(verbose=True) as crawler:
34
  result = await crawler.arun(
35
  url=input.url,
@@ -38,7 +42,7 @@ async def crawl(input: CrawlerInput):
38
  api_token=os.getenv('OPENAI_API_KEY'),
39
  extraction_type="schema",
40
  verbose=True,
41
- instruction=f"Extract the following information: {', '.join(input.columns)}. Descriptions: {', '.join(input.descriptions)}"
42
  )
43
  )
44
 
@@ -52,4 +56,4 @@ async def test():
52
 
53
  if __name__ == "__main__":
54
  import uvicorn
55
- uvicorn.run(app, host="0.0.0.0", port=8000, loop="asyncio")
 
1
  import os
2
+ import asyncio
3
  from fastapi import FastAPI, HTTPException
4
  from pydantic import BaseModel, Field
5
+ from typing import List, Optional
6
  from crawl4ai import AsyncWebCrawler
7
+ from crawl4ai.extraction_strategy import JsonCssExtractionStrategy, LLMExtractionStrategy
8
  import json
 
 
 
9
 
10
  app = FastAPI()
11
 
 
28
  if len(input.columns) != len(input.descriptions):
29
  raise HTTPException(status_code=400, detail="Number of columns must match number of descriptions")
30
 
31
+ # Create a dictionary with columns as keys and descriptions as values
32
+ extraction_info = {col: desc for col, desc in zip(input.columns, input.descriptions)}
33
+
34
+ # Convert the dictionary to a JSON string
35
+ instruction = f"Extract the following information: {json.dumps(extraction_info)}"
36
+
37
  async with AsyncWebCrawler(verbose=True) as crawler:
38
  result = await crawler.arun(
39
  url=input.url,
 
42
  api_token=os.getenv('OPENAI_API_KEY'),
43
  extraction_type="schema",
44
  verbose=True,
45
+ instruction=instruction
46
  )
47
  )
48
 
 
56
 
57
  if __name__ == "__main__":
58
  import uvicorn
59
+ uvicorn.run(app, host="0.0.0.0", port=8000)