ak0601 commited on
Commit
b13ea04
1 Parent(s): 2b3f736

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +147 -147
app.py CHANGED
@@ -1,147 +1,147 @@
1
- from fastapi import FastAPI, UploadFile, Form, HTTPException
2
- from pydantic import BaseModel
3
- import uvicorn
4
- from fastapi.responses import JSONResponse
5
- from typing import Dict
6
- import hashlib
7
- from pypdf import PdfReader
8
- from dotenv import load_dotenv, dotenv_values
9
- import aiofiles
10
- from pathlib import Path
11
- from langchain_community.document_loaders import WebBaseLoader
12
- import google.generativeai as genai
13
- import os
14
- import re
15
- from fastapi.middleware.cors import CORSMiddleware
16
-
17
- # Initialize Gemini LLM
18
- load_dotenv()
19
- Google_key = os.getenv("GOOGLE_API_KEY")
20
- model = genai.GenerativeModel("gemini-1.5-flash")
21
- print(str(Google_key))
22
- genai.configure(api_key="AIzaSyC1-QUzA45IlCosX__sKlzNAgVZGEaHc0c")
23
-
24
- app = FastAPI()
25
-
26
- app.add_middleware(
27
- CORSMiddleware,
28
- allow_origins=["*"],
29
- allow_credentials=True,
30
- allow_methods=["*"],
31
- allow_headers=["*"],
32
- )
33
-
34
- def generate_content(url):
35
- loader = WebBaseLoader(str(url))
36
- data = loader.load()
37
- formatted_text = data[0].page_content.strip().replace("\n\n", "\n")
38
- cleaned_text = re.sub(r"\s+", " ", formatted_text)
39
- cleaned_text = re.sub(r"\n+", "\n\n", cleaned_text)
40
- return cleaned_text
41
-
42
-
43
- # Dictionary to store processed content
44
- processed_data: Dict[str, str] = {}
45
-
46
-
47
-
48
-
49
- def generate_chat_id(content: str) -> str:
50
- return hashlib.md5(content.encode()).hexdigest()
51
-
52
-
53
-
54
- class ProcessURLRequest(BaseModel):
55
- url: str
56
-
57
-
58
- @app.post("/process_url")
59
- async def process_url(request: ProcessURLRequest):
60
- # Simulated web scraping for demonstration
61
- scraped_content = f"Scraped content from URL: {generate_content(request.url)}"
62
- chat_id = generate_chat_id(scraped_content)
63
-
64
- processed_data[chat_id] = scraped_content
65
-
66
- return JSONResponse(
67
- content={"chat_id": chat_id, "message": "URL content processed and stored successfully."}
68
- )
69
-
70
-
71
- # API Endpoint 2: Process PDF Document
72
- @app.post("/process_pdf")
73
- async def process_pdf(file: UploadFile):
74
- if not file.filename.endswith(".pdf"):
75
- raise HTTPException(status_code=400, detail="Only PDF files are supported.")
76
-
77
- # Save uploaded file temporarily
78
- temp_file = Path(f"temp_{file.filename}")
79
- async with aiofiles.open(temp_file, "wb") as out_file:
80
- content = await file.read()
81
- await out_file.write(content)
82
-
83
- reader = PdfReader(temp_file)
84
- pg_l = len(reader.pages)
85
- text = ""
86
- for i in range(pg_l
87
-
88
- ):
89
- page = reader.pages[i]
90
- text += page.extract_text()
91
-
92
- extracted_text = text
93
-
94
-
95
- chat_id = generate_chat_id(extracted_text)
96
-
97
- processed_data[chat_id] = extracted_text
98
-
99
- temp_file.unlink()
100
-
101
- return JSONResponse(
102
- content={"chat_id": chat_id, "message": "PDF content processed and stored successfully."}
103
- )
104
-
105
-
106
- # API Endpoint 3: Chat API
107
- class ChatRequest(BaseModel):
108
- chat_id: str
109
- question: str
110
-
111
-
112
- @app.post("/chat")
113
- async def chat(request: ChatRequest):
114
- chat_id = request.chat_id
115
- question = request.question
116
-
117
- # Retrieve the stored content
118
- if chat_id not in processed_data:
119
- raise HTTPException(status_code=404, detail="Chat ID not found.")
120
-
121
- stored_content = processed_data[chat_id]
122
- response = model.generate_content(f"""You are a highly accurate and context-driven LLM tasked with generating precise responses based solely on the provided context. Your goal is to synthesize information exclusively from the given context to respond directly and comprehensively to the question.
123
-
124
- Inputs:
125
-
126
- Question: {question}
127
- Context: {stored_content}
128
- Task:
129
-
130
- Carefully analyze the provided context.
131
- Construct an accurate and relevant response to the question.
132
- Ensure that the response strictly adheres to the given context, without introducing external information, assumptions, or unsupported content.
133
- Evaluation Criteria:
134
-
135
- Responses must demonstrate strict adherence to the provided context.
136
- Focus on clarity, precision, and relevance.
137
- Avoid any content not explicitly supported by the context..""")
138
-
139
-
140
-
141
- response_text = f"{response.text}"
142
-
143
- return JSONResponse(content={"response": response_text})
144
-
145
-
146
- if __name__ == "__main__":
147
- uvicorn.run(app, host="0.0.0.0", port=8000)
 
1
+ from fastapi import FastAPI, UploadFile, Form, HTTPException
2
+ from pydantic import BaseModel
3
+ import uvicorn
4
+ from fastapi.responses import JSONResponse
5
+ from typing import Dict
6
+ import hashlib
7
+ from pypdf import PdfReader
8
+ from dotenv import load_dotenv, dotenv_values
9
+ import aiofiles
10
+ from pathlib import Path
11
+ from langchain_community.document_loaders import WebBaseLoader
12
+ import google.generativeai as genai
13
+ import os
14
+ import re
15
+ from fastapi.middleware.cors import CORSMiddleware
16
+
17
+ # Initialize Gemini LLM
18
+ load_dotenv()
19
+ Google_key = os.getenv("GOOGLE_API_KEY")
20
+ model = genai.GenerativeModel("gemini-1.5-flash")
21
+ print(str(Google_key))
22
+ genai.configure(api_key="AIzaSyC1-QUzA45IlCosX__sKlzNAgVZGEaHc0c")
23
+
24
+ app = FastAPI()
25
+
26
+ app.add_middleware(
27
+ CORSMiddleware,
28
+ allow_origins=["*"],
29
+ allow_credentials=True,
30
+ allow_methods=["*"],
31
+ allow_headers=["*"],
32
+ )
33
+
34
+ def generate_content(url):
35
+ loader = WebBaseLoader(str(url))
36
+ data = loader.load()
37
+ formatted_text = data[0].page_content.strip().replace("\n\n", "\n")
38
+ cleaned_text = re.sub(r"\s+", " ", formatted_text)
39
+ cleaned_text = re.sub(r"\n+", "\n\n", cleaned_text)
40
+ return cleaned_text
41
+
42
+
43
+ # Dictionary to store processed content
44
+ processed_data: Dict[str, str] = {}
45
+
46
+
47
+
48
+
49
+ def generate_chat_id(content: str) -> str:
50
+ return hashlib.md5(content.encode()).hexdigest()
51
+
52
+
53
+
54
+ class ProcessURLRequest(BaseModel):
55
+ url: str
56
+
57
+
58
+ @app.post("/process_url")
59
+ async def process_url(request: ProcessURLRequest):
60
+ # Simulated web scraping for demonstration
61
+ scraped_content = f"Scraped content from URL: {generate_content(request.url)}"
62
+ chat_id = generate_chat_id(scraped_content)
63
+
64
+ processed_data[chat_id] = scraped_content
65
+
66
+ return JSONResponse(
67
+ content={"chat_id": chat_id, "message": "URL content processed and stored successfully."}
68
+ )
69
+
70
+
71
+ # API Endpoint 2: Process PDF Document
72
+ @app.post("/process_pdf")
73
+ async def process_pdf(file: UploadFile):
74
+ if not file.filename.endswith(".pdf"):
75
+ raise HTTPException(status_code=400, detail="Only PDF files are supported.")
76
+
77
+ # Save uploaded file temporarily
78
+ temp_file = Path(f"temp_{file.filename}")
79
+ async with aiofiles.open(temp_file, "wb") as out_file:
80
+ content = await file.read()
81
+ await out_file.write(content)
82
+
83
+ reader = PdfReader(temp_file)
84
+ pg_l = len(reader.pages)
85
+ text = ""
86
+ for i in range(pg_l
87
+
88
+ ):
89
+ page = reader.pages[i]
90
+ text += page.extract_text()
91
+
92
+ extracted_text = text
93
+
94
+
95
+ chat_id = generate_chat_id(extracted_text)
96
+
97
+ processed_data[chat_id] = extracted_text
98
+
99
+ temp_file.unlink()
100
+
101
+ return JSONResponse(
102
+ content={"chat_id": chat_id, "message": "PDF content processed and stored successfully."}
103
+ )
104
+
105
+
106
+ # API Endpoint 3: Chat API
107
+ class ChatRequest(BaseModel):
108
+ chat_id: str
109
+ question: str
110
+
111
+
112
+ @app.post("/chat")
113
+ async def chat(request: ChatRequest):
114
+ chat_id = request.chat_id
115
+ question = request.question
116
+
117
+ # Retrieve the stored content
118
+ if chat_id not in processed_data:
119
+ raise HTTPException(status_code=404, detail="Chat ID not found.")
120
+
121
+ stored_content = processed_data[chat_id]
122
+ response = model.generate_content(f"""You are a highly accurate and context-driven LLM tasked with generating precise responses based solely on the provided context. Your goal is to synthesize information exclusively from the given context to respond directly and comprehensively to the question.
123
+
124
+ Inputs:
125
+
126
+ Question: {question}
127
+ Context: {stored_content}
128
+ Task:
129
+
130
+ Carefully analyze the provided context.
131
+ Construct an accurate and relevant response to the question.
132
+ Ensure that the response strictly adheres to the given context, without introducing external information, assumptions, or unsupported content.
133
+ Evaluation Criteria:
134
+
135
+ Responses must demonstrate strict adherence to the provided context.
136
+ Focus on clarity, precision, and relevance.
137
+ Avoid any content not explicitly supported by the context..""")
138
+
139
+
140
+
141
+ response_text = f"{response.text}"
142
+
143
+ return JSONResponse(content={"response": response_text})
144
+
145
+
146
+ if __name__ == "__main__":
147
+ uvicorn.run(app, host="0.0.0.0", port=7860)