Update app.py
Browse files
app.py
CHANGED
@@ -1,9 +1,12 @@
|
|
1 |
from fastapi import FastAPI, HTTPException
|
2 |
from fastapi.responses import JSONResponse
|
3 |
from webscout import WEBS, transcriber
|
4 |
-
|
5 |
from typing import Optional
|
6 |
from fastapi.encoders import jsonable_encoder
|
|
|
|
|
|
|
|
|
7 |
|
8 |
app = FastAPI()
|
9 |
|
@@ -122,6 +125,73 @@ async def chat(
|
|
122 |
raise HTTPException(status_code=500, detail=f"Error getting chat results: {e}")
|
123 |
|
124 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
125 |
|
126 |
@app.get("/api/maps")
|
127 |
async def maps(
|
@@ -195,4 +265,4 @@ def get_ascii_weather(location: str):
|
|
195 |
# Run the API server if this script is executed
|
196 |
if __name__ == "__main__":
|
197 |
import uvicorn
|
198 |
-
uvicorn.run(app, host="0.0.0.0", port=8080)
|
|
|
1 |
from fastapi import FastAPI, HTTPException
|
2 |
from fastapi.responses import JSONResponse
|
3 |
from webscout import WEBS, transcriber
|
|
|
4 |
from typing import Optional
|
5 |
from fastapi.encoders import jsonable_encoder
|
6 |
+
from bs4 import BeautifulSoup
|
7 |
+
import requests
|
8 |
+
from functools import lru_cache
|
9 |
+
import urllib.parse
|
10 |
|
11 |
app = FastAPI()
|
12 |
|
|
|
125 |
raise HTTPException(status_code=500, detail=f"Error getting chat results: {e}")
|
126 |
|
127 |
|
128 |
+
@lru_cache(maxsize=128)
|
129 |
+
def extract_text_from_webpage(html_content):
|
130 |
+
"""Extracts visible text from HTML content using BeautifulSoup."""
|
131 |
+
soup = BeautifulSoup(html_content, "html.parser")
|
132 |
+
# Remove unwanted tags
|
133 |
+
for tag in soup(["script", "style", "header", "footer", "nav"]):
|
134 |
+
tag.extract()
|
135 |
+
# Get the remaining visible text
|
136 |
+
visible_text = soup.get_text(strip=True)
|
137 |
+
return visible_text
|
138 |
+
|
139 |
+
@app.get("/api/web_extract")
|
140 |
+
async def web_extract(
|
141 |
+
url: str,
|
142 |
+
max_chars: int = 12000, # Adjust based on token limit
|
143 |
+
):
|
144 |
+
"""Extracts text from a given URL."""
|
145 |
+
try:
|
146 |
+
response = requests.get(url, headers={"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/111.0"})
|
147 |
+
response.raise_for_status()
|
148 |
+
visible_text = extract_text_from_webpage(response.text)
|
149 |
+
if len(visible_text) > max_chars:
|
150 |
+
visible_text = visible_text[:max_chars] + "..."
|
151 |
+
return {"url": url, "text": visible_text}
|
152 |
+
except requests.exceptions.RequestException as e:
|
153 |
+
raise HTTPException(status_code=500, detail=f"Error fetching or processing URL: {e}")
|
154 |
+
|
155 |
+
@app.get("/api/search-and-extract")
|
156 |
+
async def web_search_and_extract(
|
157 |
+
q: str,
|
158 |
+
max_results: int = 3,
|
159 |
+
timelimit: Optional[str] = None,
|
160 |
+
safesearch: str = "moderate",
|
161 |
+
region: str = "wt-wt",
|
162 |
+
backend: str = "api",
|
163 |
+
max_chars: int = 6000 # Adjust based on token limit
|
164 |
+
):
|
165 |
+
"""
|
166 |
+
Searches using WEBS, extracts text from the top results, and returns both.
|
167 |
+
"""
|
168 |
+
try:
|
169 |
+
with WEBS() as webs:
|
170 |
+
# Perform WEBS search
|
171 |
+
search_results = webs.text(keywords=q, region=region, safesearch=safesearch,
|
172 |
+
timelimit=timelimit, backend=backend, max_results=max_results)
|
173 |
+
|
174 |
+
# Extract text from each result's link
|
175 |
+
extracted_results = []
|
176 |
+
for result in search_results:
|
177 |
+
if 'href' in result:
|
178 |
+
link = result['href']
|
179 |
+
try:
|
180 |
+
response = requests.get(link, headers={"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/111.0"})
|
181 |
+
response.raise_for_status()
|
182 |
+
visible_text = extract_text_from_webpage(response.text)
|
183 |
+
if len(visible_text) > max_chars:
|
184 |
+
visible_text = visible_text[:max_chars] + "..."
|
185 |
+
extracted_results.append({"link": link, "text": visible_text})
|
186 |
+
except requests.exceptions.RequestException as e:
|
187 |
+
print(f"Error fetching or processing {link}: {e}")
|
188 |
+
extracted_results.append({"link": link, "text": None})
|
189 |
+
else:
|
190 |
+
extracted_results.append({"link": None, "text": None})
|
191 |
+
|
192 |
+
return JSONResponse(content=jsonable_encoder({"search_results": search_results, "extracted_results": extracted_results}))
|
193 |
+
except Exception as e:
|
194 |
+
raise HTTPException(status_code=500, detail=f"Error during search and extraction: {e}")
|
195 |
|
196 |
@app.get("/api/maps")
|
197 |
async def maps(
|
|
|
265 |
# Run the API server if this script is executed
|
266 |
if __name__ == "__main__":
|
267 |
import uvicorn
|
268 |
+
uvicorn.run(app, host="0.0.0.0", port=8080)
|