Spaces:
Sleeping
Sleeping
DEBUG: updating getLinks
Browse files- app.py +5 -4
- functions.py +2 -1
app.py
CHANGED
@@ -10,6 +10,7 @@ from fastapi.middleware.cors import CORSMiddleware
|
|
10 |
from langchain_community.document_loaders import UnstructuredURLLoader
|
11 |
from src.api.speech_api import speech_translator_router
|
12 |
from functions import client as supabase
|
|
|
13 |
|
14 |
app = FastAPI(title="ConversAI", root_path="/api/v1")
|
15 |
|
@@ -224,11 +225,11 @@ async def addText(addQaPair: AddQAPair):
|
|
224 |
|
225 |
@app.post("/addWebsite")
|
226 |
async def addWebsite(vectorstore: str, websiteUrls: list[str]):
|
227 |
-
|
228 |
-
loader = UnstructuredURLLoader(urls=urls)
|
229 |
docs = loader.load()
|
230 |
text = "\n\n".join(
|
231 |
-
[f"
|
|
|
232 |
username, chatbotname = vectorstore.split("$")[1], vectorstore.split("$")[2]
|
233 |
df = pd.DataFrame(client.table("ConversAI_ChatbotInfo").select("*").execute().data)
|
234 |
currentCount = df[(df["user_id"] == username) & (df["chatbotname"] == chatbotname)]["charactercount"].iloc[0]
|
@@ -238,7 +239,7 @@ async def addWebsite(vectorstore: str, websiteUrls: list[str]):
|
|
238 |
if newCount < int(limit):
|
239 |
client.table("ConversAI_ChatbotInfo").update({"charactercount": str(newCount)}).eq("user_id", username).eq(
|
240 |
"chatbotname", chatbotname).execute()
|
241 |
-
return addDocuments(text=text, source=
|
242 |
else:
|
243 |
return {
|
244 |
"output": "WEBSITE EXCEEDING LIMITS, PLEASE TRY WITH A SMALLER DOCUMENT."
|
|
|
10 |
from langchain_community.document_loaders import UnstructuredURLLoader
|
11 |
from src.api.speech_api import speech_translator_router
|
12 |
from functions import client as supabase
|
13 |
+
from urllib.parse import urlparse
|
14 |
|
15 |
app = FastAPI(title="ConversAI", root_path="/api/v1")
|
16 |
|
|
|
225 |
|
226 |
@app.post("/addWebsite")
|
227 |
async def addWebsite(vectorstore: str, websiteUrls: list[str]):
|
228 |
+
loader = UnstructuredURLLoader(urls=websiteUrls)
|
|
|
229 |
docs = loader.load()
|
230 |
text = "\n\n".join(
|
231 |
+
[f"{docs[doc].page_content}" for doc in range(len(docs))]
|
232 |
+
)
|
233 |
username, chatbotname = vectorstore.split("$")[1], vectorstore.split("$")[2]
|
234 |
df = pd.DataFrame(client.table("ConversAI_ChatbotInfo").select("*").execute().data)
|
235 |
currentCount = df[(df["user_id"] == username) & (df["chatbotname"] == chatbotname)]["charactercount"].iloc[0]
|
|
|
239 |
if newCount < int(limit):
|
240 |
client.table("ConversAI_ChatbotInfo").update({"charactercount": str(newCount)}).eq("user_id", username).eq(
|
241 |
"chatbotname", chatbotname).execute()
|
242 |
+
return addDocuments(text=text, source=urlparse(websiteUrls[0]).netloc, vectorstore=vectorstore)
|
243 |
else:
|
244 |
return {
|
245 |
"output": "WEBSITE EXCEEDING LIMITS, PLEASE TRY WITH A SMALLER DOCUMENT."
|
functions.py
CHANGED
@@ -154,6 +154,7 @@ def addDocuments(text: str, source: str, vectorstore: str):
|
|
154 |
def format_docs(docs: str):
|
155 |
context = ""
|
156 |
for doc in docs:
|
|
|
157 |
context += f"CONTENT: {doc.page_content}\nSOURCE: {doc.metadata} \n\n\n"
|
158 |
if context == "":
|
159 |
context = "No context found"
|
@@ -255,7 +256,7 @@ def listTables(username: str):
|
|
255 |
|
256 |
def getLinks(url: str, timeout=30):
|
257 |
start = time.time()
|
258 |
-
|
259 |
def getLinksFromPage(url: str) -> list:
|
260 |
response = requests.get(url)
|
261 |
soup = BeautifulSoup(response.content, "lxml")
|
|
|
154 |
def format_docs(docs: str):
|
155 |
context = ""
|
156 |
for doc in docs:
|
157 |
+
print("METADATA ::: ", type(doc.metadata))
|
158 |
context += f"CONTENT: {doc.page_content}\nSOURCE: {doc.metadata} \n\n\n"
|
159 |
if context == "":
|
160 |
context = "No context found"
|
|
|
256 |
|
257 |
def getLinks(url: str, timeout=30):
|
258 |
start = time.time()
|
259 |
+
|
260 |
def getLinksFromPage(url: str) -> list:
|
261 |
response = requests.get(url)
|
262 |
soup = BeautifulSoup(response.content, "lxml")
|