Rauhan commited on
Commit
17050fe
·
1 Parent(s): 4a2e5ad

DEBUG: updating getLinks

Browse files
Files changed (2) hide show
  1. app.py +5 -4
  2. functions.py +2 -1
app.py CHANGED
@@ -10,6 +10,7 @@ from fastapi.middleware.cors import CORSMiddleware
10
  from langchain_community.document_loaders import UnstructuredURLLoader
11
  from src.api.speech_api import speech_translator_router
12
  from functions import client as supabase
 
13
 
14
  app = FastAPI(title="ConversAI", root_path="/api/v1")
15
 
@@ -224,11 +225,11 @@ async def addText(addQaPair: AddQAPair):
224
 
225
  @app.post("/addWebsite")
226
  async def addWebsite(vectorstore: str, websiteUrls: list[str]):
227
- urls = websiteUrls
228
- loader = UnstructuredURLLoader(urls=urls)
229
  docs = loader.load()
230
  text = "\n\n".join(
231
- [f"Metadata:\n{docs[doc].metadata} \nPage Content:\n {docs[doc].page_content}" for doc in range(len(docs))])
 
232
  username, chatbotname = vectorstore.split("$")[1], vectorstore.split("$")[2]
233
  df = pd.DataFrame(client.table("ConversAI_ChatbotInfo").select("*").execute().data)
234
  currentCount = df[(df["user_id"] == username) & (df["chatbotname"] == chatbotname)]["charactercount"].iloc[0]
@@ -238,7 +239,7 @@ async def addWebsite(vectorstore: str, websiteUrls: list[str]):
238
  if newCount < int(limit):
239
  client.table("ConversAI_ChatbotInfo").update({"charactercount": str(newCount)}).eq("user_id", username).eq(
240
  "chatbotname", chatbotname).execute()
241
- return addDocuments(text=text, source="website", vectorstore=vectorstore)
242
  else:
243
  return {
244
  "output": "WEBSITE EXCEEDING LIMITS, PLEASE TRY WITH A SMALLER DOCUMENT."
 
10
  from langchain_community.document_loaders import UnstructuredURLLoader
11
  from src.api.speech_api import speech_translator_router
12
  from functions import client as supabase
13
+ from urllib.parse import urlparse
14
 
15
  app = FastAPI(title="ConversAI", root_path="/api/v1")
16
 
 
225
 
226
  @app.post("/addWebsite")
227
  async def addWebsite(vectorstore: str, websiteUrls: list[str]):
228
+ loader = UnstructuredURLLoader(urls=websiteUrls)
 
229
  docs = loader.load()
230
  text = "\n\n".join(
231
+ [f"{docs[doc].page_content}" for doc in range(len(docs))]
232
+ )
233
  username, chatbotname = vectorstore.split("$")[1], vectorstore.split("$")[2]
234
  df = pd.DataFrame(client.table("ConversAI_ChatbotInfo").select("*").execute().data)
235
  currentCount = df[(df["user_id"] == username) & (df["chatbotname"] == chatbotname)]["charactercount"].iloc[0]
 
239
  if newCount < int(limit):
240
  client.table("ConversAI_ChatbotInfo").update({"charactercount": str(newCount)}).eq("user_id", username).eq(
241
  "chatbotname", chatbotname).execute()
242
+ return addDocuments(text=text, source=urlparse(websiteUrls[0]).netloc, vectorstore=vectorstore)
243
  else:
244
  return {
245
  "output": "WEBSITE EXCEEDING LIMITS, PLEASE TRY WITH A SMALLER DOCUMENT."
functions.py CHANGED
@@ -154,6 +154,7 @@ def addDocuments(text: str, source: str, vectorstore: str):
154
  def format_docs(docs: str):
155
  context = ""
156
  for doc in docs:
 
157
  context += f"CONTENT: {doc.page_content}\nSOURCE: {doc.metadata} \n\n\n"
158
  if context == "":
159
  context = "No context found"
@@ -255,7 +256,7 @@ def listTables(username: str):
255
 
256
  def getLinks(url: str, timeout=30):
257
  start = time.time()
258
-
259
  def getLinksFromPage(url: str) -> list:
260
  response = requests.get(url)
261
  soup = BeautifulSoup(response.content, "lxml")
 
154
  def format_docs(docs: str):
155
  context = ""
156
  for doc in docs:
157
+ print("METADATA ::: ", type(doc.metadata))
158
  context += f"CONTENT: {doc.page_content}\nSOURCE: {doc.metadata} \n\n\n"
159
  if context == "":
160
  context = "No context found"
 
256
 
257
  def getLinks(url: str, timeout=30):
258
  start = time.time()
259
+
260
  def getLinksFromPage(url: str) -> list:
261
  response = requests.get(url)
262
  soup = BeautifulSoup(response.content, "lxml")