danicafisher commited on
Commit
952909f
·
verified ·
1 Parent(s): e5beda5

Update helper_functions.py

Browse files
Files changed (1) hide show
  1. helper_functions.py +11 -7
helper_functions.py CHANGED
@@ -1,16 +1,21 @@
1
- from langchain_community.document_loaders import PyMuPDFLoader, TextLoader
2
  from langchain_community.vectorstores import Qdrant
3
  import os
4
 
5
- def process_file(uploaded_file):
 
 
 
 
 
6
  # save the file temporarily
7
- temp_file = "./temp/"+uploaded_file.path
8
  with open(temp_file, "wb") as file:
9
- file.write(uploaded_file.content)
10
- file_name = uploaded_file.name
11
 
12
  documents = []
13
- if uploaded_file.path.endswith(".pdf"):
14
  loader = PyMuPDFLoader(temp_file)
15
  docs = loader.load()
16
  documents.extend(docs)
@@ -22,7 +27,6 @@ def process_file(uploaded_file):
22
 
23
 
24
  def add_to_qdrant(documents, embeddings, qdrant_client, collection_name):
25
-
26
  Qdrant.from_documents(
27
  documents,
28
  embeddings,
 
1
+ from langchain_community.document_loaders import PyMuPDFLoader, TextLoader, WebBaseLoader
2
  from langchain_community.vectorstores import Qdrant
3
  import os
4
 
5
+ def process_file(file_or_url):
6
+ if isinstance(file_or_url, str) and file_or_url.startswith(('http://', 'https://')):
7
+ # Handle URL
8
+ loader = WebBaseLoader(file_or_url)
9
+ docs = loader.load()
10
+ documents.extend(docs)
11
  # save the file temporarily
12
+ temp_file = "./temp/"+file_or_url.path
13
  with open(temp_file, "wb") as file:
14
+ file.write(file_or_url.content)
15
+ file_name = file_or_url.name
16
 
17
  documents = []
18
+ if file_or_url.path.endswith(".pdf"):
19
  loader = PyMuPDFLoader(temp_file)
20
  docs = loader.load()
21
  documents.extend(docs)
 
27
 
28
 
29
  def add_to_qdrant(documents, embeddings, qdrant_client, collection_name):
 
30
  Qdrant.from_documents(
31
  documents,
32
  embeddings,