angry-meow commited on
Commit
be3813f
·
2 Parent(s): d523035 9a71317

Merge branch 'main' of https://huggingface.co/spaces/CoExperiences/aie4-final into main

Browse files
Files changed (2) hide show
  1. app.py +4 -6
  2. helper_functions.py +11 -4
app.py CHANGED
@@ -51,7 +51,7 @@ async def on_chat_start():
51
  files = await cl.AskFileMessage(
52
  content="Please upload a Text or PDF File file to begin!",
53
  accept=["text/plain", "application/pdf"],
54
- max_size_mb=2,
55
  ).send()
56
 
57
  file = files[0]
@@ -63,14 +63,12 @@ async def on_chat_start():
63
 
64
  # load the file
65
  docs = process_file(file)
66
- for i, doc in enumerate(docs):
67
- doc.metadata["source"] = f"source_{i}" # TO DO: Add metadata
68
- add_to_qdrant(doc, te3_small, qdrant_client, collection_name)
69
  print(f"Processing {len(docs)} text chunks")
70
 
71
  # Add to the qdrant_store
72
- splits = text_splitter.split_documents(docs)
73
-
74
  qdrant_store.add_documents(
75
  documents=splits
76
  )
 
51
  files = await cl.AskFileMessage(
52
  content="Please upload a Text or PDF File file to begin!",
53
  accept=["text/plain", "application/pdf"],
54
+ max_size_mb=12,
55
  ).send()
56
 
57
  file = files[0]
 
63
 
64
  # load the file
65
  docs = process_file(file)
66
+ splits = text_splitter.split_documents(docs)
67
+ for i, doc in enumerate(splits):
68
+ doc.metadata["source"] = f"source_{i}"
69
  print(f"Processing {len(docs)} text chunks")
70
 
71
  # Add to the qdrant_store
 
 
72
  qdrant_store.add_documents(
73
  documents=splits
74
  )
helper_functions.py CHANGED
@@ -1,14 +1,21 @@
1
  from langchain_community.document_loaders import PyMuPDFLoader, TextLoader
2
  from langchain_community.vectorstores import Qdrant
 
 
 
 
 
 
 
 
3
 
4
- def process_file(file):
5
  documents = []
6
- if file.endswith(".pdf"):
7
- loader = PyMuPDFLoader(file)
8
  docs = loader.load()
9
  documents.extend(docs)
10
  else:
11
- loader = TextLoader(file)
12
  docs = loader.load()
13
  documents.extend(docs)
14
  return documents
 
1
  from langchain_community.document_loaders import PyMuPDFLoader, TextLoader
2
  from langchain_community.vectorstores import Qdrant
3
+ import os
4
+
5
+ def process_file(uploaded_file):
6
+ # save the file temporarily
7
+ temp_file = "./temp.pdf"
8
+ with open(temp_file, "wb") as file:
9
+ file.write(uploaded_file.content)
10
+ file_name = uploaded_file.name
11
 
 
12
  documents = []
13
+ if uploaded_file.path.endswith(".pdf"):
14
+ loader = PyMuPDFLoader(temp_file)
15
  docs = loader.load()
16
  documents.extend(docs)
17
  else:
18
+ loader = TextLoader(tmp_location)
19
  docs = loader.load()
20
  documents.extend(docs)
21
  return documents