Koshti10 commited on
Commit
56dc125
1 Parent(s): 3715033

Upload 2 files

Browse files
Files changed (2) hide show
  1. app_drive.py +4 -7
  2. dnd_database.py +26 -0
app_drive.py CHANGED
@@ -40,7 +40,10 @@ def create_data_from_drive(drive_link):
40
  def check_pdfs(pdf_files):
41
  global db
42
  db = create_dnd_database(pdf_files)
43
- return "Processing Completed - You can start the chat now!"
 
 
 
44
 
45
  ############################# Chatbot Specific functions #############################
46
  def user(user_message, history):
@@ -183,12 +186,6 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="emerald", neutral_hue="slate"))
183
  queue = True
184
  )
185
 
186
- # Save Chat whenever there is some change in the chatbot
187
- # chatbot.change(
188
- # fn = save_feedback,
189
- # inputs=[feedback_radio],
190
- # queue = True)
191
-
192
  # Change whenever some feedback is given (Numeric or Text)
193
  feedback_radio.change(
194
  fn=save_feedback,
 
40
  def check_pdfs(pdf_files):
41
  global db
42
  db = create_dnd_database(pdf_files)
43
+ if not db:
44
+ return "There was a discrepancy. Please upload a PDF file again or submit a drive link containing only PDFs."
45
+ else:
46
+ return "Processing Completed - You can start the chat now!"
47
 
48
  ############################# Chatbot Specific functions #############################
49
  def user(user_message, history):
 
186
  queue = True
187
  )
188
 
 
 
 
 
 
 
189
  # Change whenever some feedback is given (Numeric or Text)
190
  feedback_radio.change(
191
  fn=save_feedback,
dnd_database.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from driveapi.drive import process_pdf
2
+
3
+ from langchain.embeddings.openai import OpenAIEmbeddings
4
+ from langchain.text_splitter import CharacterTextSplitter
5
+ from langchain.vectorstores import FAISS
6
+
7
+ def create_dnd_database(file_list):
8
+ raw_text = ''
9
+ if file_list is None:
10
+ return None
11
+ for pdf in file_list:
12
+ raw_text += process_pdf(pdf)
13
+
14
+ embedding = OpenAIEmbeddings()
15
+
16
+ text_splitter = CharacterTextSplitter(
17
+ separator = "\n",
18
+ chunk_size = 1000,
19
+ chunk_overlap = 200,
20
+ length_function = len,
21
+ )
22
+ texts = text_splitter.split_text(raw_text)
23
+ print('Length of text: ' + str(len(raw_text)))
24
+ db = FAISS.from_texts(texts, embedding)
25
+
26
+ return db