Spaces:

Koshti10
/

Chat_literature

Sleeping

Koshti10 commited on Mar 13, 2024

Commit

56dc125

verified ·

1 Parent(s): 3715033

Upload 2 files

Files changed (2) hide show

app_drive.py CHANGED Viewed

@@ -40,7 +40,10 @@ def create_data_from_drive(drive_link):
 def check_pdfs(pdf_files):
     global db
     db = create_dnd_database(pdf_files)
-    return "Processing Completed - You can start the chat now!"
 ############################# Chatbot Specific functions #############################
 def user(user_message, history):
@@ -183,12 +186,6 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="emerald", neutral_hue="slate"))
         queue = True
         )
-    # Save Chat whenever there is some change in the chatbot
-    # chatbot.change(
-    #     fn = save_feedback,
-    #     inputs=[feedback_radio],
-    #     queue = True)
     # Change whenever some feedback is given (Numeric or Text)
     feedback_radio.change(
         fn=save_feedback,

 def check_pdfs(pdf_files):
     global db
     db = create_dnd_database(pdf_files)
+    if not db:
+        return "There was a discrepancy. Please upload a PDF file again or submit a drive link containing only PDFs."
+    else:
+        return "Processing Completed - You can start the chat now!"
 ############################# Chatbot Specific functions #############################
 def user(user_message, history):
         queue = True
         )
     # Change whenever some feedback is given (Numeric or Text)
     feedback_radio.change(
         fn=save_feedback,

dnd_database.py ADDED Viewed

+from driveapi.drive import process_pdf
+from langchain.embeddings.openai import OpenAIEmbeddings
+from langchain.text_splitter import CharacterTextSplitter
+from langchain.vectorstores import FAISS
+def create_dnd_database(file_list):
+    raw_text = ''
+    if file_list is None:
+        return None
+    for pdf in file_list:
+        raw_text += process_pdf(pdf)
+    embedding = OpenAIEmbeddings()
+    text_splitter = CharacterTextSplitter(
+            separator = "\n",
+            chunk_size = 1000,
+            chunk_overlap  = 200,
+            length_function = len,
+        )
+    texts = text_splitter.split_text(raw_text)
+    print('Length of text: ' + str(len(raw_text)))
+    db = FAISS.from_texts(texts, embedding)
+    return db