Spaces:

Samarth991
/

LLM-Chatbot

Sleeping

Samarth991 commited on Sep 12, 2023

Commit

31f4dd5

•

1 Parent(s): 160ee8a

adding chatbot

Files changed (1) hide show

app.py CHANGED Viewed

@@ -26,7 +26,7 @@ def get_openai_chat_model(API_key):
     llm = OpenAI()
     return llm
-def process_documents(documents,data_chunk=1000,chunk_overlap=50):
     text_splitter = CharacterTextSplitter(chunk_size=data_chunk, chunk_overlap=chunk_overlap,separator='\n')
     texts = text_splitter.split_documents(documents)
     return texts
@@ -58,19 +58,21 @@ def document_loader(file_path,api_key,doc_type='pdf',llm='Huggingface'):
         document = process_csv_document(document_file=file_path)
     elif doc_type == 'word':
         document = process_word_document(document_file=file_path)
-    if document:
-        print(document)
-        embedding_model = SentenceTransformerEmbeddings(model_name='thenlper/gte-base',model_kwargs={"device": DEVICE})
         texts = process_documents(documents=document)
         vector_db = FAISS.from_documents(documents=texts, embedding= embedding_model)
-        global qa
         qa = RetrievalQA.from_chain_type(llm=chat_application(llm_service=llm,key=api_key),
-                                     chain_type='stuff',
-                                     retriever=vector_db.as_retriever(),
-                                    #  chain_type_kwargs=chain_type_kwargs,
-                                    return_source_documents=True
-                                    )
-    else:
         return "Error in loading Documents "
     return "Document Processing completed ..."

     llm = OpenAI()
     return llm
+def process_documents(documents,data_chunk=2000,chunk_overlap=50):
     text_splitter = CharacterTextSplitter(chunk_size=data_chunk, chunk_overlap=chunk_overlap,separator='\n')
     texts = text_splitter.split_documents(documents)
     return texts
         document = process_csv_document(document_file=file_path)
     elif doc_type == 'word':
         document = process_word_document(document_file=file_path)
+    print("Document :",document)
+    embedding_model = SentenceTransformerEmbeddings(model_name='thenlper/gte-base',model_kwargs={"device": DEVICE})
+    global qa
+    try:
         texts = process_documents(documents=document)
         vector_db = FAISS.from_documents(documents=texts, embedding= embedding_model)
         qa = RetrievalQA.from_chain_type(llm=chat_application(llm_service=llm,key=api_key),
+                                    chain_type='stuff',
+                                    retriever=vector_db.as_retriever(),
+                                #  chain_type_kwargs=chain_type_kwargs,
+                                return_source_documents=True
+                                )
+    except:
         return "Error in loading Documents "
     return "Document Processing completed ..."