Spaces:
Sleeping
Sleeping
Samarth991
commited on
Commit
•
31f4dd5
1
Parent(s):
160ee8a
adding chatbot
Browse files
app.py
CHANGED
@@ -26,7 +26,7 @@ def get_openai_chat_model(API_key):
|
|
26 |
llm = OpenAI()
|
27 |
return llm
|
28 |
|
29 |
-
def process_documents(documents,data_chunk=
|
30 |
text_splitter = CharacterTextSplitter(chunk_size=data_chunk, chunk_overlap=chunk_overlap,separator='\n')
|
31 |
texts = text_splitter.split_documents(documents)
|
32 |
return texts
|
@@ -58,19 +58,21 @@ def document_loader(file_path,api_key,doc_type='pdf',llm='Huggingface'):
|
|
58 |
document = process_csv_document(document_file=file_path)
|
59 |
elif doc_type == 'word':
|
60 |
document = process_word_document(document_file=file_path)
|
61 |
-
|
62 |
-
|
63 |
-
|
|
|
|
|
64 |
texts = process_documents(documents=document)
|
65 |
vector_db = FAISS.from_documents(documents=texts, embedding= embedding_model)
|
66 |
-
|
67 |
qa = RetrievalQA.from_chain_type(llm=chat_application(llm_service=llm,key=api_key),
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
return "Error in loading Documents "
|
75 |
|
76 |
return "Document Processing completed ..."
|
|
|
26 |
llm = OpenAI()
|
27 |
return llm
|
28 |
|
29 |
+
def process_documents(documents,data_chunk=2000,chunk_overlap=50):
|
30 |
text_splitter = CharacterTextSplitter(chunk_size=data_chunk, chunk_overlap=chunk_overlap,separator='\n')
|
31 |
texts = text_splitter.split_documents(documents)
|
32 |
return texts
|
|
|
58 |
document = process_csv_document(document_file=file_path)
|
59 |
elif doc_type == 'word':
|
60 |
document = process_word_document(document_file=file_path)
|
61 |
+
|
62 |
+
print("Document :",document)
|
63 |
+
embedding_model = SentenceTransformerEmbeddings(model_name='thenlper/gte-base',model_kwargs={"device": DEVICE})
|
64 |
+
global qa
|
65 |
+
try:
|
66 |
texts = process_documents(documents=document)
|
67 |
vector_db = FAISS.from_documents(documents=texts, embedding= embedding_model)
|
68 |
+
|
69 |
qa = RetrievalQA.from_chain_type(llm=chat_application(llm_service=llm,key=api_key),
|
70 |
+
chain_type='stuff',
|
71 |
+
retriever=vector_db.as_retriever(),
|
72 |
+
# chain_type_kwargs=chain_type_kwargs,
|
73 |
+
return_source_documents=True
|
74 |
+
)
|
75 |
+
except:
|
76 |
return "Error in loading Documents "
|
77 |
|
78 |
return "Document Processing completed ..."
|