blaze999 commited on
Commit
cd3725f
·
1 Parent(s): 4b77bf2

Upload 9 files

Browse files
.gitattributes CHANGED
@@ -34,3 +34,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  local_models/llama-2-7b-chat.Q4_K_M.gguf filter=lfs diff=lfs merge=lfs -text
 
 
 
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  local_models/llama-2-7b-chat.Q4_K_M.gguf filter=lfs diff=lfs merge=lfs -text
37
+ PMS_pdfs/DBS[[:space:]]Sustainability[[:space:]]Report[[:space:]]2019[[:space:]]20200306.pdf filter=lfs diff=lfs merge=lfs -text
38
+ PMS_vector_db/PMS_index/index.faiss filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ local_models/
PMS_pdfs/DBS Sustainability Report 2019 20200306.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3cac737e5f4acb7a922b68a251d21eace4743dd8f5cc6a733e68cbf6f625a65b
3
+ size 7719722
PMS_pdfs/First_Steps_to_Investing_A_Beginners_Guide_Prithvi_Haldea.pdf ADDED
Binary file (161 kB). View file
 
PMS_pdfs/deposit-policy.pdf ADDED
Binary file (273 kB). View file
 
PMS_pdfs/understanding_mutualfunds.pdf ADDED
Binary file (766 kB). View file
 
PMS_vector_db/PMS_index/index.faiss ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9799a3e7f37c066b197cb0f6d075c583ab7a307c7266d42625e512000bda56d4
3
+ size 6246445
PMS_vector_db/PMS_index/index.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8d03b619b6ec988fc0ca73c5e97fbf40b93fffb365f1d9c56b88ae4263c11f2
3
+ size 569560
app.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.llms import CTransformers
2
+ from langchain.document_loaders import PyPDFLoader
3
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
4
+ from langchain.embeddings import HuggingFaceEmbeddings
5
+ from langchain.vectorstores import FAISS
6
+ from langchain.chains import RetrievalQA
7
+ import time
8
+ import gradio as gr
9
+ import os
10
+
11
+ llm = CTransformers(model= "local_models/llama-2-7b-chat.Q4_K_M.gguf")
12
+ embeddings = HuggingFaceEmbeddings(model_name = 'local_models/embeddings-bge-large/')
13
+
14
+
15
+
16
+ def load_data(dir_path):
17
+ files = os.listdir(dir_path)
18
+ data = []
19
+ for file in files:
20
+ print(file)
21
+ loader = PyPDFLoader(dir_path+file)
22
+ pages = loader.load_and_split()
23
+ data.extend(pages)
24
+ return data
25
+
26
+ def build_vector_db(data):
27
+ text_splitter = RecursiveCharacterTextSplitter(
28
+ # Set a really small chunk size, just to show.
29
+ chunk_size = 300,
30
+ chunk_overlap = 30,
31
+ length_function = len,
32
+ )
33
+ text_chunks = text_splitter.split_documents(data)
34
+ print(len(text_chunks))
35
+ docsearch = FAISS.from_documents(text_chunks, embeddings)
36
+ docsearch.save_local('PMS_vector_db/PMS_index')
37
+ return docsearch
38
+
39
+ def get_vector_db(db_path):
40
+ if os.path.exists(db_path):
41
+ vector_db = FAISS.load_local(db_path, embeddings)
42
+ print('loading from the existing vectorDB')
43
+ else:
44
+ data = load_data("PMS_pdfs/")
45
+ vector_db = build_vector_db(data)
46
+ return vector_db
47
+
48
+ def predict(prompt,history):
49
+ vector_db = get_vector_db('PMS_vector_db/PMS_index/')
50
+ qa = RetrievalQA.from_chain_type(llm=llm, chain_type='stuff',
51
+ retriever = vector_db.as_retriever(),
52
+ return_source_documents = True)
53
+ response = qa({'query':prompt})
54
+ response = response['result']
55
+ for i in range(len(response)):
56
+ time.sleep(0.05)
57
+ yield response[:i+1]
58
+
59
+
60
+ gr.ChatInterface(predict).queue().launch()
61
+
62
+
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ langchain
2
+ pypdf
3
+ ctransformers
4
+ faiss-cpu
5
+ gradio