Spaces:
Sleeping
Sleeping
Asaad Almutareb
commited on
Commit
·
33b41aa
1
Parent(s):
e9acb87
added FAISS and S3 loading
Browse files- .gitignore +4 -1
- qa.py +15 -11
.gitignore
CHANGED
@@ -166,4 +166,7 @@ cython_debug/
|
|
166 |
chroma_db/*
|
167 |
bin
|
168 |
obj
|
169 |
-
.langchain.sqlite
|
|
|
|
|
|
|
|
166 |
chroma_db/*
|
167 |
bin
|
168 |
obj
|
169 |
+
.langchain.sqlite
|
170 |
+
*.zip
|
171 |
+
*.faiss
|
172 |
+
*.pkl
|
qa.py
CHANGED
@@ -18,16 +18,17 @@ from langchain.llms import HuggingFaceHub
|
|
18 |
from langchain.embeddings import HuggingFaceHubEmbeddings
|
19 |
# vectorestore
|
20 |
from langchain.vectorstores import Chroma
|
|
|
|
|
21 |
|
22 |
# retrieval chain
|
23 |
from langchain.chains import RetrievalQAWithSourcesChain
|
24 |
# prompt template
|
25 |
from langchain.prompts import PromptTemplate
|
26 |
from langchain.memory import ConversationBufferMemory
|
27 |
-
|
28 |
-
# reorder retrived documents
|
29 |
# github issues
|
30 |
-
from langchain.document_loaders import GitHubIssuesLoader
|
31 |
# debugging
|
32 |
from langchain.globals import set_verbose
|
33 |
# caching
|
@@ -78,7 +79,7 @@ llm = HuggingFaceHub(repo_id=llm_model_name, model_kwargs={
|
|
78 |
})
|
79 |
|
80 |
# initialize Embedding config
|
81 |
-
embedding_model_name = "sentence-transformers/
|
82 |
embeddings = HuggingFaceHubEmbeddings(repo_id=embedding_model_name)
|
83 |
|
84 |
set_llm_cache(SQLiteCache(database_path=".langchain.sqlite"))
|
@@ -86,16 +87,19 @@ set_llm_cache(SQLiteCache(database_path=".langchain.sqlite"))
|
|
86 |
# retrieve vectorsrore
|
87 |
s3 = boto3.client('s3', config=Config(signature_version=UNSIGNED))
|
88 |
|
89 |
-
##
|
90 |
s3.download_file(AWS_S3_LOCATION, AWS_S3_FILE, VS_DESTINATION)
|
91 |
-
|
92 |
-
|
93 |
-
db.get()
|
94 |
|
95 |
-
|
|
|
|
|
|
|
|
|
|
|
96 |
|
97 |
-
|
98 |
-
# asks LLM to extract relevant parts from retrieved documents
|
99 |
|
100 |
prompt = PromptTemplate(
|
101 |
input_variables=["history", "context", "question"],
|
|
|
18 |
from langchain.embeddings import HuggingFaceHubEmbeddings
|
19 |
# vectorestore
|
20 |
from langchain.vectorstores import Chroma
|
21 |
+
from langchain.vectorstores import FAISS
|
22 |
+
import zipfile
|
23 |
|
24 |
# retrieval chain
|
25 |
from langchain.chains import RetrievalQAWithSourcesChain
|
26 |
# prompt template
|
27 |
from langchain.prompts import PromptTemplate
|
28 |
from langchain.memory import ConversationBufferMemory
|
29 |
+
|
|
|
30 |
# github issues
|
31 |
+
#from langchain.document_loaders import GitHubIssuesLoader
|
32 |
# debugging
|
33 |
from langchain.globals import set_verbose
|
34 |
# caching
|
|
|
79 |
})
|
80 |
|
81 |
# initialize Embedding config
|
82 |
+
embedding_model_name = "sentence-transformers/multi-qa-mpnet-base-dot-v1"
|
83 |
embeddings = HuggingFaceHubEmbeddings(repo_id=embedding_model_name)
|
84 |
|
85 |
set_llm_cache(SQLiteCache(database_path=".langchain.sqlite"))
|
|
|
87 |
# retrieve vectorsrore
|
88 |
s3 = boto3.client('s3', config=Config(signature_version=UNSIGNED))
|
89 |
|
90 |
+
## download vectorstore from S3
|
91 |
s3.download_file(AWS_S3_LOCATION, AWS_S3_FILE, VS_DESTINATION)
|
92 |
+
with zipfile.ZipFile(VS_DESTINATION, 'r') as zip_ref:
|
93 |
+
zip_ref.extractall('./vectorstore/')
|
|
|
94 |
|
95 |
+
FAISS_INDEX_PATH='./vectorstore/lc-faiss-multi-qa-mpnet'
|
96 |
+
db = FAISS.load_local(FAISS_INDEX_PATH, embeddings)
|
97 |
+
|
98 |
+
# use the cached embeddings instead of embeddings to speed up re-retrival
|
99 |
+
# db = Chroma(persist_directory="./vectorstore", embedding_function=embeddings)
|
100 |
+
# db.get()
|
101 |
|
102 |
+
retriever = db.as_retriever(search_type="mmr", search_kwargs={'k': 3, 'lambda_mult': 0.25})
|
|
|
103 |
|
104 |
prompt = PromptTemplate(
|
105 |
input_variables=["history", "context", "question"],
|