Spaces:
Runtime error
Runtime error
Upload 4 files
Browse files- .env +3 -0
- app.py +123 -0
- constants.py +142 -0
- requirements.txt +16 -0
.env
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
HUGGINGFACE_API_TOKEN='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'
|
2 |
+
REPLICATE_API_TOKEN = 'r8_f0yg1vSn32AAGDnqV6qErGJZeCcFFl30CJ46E' #--> Org gamail
|
3 |
+
# REPLICATE_API_TOKEN = 'r8_L3BQN0zjnB1KwwkPjZD0RSLVrj9umPv0oRjFY' # --trial not working
|
app.py
ADDED
@@ -0,0 +1,123 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from streamlit_chat import message
|
3 |
+
from langchain.chains import ConversationalRetrievalChain
|
4 |
+
from langchain.embeddings import HuggingFaceEmbeddings
|
5 |
+
from langchain.llms import CTransformers
|
6 |
+
from langchain.llms import Replicate
|
7 |
+
from langchain.text_splitter import CharacterTextSplitter
|
8 |
+
from langchain.vectorstores import FAISS
|
9 |
+
from langchain.memory import ConversationBufferMemory
|
10 |
+
from langchain.document_loaders import PyPDFLoader, UnstructuredFileLoader
|
11 |
+
from langchain.document_loaders import TextLoader
|
12 |
+
from langchain.document_loaders import Docx2txtLoader
|
13 |
+
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
|
14 |
+
from langchain.text_splitter import Language, RecursiveCharacterTextSplitter
|
15 |
+
import os
|
16 |
+
from dotenv import load_dotenv
|
17 |
+
import tempfile
|
18 |
+
from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor, as_completed
|
19 |
+
from constants import (
|
20 |
+
CHROMA_SETTINGS,
|
21 |
+
DOCUMENT_MAP,
|
22 |
+
EMBEDDING_MODEL_NAME,
|
23 |
+
INGEST_THREADS,
|
24 |
+
PERSIST_DIRECTORY,
|
25 |
+
SOURCE_DIRECTORY,
|
26 |
+
)
|
27 |
+
from langchain.docstore.document import Document
|
28 |
+
load_dotenv()
|
29 |
+
|
30 |
+
|
31 |
+
def initialize_session_state():
|
32 |
+
if 'history' not in st.session_state:
|
33 |
+
st.session_state['history'] = []
|
34 |
+
|
35 |
+
if 'generated' not in st.session_state:
|
36 |
+
st.session_state['generated'] = ["Hello! Ask me anything about 🤗"]
|
37 |
+
|
38 |
+
if 'past' not in st.session_state:
|
39 |
+
st.session_state['past'] = ["Hey! 👋"]
|
40 |
+
|
41 |
+
def conversation_chat(query, chain, history):
|
42 |
+
result = chain({"question": query, "chat_history": history})
|
43 |
+
history.append((query, result["answer"]))
|
44 |
+
return result["answer"]
|
45 |
+
|
46 |
+
def display_chat_history(chain):
|
47 |
+
reply_container = st.container()
|
48 |
+
container = st.container()
|
49 |
+
|
50 |
+
with container:
|
51 |
+
with st.form(key='my_form', clear_on_submit=True):
|
52 |
+
user_input = st.text_input("Question:", placeholder="Ask about your Documents", key='input')
|
53 |
+
submit_button = st.form_submit_button(label='Send')
|
54 |
+
|
55 |
+
if submit_button and user_input:
|
56 |
+
with st.spinner('Generating response...'):
|
57 |
+
output = conversation_chat(user_input, chain, st.session_state['history'])
|
58 |
+
|
59 |
+
st.session_state['past'].append(user_input)
|
60 |
+
st.session_state['generated'].append(output)
|
61 |
+
|
62 |
+
if st.session_state['generated']:
|
63 |
+
with reply_container:
|
64 |
+
for i in range(len(st.session_state['generated'])):
|
65 |
+
message(st.session_state["past"][i], is_user=True, key=str(i) + '_user', avatar_style="thumbs")
|
66 |
+
message(st.session_state["generated"][i], key=str(i), avatar_style="fun-emoji")
|
67 |
+
|
68 |
+
|
69 |
+
def create_conversational_chain(vector_store):
|
70 |
+
load_dotenv()
|
71 |
+
llm = Replicate(
|
72 |
+
streaming = True,
|
73 |
+
# model = "replicate/llama-2-70b-chat:58d078176e02c219e11eb4da5a02a7830a283b14cf8f94537af893ccff5ee781",
|
74 |
+
model = "meta/llama-2-7b-chat:8e6975e5ed6174911a6ff3d60540dfd4844201974602551e10e9e87ab143d81e",
|
75 |
+
callbacks=[StreamingStdOutCallbackHandler()],
|
76 |
+
input = {"temperature": 0.01, "max_length" :500,"top_p":1})
|
77 |
+
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
|
78 |
+
|
79 |
+
chain = ConversationalRetrievalChain.from_llm(llm=llm, chain_type='stuff',
|
80 |
+
retriever=vector_store.as_retriever(search_kwargs={"k": 2}),
|
81 |
+
memory=memory)
|
82 |
+
return chain
|
83 |
+
|
84 |
+
file_paths = [
|
85 |
+
'./SOURCE_DOCUMENTS/Freedom of Information and Protection of Privacy Act, R.S.O. 1990, c. F.31[462] - Copy.pdf',
|
86 |
+
'./SOURCE_DOCUMENTS/Highway Traffic Act, R.S.O. 1990, c. H.8[465] - Copy.pdf',
|
87 |
+
'./SOURCE_DOCUMENTS/Narcotics Safety and Awareness Act, 2010, S.O. 2010, c. 22[463].pdf',
|
88 |
+
'./SOURCE_DOCUMENTS/Nutrient Management Act, 2002, S.O. 2002, c. 4[464].pdf'
|
89 |
+
# Add more file paths as needed
|
90 |
+
]
|
91 |
+
|
92 |
+
def main():
|
93 |
+
# load_dotenv()
|
94 |
+
os.environ.get("REPLICATE_API_TOKEN")
|
95 |
+
# Initialize session state
|
96 |
+
initialize_session_state()
|
97 |
+
st.title("Multi-Docs ChatBot using llama-2-7b :books:")
|
98 |
+
# loader = UnstructuredFileLoader('./SOURCE_DOCUMENTS/Freedom of Information and Protection of Privacy Act, R.S.O. 1990, c. F.31[462] - Copy.pdf')
|
99 |
+
# documents = loader.load()
|
100 |
+
documents = []
|
101 |
+
for file_path in file_paths:
|
102 |
+
loader = UnstructuredFileLoader(file_path)
|
103 |
+
loaded_doc = loader.load() # Assuming this returns a list of pages
|
104 |
+
documents.extend(loaded_doc)
|
105 |
+
|
106 |
+
text_splitter=CharacterTextSplitter(separator='\n',
|
107 |
+
chunk_size=1500,
|
108 |
+
chunk_overlap=300)
|
109 |
+
text_chunks=text_splitter.split_documents(documents)
|
110 |
+
|
111 |
+
embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2',model_kwargs={'device': 'cpu'})
|
112 |
+
|
113 |
+
vector_store=FAISS.from_documents(text_chunks, embeddings)
|
114 |
+
|
115 |
+
# Create the chain object
|
116 |
+
chain = create_conversational_chain(vector_store)
|
117 |
+
|
118 |
+
# Display chat history
|
119 |
+
display_chat_history(chain)
|
120 |
+
|
121 |
+
|
122 |
+
if __name__ == "__main__":
|
123 |
+
main()
|
constants.py
ADDED
@@ -0,0 +1,142 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
|
3 |
+
# from dotenv import load_dotenv
|
4 |
+
from chromadb.config import Settings
|
5 |
+
|
6 |
+
# https://python.langchain.com/en/latest/modules/indexes/document_loaders/examples/excel.html?highlight=xlsx#microsoft-excel
|
7 |
+
from langchain.document_loaders import CSVLoader, PDFMinerLoader, TextLoader, UnstructuredExcelLoader, Docx2txtLoader
|
8 |
+
|
9 |
+
# load_dotenv()
|
10 |
+
ROOT_DIRECTORY = os.path.dirname(os.path.realpath(__file__))
|
11 |
+
|
12 |
+
# Define the folder for storing database
|
13 |
+
SOURCE_DIRECTORY = f"{ROOT_DIRECTORY}/SOURCE_DOCUMENTS"
|
14 |
+
|
15 |
+
PERSIST_DIRECTORY = f"{ROOT_DIRECTORY}/DB"
|
16 |
+
|
17 |
+
# Can be changed to a specific number
|
18 |
+
INGEST_THREADS = os.cpu_count() or 8
|
19 |
+
|
20 |
+
# Define the Chroma settings
|
21 |
+
CHROMA_SETTINGS = Settings(
|
22 |
+
anonymized_telemetry=False,
|
23 |
+
is_persistent=True,
|
24 |
+
)
|
25 |
+
|
26 |
+
|
27 |
+
# https://python.langchain.com/en/latest/_modules/langchain/document_loaders/excel.html#UnstructuredExcelLoader
|
28 |
+
DOCUMENT_MAP = {
|
29 |
+
".txt": TextLoader,
|
30 |
+
".md": TextLoader,
|
31 |
+
".py": TextLoader,
|
32 |
+
".pdf": PDFMinerLoader,
|
33 |
+
".csv": CSVLoader,
|
34 |
+
".xls": UnstructuredExcelLoader,
|
35 |
+
".xlsx": UnstructuredExcelLoader,
|
36 |
+
".docx": Docx2txtLoader,
|
37 |
+
".doc": Docx2txtLoader,
|
38 |
+
}
|
39 |
+
|
40 |
+
# Default Instructor Model
|
41 |
+
EMBEDDING_MODEL_NAME = "hkunlp/instructor-large" # Uses 1.5 GB of VRAM (High Accuracy with lower VRAM usage)
|
42 |
+
|
43 |
+
####
|
44 |
+
#### OTHER EMBEDDING MODEL OPTIONS
|
45 |
+
####
|
46 |
+
|
47 |
+
# EMBEDDING_MODEL_NAME = "hkunlp/instructor-xl" # Uses 5 GB of VRAM (Most Accurate of all models)
|
48 |
+
# EMBEDDING_MODEL_NAME = "intfloat/e5-large-v2" # Uses 1.5 GB of VRAM (A little less accurate than instructor-large)
|
49 |
+
# EMBEDDING_MODEL_NAME = "intfloat/e5-base-v2" # Uses 0.5 GB of VRAM (A good model for lower VRAM GPUs)
|
50 |
+
# EMBEDDING_MODEL_NAME = "all-MiniLM-L6-v2" # Uses 0.2 GB of VRAM (Less accurate but fastest - only requires 150mb of vram)
|
51 |
+
|
52 |
+
####
|
53 |
+
#### MULTILINGUAL EMBEDDING MODELS
|
54 |
+
####
|
55 |
+
|
56 |
+
# EMBEDDING_MODEL_NAME = "intfloat/multilingual-e5-large" # Uses 2.5 GB of VRAM
|
57 |
+
# EMBEDDING_MODEL_NAME = "intfloat/multilingual-e5-base" # Uses 1.2 GB of VRAM
|
58 |
+
|
59 |
+
|
60 |
+
#### SELECT AN OPEN SOURCE LLM (LARGE LANGUAGE MODEL)
|
61 |
+
# Select the Model ID and model_basename
|
62 |
+
# load the LLM for generating Natural Language responses
|
63 |
+
|
64 |
+
#### GPU VRAM Memory required for LLM Models (ONLY) by Billion Parameter value (B Model)
|
65 |
+
#### Does not include VRAM used by Embedding Models - which use an additional 2GB-7GB of VRAM depending on the model.
|
66 |
+
####
|
67 |
+
#### (B Model) (float32) (float16) (GPTQ 8bit) (GPTQ 4bit)
|
68 |
+
#### 7b 28 GB 14 GB 7 GB - 9 GB 3.5 GB - 5 GB
|
69 |
+
#### 13b 52 GB 26 GB 13 GB - 15 GB 6.5 GB - 8 GB
|
70 |
+
#### 32b 130 GB 65 GB 32.5 GB - 35 GB 16.25 GB - 19 GB
|
71 |
+
#### 65b 260.8 GB 130.4 GB 65.2 GB - 67 GB 32.6 GB - - 35 GB
|
72 |
+
|
73 |
+
MODEL_ID = "TheBloke/Llama-2-7B-Chat-GGML"
|
74 |
+
MODEL_BASENAME = "llama-2-7b-chat.ggmlv3.q4_0.bin"
|
75 |
+
|
76 |
+
####
|
77 |
+
#### (FOR HF MODELS)
|
78 |
+
####
|
79 |
+
|
80 |
+
# MODEL_ID = "TheBloke/vicuna-7B-1.1-HF"
|
81 |
+
# MODEL_BASENAME = None
|
82 |
+
# MODEL_ID = "TheBloke/Wizard-Vicuna-7B-Uncensored-HF"
|
83 |
+
# MODEL_ID = "TheBloke/guanaco-7B-HF"
|
84 |
+
# MODEL_ID = 'NousResearch/Nous-Hermes-13b' # Requires ~ 23GB VRAM. Using STransformers
|
85 |
+
# alongside will 100% create OOM on 24GB cards.
|
86 |
+
# llm = load_model(device_type, model_id=model_id)
|
87 |
+
|
88 |
+
####
|
89 |
+
#### (FOR GPTQ QUANTIZED) Select a llm model based on your GPU and VRAM GB. Does not include Embedding Models VRAM usage.
|
90 |
+
####
|
91 |
+
|
92 |
+
##### 48GB VRAM Graphics Cards (RTX 6000, RTX A6000 and other 48GB VRAM GPUs) #####
|
93 |
+
|
94 |
+
### 65b GPTQ LLM Models for 48GB GPUs (*** With best embedding model: hkunlp/instructor-xl ***)
|
95 |
+
# model_id = "TheBloke/guanaco-65B-GPTQ"
|
96 |
+
# model_basename = "model.safetensors"
|
97 |
+
# model_id = "TheBloke/Airoboros-65B-GPT4-2.0-GPTQ"
|
98 |
+
# model_basename = "model.safetensors"
|
99 |
+
# model_id = "TheBloke/gpt4-alpaca-lora_mlp-65B-GPTQ"
|
100 |
+
# model_basename = "model.safetensors"
|
101 |
+
# model_id = "TheBloke/Upstage-Llama1-65B-Instruct-GPTQ"
|
102 |
+
# model_basename = "model.safetensors"
|
103 |
+
|
104 |
+
##### 24GB VRAM Graphics Cards (RTX 3090 - RTX 4090 (35% Faster) - RTX A5000 - RTX A5500) #####
|
105 |
+
|
106 |
+
### 13b GPTQ Models for 24GB GPUs (*** With best embedding model: hkunlp/instructor-xl ***)
|
107 |
+
# model_id = "TheBloke/Wizard-Vicuna-13B-Uncensored-GPTQ"
|
108 |
+
# model_basename = "Wizard-Vicuna-13B-Uncensored-GPTQ-4bit-128g.compat.no-act-order.safetensors"
|
109 |
+
# model_id = "TheBloke/vicuna-13B-v1.5-GPTQ"
|
110 |
+
# model_basename = "model.safetensors"
|
111 |
+
# model_id = "TheBloke/Nous-Hermes-13B-GPTQ"
|
112 |
+
# model_basename = "nous-hermes-13b-GPTQ-4bit-128g.no-act.order"
|
113 |
+
# model_id = "TheBloke/WizardLM-13B-V1.2-GPTQ"
|
114 |
+
# model_basename = "gptq_model-4bit-128g.safetensors
|
115 |
+
|
116 |
+
### 30b GPTQ Models for 24GB GPUs (*** Requires using intfloat/e5-base-v2 instead of hkunlp/instructor-large as embedding model ***)
|
117 |
+
# model_id = "TheBloke/Wizard-Vicuna-30B-Uncensored-GPTQ"
|
118 |
+
# model_basename = "Wizard-Vicuna-30B-Uncensored-GPTQ-4bit--1g.act.order.safetensors"
|
119 |
+
# model_id = "TheBloke/WizardLM-30B-Uncensored-GPTQ"
|
120 |
+
# model_basename = "WizardLM-30B-Uncensored-GPTQ-4bit.act-order.safetensors"
|
121 |
+
|
122 |
+
##### 8-10GB VRAM Graphics Cards (RTX 3080 - RTX 3080 Ti - RTX 3070 Ti - 3060 Ti - RTX 2000 Series, Quadro RTX 4000, 5000, 6000) #####
|
123 |
+
### (*** Requires using intfloat/e5-small-v2 instead of hkunlp/instructor-large as embedding model ***)
|
124 |
+
|
125 |
+
### 7b GPTQ Models for 8GB GPUs
|
126 |
+
# model_id = "TheBloke/Wizard-Vicuna-7B-Uncensored-GPTQ"
|
127 |
+
# model_basename = "Wizard-Vicuna-7B-Uncensored-GPTQ-4bit-128g.no-act.order.safetensors"
|
128 |
+
# model_id = "TheBloke/WizardLM-7B-uncensored-GPTQ"
|
129 |
+
# model_basename = "WizardLM-7B-uncensored-GPTQ-4bit-128g.compat.no-act-order.safetensors"
|
130 |
+
# model_id = "TheBloke/wizardLM-7B-GPTQ"
|
131 |
+
# model_basename = "wizardLM-7B-GPTQ-4bit.compat.no-act-order.safetensors"
|
132 |
+
|
133 |
+
####
|
134 |
+
#### (FOR GGML) (Quantized cpu+gpu+mps) models - check if they support llama.cpp
|
135 |
+
####
|
136 |
+
|
137 |
+
# MODEL_ID = "TheBloke/wizard-vicuna-13B-GGML"
|
138 |
+
# MODEL_BASENAME = "wizard-vicuna-13B.ggmlv3.q4_0.bin"
|
139 |
+
# MODEL_BASENAME = "wizard-vicuna-13B.ggmlv3.q6_K.bin"
|
140 |
+
# MODEL_BASENAME = "wizard-vicuna-13B.ggmlv3.q2_K.bin"
|
141 |
+
# MODEL_ID = "TheBloke/orca_mini_3B-GGML"
|
142 |
+
# MODEL_BASENAME = "orca-mini-3b.ggmlv3.q4_0.bin"
|
requirements.txt
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
langchain
|
2 |
+
torch
|
3 |
+
accelerate
|
4 |
+
sentence_transformers
|
5 |
+
streamlit_chat
|
6 |
+
streamlit
|
7 |
+
faiss-cpu
|
8 |
+
tiktoken
|
9 |
+
ctransformers
|
10 |
+
huggingface-hub
|
11 |
+
pypdf
|
12 |
+
pypdf2
|
13 |
+
python-dotenv
|
14 |
+
replicate
|
15 |
+
docx2txt
|
16 |
+
streamlit_chat
|