Spaces:
Running
Running
Clement Vachet
commited on
Commit
·
1daf9a1
1
Parent(s):
e5b579b
Update list of LLM models and simplify LLM use
Browse files
app.py
CHANGED
@@ -10,7 +10,6 @@ from langchain.chains import ConversationChain
|
|
10 |
from langchain.memory import ConversationBufferMemory
|
11 |
from langchain_huggingface import HuggingFaceEndpoint
|
12 |
|
13 |
-
|
14 |
from pathlib import Path
|
15 |
import chromadb
|
16 |
from unidecode import unidecode
|
@@ -29,15 +28,14 @@ from dotenv import load_dotenv
|
|
29 |
_ = load_dotenv()
|
30 |
huggingfacehub_api_token = os.environ.get("HUGGINGFACE_API_KEY")
|
31 |
|
|
|
32 |
# default_persist_directory = './chroma_HF/'
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
# ]
|
40 |
-
list_llm = ["mistralai/Mistral-7B-Instruct-v0.2"]
|
41 |
list_llm_simple = [os.path.basename(llm) for llm in list_llm]
|
42 |
|
43 |
|
@@ -61,9 +59,10 @@ def create_db(splits, collection_name):
|
|
61 |
"""Create embeddings and vector database"""
|
62 |
|
63 |
embedding = HuggingFaceEmbeddings(
|
64 |
-
model_name="sentence-transformers/paraphrase-multilingual-mpnet-base-v2",
|
65 |
-
|
66 |
-
|
|
|
67 |
)
|
68 |
new_client = chromadb.EphemeralClient()
|
69 |
vectordb = Chroma.from_documents(
|
@@ -88,76 +87,15 @@ def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, pr
|
|
88 |
# Warning: langchain issue
|
89 |
# URL: https://github.com/langchain-ai/langchain/issues/6080
|
90 |
|
91 |
-
|
92 |
-
# if llm_model == "mistralai/Mixtral-8x7B-Instruct-v0.1":
|
93 |
-
# llm = HuggingFaceEndpoint(
|
94 |
-
# repo_id=llm_model,
|
95 |
-
# # model_kwargs={"temperature": temperature, "max_new_tokens": max_tokens, "top_k": top_k, "load_in_8bit": True}
|
96 |
-
# temperature = temperature,
|
97 |
-
# max_new_tokens = max_tokens,
|
98 |
-
# top_k = top_k,
|
99 |
-
# load_in_8bit = True,
|
100 |
-
# huggingfacehub_api_token=huggingfacehub_api_token,
|
101 |
-
# )
|
102 |
-
# elif llm_model in ["HuggingFaceH4/zephyr-7b-gemma-v0.1","mosaicml/mpt-7b-instruct"]:
|
103 |
-
# raise gr.Error("LLM model is too large to be loaded automatically on free inference endpoint")
|
104 |
-
# llm = HuggingFaceEndpoint(
|
105 |
-
# repo_id=llm_model,
|
106 |
-
# temperature = temperature,
|
107 |
-
# max_new_tokens = max_tokens,
|
108 |
-
# top_k = top_k,
|
109 |
-
# huggingfacehub_api_token=huggingfacehub_api_token,
|
110 |
-
# )
|
111 |
-
# elif llm_model == "microsoft/phi-2":
|
112 |
-
# # raise gr.Error("phi-2 model requires 'trust_remote_code=True', currently not supported by langchain HuggingFaceHub...")
|
113 |
-
# llm = HuggingFaceEndpoint(
|
114 |
-
# repo_id=llm_model,
|
115 |
-
# # model_kwargs={"temperature": temperature, "max_new_tokens": max_tokens, "top_k": top_k, "trust_remote_code": True, "torch_dtype": "auto"}
|
116 |
-
# temperature = temperature,
|
117 |
-
# max_new_tokens = max_tokens,
|
118 |
-
# top_k = top_k,
|
119 |
-
# trust_remote_code = True,
|
120 |
-
# torch_dtype = "auto",
|
121 |
-
# huggingfacehub_api_token=huggingfacehub_api_token,
|
122 |
-
# )
|
123 |
-
# elif llm_model == "TinyLlama/TinyLlama-1.1B-Chat-v1.0":
|
124 |
-
# llm = HuggingFaceEndpoint(
|
125 |
-
# repo_id=llm_model,
|
126 |
-
# # model_kwargs={"temperature": temperature, "max_new_tokens": 250, "top_k": top_k}
|
127 |
-
# temperature = temperature,
|
128 |
-
# max_new_tokens = 250,
|
129 |
-
# top_k = top_k,
|
130 |
-
# huggingfacehub_api_token=huggingfacehub_api_token,
|
131 |
-
# )
|
132 |
-
# elif llm_model == "meta-llama/Llama-2-7b-chat-hf":
|
133 |
-
# raise gr.Error("Llama-2-7b-chat-hf model requires a Pro subscription...")
|
134 |
-
# llm = HuggingFaceEndpoint(
|
135 |
-
# repo_id=llm_model,
|
136 |
-
# # model_kwargs={"temperature": temperature, "max_new_tokens": max_tokens, "top_k": top_k}
|
137 |
-
# temperature = temperature,
|
138 |
-
# max_new_tokens = max_tokens,
|
139 |
-
# top_k = top_k,
|
140 |
-
# huggingfacehub_api_token=huggingfacehub_api_token,
|
141 |
-
# )
|
142 |
-
# else:
|
143 |
-
# llm = HuggingFaceEndpoint(
|
144 |
-
# repo_id=llm_model,
|
145 |
-
# # model_kwargs={"temperature": temperature, "max_new_tokens": max_tokens, "top_k": top_k, "trust_remote_code": True, "torch_dtype": "auto"}
|
146 |
-
# # model_kwargs={"temperature": temperature, "max_new_tokens": max_tokens, "top_k": top_k}
|
147 |
-
# temperature = temperature,
|
148 |
-
# max_new_tokens = max_tokens,
|
149 |
-
# top_k = top_k,
|
150 |
-
# huggingfacehub_api_token=huggingfacehub_api_token,
|
151 |
-
# )
|
152 |
llm = HuggingFaceEndpoint(
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
)
|
161 |
|
162 |
progress(0.75, desc="Defining buffer memory...")
|
163 |
memory = ConversationBufferMemory(
|
@@ -173,7 +111,7 @@ def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, pr
|
|
173 |
retriever=retriever,
|
174 |
chain_type="stuff",
|
175 |
memory=memory,
|
176 |
-
# combine_docs_chain_kwargs={"prompt":
|
177 |
return_source_documents=True,
|
178 |
#return_generated_question=False,
|
179 |
verbose=False,
|
|
|
10 |
from langchain.memory import ConversationBufferMemory
|
11 |
from langchain_huggingface import HuggingFaceEndpoint
|
12 |
|
|
|
13 |
from pathlib import Path
|
14 |
import chromadb
|
15 |
from unidecode import unidecode
|
|
|
28 |
_ = load_dotenv()
|
29 |
huggingfacehub_api_token = os.environ.get("HUGGINGFACE_API_KEY")
|
30 |
|
31 |
+
|
32 |
# default_persist_directory = './chroma_HF/'
|
33 |
+
list_llm = ["mistralai/Mistral-7B-Instruct-v0.3", "microsoft/Phi-3.5-mini-instruct", \
|
34 |
+
"meta-llama/Llama-3.2-3B-Instruct", "meta-llama/Llama-3.2-1B-Instruct", "meta-llama/Meta-Llama-3-8B-Instruct", \
|
35 |
+
"HuggingFaceH4/zephyr-7b-beta", "HuggingFaceH4/zephyr-7b-gemma-v0.1", \
|
36 |
+
"TinyLlama/TinyLlama-1.1B-Chat-v1.0", "google/gemma-2-2b-it", "google/gemma-2-9b-it", \
|
37 |
+
"Qwen/Qwen2.5-1.5B-Instruct", "Qwen/Qwen2.5-3B-Instruct", "Qwen/Qwen2.5-7B-Instruct",
|
38 |
+
]
|
|
|
|
|
39 |
list_llm_simple = [os.path.basename(llm) for llm in list_llm]
|
40 |
|
41 |
|
|
|
59 |
"""Create embeddings and vector database"""
|
60 |
|
61 |
embedding = HuggingFaceEmbeddings(
|
62 |
+
# model_name="sentence-transformers/paraphrase-multilingual-mpnet-base-v2",
|
63 |
+
# model_name="sentence-transformers/all-MiniLM-L6-v2",
|
64 |
+
# model_kwargs={'device': 'cpu'},
|
65 |
+
# encode_kwargs={'normalize_embeddings': False}
|
66 |
)
|
67 |
new_client = chromadb.EphemeralClient()
|
68 |
vectordb = Chroma.from_documents(
|
|
|
87 |
# Warning: langchain issue
|
88 |
# URL: https://github.com/langchain-ai/langchain/issues/6080
|
89 |
|
90 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
91 |
llm = HuggingFaceEndpoint(
|
92 |
+
repo_id=llm_model,
|
93 |
+
task = "text-generation",
|
94 |
+
temperature = temperature,
|
95 |
+
max_new_tokens = max_tokens,
|
96 |
+
top_k = top_k,
|
97 |
+
huggingfacehub_api_token=huggingfacehub_api_token,
|
98 |
+
)
|
|
|
99 |
|
100 |
progress(0.75, desc="Defining buffer memory...")
|
101 |
memory = ConversationBufferMemory(
|
|
|
111 |
retriever=retriever,
|
112 |
chain_type="stuff",
|
113 |
memory=memory,
|
114 |
+
# combine_docs_chain_kwargs={"prompt": rag_prompt},
|
115 |
return_source_documents=True,
|
116 |
#return_generated_question=False,
|
117 |
verbose=False,
|