Clement Vachet commited on
Commit
1daf9a1
·
1 Parent(s): e5b579b

Update list of LLM models and simplify LLM use

Browse files
Files changed (1) hide show
  1. app.py +20 -82
app.py CHANGED
@@ -10,7 +10,6 @@ from langchain.chains import ConversationChain
10
  from langchain.memory import ConversationBufferMemory
11
  from langchain_huggingface import HuggingFaceEndpoint
12
 
13
-
14
  from pathlib import Path
15
  import chromadb
16
  from unidecode import unidecode
@@ -29,15 +28,14 @@ from dotenv import load_dotenv
29
  _ = load_dotenv()
30
  huggingfacehub_api_token = os.environ.get("HUGGINGFACE_API_KEY")
31
 
 
32
  # default_persist_directory = './chroma_HF/'
33
- # list_llm = ["mistralai/Mistral-7B-Instruct-v0.2", "mistralai/Mixtral-8x7B-Instruct-v0.1", "mistralai/Mistral-7B-Instruct-v0.1", \
34
- # "google/gemma-7b-it","google/gemma-2b-it", \
35
- # "HuggingFaceH4/zephyr-7b-beta", "HuggingFaceH4/zephyr-7b-gemma-v0.1", \
36
- # "meta-llama/Llama-2-7b-chat-hf", "microsoft/phi-2", \
37
- # "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "mosaicml/mpt-7b-instruct", "tiiuae/falcon-7b-instruct", \
38
- # "google/flan-t5-xxl"
39
- # ]
40
- list_llm = ["mistralai/Mistral-7B-Instruct-v0.2"]
41
  list_llm_simple = [os.path.basename(llm) for llm in list_llm]
42
 
43
 
@@ -61,9 +59,10 @@ def create_db(splits, collection_name):
61
  """Create embeddings and vector database"""
62
 
63
  embedding = HuggingFaceEmbeddings(
64
- model_name="sentence-transformers/paraphrase-multilingual-mpnet-base-v2",
65
- model_kwargs={'device': 'cpu'},
66
- encode_kwargs={'normalize_embeddings': False}
 
67
  )
68
  new_client = chromadb.EphemeralClient()
69
  vectordb = Chroma.from_documents(
@@ -88,76 +87,15 @@ def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, pr
88
  # Warning: langchain issue
89
  # URL: https://github.com/langchain-ai/langchain/issues/6080
90
 
91
- # WARNING - simplify LLM use
92
- # if llm_model == "mistralai/Mixtral-8x7B-Instruct-v0.1":
93
- # llm = HuggingFaceEndpoint(
94
- # repo_id=llm_model,
95
- # # model_kwargs={"temperature": temperature, "max_new_tokens": max_tokens, "top_k": top_k, "load_in_8bit": True}
96
- # temperature = temperature,
97
- # max_new_tokens = max_tokens,
98
- # top_k = top_k,
99
- # load_in_8bit = True,
100
- # huggingfacehub_api_token=huggingfacehub_api_token,
101
- # )
102
- # elif llm_model in ["HuggingFaceH4/zephyr-7b-gemma-v0.1","mosaicml/mpt-7b-instruct"]:
103
- # raise gr.Error("LLM model is too large to be loaded automatically on free inference endpoint")
104
- # llm = HuggingFaceEndpoint(
105
- # repo_id=llm_model,
106
- # temperature = temperature,
107
- # max_new_tokens = max_tokens,
108
- # top_k = top_k,
109
- # huggingfacehub_api_token=huggingfacehub_api_token,
110
- # )
111
- # elif llm_model == "microsoft/phi-2":
112
- # # raise gr.Error("phi-2 model requires 'trust_remote_code=True', currently not supported by langchain HuggingFaceHub...")
113
- # llm = HuggingFaceEndpoint(
114
- # repo_id=llm_model,
115
- # # model_kwargs={"temperature": temperature, "max_new_tokens": max_tokens, "top_k": top_k, "trust_remote_code": True, "torch_dtype": "auto"}
116
- # temperature = temperature,
117
- # max_new_tokens = max_tokens,
118
- # top_k = top_k,
119
- # trust_remote_code = True,
120
- # torch_dtype = "auto",
121
- # huggingfacehub_api_token=huggingfacehub_api_token,
122
- # )
123
- # elif llm_model == "TinyLlama/TinyLlama-1.1B-Chat-v1.0":
124
- # llm = HuggingFaceEndpoint(
125
- # repo_id=llm_model,
126
- # # model_kwargs={"temperature": temperature, "max_new_tokens": 250, "top_k": top_k}
127
- # temperature = temperature,
128
- # max_new_tokens = 250,
129
- # top_k = top_k,
130
- # huggingfacehub_api_token=huggingfacehub_api_token,
131
- # )
132
- # elif llm_model == "meta-llama/Llama-2-7b-chat-hf":
133
- # raise gr.Error("Llama-2-7b-chat-hf model requires a Pro subscription...")
134
- # llm = HuggingFaceEndpoint(
135
- # repo_id=llm_model,
136
- # # model_kwargs={"temperature": temperature, "max_new_tokens": max_tokens, "top_k": top_k}
137
- # temperature = temperature,
138
- # max_new_tokens = max_tokens,
139
- # top_k = top_k,
140
- # huggingfacehub_api_token=huggingfacehub_api_token,
141
- # )
142
- # else:
143
- # llm = HuggingFaceEndpoint(
144
- # repo_id=llm_model,
145
- # # model_kwargs={"temperature": temperature, "max_new_tokens": max_tokens, "top_k": top_k, "trust_remote_code": True, "torch_dtype": "auto"}
146
- # # model_kwargs={"temperature": temperature, "max_new_tokens": max_tokens, "top_k": top_k}
147
- # temperature = temperature,
148
- # max_new_tokens = max_tokens,
149
- # top_k = top_k,
150
- # huggingfacehub_api_token=huggingfacehub_api_token,
151
- # )
152
  llm = HuggingFaceEndpoint(
153
- repo_id=llm_model,
154
- # model_kwargs={"temperature": temperature, "max_new_tokens": max_tokens, "top_k": top_k, "trust_remote_code": True, "torch_dtype": "auto"}
155
- # model_kwargs={"temperature": temperature, "max_new_tokens": max_tokens, "top_k": top_k}
156
- temperature = temperature,
157
- max_new_tokens = max_tokens,
158
- top_k = top_k,
159
- huggingfacehub_api_token=huggingfacehub_api_token,
160
- )
161
 
162
  progress(0.75, desc="Defining buffer memory...")
163
  memory = ConversationBufferMemory(
@@ -173,7 +111,7 @@ def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, pr
173
  retriever=retriever,
174
  chain_type="stuff",
175
  memory=memory,
176
- # combine_docs_chain_kwargs={"prompt": your_prompt})
177
  return_source_documents=True,
178
  #return_generated_question=False,
179
  verbose=False,
 
10
  from langchain.memory import ConversationBufferMemory
11
  from langchain_huggingface import HuggingFaceEndpoint
12
 
 
13
  from pathlib import Path
14
  import chromadb
15
  from unidecode import unidecode
 
28
  _ = load_dotenv()
29
  huggingfacehub_api_token = os.environ.get("HUGGINGFACE_API_KEY")
30
 
31
+
32
  # default_persist_directory = './chroma_HF/'
33
+ list_llm = ["mistralai/Mistral-7B-Instruct-v0.3", "microsoft/Phi-3.5-mini-instruct", \
34
+ "meta-llama/Llama-3.2-3B-Instruct", "meta-llama/Llama-3.2-1B-Instruct", "meta-llama/Meta-Llama-3-8B-Instruct", \
35
+ "HuggingFaceH4/zephyr-7b-beta", "HuggingFaceH4/zephyr-7b-gemma-v0.1", \
36
+ "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "google/gemma-2-2b-it", "google/gemma-2-9b-it", \
37
+ "Qwen/Qwen2.5-1.5B-Instruct", "Qwen/Qwen2.5-3B-Instruct", "Qwen/Qwen2.5-7B-Instruct",
38
+ ]
 
 
39
  list_llm_simple = [os.path.basename(llm) for llm in list_llm]
40
 
41
 
 
59
  """Create embeddings and vector database"""
60
 
61
  embedding = HuggingFaceEmbeddings(
62
+ # model_name="sentence-transformers/paraphrase-multilingual-mpnet-base-v2",
63
+ # model_name="sentence-transformers/all-MiniLM-L6-v2",
64
+ # model_kwargs={'device': 'cpu'},
65
+ # encode_kwargs={'normalize_embeddings': False}
66
  )
67
  new_client = chromadb.EphemeralClient()
68
  vectordb = Chroma.from_documents(
 
87
  # Warning: langchain issue
88
  # URL: https://github.com/langchain-ai/langchain/issues/6080
89
 
90
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
  llm = HuggingFaceEndpoint(
92
+ repo_id=llm_model,
93
+ task = "text-generation",
94
+ temperature = temperature,
95
+ max_new_tokens = max_tokens,
96
+ top_k = top_k,
97
+ huggingfacehub_api_token=huggingfacehub_api_token,
98
+ )
 
99
 
100
  progress(0.75, desc="Defining buffer memory...")
101
  memory = ConversationBufferMemory(
 
111
  retriever=retriever,
112
  chain_type="stuff",
113
  memory=memory,
114
+ # combine_docs_chain_kwargs={"prompt": rag_prompt},
115
  return_source_documents=True,
116
  #return_generated_question=False,
117
  verbose=False,