Spaces:
Sleeping
Sleeping
Sébastien De Greef
commited on
Commit
•
95888be
1
Parent(s):
40802c2
Add new LLM models and update main.py to support model selection
Browse files- llama38b.Modelfile +17 -0
- main.py +39 -7
- mistral7b.Modelfile +7 -0
- models.txt +6 -0
- start_server.sh +6 -2
llama38b.Modelfile
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM llama3:8b
|
2 |
+
|
3 |
+
TEMPLATE """{{ if .System }}<|start_header_id|>system<|end_header_id|>
|
4 |
+
|
5 |
+
{{ .System }}<|eot_id|>{{ end }}{{ if .Prompt }}<|start_header_id|>user<|end_header_id|>
|
6 |
+
|
7 |
+
{{ .Prompt }}<|eot_id|>{{ end }}<|start_header_id|>assistant<|end_header_id|>
|
8 |
+
|
9 |
+
{{ .Response }}<|eot_id|>"""
|
10 |
+
|
11 |
+
|
12 |
+
PARAMETER num_ctx 8192
|
13 |
+
|
14 |
+
PARAMETER stop "<|start_header_id|>"
|
15 |
+
PARAMETER stop "<|end_header_id|>"
|
16 |
+
PARAMETER stop "<|eot_id|>"
|
17 |
+
PARAMETER stop "<|reserved_special_token"
|
main.py
CHANGED
@@ -2,9 +2,25 @@ from langchain.schema import AIMessage, HumanMessage
|
|
2 |
import gradio as gr
|
3 |
from langchain_community.llms import Ollama
|
4 |
|
5 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
|
7 |
-
|
|
|
|
|
|
|
|
|
|
|
8 |
history_langchain_format = []
|
9 |
for human, ai in history:
|
10 |
history_langchain_format.append(HumanMessage(content=human))
|
@@ -12,15 +28,31 @@ def predict(message, history):
|
|
12 |
history_langchain_format.append(HumanMessage(content=message))
|
13 |
try:
|
14 |
chat_response = llm.invoke(history_langchain_format)
|
15 |
-
except
|
16 |
chat_response = "Error: " + str(e)
|
17 |
|
18 |
return chat_response
|
19 |
|
20 |
-
def run():
|
21 |
-
demo = gr.ChatInterface(predict)
|
22 |
-
demo.launch(server_name="0.0.0.0", server_port=7860)
|
23 |
|
24 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
if __name__ == "__main__":
|
26 |
-
|
|
|
2 |
import gradio as gr
|
3 |
from langchain_community.llms import Ollama
|
4 |
|
5 |
+
def parse_model_names(path):
|
6 |
+
"""Parses the model file to extract value-label pairs for the dropdown."""
|
7 |
+
choices = []
|
8 |
+
with open(path, 'r') as file:
|
9 |
+
lines = file.readlines()
|
10 |
+
for line in lines:
|
11 |
+
if '#' in line:
|
12 |
+
value, description = line.split('#', 1)
|
13 |
+
value = value.strip()
|
14 |
+
description = description.strip()
|
15 |
+
choices.append((description, value))
|
16 |
+
return choices
|
17 |
|
18 |
+
models = parse_model_names("models.txt")
|
19 |
+
|
20 |
+
|
21 |
+
def predict(message, history, model):
|
22 |
+
print("Predicting", message, history, models[model][1]),
|
23 |
+
llm = Ollama(model=models[model][1], timeout=1000) # Instantiate Ollama with the selected model
|
24 |
history_langchain_format = []
|
25 |
for human, ai in history:
|
26 |
history_langchain_format.append(HumanMessage(content=human))
|
|
|
28 |
history_langchain_format.append(HumanMessage(content=message))
|
29 |
try:
|
30 |
chat_response = llm.invoke(history_langchain_format)
|
31 |
+
except Exception as e: # Use a general exception handler here
|
32 |
chat_response = "Error: " + str(e)
|
33 |
|
34 |
return chat_response
|
35 |
|
|
|
|
|
|
|
36 |
|
37 |
|
38 |
+
with gr.Blocks(fill_height=True) as demo:
|
39 |
+
with gr.Row():
|
40 |
+
model_dropdown = gr.Dropdown(label="Select LLM Model", choices=models, info="Select the model you want to chat with", type="index")
|
41 |
+
|
42 |
+
# We use a state variable to track the current model
|
43 |
+
model_state = gr.State(value=model_dropdown.value)
|
44 |
+
|
45 |
+
def update_model(selected_model):
|
46 |
+
print("Model selected", selected_model)
|
47 |
+
model_state.value = selected_model
|
48 |
+
return selected_model
|
49 |
+
|
50 |
+
|
51 |
+
chat = gr.ChatInterface(predict,
|
52 |
+
additional_inputs=[ model_dropdown ],
|
53 |
+
|
54 |
+
)
|
55 |
+
|
56 |
+
|
57 |
if __name__ == "__main__":
|
58 |
+
demo.launch(server_name="0.0.0.0", server_port=7860)
|
mistral7b.Modelfile
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM mistral:7b
|
2 |
+
Wh
|
3 |
+
TEMPLATE """[INST] {{ .System }} {{ .Prompt }} [/INST]"""
|
4 |
+
PARAMETER stop "[INST]"
|
5 |
+
PARAMETER stop "[/INST]"
|
6 |
+
|
7 |
+
PARAMETER num_ctx 4096
|
models.txt
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
gemma:2b # Gemma 2b
|
2 |
+
gemma:7b # Gemma 7b
|
3 |
+
mistral:7b # Mistral 7b
|
4 |
+
mistral4k:7b # Mistral 7b (4096 context)
|
5 |
+
llama3:8b # Llama3 8b
|
6 |
+
llama38k:8b # Llama3 8b (8192 context)
|
start_server.sh
CHANGED
@@ -1,9 +1,13 @@
|
|
1 |
#!/bin/bash
|
2 |
-
# Start the background task
|
3 |
ollama serve &
|
4 |
|
5 |
ollama pull mistral:7b > /dev/null 2>&1
|
|
|
|
|
6 |
ollama pull llama3:8b > /dev/null 2>&1
|
|
|
|
|
|
|
|
|
7 |
|
8 |
-
# Start the Gradio app
|
9 |
python main.py
|
|
|
1 |
#!/bin/bash
|
|
|
2 |
ollama serve &
|
3 |
|
4 |
ollama pull mistral:7b > /dev/null 2>&1
|
5 |
+
ollama create mistral4k:7b --file .\mistral7b.Modelfile > /dev/null 2>&1
|
6 |
+
|
7 |
ollama pull llama3:8b > /dev/null 2>&1
|
8 |
+
ollama create llama38k:8b --file .\llama38b.Modelfile > /dev/null 2>&1
|
9 |
+
|
10 |
+
ollama pull gemma:2b > /dev/null 2>&1
|
11 |
+
ollama pull gemma:7b > /dev/null 2>&1
|
12 |
|
|
|
13 |
python main.py
|