Sébastien De Greef commited on
Commit
95888be
1 Parent(s): 40802c2

Add new LLM models and update main.py to support model selection

Browse files
Files changed (5) hide show
  1. llama38b.Modelfile +17 -0
  2. main.py +39 -7
  3. mistral7b.Modelfile +7 -0
  4. models.txt +6 -0
  5. start_server.sh +6 -2
llama38b.Modelfile ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM llama3:8b
2
+
3
+ TEMPLATE """{{ if .System }}<|start_header_id|>system<|end_header_id|>
4
+
5
+ {{ .System }}<|eot_id|>{{ end }}{{ if .Prompt }}<|start_header_id|>user<|end_header_id|>
6
+
7
+ {{ .Prompt }}<|eot_id|>{{ end }}<|start_header_id|>assistant<|end_header_id|>
8
+
9
+ {{ .Response }}<|eot_id|>"""
10
+
11
+
12
+ PARAMETER num_ctx 8192
13
+
14
+ PARAMETER stop "<|start_header_id|>"
15
+ PARAMETER stop "<|end_header_id|>"
16
+ PARAMETER stop "<|eot_id|>"
17
+ PARAMETER stop "<|reserved_special_token"
main.py CHANGED
@@ -2,9 +2,25 @@ from langchain.schema import AIMessage, HumanMessage
2
  import gradio as gr
3
  from langchain_community.llms import Ollama
4
 
5
- llm = Ollama(model="llama3:8b", timeout=1000)
 
 
 
 
 
 
 
 
 
 
 
6
 
7
- def predict(message, history):
 
 
 
 
 
8
  history_langchain_format = []
9
  for human, ai in history:
10
  history_langchain_format.append(HumanMessage(content=human))
@@ -12,15 +28,31 @@ def predict(message, history):
12
  history_langchain_format.append(HumanMessage(content=message))
13
  try:
14
  chat_response = llm.invoke(history_langchain_format)
15
- except chat_response as e:
16
  chat_response = "Error: " + str(e)
17
 
18
  return chat_response
19
 
20
- def run():
21
- demo = gr.ChatInterface(predict)
22
- demo.launch(server_name="0.0.0.0", server_port=7860)
23
 
24
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  if __name__ == "__main__":
26
- run()
 
2
  import gradio as gr
3
  from langchain_community.llms import Ollama
4
 
5
+ def parse_model_names(path):
6
+ """Parses the model file to extract value-label pairs for the dropdown."""
7
+ choices = []
8
+ with open(path, 'r') as file:
9
+ lines = file.readlines()
10
+ for line in lines:
11
+ if '#' in line:
12
+ value, description = line.split('#', 1)
13
+ value = value.strip()
14
+ description = description.strip()
15
+ choices.append((description, value))
16
+ return choices
17
 
18
+ models = parse_model_names("models.txt")
19
+
20
+
21
+ def predict(message, history, model):
22
+ print("Predicting", message, history, models[model][1]),
23
+ llm = Ollama(model=models[model][1], timeout=1000) # Instantiate Ollama with the selected model
24
  history_langchain_format = []
25
  for human, ai in history:
26
  history_langchain_format.append(HumanMessage(content=human))
 
28
  history_langchain_format.append(HumanMessage(content=message))
29
  try:
30
  chat_response = llm.invoke(history_langchain_format)
31
+ except Exception as e: # Use a general exception handler here
32
  chat_response = "Error: " + str(e)
33
 
34
  return chat_response
35
 
 
 
 
36
 
37
 
38
+ with gr.Blocks(fill_height=True) as demo:
39
+ with gr.Row():
40
+ model_dropdown = gr.Dropdown(label="Select LLM Model", choices=models, info="Select the model you want to chat with", type="index")
41
+
42
+ # We use a state variable to track the current model
43
+ model_state = gr.State(value=model_dropdown.value)
44
+
45
+ def update_model(selected_model):
46
+ print("Model selected", selected_model)
47
+ model_state.value = selected_model
48
+ return selected_model
49
+
50
+
51
+ chat = gr.ChatInterface(predict,
52
+ additional_inputs=[ model_dropdown ],
53
+
54
+ )
55
+
56
+
57
  if __name__ == "__main__":
58
+ demo.launch(server_name="0.0.0.0", server_port=7860)
mistral7b.Modelfile ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ FROM mistral:7b
2
+ Wh
3
+ TEMPLATE """[INST] {{ .System }} {{ .Prompt }} [/INST]"""
4
+ PARAMETER stop "[INST]"
5
+ PARAMETER stop "[/INST]"
6
+
7
+ PARAMETER num_ctx 4096
models.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ gemma:2b # Gemma 2b
2
+ gemma:7b # Gemma 7b
3
+ mistral:7b # Mistral 7b
4
+ mistral4k:7b # Mistral 7b (4096 context)
5
+ llama3:8b # Llama3 8b
6
+ llama38k:8b # Llama3 8b (8192 context)
start_server.sh CHANGED
@@ -1,9 +1,13 @@
1
  #!/bin/bash
2
- # Start the background task
3
  ollama serve &
4
 
5
  ollama pull mistral:7b > /dev/null 2>&1
 
 
6
  ollama pull llama3:8b > /dev/null 2>&1
 
 
 
 
7
 
8
- # Start the Gradio app
9
  python main.py
 
1
  #!/bin/bash
 
2
  ollama serve &
3
 
4
  ollama pull mistral:7b > /dev/null 2>&1
5
+ ollama create mistral4k:7b --file .\mistral7b.Modelfile > /dev/null 2>&1
6
+
7
  ollama pull llama3:8b > /dev/null 2>&1
8
+ ollama create llama38k:8b --file .\llama38b.Modelfile > /dev/null 2>&1
9
+
10
+ ollama pull gemma:2b > /dev/null 2>&1
11
+ ollama pull gemma:7b > /dev/null 2>&1
12
 
 
13
  python main.py