demo-docker-gradio

Sleeping

App Files Files Community

Sébastien De Greef commited on Apr 21

Commit

95888be

•

1 Parent(s): 40802c2

Add new LLM models and update main.py to support model selection

Browse files

Files changed (5) hide show

llama38b.Modelfile +17 -0
main.py +39 -7
mistral7b.Modelfile +7 -0
models.txt +6 -0
start_server.sh +6 -2

llama38b.Modelfile ADDED Viewed

	@@ -0,0 +1,17 @@

+FROM llama3:8b
+TEMPLATE """{{ if .System }}<|start_header_id|>system<|end_header_id|>
+{{ .System }}<|eot_id|>{{ end }}{{ if .Prompt }}<|start_header_id|>user<|end_header_id|>
+{{ .Prompt }}<|eot_id|>{{ end }}<|start_header_id|>assistant<|end_header_id|>
+{{ .Response }}<|eot_id|>"""
+PARAMETER num_ctx 8192
+PARAMETER stop "<|start_header_id|>"
+PARAMETER stop "<|end_header_id|>"
+PARAMETER stop "<|eot_id|>"
+PARAMETER stop "<|reserved_special_token"

main.py CHANGED Viewed

@@ -2,9 +2,25 @@ from langchain.schema import AIMessage, HumanMessage
 import gradio as gr
 from langchain_community.llms import Ollama
-llm = Ollama(model="llama3:8b", timeout=1000)
-def predict(message, history):
     history_langchain_format = []
     for human, ai in history:
         history_langchain_format.append(HumanMessage(content=human))
@@ -12,15 +28,31 @@ def predict(message, history):
     history_langchain_format.append(HumanMessage(content=message))
     try:
         chat_response = llm.invoke(history_langchain_format)
-    except chat_response as e:
         chat_response = "Error: " + str(e)
     return chat_response
-def run():
-    demo = gr.ChatInterface(predict)
-    demo.launch(server_name="0.0.0.0", server_port=7860)
 if __name__ == "__main__":
-    run()

 import gradio as gr
 from langchain_community.llms import Ollama
+def parse_model_names(path):
+    """Parses the model file to extract value-label pairs for the dropdown."""
+    choices = []
+    with open(path, 'r') as file:
+        lines = file.readlines()
+        for line in lines:
+            if '#' in line:
+                value, description = line.split('#', 1)
+                value = value.strip()
+                description = description.strip()
+                choices.append((description, value))
+    return choices
+models = parse_model_names("models.txt")
+def predict(message, history, model):
+    print("Predicting", message, history, models[model][1]),
+    llm = Ollama(model=models[model][1], timeout=1000)  # Instantiate Ollama with the selected model
     history_langchain_format = []
     for human, ai in history:
         history_langchain_format.append(HumanMessage(content=human))
     history_langchain_format.append(HumanMessage(content=message))
     try:
         chat_response = llm.invoke(history_langchain_format)
+    except Exception as e:  # Use a general exception handler here
         chat_response = "Error: " + str(e)
     return chat_response
+with gr.Blocks(fill_height=True) as demo:
+    with gr.Row():
+        model_dropdown = gr.Dropdown(label="Select LLM Model", choices=models, info="Select the model you want to chat with", type="index")
+        # We use a state variable to track the current model
+        model_state = gr.State(value=model_dropdown.value)
+        def update_model(selected_model):
+            print("Model selected", selected_model)
+            model_state.value = selected_model
+            return selected_model
+    chat = gr.ChatInterface(predict,
+        additional_inputs=[ model_dropdown ],
+    )
 if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", server_port=7860)

mistral7b.Modelfile ADDED Viewed

	@@ -0,0 +1,7 @@

+FROM mistral:7b
+Wh
+TEMPLATE """[INST] {{ .System }} {{ .Prompt }} [/INST]"""
+PARAMETER stop "[INST]"
+PARAMETER stop "[/INST]"
+PARAMETER num_ctx 4096

models.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+gemma:2b            # Gemma 2b
+gemma:7b            # Gemma 7b
+mistral:7b          # Mistral 7b
+mistral4k:7b        # Mistral 7b (4096 context)
+llama3:8b           # Llama3 8b
+llama38k:8b         # Llama3 8b  (8192 context)

start_server.sh CHANGED Viewed

@@ -1,9 +1,13 @@
 #!/bin/bash
-# Start the background task
 ollama serve &
 ollama pull mistral:7b  > /dev/null 2>&1
 ollama pull llama3:8b > /dev/null 2>&1
-# Start the Gradio app
 python main.py

 #!/bin/bash
 ollama serve &
 ollama pull mistral:7b  > /dev/null 2>&1
+ollama create mistral4k:7b --file .\mistral7b.Modelfile > /dev/null 2>&1
 ollama pull llama3:8b > /dev/null 2>&1
+ollama create llama38k:8b --file .\llama38b.Modelfile  > /dev/null 2>&1
+ollama pull gemma:2b > /dev/null 2>&1
+ollama pull gemma:7b > /dev/null 2>&1
 python main.py