Spaces:

lightmate
/

llm-chatbot

Running

App Files Files Community

lightmate commited on Nov 6, 2024

Commit

c6b2b77

•

1 Parent(s): 210ec4a

Update app.py

Browse files

Files changed (1) hide show

app.py +49 -21

app.py CHANGED Viewed

@@ -1,7 +1,6 @@
 import os
 import torch
 import gradio as gr
-import ipywidgets as widgets
 from pathlib import Path
 from transformers import AutoConfig, AutoTokenizer
 from optimum.intel.openvino import OVModelForCausalLM
@@ -18,7 +17,6 @@ import requests
 # Define the model loading function (same as in your notebook)
 def convert_to_int4(model_id, model_configuration, enable_awq=False):
-    # Model conversion logic here (same as in notebook)
     compression_configs = {
         "qwen2.5-0.5b-instruct": {"sym": True, "group_size": 128, "ratio": 1.0},
         "default": {"sym": False, "group_size": 128, "ratio": 0.8},
@@ -45,10 +43,8 @@ def convert_to_int4(model_id, model_configuration, enable_awq=False):
     os.system(export_command)
     return int4_model_dir
 # Model and tokenizer loading
 def load_model(model_dir, device):
-    # Load model using OpenVINO
     ov_config = {hints.performance_mode(): hints.PerformanceMode.LATENCY, streams.num(): "1", props.cache_dir(): ""}
     core = ov.Core()
     model_name = model_configuration["model_id"]
@@ -64,7 +60,7 @@ def load_model(model_dir, device):
     return ov_model, tok
-# Define the bot function that interacts with Gradio UI
 def bot(history, temperature, top_p, top_k, repetition_penalty, conversation_id):
     input_ids = convert_history_to_token(history)
     if input_ids.shape[1] > 2000:
@@ -99,23 +95,56 @@ def bot(history, temperature, top_p, top_k, repetition_penalty, conversation_id)
         history[-1][1] = partial_text
         yield history
-# Gradio interface setup
 def create_gradio_interface():
-    model_language = SUPPORTED_LLM_MODELS.keys()  # List of model languages
-    model_id = widgets.Dropdown(options=model_language, value=model_language[0], description="Model Language:")
-    # Choose model based on the selected language
-    model_configuration = SUPPORTED_LLM_MODELS[model_language[0]][model_id.value]
-    # Prepare model (convert to INT4, etc.)
-    int4_model_dir = convert_to_int4(model_id.value, model_configuration)
-    # Load model and tokenizer
-    device = device_widget("CPU")
-    ov_model, tok = load_model(int4_model_dir, device)
-    # Create the Gradio app
-    demo = make_demo(run_fn=bot, stop_fn=request_cancel, title=f"OpenVINO Chatbot", language=model_language[0])
     return demo
@@ -123,4 +152,3 @@ def create_gradio_interface():
 if __name__ == "__main__":
     app = create_gradio_interface()
     app.launch(debug=True, share=True)  # share=True for public access

 import os
 import torch
 import gradio as gr
 from pathlib import Path
 from transformers import AutoConfig, AutoTokenizer
 from optimum.intel.openvino import OVModelForCausalLM
 # Define the model loading function (same as in your notebook)
 def convert_to_int4(model_id, model_configuration, enable_awq=False):
     compression_configs = {
         "qwen2.5-0.5b-instruct": {"sym": True, "group_size": 128, "ratio": 1.0},
         "default": {"sym": False, "group_size": 128, "ratio": 0.8},
     os.system(export_command)
     return int4_model_dir
 # Model and tokenizer loading
 def load_model(model_dir, device):
     ov_config = {hints.performance_mode(): hints.PerformanceMode.LATENCY, streams.num(): "1", props.cache_dir(): ""}
     core = ov.Core()
     model_name = model_configuration["model_id"]
     return ov_model, tok
+# Gradio Interface for Bot interaction
 def bot(history, temperature, top_p, top_k, repetition_penalty, conversation_id):
     input_ids = convert_history_to_token(history)
     if input_ids.shape[1] > 2000:
         history[-1][1] = partial_text
         yield history
+# Define a Gradio interface for user interaction
 def create_gradio_interface():
+    # Dropdown for selecting model language and model ID
+    model_language = list(SUPPORTED_LLM_MODELS.keys())  # List of model languages
+    model_id = gr.Dropdown(choices=model_language, value=model_language[0], label="Model Language")
+    # Once model language is selected, show the respective model IDs
+    def update_model_ids(model_language):
+        model_ids = list(SUPPORTED_LLM_MODELS[model_language].keys())
+        return gr.Dropdown.update(choices=model_ids, value=model_ids[0])
+    model_id_selector = gr.Dropdown(choices=model_language, value=model_language[0], label="Model ID")
+    model_id_selector.change(update_model_ids, inputs=model_language, outputs=model_id_selector)
+    # Set up a checkbox for enabling AWQ compression
+    enable_awq = gr.Checkbox(value=False, label="Enable AWQ for Compression")
+    # Initialize model selection based on language and ID
+    def load_model_on_select(model_language, model_id, enable_awq):
+        model_configuration = SUPPORTED_LLM_MODELS[model_language][model_id]
+        int4_model_dir = convert_to_int4(model_id, model_configuration, enable_awq)
+        # Load the model and tokenizer
+        device = device_widget("CPU")  # or any device you want to use
+        ov_model, tok = load_model(int4_model_dir, device)
+        # Return the loaded model and tokenizer
+        return ov_model, tok
+    # Connect model selection UI to load model dynamically
+    load_button = gr.Button("Load Model")
+    load_button.click(load_model_on_select, inputs=[model_language, model_id, enable_awq], outputs=[gr.Textbox(label="Model Status")])
+    # Create the Gradio chatbot interface
+    chatbot = gr.Chatbot()
+    # Parameters for bot generation
+    temperature = gr.Slider(minimum=0, maximum=1, step=0.1, label="Temperature", value=0.7)
+    top_p = gr.Slider(minimum=0, maximum=1, step=0.1, label="Top-p", value=0.9)
+    top_k = gr.Slider(minimum=0, maximum=50, step=1, label="Top-k", value=50)
+    repetition_penalty = gr.Slider(minimum=0, maximum=2, step=0.1, label="Repetition Penalty", value=1.0)
+    # Run the Gradio interface
+    demo = gr.Interface(
+        fn=bot,
+        inputs=[chatbot, temperature, top_p, top_k, repetition_penalty],
+        outputs=[chatbot],
+        title="OpenVINO Chatbot",
+        live=True
+    )
     return demo
 if __name__ == "__main__":
     app = create_gradio_interface()
     app.launch(debug=True, share=True)  # share=True for public access