lightmate commited on
Commit
c6b2b77
1 Parent(s): 210ec4a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -21
app.py CHANGED
@@ -1,7 +1,6 @@
1
  import os
2
  import torch
3
  import gradio as gr
4
- import ipywidgets as widgets
5
  from pathlib import Path
6
  from transformers import AutoConfig, AutoTokenizer
7
  from optimum.intel.openvino import OVModelForCausalLM
@@ -18,7 +17,6 @@ import requests
18
 
19
  # Define the model loading function (same as in your notebook)
20
  def convert_to_int4(model_id, model_configuration, enable_awq=False):
21
- # Model conversion logic here (same as in notebook)
22
  compression_configs = {
23
  "qwen2.5-0.5b-instruct": {"sym": True, "group_size": 128, "ratio": 1.0},
24
  "default": {"sym": False, "group_size": 128, "ratio": 0.8},
@@ -45,10 +43,8 @@ def convert_to_int4(model_id, model_configuration, enable_awq=False):
45
  os.system(export_command)
46
  return int4_model_dir
47
 
48
-
49
  # Model and tokenizer loading
50
  def load_model(model_dir, device):
51
- # Load model using OpenVINO
52
  ov_config = {hints.performance_mode(): hints.PerformanceMode.LATENCY, streams.num(): "1", props.cache_dir(): ""}
53
  core = ov.Core()
54
  model_name = model_configuration["model_id"]
@@ -64,7 +60,7 @@ def load_model(model_dir, device):
64
 
65
  return ov_model, tok
66
 
67
- # Define the bot function that interacts with Gradio UI
68
  def bot(history, temperature, top_p, top_k, repetition_penalty, conversation_id):
69
  input_ids = convert_history_to_token(history)
70
  if input_ids.shape[1] > 2000:
@@ -99,23 +95,56 @@ def bot(history, temperature, top_p, top_k, repetition_penalty, conversation_id)
99
  history[-1][1] = partial_text
100
  yield history
101
 
102
- # Gradio interface setup
103
  def create_gradio_interface():
104
- model_language = SUPPORTED_LLM_MODELS.keys() # List of model languages
105
- model_id = widgets.Dropdown(options=model_language, value=model_language[0], description="Model Language:")
106
-
107
- # Choose model based on the selected language
108
- model_configuration = SUPPORTED_LLM_MODELS[model_language[0]][model_id.value]
109
-
110
- # Prepare model (convert to INT4, etc.)
111
- int4_model_dir = convert_to_int4(model_id.value, model_configuration)
112
-
113
- # Load model and tokenizer
114
- device = device_widget("CPU")
115
- ov_model, tok = load_model(int4_model_dir, device)
116
 
117
- # Create the Gradio app
118
- demo = make_demo(run_fn=bot, stop_fn=request_cancel, title=f"OpenVINO Chatbot", language=model_language[0])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
 
120
  return demo
121
 
@@ -123,4 +152,3 @@ def create_gradio_interface():
123
  if __name__ == "__main__":
124
  app = create_gradio_interface()
125
  app.launch(debug=True, share=True) # share=True for public access
126
-
 
1
  import os
2
  import torch
3
  import gradio as gr
 
4
  from pathlib import Path
5
  from transformers import AutoConfig, AutoTokenizer
6
  from optimum.intel.openvino import OVModelForCausalLM
 
17
 
18
  # Define the model loading function (same as in your notebook)
19
  def convert_to_int4(model_id, model_configuration, enable_awq=False):
 
20
  compression_configs = {
21
  "qwen2.5-0.5b-instruct": {"sym": True, "group_size": 128, "ratio": 1.0},
22
  "default": {"sym": False, "group_size": 128, "ratio": 0.8},
 
43
  os.system(export_command)
44
  return int4_model_dir
45
 
 
46
  # Model and tokenizer loading
47
  def load_model(model_dir, device):
 
48
  ov_config = {hints.performance_mode(): hints.PerformanceMode.LATENCY, streams.num(): "1", props.cache_dir(): ""}
49
  core = ov.Core()
50
  model_name = model_configuration["model_id"]
 
60
 
61
  return ov_model, tok
62
 
63
+ # Gradio Interface for Bot interaction
64
  def bot(history, temperature, top_p, top_k, repetition_penalty, conversation_id):
65
  input_ids = convert_history_to_token(history)
66
  if input_ids.shape[1] > 2000:
 
95
  history[-1][1] = partial_text
96
  yield history
97
 
98
+ # Define a Gradio interface for user interaction
99
  def create_gradio_interface():
100
+ # Dropdown for selecting model language and model ID
101
+ model_language = list(SUPPORTED_LLM_MODELS.keys()) # List of model languages
102
+ model_id = gr.Dropdown(choices=model_language, value=model_language[0], label="Model Language")
 
 
 
 
 
 
 
 
 
103
 
104
+ # Once model language is selected, show the respective model IDs
105
+ def update_model_ids(model_language):
106
+ model_ids = list(SUPPORTED_LLM_MODELS[model_language].keys())
107
+ return gr.Dropdown.update(choices=model_ids, value=model_ids[0])
108
+
109
+ model_id_selector = gr.Dropdown(choices=model_language, value=model_language[0], label="Model ID")
110
+ model_id_selector.change(update_model_ids, inputs=model_language, outputs=model_id_selector)
111
+
112
+ # Set up a checkbox for enabling AWQ compression
113
+ enable_awq = gr.Checkbox(value=False, label="Enable AWQ for Compression")
114
+
115
+ # Initialize model selection based on language and ID
116
+ def load_model_on_select(model_language, model_id, enable_awq):
117
+ model_configuration = SUPPORTED_LLM_MODELS[model_language][model_id]
118
+ int4_model_dir = convert_to_int4(model_id, model_configuration, enable_awq)
119
+
120
+ # Load the model and tokenizer
121
+ device = device_widget("CPU") # or any device you want to use
122
+ ov_model, tok = load_model(int4_model_dir, device)
123
+
124
+ # Return the loaded model and tokenizer
125
+ return ov_model, tok
126
+
127
+ # Connect model selection UI to load model dynamically
128
+ load_button = gr.Button("Load Model")
129
+ load_button.click(load_model_on_select, inputs=[model_language, model_id, enable_awq], outputs=[gr.Textbox(label="Model Status")])
130
+
131
+ # Create the Gradio chatbot interface
132
+ chatbot = gr.Chatbot()
133
+
134
+ # Parameters for bot generation
135
+ temperature = gr.Slider(minimum=0, maximum=1, step=0.1, label="Temperature", value=0.7)
136
+ top_p = gr.Slider(minimum=0, maximum=1, step=0.1, label="Top-p", value=0.9)
137
+ top_k = gr.Slider(minimum=0, maximum=50, step=1, label="Top-k", value=50)
138
+ repetition_penalty = gr.Slider(minimum=0, maximum=2, step=0.1, label="Repetition Penalty", value=1.0)
139
+
140
+ # Run the Gradio interface
141
+ demo = gr.Interface(
142
+ fn=bot,
143
+ inputs=[chatbot, temperature, top_p, top_k, repetition_penalty],
144
+ outputs=[chatbot],
145
+ title="OpenVINO Chatbot",
146
+ live=True
147
+ )
148
 
149
  return demo
150
 
 
152
  if __name__ == "__main__":
153
  app = create_gradio_interface()
154
  app.launch(debug=True, share=True) # share=True for public access