sagar007 commited on
Commit
5fd8357
·
verified ·
1 Parent(s): 80e2071

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +82 -53
app.py CHANGED
@@ -6,7 +6,7 @@ from threading import Thread
6
  import numpy as np
7
  from PIL import Image
8
  import subprocess
9
- import spaces
10
 
11
  # Install flash-attention
12
  subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
@@ -106,63 +106,92 @@ def process_vision_query(image, text_input):
106
  response = vision_processor.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
107
  return response
108
 
109
- # Modified combined chat function
110
- def combined_chat(message, image, history, system_prompt, temperature, max_new_tokens, top_p, top_k):
111
- if image is not None:
112
- # Process image query
113
- response = process_vision_query(image, message)
114
- history.append((message, response))
115
- return history, None
116
- else:
117
- # Process text query
118
- return stream_text_chat(message, history, system_prompt, temperature, max_new_tokens, top_p, top_k), None
119
-
120
- # Function to toggle between text and image input
121
- def toggle_input(choice):
122
- if choice == "Text":
123
- return gr.update(visible=True), gr.update(visible=False)
124
- else:
125
- return gr.update(visible=False), gr.update(visible=True)
126
-
127
  # Custom CSS
128
  custom_css = """
129
- body { background-color: #343541; color: #ececf1; font-family: 'Arial', sans-serif; }
130
- .gradio-container { max-width: 800px !important; margin: auto; }
131
- #chatbot { height: 400px; overflow-y: auto; }
132
- #input-container { display: flex; align-items: center; }
133
- #msg, #image-input { flex-grow: 1; margin-right: 10px; }
134
- #submit-btn { min-width: 60px; }
135
- footer { text-align: center; margin-top: 2rem; color: #acacbe; }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
136
  """
137
 
138
  # Gradio interface
139
- with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:
140
- chatbot = gr.Chatbot(elem_id="chatbot")
141
-
142
- with gr.Row(elem_id="input-container"):
143
- input_type = gr.Radio(["Text", "Image"], value="Text", label="Input Type")
144
- with gr.Column(visible=True) as text_input:
145
- msg = gr.Textbox(
146
- show_label=False,
147
- placeholder="Send a message...",
148
- elem_id="msg"
149
- )
150
- with gr.Column(visible=False) as image_input:
151
- image = gr.Image(type="pil", elem_id="image-input")
152
-
153
- submit_btn = gr.Button("Send", elem_id="submit-btn")
154
- clear_btn = gr.Button("Clear Chat", variant="secondary")
155
-
156
- with gr.Accordion("Advanced Options", open=False):
157
- system_prompt = gr.Textbox(value="You are a helpful assistant", label="System Prompt")
158
- temperature = gr.Slider(minimum=0, maximum=1, step=0.1, value=0.8, label="Temperature")
159
- max_new_tokens = gr.Slider(minimum=128, maximum=8192, step=1, value=1024, label="Max new tokens")
160
- top_p = gr.Slider(minimum=0.0, maximum=1.0, step=0.1, value=1.0, label="top_p")
161
- top_k = gr.Slider(minimum=1, maximum=20, step=1, value=20, label="top_k")
162
-
163
- input_type.change(toggle_input, input_type, [text_input, image_input])
164
- submit_btn.click(combined_chat, [msg, image, chatbot, system_prompt, temperature, max_new_tokens, top_p, top_k], [chatbot, image])
165
- clear_btn.click(lambda: ([], None), None, [chatbot, image], queue=False)
 
 
 
 
 
 
 
 
 
 
 
166
 
167
  gr.HTML("<footer>Powered by Phi 3.5 Multimodal AI</footer>")
168
 
 
6
  import numpy as np
7
  from PIL import Image
8
  import subprocess
9
+ import spaces # Add this import
10
 
11
  # Install flash-attention
12
  subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
 
106
  response = vision_processor.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
107
  return response
108
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
  # Custom CSS
110
  custom_css = """
111
+ body { background-color: #0b0f19; color: #e2e8f0; font-family: 'Arial', sans-serif;}
112
+ #custom-header { text-align: center; padding: 20px 0; background-color: #1a202c; margin-bottom: 20px; border-radius: 10px;}
113
+ #custom-header h1 { font-size: 2.5rem; margin-bottom: 0.5rem;}
114
+ #custom-header h1 .blue { color: #60a5fa;}
115
+ #custom-header h1 .pink { color: #f472b6;}
116
+ #custom-header h2 { font-size: 1.5rem; color: #94a3b8;}
117
+ .suggestions { display: flex; justify-content: center; flex-wrap: wrap; gap: 1rem; margin: 20px 0;}
118
+ .suggestion { background-color: #1e293b; border-radius: 0.5rem; padding: 1rem; display: flex; align-items: center; transition: transform 0.3s ease; width: 200px;}
119
+ .suggestion:hover { transform: translateY(-5px);}
120
+ .suggestion-icon { font-size: 1.5rem; margin-right: 1rem; background-color: #2d3748; padding: 0.5rem; border-radius: 50%;}
121
+ .gradio-container { max-width: 100% !important;}
122
+ #component-0, #component-1, #component-2 { max-width: 100% !important;}
123
+ footer { text-align: center; margin-top: 2rem; color: #64748b;}
124
+ """
125
+
126
+ # Custom HTML for the header
127
+ custom_header = """
128
+ <div id="custom-header">
129
+ <h1><span class="blue">Phi 3.5</span> <span class="pink">Multimodal Assistant</span></h1>
130
+ <h2>Text and Vision AI at Your Service</h2>
131
+ </div>
132
+ """
133
+
134
+ # Custom HTML for suggestions
135
+ custom_suggestions = """
136
+ <div class="suggestions">
137
+ <div class="suggestion">
138
+ <span class="suggestion-icon">💬</span>
139
+ <p>Chat with the Text Model</p>
140
+ </div>
141
+ <div class="suggestion">
142
+ <span class="suggestion-icon">🖼️</span>
143
+ <p>Analyze Images with Vision Model</p>
144
+ </div>
145
+ <div class="suggestion">
146
+ <span class="suggestion-icon">🤖</span>
147
+ <p>Get AI-generated responses</p>
148
+ </div>
149
+ <div class="suggestion">
150
+ <span class="suggestion-icon">🔍</span>
151
+ <p>Explore advanced options</p>
152
+ </div>
153
+ </div>
154
  """
155
 
156
  # Gradio interface
157
+ with gr.Blocks(css=custom_css, theme=gr.themes.Base().set(
158
+ body_background_fill="#0b0f19",
159
+ body_text_color="#e2e8f0",
160
+ button_primary_background_fill="#3b82f6",
161
+ button_primary_background_fill_hover="#2563eb",
162
+ button_primary_text_color="white",
163
+ block_title_text_color="#94a3b8",
164
+ block_label_text_color="#94a3b8",
165
+ )) as demo:
166
+ gr.HTML(custom_header)
167
+ gr.HTML(custom_suggestions)
168
+
169
+ with gr.Tab("Text Model (Phi-3.5-mini)"):
170
+ chatbot = gr.Chatbot(height=400)
171
+ msg = gr.Textbox(label="Message", placeholder="Type your message here...")
172
+ with gr.Accordion("Advanced Options", open=False):
173
+ system_prompt = gr.Textbox(value="You are a helpful assistant", label="System Prompt")
174
+ temperature = gr.Slider(minimum=0, maximum=1, step=0.1, value=0.8, label="Temperature")
175
+ max_new_tokens = gr.Slider(minimum=128, maximum=8192, step=1, value=1024, label="Max new tokens")
176
+ top_p = gr.Slider(minimum=0.0, maximum=1.0, step=0.1, value=1.0, label="top_p")
177
+ top_k = gr.Slider(minimum=1, maximum=20, step=1, value=20, label="top_k")
178
+
179
+ submit_btn = gr.Button("Submit", variant="primary")
180
+ clear_btn = gr.Button("Clear Chat", variant="secondary")
181
+
182
+ submit_btn.click(stream_text_chat, [msg, chatbot, system_prompt, temperature, max_new_tokens, top_p, top_k], [chatbot])
183
+ clear_btn.click(lambda: None, None, chatbot, queue=False)
184
+
185
+ with gr.Tab("Vision Model (Phi-3.5-vision)"):
186
+ with gr.Row():
187
+ with gr.Column(scale=1):
188
+ vision_input_img = gr.Image(label="Upload an Image", type="pil")
189
+ vision_text_input = gr.Textbox(label="Ask a question about the image", placeholder="What do you see in this image?")
190
+ vision_submit_btn = gr.Button("Analyze Image", variant="primary")
191
+ with gr.Column(scale=1):
192
+ vision_output_text = gr.Textbox(label="AI Analysis", lines=10)
193
+
194
+ vision_submit_btn.click(process_vision_query, [vision_input_img, vision_text_input], [vision_output_text])
195
 
196
  gr.HTML("<footer>Powered by Phi 3.5 Multimodal AI</footer>")
197