Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -6,7 +6,7 @@ from threading import Thread
|
|
6 |
import numpy as np
|
7 |
from PIL import Image
|
8 |
import subprocess
|
9 |
-
import spaces
|
10 |
|
11 |
# Install flash-attention
|
12 |
subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
|
@@ -106,63 +106,92 @@ def process_vision_query(image, text_input):
|
|
106 |
response = vision_processor.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
|
107 |
return response
|
108 |
|
109 |
-
# Modified combined chat function
|
110 |
-
def combined_chat(message, image, history, system_prompt, temperature, max_new_tokens, top_p, top_k):
|
111 |
-
if image is not None:
|
112 |
-
# Process image query
|
113 |
-
response = process_vision_query(image, message)
|
114 |
-
history.append((message, response))
|
115 |
-
return history, None
|
116 |
-
else:
|
117 |
-
# Process text query
|
118 |
-
return stream_text_chat(message, history, system_prompt, temperature, max_new_tokens, top_p, top_k), None
|
119 |
-
|
120 |
-
# Function to toggle between text and image input
|
121 |
-
def toggle_input(choice):
|
122 |
-
if choice == "Text":
|
123 |
-
return gr.update(visible=True), gr.update(visible=False)
|
124 |
-
else:
|
125 |
-
return gr.update(visible=False), gr.update(visible=True)
|
126 |
-
|
127 |
# Custom CSS
|
128 |
custom_css = """
|
129 |
-
body { background-color: #
|
130 |
-
|
131 |
-
#
|
132 |
-
#
|
133 |
-
#
|
134 |
-
#
|
135 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
136 |
"""
|
137 |
|
138 |
# Gradio interface
|
139 |
-
with gr.Blocks(css=custom_css, theme=gr.themes.
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
166 |
|
167 |
gr.HTML("<footer>Powered by Phi 3.5 Multimodal AI</footer>")
|
168 |
|
|
|
6 |
import numpy as np
|
7 |
from PIL import Image
|
8 |
import subprocess
|
9 |
+
import spaces # Add this import
|
10 |
|
11 |
# Install flash-attention
|
12 |
subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
|
|
|
106 |
response = vision_processor.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
|
107 |
return response
|
108 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
109 |
# Custom CSS
|
110 |
custom_css = """
|
111 |
+
body { background-color: #0b0f19; color: #e2e8f0; font-family: 'Arial', sans-serif;}
|
112 |
+
#custom-header { text-align: center; padding: 20px 0; background-color: #1a202c; margin-bottom: 20px; border-radius: 10px;}
|
113 |
+
#custom-header h1 { font-size: 2.5rem; margin-bottom: 0.5rem;}
|
114 |
+
#custom-header h1 .blue { color: #60a5fa;}
|
115 |
+
#custom-header h1 .pink { color: #f472b6;}
|
116 |
+
#custom-header h2 { font-size: 1.5rem; color: #94a3b8;}
|
117 |
+
.suggestions { display: flex; justify-content: center; flex-wrap: wrap; gap: 1rem; margin: 20px 0;}
|
118 |
+
.suggestion { background-color: #1e293b; border-radius: 0.5rem; padding: 1rem; display: flex; align-items: center; transition: transform 0.3s ease; width: 200px;}
|
119 |
+
.suggestion:hover { transform: translateY(-5px);}
|
120 |
+
.suggestion-icon { font-size: 1.5rem; margin-right: 1rem; background-color: #2d3748; padding: 0.5rem; border-radius: 50%;}
|
121 |
+
.gradio-container { max-width: 100% !important;}
|
122 |
+
#component-0, #component-1, #component-2 { max-width: 100% !important;}
|
123 |
+
footer { text-align: center; margin-top: 2rem; color: #64748b;}
|
124 |
+
"""
|
125 |
+
|
126 |
+
# Custom HTML for the header
|
127 |
+
custom_header = """
|
128 |
+
<div id="custom-header">
|
129 |
+
<h1><span class="blue">Phi 3.5</span> <span class="pink">Multimodal Assistant</span></h1>
|
130 |
+
<h2>Text and Vision AI at Your Service</h2>
|
131 |
+
</div>
|
132 |
+
"""
|
133 |
+
|
134 |
+
# Custom HTML for suggestions
|
135 |
+
custom_suggestions = """
|
136 |
+
<div class="suggestions">
|
137 |
+
<div class="suggestion">
|
138 |
+
<span class="suggestion-icon">💬</span>
|
139 |
+
<p>Chat with the Text Model</p>
|
140 |
+
</div>
|
141 |
+
<div class="suggestion">
|
142 |
+
<span class="suggestion-icon">🖼️</span>
|
143 |
+
<p>Analyze Images with Vision Model</p>
|
144 |
+
</div>
|
145 |
+
<div class="suggestion">
|
146 |
+
<span class="suggestion-icon">🤖</span>
|
147 |
+
<p>Get AI-generated responses</p>
|
148 |
+
</div>
|
149 |
+
<div class="suggestion">
|
150 |
+
<span class="suggestion-icon">🔍</span>
|
151 |
+
<p>Explore advanced options</p>
|
152 |
+
</div>
|
153 |
+
</div>
|
154 |
"""
|
155 |
|
156 |
# Gradio interface
|
157 |
+
with gr.Blocks(css=custom_css, theme=gr.themes.Base().set(
|
158 |
+
body_background_fill="#0b0f19",
|
159 |
+
body_text_color="#e2e8f0",
|
160 |
+
button_primary_background_fill="#3b82f6",
|
161 |
+
button_primary_background_fill_hover="#2563eb",
|
162 |
+
button_primary_text_color="white",
|
163 |
+
block_title_text_color="#94a3b8",
|
164 |
+
block_label_text_color="#94a3b8",
|
165 |
+
)) as demo:
|
166 |
+
gr.HTML(custom_header)
|
167 |
+
gr.HTML(custom_suggestions)
|
168 |
+
|
169 |
+
with gr.Tab("Text Model (Phi-3.5-mini)"):
|
170 |
+
chatbot = gr.Chatbot(height=400)
|
171 |
+
msg = gr.Textbox(label="Message", placeholder="Type your message here...")
|
172 |
+
with gr.Accordion("Advanced Options", open=False):
|
173 |
+
system_prompt = gr.Textbox(value="You are a helpful assistant", label="System Prompt")
|
174 |
+
temperature = gr.Slider(minimum=0, maximum=1, step=0.1, value=0.8, label="Temperature")
|
175 |
+
max_new_tokens = gr.Slider(minimum=128, maximum=8192, step=1, value=1024, label="Max new tokens")
|
176 |
+
top_p = gr.Slider(minimum=0.0, maximum=1.0, step=0.1, value=1.0, label="top_p")
|
177 |
+
top_k = gr.Slider(minimum=1, maximum=20, step=1, value=20, label="top_k")
|
178 |
+
|
179 |
+
submit_btn = gr.Button("Submit", variant="primary")
|
180 |
+
clear_btn = gr.Button("Clear Chat", variant="secondary")
|
181 |
+
|
182 |
+
submit_btn.click(stream_text_chat, [msg, chatbot, system_prompt, temperature, max_new_tokens, top_p, top_k], [chatbot])
|
183 |
+
clear_btn.click(lambda: None, None, chatbot, queue=False)
|
184 |
+
|
185 |
+
with gr.Tab("Vision Model (Phi-3.5-vision)"):
|
186 |
+
with gr.Row():
|
187 |
+
with gr.Column(scale=1):
|
188 |
+
vision_input_img = gr.Image(label="Upload an Image", type="pil")
|
189 |
+
vision_text_input = gr.Textbox(label="Ask a question about the image", placeholder="What do you see in this image?")
|
190 |
+
vision_submit_btn = gr.Button("Analyze Image", variant="primary")
|
191 |
+
with gr.Column(scale=1):
|
192 |
+
vision_output_text = gr.Textbox(label="AI Analysis", lines=10)
|
193 |
+
|
194 |
+
vision_submit_btn.click(process_vision_query, [vision_input_img, vision_text_input], [vision_output_text])
|
195 |
|
196 |
gr.HTML("<footer>Powered by Phi 3.5 Multimodal AI</footer>")
|
197 |
|