Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -224,26 +224,27 @@ with gr.Blocks(css=custom_css, theme=gr.themes.Base().set(
|
|
224 |
clear_btn.click(lambda: (None, None), None, [chatbot, audio_output], queue=False)
|
225 |
|
226 |
with gr.Tab("Vision Model with TTS (Phi-3.5-vision)"):
|
227 |
-
|
228 |
with gr.Row():
|
229 |
-
|
230 |
-
|
231 |
-
|
232 |
-
|
233 |
-
|
234 |
-
|
235 |
-
|
236 |
-
|
237 |
-
|
|
|
|
|
238 |
inputs=[vision_input_img, vision_text_input],
|
239 |
outputs=[vision_output_text, vision_output_audio])
|
240 |
-
|
241 |
-
|
242 |
|
243 |
|
244 |
|
245 |
with gr.Tab("Text-to-Speech (Parler-TTS)"):
|
246 |
with gr.Row():
|
|
|
247 |
with gr.Column(scale=1):
|
248 |
tts_prompt = gr.Textbox(label="Text to Speak", placeholder="Enter the text you want to convert to speech...")
|
249 |
tts_description = gr.Textbox(label="Voice Description", value="A female speaker delivers a slightly expressive and animated speech with a moderate speed and pitch. The recording is of very high quality, with the speaker's voice sounding clear and very close up.", lines=3)
|
|
|
224 |
clear_btn.click(lambda: (None, None), None, [chatbot, audio_output], queue=False)
|
225 |
|
226 |
with gr.Tab("Vision Model with TTS (Phi-3.5-vision)"):
|
|
|
227 |
with gr.Row():
|
228 |
+
|
229 |
+
with gr.Column(scale=1):
|
230 |
+
|
231 |
+
vision_input_img = gr.Image(label="Upload an Image", type="pil")
|
232 |
+
vision_text_input = gr.Textbox(label="Ask a question about the image", placeholder="What do you see in this image?")
|
233 |
+
vision_submit_btn = gr.Button("Analyze Image and Generate Speech", variant="primary")
|
234 |
+
with gr.Column(scale=1):
|
235 |
+
vision_output_text = gr.Textbox(label="AI Analysis", lines=10)
|
236 |
+
vision_output_audio = gr.Audio(label="Generated Speech")
|
237 |
+
|
238 |
+
vision_submit_btn.click(process_vision_query,
|
239 |
inputs=[vision_input_img, vision_text_input],
|
240 |
outputs=[vision_output_text, vision_output_audio])
|
241 |
+
|
|
|
242 |
|
243 |
|
244 |
|
245 |
with gr.Tab("Text-to-Speech (Parler-TTS)"):
|
246 |
with gr.Row():
|
247 |
+
|
248 |
with gr.Column(scale=1):
|
249 |
tts_prompt = gr.Textbox(label="Text to Speak", placeholder="Enter the text you want to convert to speech...")
|
250 |
tts_description = gr.Textbox(label="Voice Description", value="A female speaker delivers a slightly expressive and animated speech with a moderate speed and pitch. The recording is of very high quality, with the speaker's voice sounding clear and very close up.", lines=3)
|