sagar007 commited on
Commit
b634609
·
verified ·
1 Parent(s): dceec72

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -12
app.py CHANGED
@@ -224,26 +224,27 @@ with gr.Blocks(css=custom_css, theme=gr.themes.Base().set(
224
  clear_btn.click(lambda: (None, None), None, [chatbot, audio_output], queue=False)
225
 
226
  with gr.Tab("Vision Model with TTS (Phi-3.5-vision)"):
227
-
228
  with gr.Row():
229
- with gr.Column(scale=1):
230
- vision_input_img = gr.Image(label="Upload an Image", type="pil")
231
- vision_text_input = gr.Textbox(label="Ask a question about the image", placeholder="What do you see in this image?")
232
- vision_submit_btn = gr.Button("Analyze Image and Generate Speech", variant="primary")
233
- with gr.Column(scale=1):
234
- vision_output_text = gr.Textbox(label="AI Analysis", lines=10)
235
- vision_output_audio = gr.Audio(label="Generated Speech")
236
-
237
- vision_submit_btn.click(process_vision_query,
 
 
238
  inputs=[vision_input_img, vision_text_input],
239
  outputs=[vision_output_text, vision_output_audio])
240
-
241
-
242
 
243
 
244
 
245
  with gr.Tab("Text-to-Speech (Parler-TTS)"):
246
  with gr.Row():
 
247
  with gr.Column(scale=1):
248
  tts_prompt = gr.Textbox(label="Text to Speak", placeholder="Enter the text you want to convert to speech...")
249
  tts_description = gr.Textbox(label="Voice Description", value="A female speaker delivers a slightly expressive and animated speech with a moderate speed and pitch. The recording is of very high quality, with the speaker's voice sounding clear and very close up.", lines=3)
 
224
  clear_btn.click(lambda: (None, None), None, [chatbot, audio_output], queue=False)
225
 
226
  with gr.Tab("Vision Model with TTS (Phi-3.5-vision)"):
 
227
  with gr.Row():
228
+
229
+ with gr.Column(scale=1):
230
+
231
+ vision_input_img = gr.Image(label="Upload an Image", type="pil")
232
+ vision_text_input = gr.Textbox(label="Ask a question about the image", placeholder="What do you see in this image?")
233
+ vision_submit_btn = gr.Button("Analyze Image and Generate Speech", variant="primary")
234
+ with gr.Column(scale=1):
235
+ vision_output_text = gr.Textbox(label="AI Analysis", lines=10)
236
+ vision_output_audio = gr.Audio(label="Generated Speech")
237
+
238
+ vision_submit_btn.click(process_vision_query,
239
  inputs=[vision_input_img, vision_text_input],
240
  outputs=[vision_output_text, vision_output_audio])
241
+
 
242
 
243
 
244
 
245
  with gr.Tab("Text-to-Speech (Parler-TTS)"):
246
  with gr.Row():
247
+
248
  with gr.Column(scale=1):
249
  tts_prompt = gr.Textbox(label="Text to Speak", placeholder="Enter the text you want to convert to speech...")
250
  tts_description = gr.Textbox(label="Voice Description", value="A female speaker delivers a slightly expressive and animated speech with a moderate speed and pitch. The recording is of very high quality, with the speaker's voice sounding clear and very close up.", lines=3)