Chethu commited on
Commit
63116e6
1 Parent(s): 4e87e84

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -37
app.py CHANGED
@@ -1,37 +1,31 @@
1
- import os
2
- from PIL import Image, ImageDraw, ImageFont
3
- import gradio as gr
4
- from helper import load_image_from_url, render_results_in_image
5
- from helper import summarize_predictions_natural_language
6
- from transformers import pipeline
7
- from tokenizers import Tokenizer, Encoding
8
- from tokenizers import decoders
9
- from tokenizers import models
10
- from tokenizers import normalizers
11
- from tokenizers import pre_tokenizers
12
- from tokenizers import processors
13
- import matplotlib.pyplot as plt
14
- import requests
15
- import inflect
16
- from predictions import get_predictions
17
- from helper import ignore_warnings
18
- ignore_warnings()
19
- from transformers.utils import logging
20
- logging.set_verbosity_error()
21
-
22
- od_pipe = pipeline("object-detection", "facebook/detr-resnet-50")
23
- tts_pipe = pipeline("text-to-speech",
24
- model="kakao-enterprise/vits-ljs")
25
-
26
- demo = gr.Interface(
27
- fn=get_predictions,
28
- inputs=gr.Image(label="Input image",
29
- type="pil"),
30
- outputs=[gr.Image(label="Output image with predicted instances",
31
- type="pil"), gr.Audio(label="Narration", type="numpy", autoplay=True)]
32
- #outputs=gr.Image(label="Output image with predicted instances",
33
- # type="pil")
34
- )
35
-
36
- #demo.launch(server_name="0.0.0.0", server_port=7860)
37
- demo.launch()
 
1
+ import streamlit as st
2
+ from PIL import Image
3
+ from predictions import get_predictions
4
+
5
+ def main():
6
+ st.title("Image Whisper App")
7
+
8
+ uploaded_image = st.file_uploader("Upload an image", type=["jpg", "jpeg", "png"])
9
+
10
+ if uploaded_image is not None:
11
+ st.subheader("Uploaded Image")
12
+ st.image(uploaded_image, use_column_width=True)
13
+
14
+ if st.button("Submit"):
15
+ processed_image, text, audio = get_predictions(uploaded_image)
16
+
17
+ st.subheader("Detected Objects")
18
+ st.image(processed_image, use_column_width=True)
19
+
20
+ st.subheader("Predicted Text")
21
+ st.write(text)
22
+
23
+ st.subheader("Audio Output")
24
+ if isinstance(audio, tuple):
25
+ sample_rate, audio_data = audio
26
+ st.audio(audio_data, format='audio/wav', sample_rate=sample_rate)
27
+ else:
28
+ st.audio(audio, format='audio/wav')
29
+
30
+ if __name__ == '__main__':
31
+ main()