Spaces:

SameerR007
/

ImageCaptioning_streamlit

Runtime error

SameerR007 commited on May 15, 2023

Commit

7873a48

•

1 Parent(s): 5c1f193

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -46,8 +46,9 @@ def predict_caption(model, image, tokenizer, max_length):
     return in_text
 from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input
 from tensorflow.keras.models import Model
-import pyttsx3
-engine=pyttsx3.init()
 vgg_model = VGG16()
 vgg_model = Model(inputs=vgg_model.inputs, outputs=vgg_model.layers[-2].output)
 from tensorflow.keras.preprocessing.image import img_to_array
@@ -58,6 +59,7 @@ if(uploaded_image!=None):
     display_image=Image.open(uploaded_image)
     st.image(display_image)
     if st.button("Caption"):
         display_image=display_image.resize((224,224))
         image = img_to_array(display_image)
         image = image.reshape((1, image.shape[0], image.shape[1], image.shape[2]))
@@ -65,5 +67,8 @@ if(uploaded_image!=None):
         feature = vgg_model.predict(image, verbose=0)
         final=predict_caption(model, feature, tokenizer, max_length)
         final_output=((" ").join(final.split(" ")[1:len(final.split(" "))-1]))
-        engine.say(final_output)
-        engine.runAndWait()

     return in_text
 from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input
 from tensorflow.keras.models import Model
+from gtts import gTTS
+from io import BytesIO
+sound_file = BytesIO()
 vgg_model = VGG16()
 vgg_model = Model(inputs=vgg_model.inputs, outputs=vgg_model.layers[-2].output)
 from tensorflow.keras.preprocessing.image import img_to_array
     display_image=Image.open(uploaded_image)
     st.image(display_image)
     if st.button("Caption"):
+        st.text("Please be patient...")
         display_image=display_image.resize((224,224))
         image = img_to_array(display_image)
         image = image.reshape((1, image.shape[0], image.shape[1], image.shape[2]))
         feature = vgg_model.predict(image, verbose=0)
         final=predict_caption(model, feature, tokenizer, max_length)
         final_output=((" ").join(final.split(" ")[1:len(final.split(" "))-1]))
+        tts = gTTS(final_output, lang='en')
+        tts.write_to_fp(sound_file)
+        st.text("Output:")
+        st.text(final_output)
+        st.audio(sound_file)