Spaces:
Runtime error
Runtime error
File size: 990 Bytes
16ac1d6 7f1b559 16ac1d6 afdfd46 16ac1d6 afdfd46 16ac1d6 7f1b559 16ac1d6 7f1b559 16ac1d6 8e585c6 7f1b559 16ac1d6 8e585c6 16ac1d6 8e585c6 16ac1d6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 |
import os
os.system('pip install torch')
os.system('pip install transformers')
from PIL import Image
import io
import streamlit as st
from transformers import pipeline
vqa_pipeline = pipeline("visual-question-answering", model="microsoft/git-base-vqav2")
tts_pipeline = pipeline("text-to-speech", "suno/bark")
def main():
st.title("Visual Question Answering & Text-to-Audio App")
image = st.file_uploader("Upload an image", type=["jpg", "png"])
question = st.text_input("Enter your question")
if image and question:
image = Image.open(io.BytesIO(image.getvalue()))
vqa_result = vqa_pipeline({"image": image, "question": question})
answer = vqa_result[0]['answer']
st.write(f"Answer: {answer}")
if st.button("Convert Answer to Audio"):
tts_result = tts_pipeline(answer)
audio_data = tts_result['audio']
st.audio(audio_data, format="audio/ogg")
if __name__ == "__main__":
main() |