File size: 881 Bytes
a7ab7f4 7812e4e a7ab7f4 7812e4e a7ab7f4 9c6e277 a7ab7f4 9c6e277 a7ab7f4 9c6e277 a7ab7f4 9c6e277 a7ab7f4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 |
import gradio as gr
from transformers import pipeline
img_text_pipe = pipeline("image-to-text",
model="Salesforce/blip-image-captioning-base")
narrator = pipeline("text-to-speech",
model="kakao-enterprise/vits-ljs")
def describe_image(file_path):
img_text_pip_output = img_text_pipe(file_path)
description_text = img_text_pip_output[0]['generated_text']
print(description_text)
narrated_text = narrator(description_text)
(narrated_text["sampling_rate"], narrated_text["audio"][0] )
return (narrated_text["sampling_rate"], narrated_text["audio"][0])
iface = gr.Interface(fn=describe_image,
inputs=gr.Image(label="Input image",
type="pil"),
outputs=gr.Audio(label="Narration", type="numpy", autoplay=True)
)
iface.launch()
|