|
import gradio as gr |
|
import os |
|
|
|
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" |
|
os.environ["CUDA_VISIBLE_DEVICES"] = "0" |
|
os.environ["USER"] = "imagecraft" |
|
|
|
|
|
import gradio as gr |
|
from src.model.modules.imagecraft import ImageCraft |
|
|
|
model = ImageCraft.from_pretrained("nsandiman/imagecraft-ft-co-224") |
|
|
|
|
|
def imagecraft_interface(image_path): |
|
"""Process image inputs and generate audio response.""" |
|
transcript, audio_buffer = model.generate(image_path, output_type="buffer") |
|
|
|
return audio_buffer, transcript |
|
|
|
|
|
|
|
gradio_interface = gr.Interface( |
|
fn=imagecraft_interface, |
|
inputs=[ |
|
gr.Image(type="filepath", label="Upload an image"), |
|
], |
|
outputs=[gr.Audio(label="Speech"), gr.Textbox(label="Transcript")], |
|
title="ImageCraft", |
|
description="Upload an image and get the speech responses.", |
|
flagging_mode="never", |
|
) |
|
|
|
if __name__ == "__main__": |
|
|
|
gradio_interface.launch() |
|
|