import gradio as gr import os os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = "0" os.environ["USER"] = "imagecraft" import gradio as gr from src.model.modules.imagecraft import ImageCraft model = ImageCraft.from_pretrained("nsandiman/imagecraft-ft-co-224") def imagecraft_interface(image_path): """Process image inputs and generate audio response.""" transcript, audio_buffer = model.generate(image_path, output_type="buffer") return audio_buffer, transcript # Define Gradio interface gradio_interface = gr.Interface( fn=imagecraft_interface, inputs=[ gr.Image(type="filepath", label="Upload an image"), ], outputs=[gr.Audio(label="Speech"), gr.Textbox(label="Transcript")], title="ImageCraft", description="Upload an image and get the speech responses.", flagging_mode="never", ) if __name__ == "__main__": # Launch the Gradio app gradio_interface.launch()