import os import shutil from huggingface_hub import snapshot_download import gradio as gr os.chdir(os.path.dirname(os.path.abspath(__file__))) from scripts.inference import inference_process import argparse import uuid hallo_dir = snapshot_download(repo_id="fudan-generative-ai/hallo", local_dir="pretrained_models") def run_inference(source_image, driving_audio, progress=gr.Progress(track_tqdm=True)): unique_id = uuid.uuid4() args = argparse.Namespace( config='configs/inference/default.yaml', source_image=source_image, driving_audio=driving_audio, output=f'output-{unique_id}.mp4', pose_weight=1.0, face_weight=1.0, lip_weight=1.0, face_expand_ratio=1.2, checkpoint=None ) inference_process(args) return f'output-{unique_id}.mp4' iface = gr.Interface( title="Demo for Hallo: Hierarchical Audio-Driven Visual Synthesis for Portrait Image Animation", description="Generate talking head avatars driven from audio. **every 10 seconds of generation takes ~1 minute** - duplicate the space for private use or try for free on Google Colab", fn=run_inference, inputs=[gr.Image(type="filepath"), gr.Audio(type="filepath")], cache_examples=False, outputs="video" ) iface.launch(share=True)