|
import os |
|
import shutil |
|
from huggingface_hub import snapshot_download |
|
import gradio as gr |
|
os.chdir(os.path.dirname(os.path.abspath(__file__))) |
|
from scripts.inference import inference_process |
|
import argparse |
|
import uuid |
|
|
|
hallo_dir = snapshot_download(repo_id="fudan-generative-ai/hallo", local_dir="pretrained_models") |
|
|
|
def run_inference(source_image, driving_audio, progress=gr.Progress(track_tqdm=True)): |
|
unique_id = uuid.uuid4() |
|
|
|
args = argparse.Namespace( |
|
config='configs/inference/default.yaml', |
|
source_image=source_image, |
|
driving_audio=driving_audio, |
|
output=f'output-{unique_id}.mp4', |
|
pose_weight=1.0, |
|
face_weight=1.0, |
|
lip_weight=1.0, |
|
face_expand_ratio=1.2, |
|
checkpoint=None |
|
) |
|
|
|
inference_process(args) |
|
return f'output-{unique_id}.mp4' |
|
|
|
iface = gr.Interface( |
|
title="Demo for Hallo: Hierarchical Audio-Driven Visual Synthesis for Portrait Image Animation", |
|
description="Generate talking head avatars driven from audio. **every 10 seconds of generation takes ~1 minute** - duplicate the space for private use or try for free on Google Colab", |
|
fn=run_inference, |
|
inputs=[gr.Image(type="filepath"), gr.Audio(type="filepath")], |
|
cache_examples=False, |
|
outputs="video" |
|
) |
|
|
|
iface.launch(share=True) |