LivePortrait

Running on Zero

File size: 5,855 Bytes

7931de6

import pathlib
import subprocess
import tempfile

import av
import numpy as np
from PIL import Image


def alpha_crop_detect(path):
    result = subprocess.check_output(
        [
            "bash",
            "-c",
            f"""ffmpeg -c:v libvpx -i {path} -filter_complex "[0:v]alphaextract, cropdetect=limit=0:round=16:reset=0" -f null - 2>&1 | grep -oP 'crop=\K\d+:\d+:\d+:\d+' """,
        ]
    )
    return result.decode().strip().split("\n")[-1]


def crop_resize_overlay(
    path, background_path, range, out, left=0.5, top=0.15, height=0.85, crf=17
):
    with av.open(path, "r") as f:
        fps = f.streams.video[0].base_rate

    with av.open(background_path, "r") as f:
        background_width, background_height = (
            f.streams.video[0].width,
            f.streams.video[0].height,
        )

    if isinstance(top, float):
        top = int(background_height * top)

    if isinstance(height, float):
        height = int(background_height * height)

    height -= height % 2

    w, h, _, _ = map(int, range.split(":"))
    width = int(height / h * w)
    width -= width % 2

    if isinstance(left, float):
        left = int(background_width * left) - width // 2

    subprocess.call(
        [
            "bash",
            "-c",
            f"""ffmpeg -y -c:v libvpx -r {fps} -i {path} -r {fps} -i {background_path} -filter_complex "[0:v]crop={range},scale={width}:{height} [vidi]; [1:v][vidi] overlay={left}:{top}" -crf {crf} -pix_fmt yuva420p -c:v libvpx-vp9 -c:a copy {out}""",
        ]
    )

    return background_width, background_height, int(fps), (left, top, height)


import json
import os
import shutil
import tempfile
from pathlib import Path

import av
import pandas as pd
import stf_alternative
from stf_alternative.util import get_crop_mp4_dir, get_frame_dir, get_preprocess_dir

from stf_tools.silent import create_silent_video
from stf_tools.writers import WebmWriter


def create_template(
    template_video_path,
    background_path,
    out_path,
    config_path,
    reference_face,
    work_root_path,
    checkpoint_path,
    left,
    top,
    height,
    crf=17,
):
    crop_range = alpha_crop_detect(template_video_path)
    result_width, result_height, fps, (left, top, height) = crop_resize_overlay(
        template_video_path,
        background_path,
        crop_range,
        out_path,
        left=left,
        top=top,
        height=height,
        crf=crf,
    )

    stf_alternative.preprocess_template(
        config_path=config_path,
        template_video_path=template_video_path,
        reference_face=reference_face,
        work_root_path=work_root_path,
        template_frame_ratio=1.0,
        template_video_ratio=[1.0],
        silent_video_path=None,
        callback=None,
        device="cuda:0",
        verbose=True,
        save_frames=False,
    )

    model = stf_alternative.create_model(
        config_path=config_path,
        checkpoint_path=checkpoint_path,
        work_root_path=work_root_path,
        device="cuda:0",
        verbose=True,
        wavlm_path="microsoft/wavlm-large",
    )

    preprocess_dir = Path(get_preprocess_dir(work_root_path, model.args.name))
    crop_mp4_dir = Path(get_crop_mp4_dir(preprocess_dir, template_video_path))
    dataset_dir = crop_mp4_dir / f"{Path(template_video_path).stem}_000"
    template_frames_path = Path(
        get_frame_dir(preprocess_dir, template_video_path, ratio=1.0)
    )

    with open(preprocess_dir / "metadata.json", "w") as f:
        json.dump(
            {
                "fps": fps,
                "width": result_width,
                "height": result_height,
            },
            f,
        )

    df = pd.read_pickle(dataset_dir / "df_fan.pickle")

    w, h, x, y = map(int, crop_range.split(":"))
    scale = height / h

    id_set = set()
    for it in df["cropped_box"]:
        if id(it) in id_set:
            continue
        id_set.add(id(it))
        x1, y1, x2, y2 = it
        x1 = (x1 - x) * scale + left
        x2 = (x2 - x) * scale + left
        y1 = (y1 - y) * scale + top
        y2 = (y2 - y) * scale + top
        it[:] = (x1, y1, x2, y2)

    df.to_pickle(dataset_dir / "df_fan.pickle")

    template_frames_path.mkdir(exist_ok=True, parents=True)
    with av.open(out_path) as container:
        for frame in container.decode(video=0):
            Image.fromarray(frame.to_ndarray(format="rgb24"), mode="RGB").save(
                f"{template_frames_path}/%05d.webp" % frame.index,
                format="webp",
                lossless=True,
            )

    with tempfile.TemporaryDirectory() as tempdir:
        silent_video_path = f"{tempdir}/silent.webm"
        template = stf_alternative.Template(
            config_path=config_path,
            model=model,
            template_video_path=template_video_path,
            wav_std=False,
            ref_wav=None,
            verbose=True,
        )
        writer = WebmWriter(
            silent_video_path,
            width=result_width,
            height=result_height,
            fps=fps,
            crf=crf,
            audio_sample_rate=16000,
            quiet=False,
        )
        create_silent_video(template, writer)

        silent_frames_path = Path(
            get_frame_dir(preprocess_dir, silent_video_path, ratio=1.0)
        )
        silent_frames_path.mkdir(exist_ok=True, parents=True)
        with av.open(silent_video_path) as container:
            for frame in container.decode(video=0):
                Image.fromarray(frame.to_ndarray(format="rgb24"), mode="RGB").save(
                    f"{silent_frames_path}/%05d.webp" % frame.index,
                    format="webp",
                    lossless=True,
                )
    shutil.rmtree(template_frames_path, ignore_errors=False)
    silent_frames_path.rename(template_frames_path)