import pathlib import subprocess import tempfile import av import numpy as np from PIL import Image def alpha_crop_detect(path): result = subprocess.check_output( [ "bash", "-c", f"""ffmpeg -c:v libvpx -i {path} -filter_complex "[0:v]alphaextract, cropdetect=limit=0:round=16:reset=0" -f null - 2>&1 | grep -oP 'crop=\K\d+:\d+:\d+:\d+' """, ] ) return result.decode().strip().split("\n")[-1] def crop_resize_overlay( path, background_path, range, out, left=0.5, top=0.15, height=0.85, crf=17 ): with av.open(path, "r") as f: fps = f.streams.video[0].base_rate with av.open(background_path, "r") as f: background_width, background_height = ( f.streams.video[0].width, f.streams.video[0].height, ) if isinstance(top, float): top = int(background_height * top) if isinstance(height, float): height = int(background_height * height) height -= height % 2 w, h, _, _ = map(int, range.split(":")) width = int(height / h * w) width -= width % 2 if isinstance(left, float): left = int(background_width * left) - width // 2 subprocess.call( [ "bash", "-c", f"""ffmpeg -y -c:v libvpx -r {fps} -i {path} -r {fps} -i {background_path} -filter_complex "[0:v]crop={range},scale={width}:{height} [vidi]; [1:v][vidi] overlay={left}:{top}" -crf {crf} -pix_fmt yuva420p -c:v libvpx-vp9 -c:a copy {out}""", ] ) return background_width, background_height, int(fps), (left, top, height) import json import os import shutil import tempfile from pathlib import Path import av import pandas as pd import stf_alternative from stf_alternative.util import get_crop_mp4_dir, get_frame_dir, get_preprocess_dir from stf_tools.silent import create_silent_video from stf_tools.writers import WebmWriter def create_template( template_video_path, background_path, out_path, config_path, reference_face, work_root_path, checkpoint_path, left, top, height, crf=17, ): crop_range = alpha_crop_detect(template_video_path) result_width, result_height, fps, (left, top, height) = crop_resize_overlay( template_video_path, background_path, crop_range, out_path, left=left, top=top, height=height, crf=crf, ) stf_alternative.preprocess_template( config_path=config_path, template_video_path=template_video_path, reference_face=reference_face, work_root_path=work_root_path, template_frame_ratio=1.0, template_video_ratio=[1.0], silent_video_path=None, callback=None, device="cuda:0", verbose=True, save_frames=False, ) model = stf_alternative.create_model( config_path=config_path, checkpoint_path=checkpoint_path, work_root_path=work_root_path, device="cuda:0", verbose=True, wavlm_path="microsoft/wavlm-large", ) preprocess_dir = Path(get_preprocess_dir(work_root_path, model.args.name)) crop_mp4_dir = Path(get_crop_mp4_dir(preprocess_dir, template_video_path)) dataset_dir = crop_mp4_dir / f"{Path(template_video_path).stem}_000" template_frames_path = Path( get_frame_dir(preprocess_dir, template_video_path, ratio=1.0) ) with open(preprocess_dir / "metadata.json", "w") as f: json.dump( { "fps": fps, "width": result_width, "height": result_height, }, f, ) df = pd.read_pickle(dataset_dir / "df_fan.pickle") w, h, x, y = map(int, crop_range.split(":")) scale = height / h id_set = set() for it in df["cropped_box"]: if id(it) in id_set: continue id_set.add(id(it)) x1, y1, x2, y2 = it x1 = (x1 - x) * scale + left x2 = (x2 - x) * scale + left y1 = (y1 - y) * scale + top y2 = (y2 - y) * scale + top it[:] = (x1, y1, x2, y2) df.to_pickle(dataset_dir / "df_fan.pickle") template_frames_path.mkdir(exist_ok=True, parents=True) with av.open(out_path) as container: for frame in container.decode(video=0): Image.fromarray(frame.to_ndarray(format="rgb24"), mode="RGB").save( f"{template_frames_path}/%05d.webp" % frame.index, format="webp", lossless=True, ) with tempfile.TemporaryDirectory() as tempdir: silent_video_path = f"{tempdir}/silent.webm" template = stf_alternative.Template( config_path=config_path, model=model, template_video_path=template_video_path, wav_std=False, ref_wav=None, verbose=True, ) writer = WebmWriter( silent_video_path, width=result_width, height=result_height, fps=fps, crf=crf, audio_sample_rate=16000, quiet=False, ) create_silent_video(template, writer) silent_frames_path = Path( get_frame_dir(preprocess_dir, silent_video_path, ratio=1.0) ) silent_frames_path.mkdir(exist_ok=True, parents=True) with av.open(silent_video_path) as container: for frame in container.decode(video=0): Image.fromarray(frame.to_ndarray(format="rgb24"), mode="RGB").save( f"{silent_frames_path}/%05d.webp" % frame.index, format="webp", lossless=True, ) shutil.rmtree(template_frames_path, ignore_errors=False) silent_frames_path.rename(template_frames_path)