import glob import json import logging import os.path import shutil from datetime import datetime from pathlib import Path from typing import Annotated, Optional import torch import typer from PIL import Image from tqdm.rich import tqdm from animatediff import __version__, get_dir from animatediff.settings import ModelConfig, get_model_config from animatediff.utils.tagger import get_labels from animatediff.utils.util import (extract_frames, get_resized_image, path_from_cwd, prepare_anime_seg, prepare_groundingDINO, prepare_propainter, prepare_sam_hq, prepare_softsplat) logger = logging.getLogger(__name__) stylize: typer.Typer = typer.Typer( name="stylize", context_settings=dict(help_option_names=["-h", "--help"]), rich_markup_mode="rich", pretty_exceptions_show_locals=False, help="stylize video", ) data_dir = get_dir("data") controlnet_dirs = [ "controlnet_canny", "controlnet_depth", "controlnet_inpaint", "controlnet_ip2p", "controlnet_lineart", "controlnet_lineart_anime", "controlnet_mlsd", "controlnet_normalbae", "controlnet_openpose", "controlnet_scribble", "controlnet_seg", "controlnet_shuffle", "controlnet_softedge", "controlnet_tile", "qr_code_monster_v1", "qr_code_monster_v2", "controlnet_mediapipe_face", "animatediff_controlnet", ] def create_controlnet_dir(controlnet_root): for c in controlnet_dirs: c_dir = controlnet_root.joinpath(c) c_dir.mkdir(parents=True, exist_ok=True) @stylize.command(no_args_is_help=True) def create_config( org_movie: Annotated[ Path, typer.Argument(path_type=Path, file_okay=True, dir_okay=False, exists=True, help="Path to movie file"), ] = ..., config_org: Annotated[ Path, typer.Option( "--config-org", "-c", path_type=Path, dir_okay=False, exists=True, help="Path to original config file", ), ] = Path("config/prompts/prompt_travel.json"), ignore_list: Annotated[ Path, typer.Option( "--ignore-list", "-g", path_type=Path, dir_okay=False, exists=True, help="path to ignore token list file", ), ] = Path("config/prompts/ignore_tokens.txt"), out_dir: Annotated[ Optional[Path], typer.Option( "--out-dir", "-o", path_type=Path, file_okay=False, help="output directory", ), ] = Path("stylize/"), fps: Annotated[ int, typer.Option( "--fps", "-f", min=1, max=120, help="fps", ), ] = 8, duration: Annotated[ int, typer.Option( "--duration", "-d", min=-1, max=3600, help="Video duration in seconds. -1 means that the duration of the input video is used as is", ), ] = -1, offset: Annotated[ int, typer.Option( "--offset", "-of", min=0, max=3600, help="offset in seconds. '-d 30 -of 1200' means to use 1200-1230 seconds of the input video", ), ] = 0, aspect_ratio: Annotated[ float, typer.Option( "--aspect-ratio", "-a", min=-1, max=5.0, help="aspect ratio (width / height). (ex. 512 / 512 = 1.0 , 512 / 768 = 0.6666 , 768 / 512 = 1.5) -1 means that the aspect ratio of the input video is used as is.", ), ] = -1, size_of_short_edge: Annotated[ int, typer.Option( "--short-edge", "-sh", min=100, max=1024, help="size of short edge", ), ] = 512, predicte_interval: Annotated[ int, typer.Option( "--predicte-interval", "-p", min=1, max=120, help="Interval of frames to be predicted", ), ] = 1, general_threshold: Annotated[ float, typer.Option( "--threshold", "-th", min=0.0, max=1.0, help="threshold for general token confidence", ), ] = 0.35, character_threshold: Annotated[ float, typer.Option( "--threshold2", "-th2", min=0.0, max=1.0, help="threshold for character token confidence", ), ] = 0.85, without_confidence: Annotated[ bool, typer.Option( "--no-confidence-format", "-ncf", is_flag=True, help="confidence token format or not. ex. '(close-up:0.57), (monochrome:1.1)' -> 'close-up, monochrome'", ), ] = False, is_no_danbooru_format: Annotated[ bool, typer.Option( "--no-danbooru-format", "-ndf", is_flag=True, help="danbooru token format or not. ex. 'bandaid_on_leg, short_hair' -> 'bandaid on leg, short hair'", ), ] = False, is_img2img: Annotated[ bool, typer.Option( "--img2img", "-i2i", is_flag=True, help="img2img or not(txt2img).", ), ] = False, low_vram: Annotated[ bool, typer.Option( "--low_vram", "-lo", is_flag=True, help="low vram mode", ), ] = False, gradual_latent_hires_fix: Annotated[ bool, typer.Option( "--gradual_latent_hires_fix", "-gh", is_flag=True, help="gradual latent hires fix", ), ] = False, ): """Create a config file for video stylization""" is_danbooru_format = not is_no_danbooru_format with_confidence = not without_confidence logger.info(f"{org_movie=}") logger.info(f"{config_org=}") logger.info(f"{ignore_list=}") logger.info(f"{out_dir=}") logger.info(f"{fps=}") logger.info(f"{duration=}") logger.info(f"{offset=}") logger.info(f"{aspect_ratio=}") logger.info(f"{size_of_short_edge=}") logger.info(f"{predicte_interval=}") logger.info(f"{general_threshold=}") logger.info(f"{character_threshold=}") logger.info(f"{with_confidence=}") logger.info(f"{is_danbooru_format=}") logger.info(f"{is_img2img=}") logger.info(f"{low_vram=}") logger.info(f"{gradual_latent_hires_fix=}") model_config: ModelConfig = get_model_config(config_org) # get a timestamp for the output directory time_str = datetime.now().strftime("%Y-%m-%dT%H-%M-%S") # make the output directory save_dir = out_dir.joinpath(f"{time_str}-{model_config.save_name}") save_dir.mkdir(parents=True, exist_ok=True) logger.info(f"Will save outputs to ./{path_from_cwd(save_dir)}") img2img_dir = save_dir.joinpath("00_img2img") img2img_dir.mkdir(parents=True, exist_ok=True) extract_frames(org_movie, fps, img2img_dir, aspect_ratio, duration, offset, size_of_short_edge, low_vram) controlnet_img_dir = save_dir.joinpath("00_controlnet_image") create_controlnet_dir(controlnet_img_dir) shutil.copytree(img2img_dir, controlnet_img_dir.joinpath("controlnet_openpose"), dirs_exist_ok=True) #shutil.copytree(img2img_dir, controlnet_img_dir.joinpath("controlnet_ip2p"), dirs_exist_ok=True) black_list = [] if ignore_list.is_file(): with open(ignore_list) as f: black_list = [s.strip() for s in f.readlines()] model_config.prompt_map = get_labels( frame_dir=img2img_dir, interval=predicte_interval, general_threshold=general_threshold, character_threshold=character_threshold, ignore_tokens=black_list, with_confidence=with_confidence, is_danbooru_format=is_danbooru_format, is_cpu = False, ) model_config.head_prompt = "" model_config.tail_prompt = "" model_config.controlnet_map["input_image_dir"] = os.path.relpath(controlnet_img_dir.absolute(), data_dir) model_config.controlnet_map["is_loop"] = False model_config.lora_map={} model_config.motion_lora_map={} model_config.controlnet_map["max_samples_on_vram"] = 0 model_config.controlnet_map["max_models_on_vram"] = 0 model_config.controlnet_map["controlnet_openpose"] = { "enable": True, "use_preprocessor":True, "guess_mode":False, "controlnet_conditioning_scale": 1.0, "control_guidance_start": 0.0, "control_guidance_end": 1.0, "control_scale_list":[], "control_region_list":[] } model_config.controlnet_map["controlnet_ip2p"] = { "enable": True, "use_preprocessor":True, "guess_mode":False, "controlnet_conditioning_scale": 0.5, "control_guidance_start": 0.0, "control_guidance_end": 1.0, "control_scale_list":[], "control_region_list":[] } for m in model_config.controlnet_map: if isinstance(model_config.controlnet_map[m] ,dict): if "control_scale_list" in model_config.controlnet_map[m]: model_config.controlnet_map[m]["control_scale_list"]=[] ip_adapter_dir = save_dir.joinpath("00_ipadapter") ip_adapter_dir.mkdir(parents=True, exist_ok=True) model_config.ip_adapter_map = { "enable": True, "input_image_dir": os.path.relpath(ip_adapter_dir.absolute(), data_dir), "prompt_fixed_ratio": 0.5, "save_input_image": True, "resized_to_square": False, "scale": 0.5, "is_full_face": False, "is_plus_face": False, "is_plus": True, "is_light": False } model_config.img2img_map = { "enable": is_img2img, "init_img_dir" : os.path.relpath(img2img_dir.absolute(), data_dir), "save_init_image": True, "denoising_strength" : 0.7 } model_config.region_map = { } model_config.gradual_latent_hires_fix_map = { "enable" : True, "scale" : { "0": 0.5, "0.7": 1.0 }, "reverse_steps": 5, "noise_add_count": 3 } model_config.output = { "format" : "mp4", "fps" : fps, "encode_param":{ "crf": 10 } } img = Image.open( img2img_dir.joinpath("00000000.png") ) W, H = img.size base_size = 768 if gradual_latent_hires_fix else 512 if W < H: width = base_size height = int(base_size * H/W) else: width = int(base_size * W/H) height = base_size width = int(width//8*8) height = int(height//8*8) length = len(glob.glob( os.path.join(img2img_dir, "[0-9]*.png"), recursive=False)) model_config.stylize_config={ "original_video":{ "path":org_movie, "aspect_ratio":aspect_ratio, "offset":offset, }, "create_mask": [ "person" ], "composite": { "fg_list": [ { "path" : " absolute path to frame dir ", "mask_path" : " absolute path to mask dir (this is optional) ", "mask_prompt" : "person" }, { "path" : " absolute path to frame dir ", "mask_path" : " absolute path to mask dir (this is optional) ", "mask_prompt" : "cat" }, ], "bg_frame_dir": "Absolute path to the BG frame directory", "hint": "" }, "0":{ "width": width, "height": height, "length": length, "context": 16, "overlap": 16//4, "stride": 0, }, "1":{ "steps": model_config.steps, "guidance_scale": model_config.guidance_scale, "width": int(width * 1.5 //8*8), "height": int(height * 1.5 //8*8), "length": length, "context": 8, "overlap": 8//4, "stride": 0, "controlnet_tile":{ "enable": True, "use_preprocessor":True, "guess_mode":False, "controlnet_conditioning_scale": 1.0, "control_guidance_start": 0.0, "control_guidance_end": 1.0, "control_scale_list":[] }, "controlnet_ip2p": { "enable": False, "use_preprocessor":True, "guess_mode":False, "controlnet_conditioning_scale": 0.5, "control_guidance_start": 0.0, "control_guidance_end": 1.0, "control_scale_list":[] }, "ip_adapter": False, "reference": False, "img2img": False, "interpolation_multiplier": 1 } } if gradual_latent_hires_fix: model_config.stylize_config.pop("1") save_config_path = save_dir.joinpath("prompt.json") save_config_path.write_text(model_config.json(indent=4), encoding="utf-8") logger.info(f"config = { save_config_path }") logger.info(f"stylize_dir = { save_dir }") logger.info(f"!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!") logger.info(f"Hint. Edit the config file before starting the generation") logger.info(f"!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!") logger.info(f"1. Change 'path' and 'motion_module' as needed") logger.info(f"2. Enter the 'head_prompt' or 'tail_prompt' with your preferred prompt, quality prompt, lora trigger word, or any other prompt you wish to add.") logger.info(f"3. Change 'n_prompt' as needed") logger.info(f"4. Add the lora you need to 'lora_map'") logger.info(f"5. If you do not like the default settings, edit 'ip_adapter_map' or 'controlnet_map'. \nIf you want to change the controlnet type, you need to replace the input image.") logger.info(f"6. Change 'stylize_config' as needed. By default, it is generated twice: once for normal generation and once for upscaling.\nIf you don't need upscaling, delete the whole '1'.") logger.info(f"7. Change 'output' as needed. Changing the 'fps' at this timing is not recommended as it will change the playback speed.\nIf you want to change the fps, specify it with the create-config option") @stylize.command(no_args_is_help=True) def generate( stylize_dir: Annotated[ Path, typer.Argument(path_type=Path, file_okay=False, dir_okay=True, exists=True, help="Path to stylize dir"), ] = ..., length: Annotated[ int, typer.Option( "--length", "-L", min=-1, max=9999, help="Number of frames to generate. -1 means that the value in the config file is referenced.", rich_help_panel="Generation", ), ] = -1, frame_offset: Annotated[ int, typer.Option( "--frame-offset", "-FO", min=0, max=999999, help="Frame offset at generation.", rich_help_panel="Generation", ), ] = 0, ): """Run video stylization""" from animatediff.cli import generate time_str = datetime.now().strftime("%Y-%m-%dT%H-%M-%S") config_org = stylize_dir.joinpath("prompt.json") model_config: ModelConfig = get_model_config(config_org) if length == -1: length = model_config.stylize_config["0"]["length"] model_config.stylize_config["0"]["length"] = min(model_config.stylize_config["0"]["length"] - frame_offset, length) if "1" in model_config.stylize_config: model_config.stylize_config["1"]["length"] = min(model_config.stylize_config["1"]["length"] - frame_offset, length) if frame_offset > 0: #controlnet org_controlnet_img_dir = data_dir.joinpath( model_config.controlnet_map["input_image_dir"] ) new_controlnet_img_dir = org_controlnet_img_dir.parent / "00_tmp_controlnet_image" if new_controlnet_img_dir.is_dir(): shutil.rmtree(new_controlnet_img_dir) new_controlnet_img_dir.mkdir(parents=True, exist_ok=True) for c in controlnet_dirs: src_dir = org_controlnet_img_dir.joinpath(c) dst_dir = new_controlnet_img_dir.joinpath(c) if src_dir.is_dir(): dst_dir.mkdir(parents=True, exist_ok=True) frame_length = model_config.stylize_config["0"]["length"] src_imgs = sorted(glob.glob( os.path.join(src_dir, "[0-9]*.png"), recursive=False)) for img in src_imgs: n = int(Path(img).stem) if n in range(frame_offset, frame_offset + frame_length): dst_img_path = dst_dir.joinpath( f"{n-frame_offset:08d}.png" ) shutil.copy(img, dst_img_path) #img2img org_img2img_img_dir = data_dir.joinpath( model_config.img2img_map["init_img_dir"] ) new_img2img_img_dir = org_img2img_img_dir.parent / "00_tmp_init_img_dir" if new_img2img_img_dir.is_dir(): shutil.rmtree(new_img2img_img_dir) new_img2img_img_dir.mkdir(parents=True, exist_ok=True) src_dir = org_img2img_img_dir dst_dir = new_img2img_img_dir if src_dir.is_dir(): dst_dir.mkdir(parents=True, exist_ok=True) frame_length = model_config.stylize_config["0"]["length"] src_imgs = sorted(glob.glob( os.path.join(src_dir, "[0-9]*.png"), recursive=False)) for img in src_imgs: n = int(Path(img).stem) if n in range(frame_offset, frame_offset + frame_length): dst_img_path = dst_dir.joinpath( f"{n-frame_offset:08d}.png" ) shutil.copy(img, dst_img_path) new_prompt_map = {} for p in model_config.prompt_map: n = int(p) if n in range(frame_offset, frame_offset + frame_length): new_prompt_map[str(n-frame_offset)]=model_config.prompt_map[p] model_config.prompt_map = new_prompt_map model_config.controlnet_map["input_image_dir"] = os.path.relpath(new_controlnet_img_dir.absolute(), data_dir) model_config.img2img_map["init_img_dir"] = os.path.relpath(new_img2img_img_dir.absolute(), data_dir) tmp_config_path = stylize_dir.joinpath("prompt_tmp.json") tmp_config_path.write_text(model_config.json(indent=4), encoding="utf-8") config_org = tmp_config_path output_0_dir = generate( config_path=config_org, width=model_config.stylize_config["0"]["width"], height=model_config.stylize_config["0"]["height"], length=model_config.stylize_config["0"]["length"], context=model_config.stylize_config["0"]["context"], overlap=model_config.stylize_config["0"]["overlap"], stride=model_config.stylize_config["0"]["stride"], out_dir=stylize_dir ) torch.cuda.empty_cache() output_0_dir = output_0_dir.rename(output_0_dir.parent / f"{time_str}_{0:02d}") if "1" not in model_config.stylize_config: logger.info(f"Stylized results are output to {output_0_dir}") return logger.info(f"Intermediate files have been output to {output_0_dir}") output_0_img_dir = glob.glob( os.path.join(output_0_dir, "00-[0-9]*"), recursive=False)[0] interpolation_multiplier = 1 if "interpolation_multiplier" in model_config.stylize_config["1"]: interpolation_multiplier = model_config.stylize_config["1"]["interpolation_multiplier"] if interpolation_multiplier > 1: from animatediff.rife.rife import rife_interpolate rife_img_dir = stylize_dir.joinpath(f"{1:02d}_rife_frame") if rife_img_dir.is_dir(): shutil.rmtree(rife_img_dir) rife_img_dir.mkdir(parents=True, exist_ok=True) rife_interpolate(output_0_img_dir, rife_img_dir, interpolation_multiplier) model_config.stylize_config["1"]["length"] *= interpolation_multiplier if model_config.output: model_config.output["fps"] *= interpolation_multiplier if model_config.prompt_map: model_config.prompt_map = { str(int(i)*interpolation_multiplier): model_config.prompt_map[i] for i in model_config.prompt_map } output_0_img_dir = rife_img_dir controlnet_img_dir = stylize_dir.joinpath("01_controlnet_image") img2img_dir = stylize_dir.joinpath("01_img2img") img2img_dir.mkdir(parents=True, exist_ok=True) create_controlnet_dir(controlnet_img_dir) ip2p_for_upscale = model_config.stylize_config["1"]["controlnet_ip2p"]["enable"] ip_adapter_for_upscale = model_config.stylize_config["1"]["ip_adapter"] ref_for_upscale = model_config.stylize_config["1"]["reference"] shutil.copytree(output_0_img_dir, controlnet_img_dir.joinpath("controlnet_tile"), dirs_exist_ok=True) if ip2p_for_upscale: shutil.copytree(controlnet_img_dir.joinpath("controlnet_tile"), controlnet_img_dir.joinpath("controlnet_ip2p"), dirs_exist_ok=True) shutil.copytree(controlnet_img_dir.joinpath("controlnet_tile"), img2img_dir, dirs_exist_ok=True) model_config.controlnet_map["input_image_dir"] = os.path.relpath(controlnet_img_dir.absolute(), data_dir) model_config.controlnet_map["controlnet_tile"] = model_config.stylize_config["1"]["controlnet_tile"] model_config.controlnet_map["controlnet_ip2p"] = model_config.stylize_config["1"]["controlnet_ip2p"] if "controlnet_ref" in model_config.controlnet_map: model_config.controlnet_map["controlnet_ref"]["enable"] = ref_for_upscale model_config.ip_adapter_map["enable"] = ip_adapter_for_upscale for r in model_config.region_map: reg = model_config.region_map[r] if "condition" in reg: if "ip_adapter_map" in reg["condition"]: reg["condition"]["ip_adapter_map"]["enable"] = ip_adapter_for_upscale model_config.steps = model_config.stylize_config["1"]["steps"] if "steps" in model_config.stylize_config["1"] else model_config.steps model_config.guidance_scale = model_config.stylize_config["1"]["guidance_scale"] if "guidance_scale" in model_config.stylize_config["1"] else model_config.guidance_scale model_config.img2img_map["enable"] = model_config.stylize_config["1"]["img2img"] if model_config.img2img_map["enable"]: model_config.img2img_map["init_img_dir"] = os.path.relpath(Path(output_0_img_dir).absolute(), data_dir) save_config_path = stylize_dir.joinpath("prompt_01.json") save_config_path.write_text(model_config.json(indent=4), encoding="utf-8") output_1_dir = generate( config_path=save_config_path, width=model_config.stylize_config["1"]["width"], height=model_config.stylize_config["1"]["height"], length=model_config.stylize_config["1"]["length"], context=model_config.stylize_config["1"]["context"], overlap=model_config.stylize_config["1"]["overlap"], stride=model_config.stylize_config["1"]["stride"], out_dir=stylize_dir ) output_1_dir = output_1_dir.rename(output_1_dir.parent / f"{time_str}_{1:02d}") logger.info(f"Stylized results are output to {output_1_dir}") @stylize.command(no_args_is_help=True) def interpolate( frame_dir: Annotated[ Path, typer.Argument(path_type=Path, file_okay=False, dir_okay=True, exists=True, help="Path to frame dir"), ] = ..., interpolation_multiplier: Annotated[ int, typer.Option( "--interpolation_multiplier", "-m", min=1, max=10, help="interpolation_multiplier", ), ] = 1, ): """Interpolation with original frames. This function does not work well if the shape of the subject is changed from the original video. Large movements can also ruin the picture.(Since this command is experimental, it is better to use other interpolation methods in most cases.)""" try: import cupy except: logger.info(f"!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!") logger.info(f"cupy is required to run interpolate") logger.info(f"Your CUDA version is {torch.version.cuda}") logger.info(f"Please find the installation method of cupy for your CUDA version from the following URL") logger.info(f"https://docs.cupy.dev/en/latest/install.html#installing-cupy-from-pypi") logger.info(f"!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!") return prepare_softsplat() time_str = datetime.now().strftime("%Y-%m-%dT%H-%M-%S") config_org = frame_dir.parent.joinpath("prompt.json") model_config: ModelConfig = get_model_config(config_org) if "original_video" in model_config.stylize_config: org_video = Path(model_config.stylize_config["original_video"]["path"]) offset = model_config.stylize_config["original_video"]["offset"] aspect_ratio = model_config.stylize_config["original_video"]["aspect_ratio"] else: logger.warn('!!! The following parameters are required !!!') logger.warn('"stylize_config": {') logger.warn(' "original_video": {') logger.warn(' "path": "C:\\my_movie\\test.mp4",') logger.warn(' "aspect_ratio": 0.6666,') logger.warn(' "offset": 0') logger.warn(' },') raise ValueError('model_config.stylize_config["original_video"] not found') save_dir = frame_dir.parent.joinpath(f"optflow_{time_str}") org_frame_dir = save_dir.joinpath("org_frame") org_frame_dir.mkdir(parents=True, exist_ok=True) stylize_frame = sorted(glob.glob( os.path.join(frame_dir, "[0-9]*.png"), recursive=False)) stylize_frame_num = len(stylize_frame) duration = int(stylize_frame_num / model_config.output["fps"]) + 1 extract_frames(org_video, model_config.output["fps"] * interpolation_multiplier, org_frame_dir,aspect_ratio,duration,offset) W, H = Image.open(stylize_frame[0]).size org_frame = sorted(glob.glob( os.path.join(org_frame_dir, "[0-9]*.png"), recursive=False)) for org in tqdm(org_frame): img = get_resized_image(org, W, H) img.save(org) output_dir = save_dir.joinpath("warp_img") output_dir.mkdir(parents=True, exist_ok=True) from animatediff.softmax_splatting.run import estimate2 for sty1,sty2 in tqdm(zip(stylize_frame,stylize_frame[1:]), total=len(stylize_frame[1:])): sty1 = Path(sty1) sty2 = Path(sty2) head = int(sty1.stem) sty1_img = Image.open(sty1) sty2_img = Image.open(sty2) guide_frames=[org_frame_dir.joinpath(f"{g:08d}.png") for g in range(head*interpolation_multiplier, (head+1)*interpolation_multiplier)] guide_frames=[Image.open(g) for g in guide_frames] result = estimate2(sty1_img, sty2_img, guide_frames, "data/models/softsplat/softsplat-lf") shutil.copy( frame_dir.joinpath(f"{head:08d}.png"), output_dir.joinpath(f"{head*interpolation_multiplier:08d}.png")) offset = head*interpolation_multiplier + 1 for i, r in enumerate(result): r.save( output_dir.joinpath(f"{offset+i:08d}.png") ) from animatediff.generate import save_output frames = sorted(glob.glob( os.path.join(output_dir, "[0-9]*.png"), recursive=False)) out_images = [] for f in frames: out_images.append(Image.open(f)) model_config.output["fps"] *= interpolation_multiplier out_file = save_dir.joinpath(f"01_{model_config.output['fps']}fps") save_output(out_images,output_dir,out_file,model_config.output,True,save_frames=None,save_video=None) out_file = save_dir.joinpath(f"00_original") save_output(out_images,org_frame_dir,out_file,model_config.output,True,save_frames=None,save_video=None) @stylize.command(no_args_is_help=True) def create_mask( stylize_dir: Annotated[ Path, typer.Argument(path_type=Path, file_okay=False, dir_okay=True, exists=True, help="Path to stylize dir"), ] = ..., frame_dir: Annotated[ Path, typer.Option( "--frame_dir", "-f", path_type=Path, file_okay=False, help="Path to source frames directory. default is 'STYLIZE_DIR/00_img2img'", ), ] = None, box_threshold: Annotated[ float, typer.Option( "--box_threshold", "-b", min=0.0, max=1.0, help="box_threshold", rich_help_panel="create mask", ), ] = 0.3, text_threshold: Annotated[ float, typer.Option( "--text_threshold", "-t", min=0.0, max=1.0, help="text_threshold", rich_help_panel="create mask", ), ] = 0.25, mask_padding: Annotated[ int, typer.Option( "--mask_padding", "-mp", min=-100, max=100, help="padding pixel value", rich_help_panel="create mask", ), ] = 0, no_gb: Annotated[ bool, typer.Option( "--no_gb", "-ng", is_flag=True, help="no green back", rich_help_panel="create mask", ), ] = False, no_crop: Annotated[ bool, typer.Option( "--no_crop", "-nc", is_flag=True, help="no crop", rich_help_panel="create mask", ), ] = False, use_rembg: Annotated[ bool, typer.Option( "--use_rembg", "-rem", is_flag=True, help="use [rembg] instead of [Sam+GroundingDINO]", rich_help_panel="create mask", ), ] = False, use_animeseg: Annotated[ bool, typer.Option( "--use_animeseg", "-anim", is_flag=True, help="use [anime-segmentation] instead of [Sam+GroundingDINO]", rich_help_panel="create mask", ), ] = False, low_vram: Annotated[ bool, typer.Option( "--low_vram", "-lo", is_flag=True, help="low vram mode", rich_help_panel="create mask/tag", ), ] = False, ignore_list: Annotated[ Path, typer.Option( "--ignore-list", "-g", path_type=Path, dir_okay=False, exists=True, help="path to ignore token list file", rich_help_panel="create tag", ), ] = Path("config/prompts/ignore_tokens.txt"), predicte_interval: Annotated[ int, typer.Option( "--predicte-interval", "-p", min=1, max=120, help="Interval of frames to be predicted", rich_help_panel="create tag", ), ] = 1, general_threshold: Annotated[ float, typer.Option( "--threshold", "-th", min=0.0, max=1.0, help="threshold for general token confidence", rich_help_panel="create tag", ), ] = 0.35, character_threshold: Annotated[ float, typer.Option( "--threshold2", "-th2", min=0.0, max=1.0, help="threshold for character token confidence", rich_help_panel="create tag", ), ] = 0.85, without_confidence: Annotated[ bool, typer.Option( "--no-confidence-format", "-ncf", is_flag=True, help="confidence token format or not. ex. '(close-up:0.57), (monochrome:1.1)' -> 'close-up, monochrome'", rich_help_panel="create tag", ), ] = False, is_no_danbooru_format: Annotated[ bool, typer.Option( "--no-danbooru-format", "-ndf", is_flag=True, help="danbooru token format or not. ex. 'bandaid_on_leg, short_hair' -> 'bandaid on leg, short hair'", rich_help_panel="create tag", ), ] = False, ): """Create mask from prompt""" from animatediff.utils.mask import (create_bg, create_fg, crop_frames, crop_mask_list, save_crop_info) from animatediff.utils.mask_animseg import animseg_create_fg from animatediff.utils.mask_rembg import rembg_create_fg is_danbooru_format = not is_no_danbooru_format with_confidence = not without_confidence if use_animeseg and use_rembg: raise ValueError("use_animeseg and use_rembg cannot be enabled at the same time") prepare_sam_hq(low_vram) prepare_groundingDINO() prepare_propainter() if use_animeseg: prepare_anime_seg() time_str = datetime.now().strftime("%Y-%m-%dT%H-%M-%S") config_org = stylize_dir.joinpath("prompt.json") model_config: ModelConfig = get_model_config(config_org) if frame_dir is None: frame_dir = stylize_dir / "00_img2img" if not frame_dir.is_dir(): raise ValueError(f'{frame_dir=} does not exist.') is_img2img = model_config.img2img_map["enable"] if "enable" in model_config.img2img_map else False create_mask_list = [] if "create_mask" in model_config.stylize_config: create_mask_list = model_config.stylize_config["create_mask"] else: raise ValueError('model_config.stylize_config["create_mask"] not found') output_list = [] stylize_frame = sorted(glob.glob( os.path.join(frame_dir, "[0-9]*.png"), recursive=False)) frame_len = len(stylize_frame) W, H = Image.open(stylize_frame[0]).size org_frame_size = (H,W) masked_area = [None for f in range(frame_len)] if use_rembg: create_mask_list = ["rembg"] elif use_animeseg: create_mask_list = ["anime-segmentation"] for i,mask_token in enumerate(create_mask_list): fg_dir = stylize_dir.joinpath(f"fg_{i:02d}_{time_str}") fg_dir.mkdir(parents=True, exist_ok=True) create_controlnet_dir( fg_dir / "00_controlnet_image" ) fg_masked_dir = fg_dir / "00_img2img" fg_masked_dir.mkdir(parents=True, exist_ok=True) fg_mask_dir = fg_dir / "00_mask" fg_mask_dir.mkdir(parents=True, exist_ok=True) if use_animeseg: masked_area = animseg_create_fg( frame_dir=frame_dir, output_dir=fg_masked_dir, output_mask_dir=fg_mask_dir, masked_area_list=masked_area, mask_padding=mask_padding, bg_color=None if no_gb else (0,255,0), ) elif use_rembg: masked_area = rembg_create_fg( frame_dir=frame_dir, output_dir=fg_masked_dir, output_mask_dir=fg_mask_dir, masked_area_list=masked_area, mask_padding=mask_padding, bg_color=None if no_gb else (0,255,0), ) else: masked_area = create_fg( mask_token=mask_token, frame_dir=frame_dir, output_dir=fg_masked_dir, output_mask_dir=fg_mask_dir, masked_area_list=masked_area, box_threshold=box_threshold, text_threshold=text_threshold, mask_padding=mask_padding, sam_checkpoint= "data/models/SAM/sam_hq_vit_h.pth" if not low_vram else "data/models/SAM/sam_hq_vit_b.pth", bg_color=None if no_gb else (0,255,0), ) if not no_crop: frame_size_hw = (masked_area[0].shape[1],masked_area[0].shape[2]) cropped_mask_list, mask_pos_list, crop_size_hw = crop_mask_list(masked_area) logger.info(f"crop fg_masked_dir") crop_frames(mask_pos_list, crop_size_hw, fg_masked_dir) logger.info(f"crop fg_mask_dir") crop_frames(mask_pos_list, crop_size_hw, fg_mask_dir) save_crop_info(mask_pos_list, crop_size_hw, frame_size_hw, fg_dir / "crop_info.json") else: crop_size_hw = None logger.info(f"mask from [{mask_token}] are output to {fg_dir}") shutil.copytree(fg_masked_dir, fg_dir / "00_controlnet_image/controlnet_openpose", dirs_exist_ok=True) #shutil.copytree(fg_masked_dir, fg_dir / "00_controlnet_image/controlnet_ip2p", dirs_exist_ok=True) if crop_size_hw: if crop_size_hw[0] == 0 or crop_size_hw[1] == 0: crop_size_hw = None output_list.append((fg_dir, crop_size_hw)) torch.cuda.empty_cache() bg_dir = stylize_dir.joinpath(f"bg_{time_str}") bg_dir.mkdir(parents=True, exist_ok=True) create_controlnet_dir( bg_dir / "00_controlnet_image" ) bg_inpaint_dir = bg_dir / "00_img2img" bg_inpaint_dir.mkdir(parents=True, exist_ok=True) create_bg(frame_dir, bg_inpaint_dir, masked_area, use_half = True, raft_iter = 20, subvideo_length=80 if not low_vram else 50, neighbor_length=10 if not low_vram else 8, ref_stride=10 if not low_vram else 8, low_vram = low_vram, ) logger.info(f"background are output to {bg_dir}") shutil.copytree(bg_inpaint_dir, bg_dir / "00_controlnet_image/controlnet_tile", dirs_exist_ok=True) shutil.copytree(bg_inpaint_dir, bg_dir / "00_controlnet_image/controlnet_ip2p", dirs_exist_ok=True) output_list.append((bg_dir,None)) torch.cuda.empty_cache() black_list = [] if ignore_list.is_file(): with open(ignore_list) as f: black_list = [s.strip() for s in f.readlines()] for output, size in output_list: model_config.prompt_map = get_labels( frame_dir= output / "00_img2img", interval=predicte_interval, general_threshold=general_threshold, character_threshold=character_threshold, ignore_tokens=black_list, with_confidence=with_confidence, is_danbooru_format=is_danbooru_format, is_cpu = False, ) model_config.controlnet_map["input_image_dir"] = os.path.relpath((output / "00_controlnet_image" ).absolute(), data_dir) model_config.img2img_map["init_img_dir"] = os.path.relpath((output / "00_img2img" ).absolute(), data_dir) if size is not None: h, w = size height = 1024 * (h/(h+w)) width = 1024 * (w/(h+w)) height = int(height//8 * 8) width = int(width//8 * 8) model_config.stylize_config["0"]["width"]=width model_config.stylize_config["0"]["height"]=height if "1" in model_config.stylize_config: model_config.stylize_config["1"]["width"]=int(width * 1.25 //8*8) model_config.stylize_config["1"]["height"]=int(height * 1.25 //8*8) else: height, width = org_frame_size model_config.stylize_config["0"]["width"]=width model_config.stylize_config["0"]["height"]=height if "1" in model_config.stylize_config: model_config.stylize_config["1"]["width"]=int(width * 1.25 //8*8) model_config.stylize_config["1"]["height"]=int(height * 1.25 //8*8) save_config_path = output.joinpath("prompt.json") save_config_path.write_text(model_config.json(indent=4), encoding="utf-8") @stylize.command(no_args_is_help=True) def composite( stylize_dir: Annotated[ Path, typer.Argument(path_type=Path, file_okay=False, dir_okay=True, exists=True, help="Path to stylize dir"), ] = ..., box_threshold: Annotated[ float, typer.Option( "--box_threshold", "-b", min=0.0, max=1.0, help="box_threshold", rich_help_panel="create mask", ), ] = 0.3, text_threshold: Annotated[ float, typer.Option( "--text_threshold", "-t", min=0.0, max=1.0, help="text_threshold", rich_help_panel="create mask", ), ] = 0.25, mask_padding: Annotated[ int, typer.Option( "--mask_padding", "-mp", min=-100, max=100, help="padding pixel value", rich_help_panel="create mask", ), ] = 0, use_rembg: Annotated[ bool, typer.Option( "--use_rembg", "-rem", is_flag=True, help="use \[rembg] instead of \[Sam+GroundingDINO]", rich_help_panel="create mask", ), ] = False, use_animeseg: Annotated[ bool, typer.Option( "--use_animeseg", "-anim", is_flag=True, help="use \[anime-segmentation] instead of \[Sam+GroundingDINO]", rich_help_panel="create mask", ), ] = False, low_vram: Annotated[ bool, typer.Option( "--low_vram", "-lo", is_flag=True, help="low vram mode", rich_help_panel="create mask/tag", ), ] = False, is_simple_composite: Annotated[ bool, typer.Option( "--simple_composite", "-si", is_flag=True, help="simple composite", rich_help_panel="composite", ), ] = False, ): """composite FG and BG""" from animatediff.utils.composite import composite, simple_composite from animatediff.utils.mask import (create_fg, load_frame_list, load_mask_list, restore_position) from animatediff.utils.mask_animseg import animseg_create_fg from animatediff.utils.mask_rembg import rembg_create_fg if use_animeseg and use_rembg: raise ValueError("use_animeseg and use_rembg cannot be enabled at the same time") prepare_sam_hq(low_vram) if use_animeseg: prepare_anime_seg() time_str = datetime.now().strftime("%Y-%m-%dT%H-%M-%S") config_org = stylize_dir.joinpath("prompt.json") model_config: ModelConfig = get_model_config(config_org) composite_config = {} if "composite" in model_config.stylize_config: composite_config = model_config.stylize_config["composite"] else: raise ValueError('model_config.stylize_config["composite"] not found') save_dir = stylize_dir.joinpath(f"cp_{time_str}") save_dir.mkdir(parents=True, exist_ok=True) save_config_path = save_dir.joinpath("prompt.json") save_config_path.write_text(model_config.json(indent=4), encoding="utf-8") bg_dir = composite_config["bg_frame_dir"] bg_dir = Path(bg_dir) if not bg_dir.is_dir(): raise ValueError('model_config.stylize_config["composite"]["bg_frame_dir"] not valid') frame_len = len(sorted(glob.glob( os.path.join(bg_dir, "[0-9]*.png"), recursive=False))) fg_list = composite_config["fg_list"] for i, fg_param in enumerate(fg_list): mask_token = fg_param["mask_prompt"] frame_dir = Path(fg_param["path"]) if not frame_dir.is_dir(): logger.warn(f"{frame_dir=} not valid -> skip") continue mask_dir = Path(fg_param["mask_path"]) if not mask_dir.is_dir(): logger.info(f"{mask_dir=} not valid -> create mask") fg_tmp_dir = save_dir.joinpath(f"fg_{i:02d}_{time_str}") fg_tmp_dir.mkdir(parents=True, exist_ok=True) masked_area_list = [None for f in range(frame_len)] if use_animeseg: mask_list = animseg_create_fg( frame_dir=frame_dir, output_dir=fg_tmp_dir, output_mask_dir=None, masked_area_list=masked_area_list, mask_padding=mask_padding, ) elif use_rembg: mask_list = rembg_create_fg( frame_dir=frame_dir, output_dir=fg_tmp_dir, output_mask_dir=None, masked_area_list=masked_area_list, mask_padding=mask_padding, ) else: mask_list = create_fg( mask_token=mask_token, frame_dir=frame_dir, output_dir=fg_tmp_dir, output_mask_dir=None, masked_area_list=masked_area_list, box_threshold=box_threshold, text_threshold=text_threshold, mask_padding=mask_padding, sam_checkpoint= "data/models/SAM/sam_hq_vit_h.pth" if not low_vram else "data/models/SAM/sam_hq_vit_b.pth", ) else: logger.info(f"use {mask_dir=} as mask") masked_area_list = [None for f in range(frame_len)] mask_list = load_mask_list(mask_dir, masked_area_list, mask_padding) mask_list = [ m.transpose([1,2,0]) if m is not None else m for m in mask_list] crop_info_path = frame_dir.parent.parent / "crop_info.json" crop_info={} if crop_info_path.is_file(): with open(crop_info_path, mode="rt", encoding="utf-8") as f: crop_info = json.load(f) mask_list = restore_position(mask_list, crop_info) fg_list = [None for f in range(frame_len)] fg_list = load_frame_list(frame_dir, fg_list, crop_info) output_dir = save_dir.joinpath(f"bg_{i:02d}_{time_str}") output_dir.mkdir(parents=True, exist_ok=True) if is_simple_composite: simple_composite(bg_dir, fg_list, output_dir, mask_list) else: composite(bg_dir, fg_list, output_dir, mask_list) bg_dir = output_dir from animatediff.generate import save_output frames = sorted(glob.glob( os.path.join(bg_dir, "[0-9]*.png"), recursive=False)) out_images = [] for f in frames: out_images.append(Image.open(f)) out_file = save_dir.joinpath(f"composite") save_output(out_images,bg_dir,out_file,model_config.output,True,save_frames=None,save_video=None) logger.info(f"output to {out_file}") @stylize.command(no_args_is_help=True) def create_region( stylize_dir: Annotated[ Path, typer.Argument(path_type=Path, file_okay=False, dir_okay=True, exists=True, help="Path to stylize dir"), ] = ..., frame_dir: Annotated[ Path, typer.Option( "--frame_dir", "-f", path_type=Path, file_okay=False, help="Path to source frames directory. default is 'STYLIZE_DIR/00_img2img'", ), ] = None, box_threshold: Annotated[ float, typer.Option( "--box_threshold", "-b", min=0.0, max=1.0, help="box_threshold", rich_help_panel="create mask", ), ] = 0.3, text_threshold: Annotated[ float, typer.Option( "--text_threshold", "-t", min=0.0, max=1.0, help="text_threshold", rich_help_panel="create mask", ), ] = 0.25, mask_padding: Annotated[ int, typer.Option( "--mask_padding", "-mp", min=-100, max=100, help="padding pixel value", rich_help_panel="create mask", ), ] = 0, use_rembg: Annotated[ bool, typer.Option( "--use_rembg", "-rem", is_flag=True, help="use [rembg] instead of [Sam+GroundingDINO]", rich_help_panel="create mask", ), ] = False, use_animeseg: Annotated[ bool, typer.Option( "--use_animeseg", "-anim", is_flag=True, help="use [anime-segmentation] instead of [Sam+GroundingDINO]", rich_help_panel="create mask", ), ] = False, low_vram: Annotated[ bool, typer.Option( "--low_vram", "-lo", is_flag=True, help="low vram mode", rich_help_panel="create mask/tag", ), ] = False, ignore_list: Annotated[ Path, typer.Option( "--ignore-list", "-g", path_type=Path, dir_okay=False, exists=True, help="path to ignore token list file", rich_help_panel="create tag", ), ] = Path("config/prompts/ignore_tokens.txt"), predicte_interval: Annotated[ int, typer.Option( "--predicte-interval", "-p", min=1, max=120, help="Interval of frames to be predicted", rich_help_panel="create tag", ), ] = 1, general_threshold: Annotated[ float, typer.Option( "--threshold", "-th", min=0.0, max=1.0, help="threshold for general token confidence", rich_help_panel="create tag", ), ] = 0.35, character_threshold: Annotated[ float, typer.Option( "--threshold2", "-th2", min=0.0, max=1.0, help="threshold for character token confidence", rich_help_panel="create tag", ), ] = 0.85, without_confidence: Annotated[ bool, typer.Option( "--no-confidence-format", "-ncf", is_flag=True, help="confidence token format or not. ex. '(close-up:0.57), (monochrome:1.1)' -> 'close-up, monochrome'", rich_help_panel="create tag", ), ] = False, is_no_danbooru_format: Annotated[ bool, typer.Option( "--no-danbooru-format", "-ndf", is_flag=True, help="danbooru token format or not. ex. 'bandaid_on_leg, short_hair' -> 'bandaid on leg, short hair'", rich_help_panel="create tag", ), ] = False, ): """Create region from prompt""" from animatediff.utils.mask import create_bg, create_fg from animatediff.utils.mask_animseg import animseg_create_fg from animatediff.utils.mask_rembg import rembg_create_fg is_danbooru_format = not is_no_danbooru_format with_confidence = not without_confidence if use_animeseg and use_rembg: raise ValueError("use_animeseg and use_rembg cannot be enabled at the same time") prepare_sam_hq(low_vram) prepare_groundingDINO() prepare_propainter() if use_animeseg: prepare_anime_seg() time_str = datetime.now().strftime("%Y-%m-%dT%H-%M-%S") config_org = stylize_dir.joinpath("prompt.json") model_config: ModelConfig = get_model_config(config_org) if frame_dir is None: frame_dir = stylize_dir / "00_img2img" if not frame_dir.is_dir(): raise ValueError(f'{frame_dir=} does not exist.') create_mask_list = [] if "create_mask" in model_config.stylize_config: create_mask_list = model_config.stylize_config["create_mask"] else: raise ValueError('model_config.stylize_config["create_mask"] not found') output_list = [] stylize_frame = sorted(glob.glob( os.path.join(frame_dir, "[0-9]*.png"), recursive=False)) frame_len = len(stylize_frame) masked_area = [None for f in range(frame_len)] if use_rembg: create_mask_list = ["rembg"] elif use_animeseg: create_mask_list = ["anime-segmentation"] for i,mask_token in enumerate(create_mask_list): fg_dir = stylize_dir.joinpath(f"r_fg_{i:02d}_{time_str}") fg_dir.mkdir(parents=True, exist_ok=True) fg_masked_dir = fg_dir / "00_tmp_masked" fg_masked_dir.mkdir(parents=True, exist_ok=True) fg_mask_dir = fg_dir / "00_mask" fg_mask_dir.mkdir(parents=True, exist_ok=True) if use_animeseg: masked_area = animseg_create_fg( frame_dir=frame_dir, output_dir=fg_masked_dir, output_mask_dir=fg_mask_dir, masked_area_list=masked_area, mask_padding=mask_padding, bg_color=(0,255,0), ) elif use_rembg: masked_area = rembg_create_fg( frame_dir=frame_dir, output_dir=fg_masked_dir, output_mask_dir=fg_mask_dir, masked_area_list=masked_area, mask_padding=mask_padding, bg_color=(0,255,0), ) else: masked_area = create_fg( mask_token=mask_token, frame_dir=frame_dir, output_dir=fg_masked_dir, output_mask_dir=fg_mask_dir, masked_area_list=masked_area, box_threshold=box_threshold, text_threshold=text_threshold, mask_padding=mask_padding, sam_checkpoint= "data/models/SAM/sam_hq_vit_h.pth" if not low_vram else "data/models/SAM/sam_hq_vit_b.pth", bg_color=(0,255,0), ) logger.info(f"mask from [{mask_token}] are output to {fg_dir}") output_list.append((fg_dir, fg_masked_dir, fg_mask_dir)) torch.cuda.empty_cache() bg_dir = stylize_dir.joinpath(f"r_bg_{time_str}") bg_dir.mkdir(parents=True, exist_ok=True) bg_inpaint_dir = bg_dir / "00_tmp_inpainted" bg_inpaint_dir.mkdir(parents=True, exist_ok=True) create_bg(frame_dir, bg_inpaint_dir, masked_area, use_half = True, raft_iter = 20, subvideo_length=80 if not low_vram else 50, neighbor_length=10 if not low_vram else 8, ref_stride=10 if not low_vram else 8, low_vram = low_vram, ) logger.info(f"background are output to {bg_dir}") output_list.append((bg_dir,bg_inpaint_dir,None)) torch.cuda.empty_cache() black_list = [] if ignore_list.is_file(): with open(ignore_list) as f: black_list = [s.strip() for s in f.readlines()] black_list.append("simple_background") black_list.append("green_background") region_map = {} for i, (output_root, masked_dir, mask_dir) in enumerate(output_list): prompt_map = get_labels( frame_dir= masked_dir, interval=predicte_interval, general_threshold=general_threshold, character_threshold=character_threshold, ignore_tokens=black_list, with_confidence=with_confidence, is_danbooru_format=is_danbooru_format, is_cpu = False, ) if mask_dir: ipadapter_dir = output_root / "00_ipadapter" ipadapter_dir.mkdir(parents=True, exist_ok=True) region_map[str(i)]={ "enable": True, "crop_generation_rate": 0.0, "mask_dir" : os.path.relpath(mask_dir.absolute(), data_dir), "save_mask": True, "is_init_img" : False, "condition" : { "prompt_fixed_ratio": 0.5, "head_prompt": "", "prompt_map": prompt_map, "tail_prompt": "", "ip_adapter_map": { "enable": True, "input_image_dir": os.path.relpath(ipadapter_dir.absolute(), data_dir), "prompt_fixed_ratio": 0.5, "save_input_image": True, "resized_to_square": False } } } else: region_map["background"]={ "is_init_img" : False, "hint" : "background's condition refers to the one in root" } model_config.prompt_map = prompt_map model_config.region_map =region_map config_org.write_text(model_config.json(indent=4), encoding="utf-8")