Spaces:

Koi953215
/

DiffIR2VR

Running on Zero

App Files Files Community

DiffIR2VR / app.py

Fabrice-TIERCELIN

Little improvements

39916ad verified 6 months ago

raw

history blame

12 kB

	import os
	import cv2
	import torch
	import spaces
	import imageio
	import numpy as np
	import gradio as gr
	torch.jit.script = lambda f: f

	import argparse
	from utils.batch_inference import (
	BSRInferenceLoop, BIDInferenceLoop
	)

	# import subprocess
	# subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)

	device = 'cuda' if torch.cuda.is_available() else 'cpu'
	def get_example(task):
	case = {
	"dn": [
	['examples/bus.mp4',],
	['examples/koala.mp4',],
	['examples/flamingo.mp4',],
	['examples/rhino.mp4',],
	['examples/elephant.mp4',],
	['examples/sheep.mp4',],
	['examples/dog-agility.mp4',],
	# ['examples/dog-gooses.mp4',],
	],
	"sr": [
	['examples/bus_sr.mp4',],
	['examples/koala_sr.mp4',],
	['examples/flamingo_sr.mp4',],
	['examples/rhino_sr.mp4',],
	['examples/elephant_sr.mp4',],
	['examples/sheep_sr.mp4',],
	['examples/dog-agility_sr.mp4',],
	# ['examples/dog-gooses_sr.mp4',],
	]

	}
	return case[task]



	def update_prompt(input_video):
	video_name = input_video.split('/')[-1]
	return set_default_prompt(video_name)


	# Map videos to corresponding images
	video_to_image = {
	'bus.mp4': ['examples_frames/bus'],
	'koala.mp4': ['examples_frames/koala'],
	'dog-gooses.mp4': ['examples_frames/dog-gooses'],
	'flamingo.mp4': ['examples_frames/flamingo'],
	'rhino.mp4': ['examples_frames/rhino'],
	'elephant.mp4': ['examples_frames/elephant'],
	'sheep.mp4': ['examples_frames/sheep'],
	'dog-agility.mp4': ['examples_frames/dog-agility'],

	'bus_sr.mp4': ['examples_frames/bus_sr'],
	'koala_sr.mp4': ['examples_frames/koala_sr'],
	'dog-gooses_sr.mp4': ['examples_frames/dog_gooses_sr'],
	'flamingo_sr.mp4': ['examples_frames/flamingo_sr'],
	'rhino_sr.mp4': ['examples_frames/rhino_sr'],
	'elephant_sr.mp4': ['examples_frames/elephant_sr'],
	'sheep_sr.mp4': ['examples_frames/sheep_sr'],
	'dog-agility_sr.mp4': ['examples_frames/dog-agility_sr'],
	}


	def images_to_video(image_list, output_path, fps=10):
	# Convert PIL Images to numpy arrays
	frames = [np.array(img).astype(np.uint8) for img in image_list]
	frames = frames[:20]

	# Create video writer
	writer = imageio.get_writer(output_path, fps=fps, codec='libx264')

	for frame in frames:
	writer.append_data(frame)

	writer.close()

	def video2frames(video_path):
	# Open the video file
	video = cv2.VideoCapture(video_path)

	img_path = video_path[:-4]
	# Initialize frame counter
	frame_count = 0
	os.makedirs(img_path, exist_ok=True)

	while True:
	# Read a frame from the video
	ret, frame = video.read()

	# If the frame was not successfully read, then we have reached the end of the video
	if not ret:
	break

	# Write the frame to a JPG file
	frame_file = f"{img_path}/{frame_count:05}.jpg"
	cv2.imwrite(frame_file, frame)

	# Increment the frame counter
	frame_count += 1

	# Release the video file
	video.release()

	return img_path

	@spaces.GPU(duration=120)
	def DiffBIR_restore(input_video, prompt, sr_ratio, n_frames, n_steps, guidance_scale, seed, n_prompt, task):

	video_name = input_video.split('/')[-1]
	if video_name in video_to_image:
	frames_path = video_to_image[video_name][0]
	else:
	frames_path = video2frames(input_video)

	print(f"[INFO] input_video: {input_video}")
	print(f"[INFO] Frames path: {frames_path}")
	args = argparse.Namespace()

	# args.task = True, choices=["sr", "dn", "fr", "fr_bg"]
	args.task = task
	args.upscale = sr_ratio

	### sampling parameters
	args.steps = n_steps
	args.better_start = True
	args.tiled = False
	args.tile_size = 512
	args.tile_stride = 256
	args.pos_prompt = prompt
	args.neg_prompt = n_prompt
	args.cfg_scale = guidance_scale
	### input parameters
	args.input = frames_path
	args.n_samples = 1
	args.batch_size = 10
	args.final_size = (480, 854)
	args.config = "configs/inference/my_cldm.yaml"
	### guidance parameters
	args.guidance = False
	args.g_loss = "w_mse"
	args.g_scale = 0.0
	args.g_start = 1001
	args.g_stop = -1
	args.g_space = "latent"
	args.g_repeat = 1
	### output parameters
	args.output = " "
	### common parameters
	args.seed = seed
	args.device = "cuda"

	args.n_frames = n_frames
	### latent control parameters
	args.warp_period = [0, 0.1]
	args.merge_period = [0, 0]
	args.ToMe_period = [0, 1]
	args.merge_ratio = [0.6, 0]

	if args.task == "sr":
	restored_vid_path = BSRInferenceLoop(args).run()
	elif args.task == "dn":
	restored_vid_path = BIDInferenceLoop(args).run()

	torch.cuda.empty_cache()
	return restored_vid_path

	########
	# demo #
	########


	intro = """
	<div style="text-align:center">
	<h1 style="font-weight: 1400; text-align: center; margin-bottom: 7px;">
	DiffIR2VR
	<br/>
	<small>Restores/upscales your zero-shot videos</small>
	</h1>
	<span>[<a target="_blank" href="https://jimmycv07.github.io/DiffIR2VR_web/">Project page</a>] [<a target="_blank" href="https://huggingface.co/papers/2406.06523">arXiv</a>]</span>
	<div style="display:flex; justify-content: center;margin-top: 0.5em">Note that this page is a limited demo of DiffIR2VR.
	For more configurations, please visit our GitHub page. The code will be released soon!</div>
	<div style="display:flex; justify-content: center;margin-top: 0.5em; color: red;">For super-resolution,
	it is recommended that the final frame size (original size * upscale ratio) be around 480x854,
	else the demo may fail due to lengthy inference times.</div>
	</div>
	"""


	with gr.Blocks(css="style.css") as demo:

	gr.HTML(intro)


	with gr.Tab(label="Super-resolution with DiffBIR"):
	with gr.Row():
	input_video = gr.Video(label="Input Video")
	output_video = gr.Video(label="Restored Video", interactive=False, autoplay=True)

	with gr.Row():
	run_button = gr.Button(value="Restore your video!", variant="primary")

	with gr.Accordion('Advanced options', open=False):
	prompt = gr.Textbox(
	label="Prompt",
	max_lines=1,
	placeholder="describe your video content"
	# value="bear, Van Gogh Style"
	)
	sr_ratio = gr.Slider(label='Upscale ratio',
	minimum=1,
	maximum=16,
	value=4,
	step=0.5)
	n_frames = gr.Slider(label='Frames',
	minimum=1,
	maximum=60,
	value=10,
	step=1)
	n_steps = gr.Slider(label='Steps',
	minimum=1,
	maximum=100,
	value=5,
	step=1)
	guidance_scale = gr.Slider(label='Guidance Scale',
	minimum=0.1,
	maximum=30.0,
	value=4.0,
	step=0.1)
	seed = gr.Slider(label='Seed',
	info="-1=result is always different",
	minimum=-1,
	maximum=1000,
	step=1,
	randomize=True)
	n_prompt = gr.Textbox(
	label='Negative Prompt',
	value="low quality, blurry, spray, low-resolution, noisy, unsharp, weird textures, JPEG artifact, aliasing, over-smooth"
	)
	task = gr.Textbox(value="sr", visible=False)
	# input_video.change(
	# fn = update_prompt,
	# inputs = [input_video],
	# outputs = [prompt],
	# queue = False)

	run_button.click(fn = DiffBIR_restore,
	inputs = [input_video,
	prompt,
	sr_ratio,
	n_frames,
	n_steps,
	guidance_scale,
	seed,
	n_prompt,
	task
	],
	outputs = [output_video]
	)
	gr.Examples(
	examples=get_example("sr"),
	label='Examples',
	inputs=[input_video],
	outputs=[output_video],
	examples_per_page=7
	)

	with gr.Tab(label="Denoise with DiffBIR"):
	with gr.Row():
	input_video = gr.Video(label="Input Video")
	output_video = gr.Video(label="Restored Video", interactive=False, autoplay=True)

	with gr.Row():
	run_button = gr.Button(value="Restore your video!", variant="primary")

	with gr.Accordion('Advanced options', open=False):
	prompt = gr.Textbox(
	label="Prompt",
	max_lines=1,
	placeholder="describe your video content"
	# value="bear, Van Gogh Style"
	)
	n_frames = gr.Slider(label='Frames',
	minimum=1,
	maximum=60,
	value=10,
	step=1)
	n_steps = gr.Slider(label='Steps',
	minimum=1,
	maximum=100,
	value=5,
	step=1)
	guidance_scale = gr.Slider(label='Guidance Scale',
	minimum=0.1,
	maximum=30.0,
	value=4.0,
	step=0.1)
	seed = gr.Slider(label='Seed',
	info="-1=result is always different",
	minimum=-1,
	maximum=1000,
	step=1,
	randomize=True)
	n_prompt = gr.Textbox(
	label='Negative Prompt',
	value="low quality, blurry, spray, low-resolution, noisy, unsharp, weird textures, JPEG artifact, aliasing, over-smooth"
	)
	task = gr.Textbox(value="dn", visible=False)
	sr_ratio = gr.Number(value=1, visible=False)

	# input_video.change(
	# fn = update_prompt,
	# inputs = [input_video],
	# outputs = [prompt],
	# queue = False)
	run_button.click(fn = DiffBIR_restore,
	inputs = [input_video,
	prompt,
	sr_ratio,
	n_frames,
	n_steps,
	guidance_scale,
	seed,
	n_prompt,
	task
	],
	outputs = [output_video]
	)
	gr.Examples(
	examples=get_example("dn"),
	label='Examples',
	inputs=[input_video],
	outputs=[output_video],
	examples_per_page=7
	)

	demo.queue()

	demo.launch()