depth-anything-v1-vs-v2

Running on Zero

App Files Files Community

depth-anything-v1-vs-v2 / run_video.py

LiheYoung

Bug fix: video write failure on $pred_only == True (#4)

0811d77 verified 14 days ago

raw history blame contribute delete

No virus

3.9 kB

	import argparse
	import cv2
	import glob
	import matplotlib
	import numpy as np
	import os
	import torch

	from depth_anything_v2.dpt import DepthAnythingV2


	if __name__ == '__main__':
	parser = argparse.ArgumentParser(description='Depth Anything V2')

	parser.add_argument('--video-path', type=str)
	parser.add_argument('--input-size', type=int, default=518)
	parser.add_argument('--outdir', type=str, default='./vis_video_depth')

	parser.add_argument('--encoder', type=str, default='vitl', choices=['vits', 'vitb', 'vitl', 'vitg'])

	parser.add_argument('--pred-only', dest='pred_only', action='store_true', help='only display the prediction')
	parser.add_argument('--grayscale', dest='grayscale', action='store_true', help='do not apply colorful palette')

	args = parser.parse_args()

	DEVICE = 'cuda' if torch.cuda.is_available() else 'mps' if torch.backends.mps.is_available() else 'cpu'

	# 'we are undergoing company review procedures to release Depth-Anything-Giant checkpoint
	model_configs = {
	'vits': {'encoder': 'vits', 'features': 64, 'out_channels': [48, 96, 192, 384]},
	'vitb': {'encoder': 'vitb', 'features': 128, 'out_channels': [96, 192, 384, 768]},
	'vitl': {'encoder': 'vitl', 'features': 256, 'out_channels': [256, 512, 1024, 1024]},
	'vitg': {'encoder': 'vitg', 'features': 384, 'out_channels': [1536, 1536, 1536, 1536]}
	}

	depth_anything = DepthAnythingV2(**model_configs[args.encoder])
	depth_anything.load_state_dict(torch.load(f'checkpoints/depth_anything_v2_{args.encoder}.pth', map_location='cpu'))
	depth_anything = depth_anything.to(DEVICE).eval()

	if os.path.isfile(args.video_path):
	if args.video_path.endswith('txt'):
	with open(args.video_path, 'r') as f:
	lines = f.read().splitlines()
	else:
	filenames = [args.video_path]
	else:
	filenames = glob.glob(os.path.join(args.video_path, '*/'), recursive=True)

	os.makedirs(args.outdir, exist_ok=True)

	margin_width = 50
	cmap = matplotlib.colormaps.get_cmap('Spectral_r')

	for k, filename in enumerate(filenames):
	print(f'Progress {k+1}/{len(filenames)}: {filename}')

	raw_video = cv2.VideoCapture(filename)
	frame_width, frame_height = int(raw_video.get(cv2.CAP_PROP_FRAME_WIDTH)), int(raw_video.get(cv2.CAP_PROP_FRAME_HEIGHT))
	frame_rate = int(raw_video.get(cv2.CAP_PROP_FPS))
	output_width = frame_width * 2 + margin_width

	if args.pred_only:
	output_width = frame_width
	else:
	output_width = frame_width * 2 + margin_width

	output_path = os.path.join(args.outdir, os.path.splitext(os.path.basename(filename))[0] + '.mp4')
	out = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*"mp4v"), frame_rate, (output_width, frame_height))

	while raw_video.isOpened():
	ret, raw_frame = raw_video.read()
	if not ret:
	break

	depth = depth_anything.infer_image(raw_frame, args.input_size)

	depth = (depth - depth.min()) / (depth.max() - depth.min()) * 255.0
	depth = depth.astype(np.uint8)

	if args.grayscale:
	depth = np.repeat(depth[..., np.newaxis], 3, axis=-1)
	else:
	depth = (cmap(depth)[:, :, :3] * 255)[:, :, ::-1].astype(np.uint8)

	if args.pred_only:
	out.write(depth)
	else:
	split_region = np.ones((frame_height, margin_width, 3), dtype=np.uint8) * 255
	combined_frame = cv2.hconcat([raw_frame, split_region, depth])

	out.write(combined_frame)

	raw_video.release()
	out.release()