Spaces:
Running
on
Zero
Running
on
Zero
import torch | |
import cv2 | |
import decord | |
from decord import VideoReader, cpu | |
decord.bridge.set_bridge('torch') | |
import numpy as np | |
from PIL import Image | |
from torchvision import transforms | |
from transformers import ProcessorMixin, BatchEncoding | |
from transformers.image_processing_utils import BatchFeature | |
from pytorchvideo.data.encoded_video import EncodedVideo | |
from torchvision.transforms import Compose, Lambda, ToTensor | |
from torchvision.transforms._transforms_video import NormalizeVideo, RandomCropVideo, RandomHorizontalFlipVideo, CenterCropVideo | |
from pytorchvideo.transforms import ApplyTransformToKey, ShortSideScale, UniformTemporalSubsample | |
import os | |
import glob | |
from tqdm import tqdm | |
OPENAI_DATASET_MEAN = (0.48145466, 0.4578275, 0.40821073) | |
OPENAI_DATASET_STD = (0.26862954, 0.26130258, 0.27577711) | |
def get_video_transform(): | |
# import pdb; pdb.set_trace() | |
transform = Compose( | |
[ | |
# UniformTemporalSubsample(num_frames), | |
Lambda(lambda x: x / 255.0), | |
NormalizeVideo(mean=OPENAI_DATASET_MEAN, std=OPENAI_DATASET_STD), | |
ShortSideScale(size=224), | |
CenterCropVideo(224), | |
RandomHorizontalFlipVideo(p=0.5), | |
] | |
) | |
return transform | |
if __name__ == '__main__': | |
directory = '/comp_robot/lushunlin/MotionGPT/video_datasets/videochatgpt/videochatgpt_tune' | |
mp4_files = glob.glob(os.path.join(directory, '*.mp4')) | |
# import pdb; pdb.set_trace() | |
transform = get_video_transform() | |
for video_path in tqdm(mp4_files): | |
try: | |
decord.bridge.set_bridge('torch') | |
decord_vr = VideoReader(video_path, ctx=cpu(0)) | |
duration = len(decord_vr) | |
frame_id_list = np.linspace(0, duration-1, 8, dtype=int) | |
video_data = decord_vr.get_batch(frame_id_list) | |
video_data = video_data.permute(3, 0, 1, 2) # (T, H, W, C) -> (C, T, H, W) | |
video_outputs = transform(video_data) | |
except: | |
with open('/comp_robot/lushunlin/MotionGPT/records/decord_error.txt', 'a') as f: | |
f.write(video_path+'\n') |