File size: 2,101 Bytes
445d3d1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import torch
import cv2
import decord
from decord import VideoReader, cpu
decord.bridge.set_bridge('torch')
import numpy as np
from PIL import Image
from torchvision import transforms
from transformers import ProcessorMixin, BatchEncoding
from transformers.image_processing_utils import BatchFeature
from pytorchvideo.data.encoded_video import EncodedVideo
from torchvision.transforms import Compose, Lambda, ToTensor
from torchvision.transforms._transforms_video import NormalizeVideo, RandomCropVideo, RandomHorizontalFlipVideo, CenterCropVideo
from pytorchvideo.transforms import ApplyTransformToKey, ShortSideScale, UniformTemporalSubsample

import os
import glob
from tqdm import tqdm

OPENAI_DATASET_MEAN = (0.48145466, 0.4578275, 0.40821073)
OPENAI_DATASET_STD = (0.26862954, 0.26130258, 0.27577711)

def get_video_transform():
    # import pdb; pdb.set_trace()


    transform = Compose(
        [
            # UniformTemporalSubsample(num_frames),
            Lambda(lambda x: x / 255.0),
            NormalizeVideo(mean=OPENAI_DATASET_MEAN, std=OPENAI_DATASET_STD),
            ShortSideScale(size=224),
            CenterCropVideo(224),
            RandomHorizontalFlipVideo(p=0.5),
        ]
    )

    return transform

if __name__ == '__main__':
    directory = '/comp_robot/lushunlin/MotionGPT/video_datasets/videochatgpt/videochatgpt_tune'
    mp4_files = glob.glob(os.path.join(directory, '*.mp4'))
    # import pdb; pdb.set_trace()
    transform = get_video_transform()
    for video_path in tqdm(mp4_files):
        try:
            decord.bridge.set_bridge('torch')
            decord_vr = VideoReader(video_path, ctx=cpu(0))
            duration = len(decord_vr)
            frame_id_list = np.linspace(0, duration-1, 8, dtype=int)
            video_data = decord_vr.get_batch(frame_id_list)
            video_data = video_data.permute(3, 0, 1, 2)  # (T, H, W, C) -> (C, T, H, W)
            video_outputs = transform(video_data)
        except:
            with open('/comp_robot/lushunlin/MotionGPT/records/decord_error.txt', 'a') as f:
                f.write(video_path+'\n')