Spaces:

anshuln
/

peekaboo-demo

Sleeping

File size: 9,350 Bytes

44f2ca8

import json
import numpy as np
import random
import csv
import pickle
import tqdm

def clamp(x, min_val, max_val):
    return int(max(min(x, max_val), min_val))

def generate_moving_frames_simpler(canvas_size, num_frames, aspect_ratio, bounding_box_size, motion_type, up_to_down_strict=False, keep_in_frame=True):
    # Mapping size to bounding box dimensions
    size_mapping = {'Small': 0.25, 'Medium': 0.3, 'Large': 0.3}
    aspect_ratio_mapping = {'Rectangle Vertical': (1.33, 1), 'Rectangle Horizontal': (1, 1.33), 'Square': (1, 1)}

    # Calculate bounding box dimensions
    ratio = aspect_ratio_mapping[aspect_ratio]
    box_height = int(canvas_size[0] * size_mapping[bounding_box_size] * ratio[0])
    box_width = int(canvas_size[1] * size_mapping[bounding_box_size] * ratio[1])

    x_init_pos = [0.1 * canvas_size[1], 0.25 * canvas_size[1], 0.45*canvas_size[1], 0.7 * canvas_size[1]]
    y_init_pos = [0.1 * canvas_size[0], 0.25 * canvas_size[0], 0.45*canvas_size[0], 0.7 * canvas_size[0]]
    
    speed_dir = random.choice([-1,1]) # random.randint(1, 3)*4
    # print('-'*20)
    # print(motion_type)
    if 'up' in motion_type.lower():
        # Freedom in horizontal init
        # Vertical init depends on upward or downward motion
        pos_x = random.choice(x_init_pos) + random.randint(int(-0.01 * canvas_size[1]), int(0.01 * canvas_size[1]))
        if up_to_down_strict == 'up':
            # pos_y = np.random.choice(y_init_pos[2:]) + random.randint(int(-0.01 * canvas_size[1]), int(0.01 * canvas_size[1]))
            speed_dir = -1.
        elif up_to_down_strict == 'down':
            # pos_y = np.random.choice(y_init_pos[2:]) + random.randint(int(-0.01 * canvas_size[1]), int(0.01 * canvas_size[1]))
            speed_dir = 1.            
            # y_end_max = canvas_size[0] - box_height 
            
        # else:
        if speed_dir == 1.:
            pos_y = np.random.choice(y_init_pos[:2]) + random.randint(int(-0.01 * canvas_size[0]), int(0.01 * canvas_size[0]))
            y_end_max = canvas_size[0] - box_height
        else:
            pos_y = np.random.choice(y_init_pos[2:]) + random.randint(int(-0.01 * canvas_size[0]), int(0.01 * canvas_size[0]))
            y_end_max = box_height 
        max_speed = np.abs(y_end_max - pos_y) / num_frames
        
        speed = random.randint(2, 4)*4
        speed = min(speed, max_speed)
        speed = speed_dir * speed
    elif 'left' in motion_type.lower():
        # Freedom in vertical init
        # Horizontal init depends on upward or downward motion
        pos_y = random.choice(y_init_pos) + random.randint(int(-0.01 * canvas_size[0]), int(0.01 * canvas_size[0]))
        if up_to_down_strict:
            speed_dir = 1.
            
        if speed_dir == 1.:
            pos_x = np.random.choice(x_init_pos[:2]) + random.randint(int(-0.01 * canvas_size[1]), int(0.01 * canvas_size[1]))
            x_end_max = canvas_size[1] - box_width
        else:
            pos_x = np.random.choice(x_init_pos[2:]) + random.randint(int(-0.01 * canvas_size[1]), int(0.01 * canvas_size[1]))
            x_end_max = box_width 
        max_speed = np.abs(x_end_max - pos_x) / num_frames
        
        speed = random.randint(2, 4)*4
        speed = min(speed, max_speed)
        speed = speed_dir * speed

    else:
        speed_dir_y = random.choice([-1,1]) 
        if speed_dir == 1.:
            pos_x = np.random.choice(x_init_pos[:2]) + random.randint(int(-0.01 * canvas_size[1]), int(0.01 * canvas_size[1]))
            x_end_max = canvas_size[1] - box_width
        else:
            pos_x = np.random.choice(x_init_pos[2:]) + random.randint(int(-0.01 * canvas_size[1]), int(0.01 * canvas_size[1]))
            x_end_max = box_width 

        if speed_dir_y == 1.:
            pos_y = np.random.choice(y_init_pos[:2]) + random.randint(int(-0.01 * canvas_size[0]), int(0.01 * canvas_size[0]))
            y_end_max = canvas_size[0] - box_height
        else:
            pos_y = np.random.choice(y_init_pos[2:]) + random.randint(int(-0.01 * canvas_size[0]), int(0.01 * canvas_size[0]))
            y_end_max = box_height                     
        max_speed_x = np.abs(x_end_max - pos_x) / num_frames
        max_speed_y = np.abs(y_end_max - pos_y) / num_frames
        speed_x = random.randint(2, 4)*4
        speed_y = random.randint(2, 4)*4
        speed_x = min(speed_x, max_speed_x)
        speed_y = min(speed_y, max_speed_y)
        speed_x, speed_y = (speed_dir * speed_x, speed_dir_y * speed_y)

    frames = []

    
    for _ in range(num_frames):
        canvas = np.zeros(canvas_size)

        # Determine movement direction and apply movement
        if motion_type == "Left to right":
            pos_x = (pos_x + speed) # % (canvas_size[1] - box_width)
            pos_y = pos_y + random.randint(int(-0.01 * canvas_size[0]), int(0.01 * canvas_size[0]))
        elif motion_type == "Up to down":
            pos_y = (pos_y + speed) # % (canvas_size[0] - box_height)
            pos_x = pos_x + random.randint(int(-0.01 * canvas_size[1]), int(0.01 * canvas_size[1]))
        elif motion_type == "Zig-zag":
            # Zig-zag motion alternates between horizontal and vertical movement
            if _ % 2 == 0:
                pos_x = (pos_x + speed_x) # % (canvas_size[1] - box_width)
            else:
                pos_y = (pos_y + speed_y) # % (canvas_size[0] - box_height)
        canvas[clamp(pos_y, 0, canvas_size[0]):clamp(pos_y + box_height, 0, canvas_size[0]),
                clamp(pos_x, 0, canvas_size[1]):clamp(pos_x + box_width, 0, canvas_size[1])] = 1

        # Add frame to the list
        frames.append(canvas)

    return frames


def generate_stationary_frames_simpler(canvas_size, num_frames, aspect_ratio, bounding_box_size):
    # Mapping size to bounding box dimensions
    size_mapping = {'Small': 0.25, 'Medium': 0.3, 'Large': 0.3}
    aspect_ratio_mapping = {'Rectangle Vertical': (1.33, 1), 'Rectangle Horizontal': (1, 1.33), 'Square': (1, 1)}

    # Calculate bounding box dimensions
    ratio = aspect_ratio_mapping[aspect_ratio]
    box_height = int(canvas_size[0] * size_mapping[bounding_box_size] * ratio[0])
    box_width = int(canvas_size[1] * size_mapping[bounding_box_size] * ratio[1])

    x_init_pos = [0.1 * canvas_size[1], 0.25 * canvas_size[1], 0.45*canvas_size[1], 0.7 * canvas_size[1]]
    y_init_pos = [0.1 * canvas_size[0], 0.25 * canvas_size[0], 0.45*canvas_size[0], 0.7 * canvas_size[0]]

    pos_x = np.random.choice(x_init_pos) + random.randint(int(-0.01 * canvas_size[1]), int(0.01 * canvas_size[1]))
    pos_y = np.random.choice(y_init_pos) + random.randint(int(-0.01 * canvas_size[0]), int(0.01 * canvas_size[0]))
    # Initialize frames
    frames = []
    for _ in range(num_frames):
        canvas = np.zeros(canvas_size)

        # Determine movement direction and apply movement
        pos_y = pos_y + random.randint(int(-0.01 * canvas_size[0]), int(0.01 * canvas_size[0]))
        pos_x = pos_x + random.randint(int(-0.01 * canvas_size[1]), int(0.01 * canvas_size[1]))

        canvas[clamp(pos_y, 0, canvas_size[0]):clamp(pos_y + box_height, 0, canvas_size[0]),
                clamp(pos_x, 0, canvas_size[1]):clamp(pos_x + box_width, 0, canvas_size[1])] = 1

        # Add frame to the list
        frames.append(canvas)


    return frames


input_file_path = "custom_prompts.csv"
output_file_path = "custom_prompts_with_bb.pkl"
num_videos_per_prompt = 3
video_id = 1100
all_records = []
frames_per_prompts = 3
num_frames = 16
with open('filtered_prompts.txt') as f:
    GOOD_PROMPTS = set([x.strip() for x in f.readlines()])
with open(input_file_path, "r") as f:
    data = csv.reader(f)
    for row in tqdm.tqdm(data):
        prompt = row[0]
        prompt = prompt.replace('herd of', '').replace('group of', '').replace('flock of', '').replace('school of', '').replace('escalator', 'elevator')
        subject = row[1].lower().replace('herd of', '').replace('group of', '').replace('flock of', '').replace('school of', '').replace('escalator', 'elevator')
        if prompt not in GOOD_PROMPTS:
            continue
        canvas_size = (224, 224)
        frames = []
        if row[-1] == "Stationary":
            for _ in range(frames_per_prompts):
                frames.append(generate_stationary_frames_simpler(canvas_size, num_frames, row[3], row[2]))
        else:
            for _ in range(frames_per_prompts):
                up_to_down_strict = False
                if "up" in prompt.lower() or 'ascending' in prompt.lower():
                    up_to_down_strict = 'up'
                elif "down" in prompt.lower() or 'descending' in prompt.lower():
                    up_to_down_strict = 'down'
                else:
                    up_to_down_strict = False
                frames.append(generate_moving_frames_simpler(canvas_size, num_frames, row[3], row[2], row[4], up_to_down_strict))

        for i in range(frames_per_prompts):
            record_dict = {"video_id": video_id, "prompt": prompt, "frames": frames[i], "subject": row[1], "motion": row[4], "aspect_ratio": row[3], "bounding_box_size": row[2]}
            all_records.append(record_dict)
            video_id += 1
    print(f"Wrote {len(all_records)} records to {output_file_path}")
    with open(output_file_path, "wb") as f:
        pickle.dump(all_records, f)