sradc
fix timestamp computation (was rounding fps to int...)
5a9c0da
import base64
import cv2
import pandas as pd
from PIL import Image
from tqdm import tqdm
from pipeline.clip_wrapper import MODEL_DIM, ClipWrapper
from pipeline.download_videos import DATA_DIR, REPO_ROOT, VIDEO_DIR
FRAME_EXTRACT_RATE_SECONDS = 5 # Extract a frame every 5 seconds
IMAGES_DIR = DATA_DIR / "images"
DATAFRAME_PATH = DATA_DIR / "dataset.parquet"
def process_videos() -> None:
IMAGES_DIR.mkdir(exist_ok=True, parents=True)
(IMAGES_DIR / ".gitignore").write_text("*")
"Runs clip on video frames, saves results to a parquet file"
clip_wrapper = ClipWrapper()
results = []
for video_path in tqdm(list(VIDEO_DIR.glob("*.mp4")), desc="Processing videos"):
video_id = video_path.stem
extracted_images_dir = IMAGES_DIR / video_id
extracted_images_dir.mkdir(exist_ok=True, parents=True)
complete_file = extracted_images_dir / "complete"
if complete_file.exists():
continue
for clip_vector, image, timestamp_secs, frame_idx in get_clip_vectors(
video_path, clip_wrapper
):
image_path = extracted_images_dir / f"{frame_idx}.jpg"
image.save(image_path)
with open(image_path, "rb") as f:
encoded_image = base64.b64encode(f.read()).decode()
results.append(
[
video_id,
frame_idx,
timestamp_secs,
encoded_image,
*clip_vector,
]
)
complete_file.touch()
df = pd.DataFrame(
results,
columns=["video_id", "frame_idx", "timestamp", "base64_image"]
+ [f"dim_{i}" for i in range(MODEL_DIM)],
)
print(f"Saving data to {DATAFRAME_PATH}")
df.to_parquet(DATAFRAME_PATH, index=False)
def get_clip_vectors(video_path, clip_wrapper):
cap = cv2.VideoCapture(str(video_path))
num_video_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
fps = cap.get(cv2.CAP_PROP_FPS)
extract_every_n_frames = int(FRAME_EXTRACT_RATE_SECONDS * fps)
for frame_idx in tqdm(range(num_video_frames), desc="Running CLIP on video"):
ret, frame = cap.read()
if frame_idx % extract_every_n_frames != 0:
continue
image = Image.fromarray(frame[..., ::-1])
clip_vector = clip_wrapper.images2vec([image]).squeeze().numpy()
timestamp_secs = frame_idx / fps
yield clip_vector, image, timestamp_secs, frame_idx
cap.release()
if __name__ == "__main__":
process_videos()