nam_nguyenhoai_AI
Update algorithm
b0a48de
raw history blame
No virus
2.65 kB
from transformers import TimesformerModel, VideoMAEImageProcessor
import torch
import cv2
import numpy as np
from torchvision.transforms import Lambda
from pytorchvideo.transforms import (
Normalize,
)
from torchvision.transforms import (
Lambda,
)
import os
from os.path import isfile, join, basename
def extract_features(frames, device, model, image_processor):
# Convert frames to tensor
frames_tensor = torch.stack([torch.from_numpy(frame) for frame in frames])
# Change the order of the tensor to (num_frames, channel, height, width)
frames_tensor = frames_tensor.permute(3, 0, 1, 2).to(device)
# Get the mean and std of the image processor
mean = image_processor.image_mean
std = image_processor.image_std
# Normalize frames
frames_tensor = Lambda(lambda x: x / 255.0)(frames_tensor)
frames_tensor = Normalize(mean, std)(frames_tensor)
# Change the order of the tensor to (num_frames, channel, height, width) and add a batch dimension
frames_tensor = frames_tensor.permute(1, 0, 2, 3).unsqueeze(0)
# Load the model to the device
model.to(device)
model.eval()
outputs = model(frames_tensor)
# Get the output after the Transformer Encoder (MLP head)
final_output = outputs[0][:, 0]
return final_output
def to_video(selected_frames, frames, output_path, video_fps):
print("MP4 Format.")
# Write the selected frames to a video
video_writer = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*'mp4v'), video_fps, (frames[0].shape[1], frames[0].shape[0]))
# selected_frames is a list of indices of frames
for idx in selected_frames:
video_writer.write(frames[idx])
video_writer.release()
print("Completed summarizing the video (wait for a moment to load).")
def to_txt(selected_frames, output_path, clip_sample_rate):
# Write the selected frames to a txt file
with open(output_path, "w") as file:
for item in selected_frames:
file.write(str(item) + "\n")
print("Completed summarizing the txt (wait for a moment to load).")
def load_model():
try:
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
model = TimesformerModel.from_pretrained("facebook/timesformer-base-finetuned-k600").to(DEVICE).eval()
processor=VideoMAEImageProcessor.from_pretrained("MCG-NJU/videomae-base")
return model, processor, DEVICE
except Exception as e:
print(e)
def sum_of_squared_difference(vector1, vector2):
squared_diff = np.square(vector1 - vector2)
sum_squared_diff = np.sum(squared_diff)
return sum_squared_diff