Spaces:

AllAideas
/

SegmentacionVideo

Runtime error

App Files Files Community

cesar commited on Nov 3, 2022

Commit

64e21e1

1 Parent(s): 10dee64

Upload 3 files

Browse files

Files changed (3) hide show

utils/constants.py +4 -0
utils/custom_layers.py +67 -0
utils/predict.py +104 -0

utils/constants.py ADDED Viewed

	@@ -0,0 +1,4 @@

+MAX_SEQ_LENGTH = 20
+NUM_FEATURES = 1024
+IMG_SIZE = 128
+CLASS_VOCAB = ['CricketShot', 'PlayingCello', 'Punch', 'ShavingBeard', 'TennisSwing']

utils/custom_layers.py ADDED Viewed

	@@ -0,0 +1,67 @@

+import tensorflow as tf
+from tensorflow import keras
+from keras import layers
+class PositionalEmbedding(layers.Layer):
+    def __init__(self, sequence_length, output_dim, **kwargs):
+        super().__init__(**kwargs)
+        self.position_embeddings = layers.Embedding(
+            input_dim=sequence_length, output_dim=output_dim
+        )
+        self.sequence_length = sequence_length
+        self.output_dim = output_dim
+    def call(self, inputs):
+        # The inputs are of shape: `(batch_size, frames, num_features)`
+        length = tf.shape(inputs)[1]
+        positions = tf.range(start=0, limit=length, delta=1)
+        embedded_positions = self.position_embeddings(positions)
+        return inputs + embedded_positions
+    def compute_mask(self, inputs, mask=None):
+        mask = tf.reduce_any(tf.cast(inputs, "bool"), axis=-1)
+        return mask
+    def get_config(self):
+        config = super().get_config()
+        config.update({
+            "sequence_length": self.sequence_length,
+            "output_dim": self.output_dim,
+        })
+        return config
+class TransformerEncoder(layers.Layer):
+    def __init__(self, embed_dim, dense_dim, num_heads, **kwargs):
+        super().__init__(**kwargs)
+        self.embed_dim = embed_dim
+        self.dense_dim = dense_dim
+        self.num_heads = num_heads
+        self.attention = layers.MultiHeadAttention(
+            num_heads=num_heads, key_dim=embed_dim, dropout=0.3
+        )
+        self.dense_proj = keras.Sequential(
+            [layers.Dense(dense_dim, activation=tf.nn.gelu), layers.Dense(embed_dim),]
+        )
+        self.layernorm_1 = layers.LayerNormalization()
+        self.layernorm_2 = layers.LayerNormalization()
+    def call(self, inputs, mask=None):
+        if mask is not None:
+            mask = mask[:, tf.newaxis, :]
+        attention_output = self.attention(inputs, inputs, attention_mask=mask)
+        proj_input = self.layernorm_1(inputs + attention_output)
+        proj_output = self.dense_proj(proj_input)
+        return self.layernorm_2(proj_input + proj_output)
+    def get_config(self):
+        config = super().get_config()
+        config.update({
+            "embed_dim": self.embed_dim,
+            "dense_dim": self.dense_dim,
+            "num_heads": self.num_heads,
+        })
+        return config

utils/predict.py ADDED Viewed

	@@ -0,0 +1,104 @@

+#from .custom_layers import TransformerEncoder, PositionalEmbedding
+from .constants import MAX_SEQ_LENGTH, NUM_FEATURES, IMG_SIZE, CLASS_VOCAB
+from huggingface_hub import from_pretrained_keras
+from tensorflow import keras
+from keras import layers
+import numpy as np
+import imageio
+import cv2
+#model = from_pretrained_keras("shivi/video-classification",custom_objects={"PositionalEmbedding":PositionalEmbedding,"TransformerEncoder": TransformerEncoder})
+model = from_pretrained_keras("keras-io/video-transformers")
+"""
+Below code is taken from the Video-Transformers example on keras-io by Sayak Paul
+"""
+def build_feature_extractor():
+    feature_extractor = keras.applications.DenseNet121(
+        weights="imagenet",
+        include_top=False,
+        pooling="avg",
+        input_shape=(IMG_SIZE, IMG_SIZE, 3),
+    )
+    preprocess_input = keras.applications.densenet.preprocess_input
+    inputs = keras.Input((IMG_SIZE, IMG_SIZE, 3))
+    preprocessed = preprocess_input(inputs)
+    outputs = feature_extractor(preprocessed)
+    return keras.Model(inputs, outputs, name="feature_extractor")
+feature_extractor = build_feature_extractor()
+def crop_center(frame):
+    center_crop_layer = layers.CenterCrop(IMG_SIZE, IMG_SIZE)
+    cropped = center_crop_layer(frame[None, ...])
+    cropped = cropped.numpy().squeeze()
+    return cropped
+def load_video(path, max_frames=0):
+    cap = cv2.VideoCapture(path)
+    frames = []
+    try:
+        while True:
+            ret, frame = cap.read()
+            if not ret:
+                break
+            frame = crop_center(frame)
+            frame = frame[:, :, [2, 1, 0]]
+            frames.append(frame)
+            if len(frames) == max_frames:
+                break
+    finally:
+        cap.release()
+    return np.array(frames)
+def prepare_single_video(frames):
+    frame_features = np.zeros(shape=(1, MAX_SEQ_LENGTH, NUM_FEATURES), dtype="float32")
+    # Pad shorter videos.
+    if len(frames) < MAX_SEQ_LENGTH:
+        diff = MAX_SEQ_LENGTH - len(frames)
+        padding = np.zeros((diff, IMG_SIZE, IMG_SIZE, 3))
+        frames = np.concatenate(frames, padding)
+    frames = frames[None, ...]
+    # Extract features from the frames of the current video.
+    for i, batch in enumerate(frames):
+        video_length = batch.shape[0]
+        length = min(MAX_SEQ_LENGTH, video_length)
+        for j in range(length):
+            if np.mean(batch[j, :]) > 0.0:
+                frame_features[i, j, :] = feature_extractor.predict(batch[None, j, :])
+            else:
+                frame_features[i, j, :] = 0.0
+    return frame_features
+def predict_action(path):
+    frames = load_video(path)
+    frame_features = prepare_single_video(frames)
+    probabilities = model.predict(frame_features)[0]
+    confidences = {}
+    for i in np.argsort(probabilities)[::-1]:
+        confidences[CLASS_VOCAB[i]] = float(probabilities[i])
+    gif_out = to_gif(frames[:MAX_SEQ_LENGTH])
+    print(confidences)
+    return confidences, gif_out
+def to_gif(images):
+    converted_images = images.astype(np.uint8)
+    imageio.mimsave("animation.gif", converted_images, fps=10)
+    return "animation.gif"