Vincent Claes commited on
Commit
a861406
1 Parent(s): abf0474

working code

Browse files
Makefile.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ install:
2
+ poetry install
3
+ poetry run pip list --format=freeze > requirements.txt
4
+
README.md CHANGED
@@ -10,3 +10,7 @@ pinned: false
10
  ---
11
 
12
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
10
  ---
11
 
12
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
13
+
14
+ # Classify Rooms
15
+
16
+ ##
app.py ADDED
@@ -0,0 +1,151 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import gradio as gr
3
+ from transformers import AutoProcessor, AutoModel
4
+
5
+ from pathlib import Path
6
+ import numpy as np
7
+ from decord import VideoReader
8
+ import imageio
9
+
10
+ FRAME_SAMPLING_RATE = 4
11
+ DEFAULT_MODEL = "microsoft/xclip-base-patch16-zero-shot"
12
+
13
+ processor = AutoProcessor.from_pretrained(DEFAULT_MODEL)
14
+ model = AutoModel.from_pretrained(DEFAULT_MODEL)
15
+
16
+ ROOMS = (
17
+ "bathroom,sauna,living room, bedroom,kitchen,toilet,hallway,dressing,attic,basement"
18
+ )
19
+ examples = [
20
+ [
21
+ "movies/bathroom.mp4",
22
+ ROOMS,
23
+ ],
24
+ ]
25
+
26
+
27
+ def sample_frames_from_video_file(
28
+ file_path: str, num_frames: int = 16, frame_sampling_rate=1
29
+ ):
30
+ videoreader = VideoReader(file_path)
31
+ videoreader.seek(0)
32
+
33
+ # sample frames
34
+ start_idx = 0
35
+ end_idx = num_frames * frame_sampling_rate - 1
36
+ indices = np.linspace(start_idx, end_idx, num=num_frames, dtype=np.int64)
37
+ frames = videoreader.get_batch(indices).asnumpy()
38
+
39
+ return frames
40
+
41
+
42
+ def get_num_total_frames(file_path: str):
43
+ videoreader = VideoReader(file_path)
44
+ videoreader.seek(0)
45
+ return len(videoreader)
46
+
47
+
48
+ # def convert_frames_to_gif(frames, save_path: str = "frames.gif"):
49
+ # converted_frames = frames.astype(np.uint8)
50
+ # Path(save_path).parent.mkdir(parents=True, exist_ok=True)
51
+ # imageio.mimsave(save_path, converted_frames, fps=8)
52
+ # return save_path
53
+
54
+
55
+ # def create_gif_from_video_file(
56
+ # file_path: str,
57
+ # num_frames: int = 16,
58
+ # frame_sampling_rate: int = 1,
59
+ # save_path: str = "frames.gif",
60
+ # ):
61
+ # frames = sample_frames_from_video_file(file_path, num_frames, frame_sampling_rate)
62
+ # return convert_frames_to_gif(frames, save_path)
63
+
64
+
65
+ def select_model(model_name):
66
+ global processor, model
67
+ processor = AutoProcessor.from_pretrained(model_name)
68
+ model = AutoModel.from_pretrained(model_name)
69
+
70
+ def get_frame_sampling_rate(video_path, num_model_input_frames):
71
+ # rearrange sampling rate based on video length and model input length
72
+ num_total_frames = get_num_total_frames(video_path)
73
+ if num_total_frames < FRAME_SAMPLING_RATE * num_model_input_frames:
74
+ frame_sampling_rate = num_total_frames // num_model_input_frames
75
+ else:
76
+ frame_sampling_rate = FRAME_SAMPLING_RATE
77
+ return frame_sampling_rate
78
+
79
+ def predict(video_path, labels_text):
80
+ labels = labels_text.split(",")
81
+ num_model_input_frames = model.config.vision_config.num_frames
82
+ frame_sampling_rate = get_frame_sampling_rate(video_path, num_model_input_frames)
83
+ frames = sample_frames_from_video_file(
84
+ video_path, num_model_input_frames, frame_sampling_rate
85
+ )
86
+ # gif_path = convert_frames_to_gif(frames, save_path="video.gif")
87
+
88
+ inputs = processor(
89
+ text=labels, videos=list(frames), return_tensors="pt", padding=True
90
+ )
91
+ # forward pass
92
+ with torch.no_grad():
93
+ outputs = model(**inputs)
94
+
95
+ probs = outputs.logits_per_video[0].softmax(dim=-1).cpu().numpy()
96
+ label_to_prob = {}
97
+ for ind, label in enumerate(labels):
98
+ label_to_prob[label] = float(probs[ind])
99
+
100
+ # return label_to_prob, gif_path
101
+ return label_to_prob
102
+
103
+
104
+ app = gr.Blocks()
105
+ with app:
106
+ gr.Markdown(
107
+ "# **<p align='center'>Classification of Rooms</p>**"
108
+ )
109
+ gr.Markdown(
110
+ "### **<p align='center'>Upload a video of a room and provide a list of type of rooms the model should select from.</p>**"
111
+
112
+ )
113
+
114
+ with gr.Row():
115
+ with gr.Column():
116
+ video_file = gr.Video(label="Video File:", show_label=True)
117
+ local_video_labels_text = gr.Textbox(
118
+ label="Labels Text:", show_label=True
119
+ )
120
+ submit_button = gr.Button(value="Predict")
121
+ # with gr.Column():
122
+ # video_gif = gr.Image(
123
+ # label="Input Clip",
124
+ # show_label=True,
125
+ # )
126
+ with gr.Column():
127
+ predictions = gr.Label(label="Predictions:", show_label=True)
128
+
129
+ gr.Markdown("**Examples:**")
130
+ # gr.Examples(
131
+ # examples,
132
+ # [video_file,local_video_labels_text],
133
+ # [predictions, video_gif],
134
+ # fn=predict,
135
+ # cache_examples=True,
136
+ # )
137
+
138
+ submit_button.click(
139
+ predict,
140
+ inputs=[video_file, local_video_labels_text],
141
+ # outputs=[predictions, video_gif],
142
+ outputs=predictions,
143
+ )
144
+ # gr.Markdown(
145
+ # """
146
+ # \n Created by: Vincent Claes, <a href=\"https://www.meet-drift.ai/\">Drift</a>.
147
+ # \n Inspired by: <a href=\"https://huggingface.co/spaces/fcakyon/zero-shot-video-classification\">fcakyon</a>.
148
+ # """
149
+ # )
150
+
151
+ app.launch()
movies/bathroom.mp4 ADDED
Binary file (615 kB). View file
 
movies/bedroom.mp4 ADDED
Binary file (106 kB). View file
 
movies/dressing.mp4 ADDED
Binary file (251 kB). View file
 
movies/home-office.mp4 ADDED
Binary file (336 kB). View file
 
movies/kitchen.mp4 ADDED
Binary file (317 kB). View file
 
movies/living-room.mp4 ADDED
Binary file (376 kB). View file
 
movies/toilet.mp4 ADDED
Binary file (215 kB). View file
 
poetry.lock ADDED
The diff for this file is too large to render. See raw diff
 
pyproject.toml ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [tool.poetry]
2
+ name = "classify-rooms"
3
+ version = "0.1.0"
4
+ description = ""
5
+ authors = ["Vincent Claes <vincent.v.claes@gmail.com>"]
6
+ readme = "README.md"
7
+
8
+ [tool.poetry.dependencies]
9
+ python = "^3.8"
10
+ gradio = "^3.12.0"
11
+ decord = "^0.6.0"
12
+ torch = "^1.13.1"
13
+ transformers = "^4.25.1"
14
+ imageio = "^2.24.0"
15
+
16
+ [tool.poetry.group.dev.dependencies]
17
+ black = "^22.12.0"
18
+
19
+ [build-system]
20
+ requires = ["poetry-core"]
21
+ build-backend = "poetry.core.masonry.api"
22
+ #gradio
23
+ #torch
24
+ #decord
25
+ #pytube
26
+ #imageio
27
+ #transformers @ git+https://github.com/huggingface/transformers.git@799cea64ac1029d66e9e58f18bc6f47892270723
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ gradio
2
+ torch
3
+ decord
4
+ imageio
5
+ transformers @ git+https://github.com/huggingface/transformers.git@799cea64ac1029d66e9e58f18bc6f47892270723