waveydaveygravy commited on
Commit
1dda0a5
·
1 Parent(s): 075c482

Upload vid2pose_v2.ipynb

Browse files
Files changed (1) hide show
  1. vid2pose_v2.ipynb +411 -0
vid2pose_v2.ipynb ADDED
@@ -0,0 +1,411 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "nbformat": 4,
3
+ "nbformat_minor": 0,
4
+ "metadata": {
5
+ "colab": {
6
+ "provenance": [],
7
+ "gpuType": "T4"
8
+ },
9
+ "kernelspec": {
10
+ "name": "python3",
11
+ "display_name": "Python 3"
12
+ },
13
+ "language_info": {
14
+ "name": "python"
15
+ },
16
+ "accelerator": "GPU"
17
+ },
18
+ "cells": [
19
+ {
20
+ "cell_type": "code",
21
+ "execution_count": null,
22
+ "metadata": {
23
+ "id": "qbM01EucvW58"
24
+ },
25
+ "outputs": [],
26
+ "source": [
27
+ "!pip install controlnet-aux==0.0.7\n",
28
+ "!pip install -U openmim\n",
29
+ "!pip install cog\n",
30
+ "!pip install mediapipe\n",
31
+ "!mim install mmengine\n",
32
+ "!mim install \"mmcv>=2.0.1\"\n",
33
+ "!mim install \"mmdet>=3.1.0\"\n",
34
+ "!mim install \"mmpose>=1.1.0\""
35
+ ]
36
+ },
37
+ {
38
+ "cell_type": "code",
39
+ "source": [
40
+ "from google.colab import files\n",
41
+ "uploaded = files.upload()"
42
+ ],
43
+ "metadata": {
44
+ "id": "rvxvKxkiR8ih"
45
+ },
46
+ "execution_count": null,
47
+ "outputs": []
48
+ },
49
+ {
50
+ "cell_type": "code",
51
+ "source": [
52
+ "#@title break video down into frames\n",
53
+ "import cv2\n",
54
+ "\n",
55
+ "# Open the video file\n",
56
+ "cap = cv2.VideoCapture('/content/a.mp4')\n",
57
+ "\n",
58
+ "i = 0\n",
59
+ "while(cap.isOpened()):\n",
60
+ " ret, frame = cap.read()\n",
61
+ "\n",
62
+ " if ret == False:\n",
63
+ " break\n",
64
+ "\n",
65
+ " # Save each frame of the video\n",
66
+ " cv2.imwrite('/content/frames/frame_' + str(i) + '.jpg', frame)\n",
67
+ "\n",
68
+ " i += 1\n",
69
+ "\n",
70
+ "cap.release()\n",
71
+ "cv2.destroyAllWindows()"
72
+ ],
73
+ "metadata": {
74
+ "id": "Kw0hIeYnvjLV"
75
+ },
76
+ "execution_count": null,
77
+ "outputs": []
78
+ },
79
+ {
80
+ "cell_type": "code",
81
+ "source": [
82
+ "#==========Interpolate the pose frames==========\n",
83
+ "!pip install moviepy"
84
+ ],
85
+ "metadata": {
86
+ "id": "jTIwuo4ESGBw"
87
+ },
88
+ "execution_count": null,
89
+ "outputs": []
90
+ },
91
+ {
92
+ "cell_type": "code",
93
+ "source": [
94
+ "#@title interpolate processed frames (best to keep fps same as input video)\n",
95
+ "!ffmpeg -r 8 -i /content/test/frame_%d.jpg -c:v libx264 -vf \"fps=8,format=yuv420p\" testpose.mp4\n"
96
+ ],
97
+ "metadata": {
98
+ "id": "8kUk-kFPwzmq"
99
+ },
100
+ "execution_count": null,
101
+ "outputs": []
102
+ },
103
+ {
104
+ "cell_type": "code",
105
+ "source": [
106
+ "#=======AVAILABLE PROCESSORS========\n",
107
+ "# load processor from processor_id\n",
108
+ "# options are:\n",
109
+ "# [\"canny\", \"depth_leres\", \"depth_leres++\", \"depth_midas\", \"depth_zoe\", \"lineart_anime\",\n",
110
+ "# \"lineart_coarse\", \"lineart_realistic\", \"mediapipe_face\", \"mlsd\", \"normal_bae\", \"normal_midas\",\n",
111
+ "# \"openpose\", \"openpose_face\", \"openpose_faceonly\", \"openpose_full\", \"openpose_hand\",\n",
112
+ "# \"scribble_hed, \"scribble_pidinet\", \"shuffle\", \"softedge_hed\", \"softedge_hedsafe\",\n",
113
+ "# \"softedge_pidinet\", \"softedge_pidsafe\", \"dwpose\"]"
114
+ ],
115
+ "metadata": {
116
+ "id": "l5aAanvtyMz9"
117
+ },
118
+ "execution_count": null,
119
+ "outputs": []
120
+ },
121
+ {
122
+ "cell_type": "code",
123
+ "source": [
124
+ "#@title simply change the processor under 'processor' at the bottom, may need to add if not available, see above (goto v1 notebook if errors start)\n",
125
+ "#=======AVAILABLE PROCESSORS========\n",
126
+ "# load processor from processor_id\n",
127
+ "# options are:\n",
128
+ "# [\"canny\", \"depth_leres\", \"depth_leres++\", \"depth_midas\", \"depth_zoe\", \"lineart_anime\",\n",
129
+ "# \"lineart_coarse\", \"lineart_realistic\", \"mediapipe_face\", \"mlsd\", \"normal_bae\", \"normal_midas\",\n",
130
+ "# \"openpose\", \"openpose_face\", \"openpose_faceonly\", \"openpose_full\", \"openpose_hand\",\n",
131
+ "# \"scribble_hed, \"scribble_pidinet\", \"shuffle\", \"softedge_hed\", \"softedge_hedsafe\",\n",
132
+ "# \"softedge_pidinet\", \"softedge_pidsafe\", \"dwpose\"]\n",
133
+ "import torch\n",
134
+ "import os\n",
135
+ "from typing import List\n",
136
+ "from cog import BasePredictor, Input, Path\n",
137
+ "from PIL import Image\n",
138
+ "from io import BytesIO\n",
139
+ "import time\n",
140
+ "from tqdm import tqdm\n",
141
+ "from controlnet_aux.processor import Processor\n",
142
+ "from controlnet_aux import (\n",
143
+ " HEDdetector,\n",
144
+ " MidasDetector,\n",
145
+ " MLSDdetector,\n",
146
+ " OpenposeDetector,\n",
147
+ " PidiNetDetector,\n",
148
+ " NormalBaeDetector,\n",
149
+ " LineartDetector,\n",
150
+ " LineartAnimeDetector,\n",
151
+ " CannyDetector,\n",
152
+ " ContentShuffleDetector,\n",
153
+ " ZoeDetector,\n",
154
+ " MediapipeFaceDetector,\n",
155
+ " SamDetector,\n",
156
+ " LeresDetector,\n",
157
+ " DWposeDetector,\n",
158
+ ")\n",
159
+ "\n",
160
+ "#Processor = processor\n",
161
+ "image_dir = '/content/frames'\n",
162
+ "\n",
163
+ "class Predictor(BasePredictor):\n",
164
+ " def setup(self) -> None:\n",
165
+ " \"\"\"Load the model into memory to make running multiple predictions efficient\"\"\"\n",
166
+ "\n",
167
+ " self.annotators = {\n",
168
+ " \"canny\": CannyDetector(),\n",
169
+ " \"content\": ContentShuffleDetector(),\n",
170
+ " \"face_detector\": MediapipeFaceDetector(),\n",
171
+ " \"hed\": self.initialize_detector(HEDdetector),\n",
172
+ " \"midas\": self.initialize_detector(MidasDetector),\n",
173
+ " \"mlsd\": self.initialize_detector(MLSDdetector),\n",
174
+ " \"open_pose\": self.initialize_detector(OpenposeDetector),\n",
175
+ " \"pidi\": self.initialize_detector(PidiNetDetector),\n",
176
+ " \"normal_bae\": self.initialize_detector(NormalBaeDetector),\n",
177
+ " \"lineart\": self.initialize_detector(LineartDetector),\n",
178
+ " \"lineart_anime\": self.initialize_detector(LineartAnimeDetector),\n",
179
+ " # \"zoe\": self.initialize_detector(ZoeDetector),\n",
180
+ "\n",
181
+ "\n",
182
+ " # \"mobile_sam\": self.initialize_detector(\n",
183
+ " # SamDetector,\n",
184
+ " # model_name=\"dhkim2810/MobileSAM\",\n",
185
+ " # model_type=\"vit_t\",\n",
186
+ " # filename=\"mobile_sam.pt\",\n",
187
+ " # ),\n",
188
+ " \"leres\": self.initialize_detector(LeresDetector),\n",
189
+ " }\n",
190
+ "\n",
191
+ " torch.device(\"cuda\")\n",
192
+ "\n",
193
+ " def initialize_detector(\n",
194
+ " self, detector_class, model_name=\"lllyasviel/Annotators\", **kwargs\n",
195
+ " ):\n",
196
+ " return detector_class.from_pretrained(\n",
197
+ " model_name,\n",
198
+ " cache_dir=\"model_cache\",\n",
199
+ " **kwargs,\n",
200
+ " )\n",
201
+ "\n",
202
+ " def process_images(self, image_dir: str) -> List[Path]:\n",
203
+ " # Start time for overall processing\n",
204
+ " start_time = time.time()\n",
205
+ "\n",
206
+ " # Load all images into memory\n",
207
+ " images = [Image.open(os.path.join(image_dir, image_name)).convert(\"RGB\").resize((512, 512)) for image_name in os.listdir(image_dir)]\n",
208
+ "\n",
209
+ " paths = []\n",
210
+ "\n",
211
+ " def predict(\n",
212
+ " self,\n",
213
+ " image_dir: str = Input(\n",
214
+ " default=\"/content/frames\",\n",
215
+ " description=\"Directory containing the images to be processed\"\n",
216
+ " )\n",
217
+ "):\n",
218
+ "\n",
219
+ " canny: bool = Input(\n",
220
+ " default=True,\n",
221
+ " description=\"Run canny edge detection\",\n",
222
+ " ),\n",
223
+ " content: bool = Input(\n",
224
+ " default=True,\n",
225
+ " description=\"Run content shuffle detection\",\n",
226
+ " ),\n",
227
+ " face_detector: bool = Input(\n",
228
+ " default=True,\n",
229
+ " description=\"Run face detection\",\n",
230
+ " ),\n",
231
+ " hed: bool = Input(\n",
232
+ " default=True,\n",
233
+ " description=\"Run HED detection\",\n",
234
+ " ),\n",
235
+ " midas: bool = Input(\n",
236
+ " default=True,\n",
237
+ " description=\"Run Midas detection\",\n",
238
+ " ),\n",
239
+ " mlsd: bool = Input(\n",
240
+ " default=True,\n",
241
+ " description=\"Run MLSD detection\",\n",
242
+ " ),\n",
243
+ " open_pose: bool = Input(\n",
244
+ " default=True,\n",
245
+ " description=\"Run Openpose detection\",\n",
246
+ " ),\n",
247
+ " pidi: bool = Input(\n",
248
+ " default=True,\n",
249
+ " description=\"Run PidiNet detection\",\n",
250
+ " ),\n",
251
+ " normal_bae: bool = Input(\n",
252
+ " default=True,\n",
253
+ " description=\"Run NormalBae detection\",\n",
254
+ " ),\n",
255
+ " lineart: bool = Input(\n",
256
+ " default=True,\n",
257
+ " description=\"Run Lineart detection\",\n",
258
+ " ),\n",
259
+ " lineart_anime: bool = Input(\n",
260
+ " default=True,\n",
261
+ " description=\"Run LineartAnime detection\",\n",
262
+ "\n",
263
+ " ),\n",
264
+ " leres: bool = Input(\n",
265
+ " default=True,\n",
266
+ " description=\"Run Leres detection\",\n",
267
+ " ),\n",
268
+ "\n",
269
+ "\n",
270
+ " # Load image\n",
271
+ " # Load all images into memory\n",
272
+ " start_time = time.time() # Start time for overall processing\n",
273
+ " images = [Image.open(os.path.join(image_dir, image_name)).convert(\"RGB\").resize((512, 512)) for image_name in os.listdir(image_dir)]\n",
274
+ "\n",
275
+ " paths = []\n",
276
+ " annotator_inputs = {\n",
277
+ " \"canny\": canny,\n",
278
+ " \"content\": content,\n",
279
+ " \"face_detector\": face_detector,\n",
280
+ " \"hed\": hed,\n",
281
+ " \"midas\": midas,\n",
282
+ " \"mlsd\": mlsd,\n",
283
+ " \"open_pose\": open_pose,\n",
284
+ " \"pidi\": pidi,\n",
285
+ " \"normal_bae\": normal_bae,\n",
286
+ " \"lineart\": lineart,\n",
287
+ " \"lineart_anime\": lineart_anime, \"openpose_full\": openpose_full,\n",
288
+ "\n",
289
+ " \"leres\": leres,\n",
290
+ " }\n",
291
+ " for annotator, run_annotator in annotator_inputs.items():\n",
292
+ " if run_annotator:\n",
293
+ " processed_image = self.process_image(image, annotator)\n",
294
+ " #processed_image.save(f\"/tmp/{annotator}.png\")\n",
295
+ " processed_path = f'/content/test1/{image_name}'\n",
296
+ "\n",
297
+ " return paths\n",
298
+ "\n",
299
+ "import time\n",
300
+ "from tqdm import tqdm\n",
301
+ "\n",
302
+ "# Load images and paths\n",
303
+ "images = []\n",
304
+ "image_paths = []\n",
305
+ "for name in os.listdir(image_dir):\n",
306
+ " path = os.path.join(image_dir, name)\n",
307
+ " image = Image.open(path)\n",
308
+ "\n",
309
+ " images.append(image)\n",
310
+ " image_paths.append(path)\n",
311
+ "\n",
312
+ "# Process images\n",
313
+ "processed = [\n",
314
+ " Processor(\"openpose_full\") for path in tqdm(image_paths)\n",
315
+ "]\n",
316
+ "\n",
317
+ "# Save processed\n",
318
+ "import os\n",
319
+ "from PIL import Image\n",
320
+ "\n",
321
+ "# Get a list of filenames in /content/frames\n",
322
+ "filenames = os.listdir('/content/frames')\n",
323
+ "\n",
324
+ "# Save processed\n",
325
+ "for filename in filenames:\n",
326
+ "\n",
327
+ " # Get the full path of the image file\n",
328
+ " image_path = os.path.join('/content/frames', filename)\n",
329
+ "\n",
330
+ " # Load the image\n",
331
+ " image = Image.open(image_path)\n",
332
+ "\n",
333
+ " # Process image\n",
334
+ " # Process all images with progress bar\n",
335
+ " processed_image = processor(image, to_pil=True)\n",
336
+ "\n",
337
+ " # Extract original name\n",
338
+ " original_name = filename.split('.')[0]\n",
339
+ "\n",
340
+ " # Save image\n",
341
+ " processed_path = f'/content/test2/{original_name}.png'\n",
342
+ " processed_image.save(processed_path)\n",
343
+ "\n"
344
+ ],
345
+ "metadata": {
346
+ "id": "qgKAWKrBL5d2"
347
+ },
348
+ "execution_count": null,
349
+ "outputs": []
350
+ },
351
+ {
352
+ "cell_type": "code",
353
+ "source": [
354
+ "@#title for seeing whats in controlnet_aux\n",
355
+ "dir(controlnet_aux)"
356
+ ],
357
+ "metadata": {
358
+ "id": "X08c_PPKTQiq"
359
+ },
360
+ "execution_count": null,
361
+ "outputs": []
362
+ },
363
+ {
364
+ "cell_type": "code",
365
+ "source": [
366
+ "!zip -r nameof.zip <location of files and folder>"
367
+ ],
368
+ "metadata": {
369
+ "id": "Oax1BHwYTZog"
370
+ },
371
+ "execution_count": null,
372
+ "outputs": []
373
+ },
374
+ {
375
+ "cell_type": "code",
376
+ "execution_count": null,
377
+ "metadata": {
378
+ "id": "FaF3RdKdaFa8"
379
+ },
380
+ "outputs": [],
381
+ "source": [
382
+ "#@title Login to HuggingFace 🤗\n",
383
+ "\n",
384
+ "#@markdown You need to accept the model license before downloading or using the Stable Diffusion weights. Please, visit the [model card](https://huggingface.co/runwayml/stable-diffusion-v1-5), read the license and tick the checkbox if you agree. You have to be a registered user in 🤗 Hugging Face Hub, and you'll also need to use an access token for the code to work.\n",
385
+ "# https://huggingface.co/settings/tokens\n",
386
+ "!mkdir -p ~/.huggingface\n",
387
+ "HUGGINGFACE_TOKEN = \"\" #@param {type:\"string\"}\n",
388
+ "!echo -n \"{HUGGINGFACE_TOKEN}\" > ~/.huggingface/token"
389
+ ]
390
+ },
391
+ {
392
+ "cell_type": "code",
393
+ "execution_count": null,
394
+ "metadata": {
395
+ "id": "aEJZoFQ2YHIb"
396
+ },
397
+ "outputs": [],
398
+ "source": [
399
+ "@#title upload to Huggingface\n",
400
+ "from huggingface_hub import HfApi\n",
401
+ "api = HfApi()\n",
402
+ "api.upload_file(\n",
403
+ " path_or_fileobj=\"\",\n",
404
+ " path_in_repo=\"name.zip\",\n",
405
+ " repo_id=\"\",\n",
406
+ " repo_type=\"dataset\",\n",
407
+ ")"
408
+ ]
409
+ }
410
+ ]
411
+ }