{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [], "gpuType": "T4" }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" }, "accelerator": "GPU" }, "cells": [ { "cell_type": "code", "execution_count": null, "metadata": { "id": "qbM01EucvW58" }, "outputs": [], "source": [ "!pip install controlnet-aux==0.0.7\n", "!pip install -U openmim\n", "!pip install cog\n", "!pip install mediapipe\n", "!mim install mmengine\n", "!mim install \"mmcv>=2.0.1\"\n", "!mim install \"mmdet>=3.1.0\"\n", "!mim install \"mmpose>=1.1.0\"" ] }, { "cell_type": "code", "source": [ "from google.colab import files\n", "uploaded = files.upload()" ], "metadata": { "id": "rvxvKxkiR8ih" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "#@title break video down into frames\n", "import cv2\n", "\n", "# Open the video file\n", "cap = cv2.VideoCapture('/content/a.mp4')\n", "\n", "i = 0\n", "while(cap.isOpened()):\n", " ret, frame = cap.read()\n", "\n", " if ret == False:\n", " break\n", "\n", " # Save each frame of the video\n", " cv2.imwrite('/content/frames/frame_' + str(i) + '.jpg', frame)\n", "\n", " i += 1\n", "\n", "cap.release()\n", "cv2.destroyAllWindows()" ], "metadata": { "id": "Kw0hIeYnvjLV" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "#==========Interpolate the pose frames==========\n", "!pip install moviepy" ], "metadata": { "id": "jTIwuo4ESGBw" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "#@title interpolate processed frames (best to keep fps same as input video)\n", "!ffmpeg -r 8 -i /content/test/frame_%d.jpg -c:v libx264 -vf \"fps=8,format=yuv420p\" testpose.mp4\n" ], "metadata": { "id": "8kUk-kFPwzmq" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "#=======AVAILABLE PROCESSORS========\n", "# load processor from processor_id\n", "# options are:\n", "# [\"canny\", \"depth_leres\", \"depth_leres++\", \"depth_midas\", \"depth_zoe\", \"lineart_anime\",\n", "# \"lineart_coarse\", \"lineart_realistic\", \"mediapipe_face\", \"mlsd\", \"normal_bae\", \"normal_midas\",\n", "# \"openpose\", \"openpose_face\", \"openpose_faceonly\", \"openpose_full\", \"openpose_hand\",\n", "# \"scribble_hed, \"scribble_pidinet\", \"shuffle\", \"softedge_hed\", \"softedge_hedsafe\",\n", "# \"softedge_pidinet\", \"softedge_pidsafe\", \"dwpose\"]" ], "metadata": { "id": "l5aAanvtyMz9" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "#@title simply change the processor under 'processor' at the bottom, may need to add if not available, see above (goto v1 notebook if errors start)\n", "#=======AVAILABLE PROCESSORS========\n", "# load processor from processor_id\n", "# options are:\n", "# [\"canny\", \"depth_leres\", \"depth_leres++\", \"depth_midas\", \"depth_zoe\", \"lineart_anime\",\n", "# \"lineart_coarse\", \"lineart_realistic\", \"mediapipe_face\", \"mlsd\", \"normal_bae\", \"normal_midas\",\n", "# \"openpose\", \"openpose_face\", \"openpose_faceonly\", \"openpose_full\", \"openpose_hand\",\n", "# \"scribble_hed, \"scribble_pidinet\", \"shuffle\", \"softedge_hed\", \"softedge_hedsafe\",\n", "# \"softedge_pidinet\", \"softedge_pidsafe\", \"dwpose\"]\n", "import torch\n", "import os\n", "from typing import List\n", "from cog import BasePredictor, Input, Path\n", "from PIL import Image\n", "from io import BytesIO\n", "import time\n", "from tqdm import tqdm\n", "from controlnet_aux.processor import Processor\n", "from controlnet_aux import (\n", " HEDdetector,\n", " MidasDetector,\n", " MLSDdetector,\n", " OpenposeDetector,\n", " PidiNetDetector,\n", " NormalBaeDetector,\n", " LineartDetector,\n", " LineartAnimeDetector,\n", " CannyDetector,\n", " ContentShuffleDetector,\n", " ZoeDetector,\n", " MediapipeFaceDetector,\n", " SamDetector,\n", " LeresDetector,\n", " DWposeDetector,\n", ")\n", "\n", "#Processor = processor\n", "image_dir = '/content/frames'\n", "\n", "class Predictor(BasePredictor):\n", " def setup(self) -> None:\n", " \"\"\"Load the model into memory to make running multiple predictions efficient\"\"\"\n", "\n", " self.annotators = {\n", " \"canny\": CannyDetector(),\n", " \"content\": ContentShuffleDetector(),\n", " \"face_detector\": MediapipeFaceDetector(),\n", " \"hed\": self.initialize_detector(HEDdetector),\n", " \"midas\": self.initialize_detector(MidasDetector),\n", " \"mlsd\": self.initialize_detector(MLSDdetector),\n", " \"open_pose\": self.initialize_detector(OpenposeDetector),\n", " \"pidi\": self.initialize_detector(PidiNetDetector),\n", " \"normal_bae\": self.initialize_detector(NormalBaeDetector),\n", " \"lineart\": self.initialize_detector(LineartDetector),\n", " \"lineart_anime\": self.initialize_detector(LineartAnimeDetector),\n", " # \"zoe\": self.initialize_detector(ZoeDetector),\n", "\n", "\n", " # \"mobile_sam\": self.initialize_detector(\n", " # SamDetector,\n", " # model_name=\"dhkim2810/MobileSAM\",\n", " # model_type=\"vit_t\",\n", " # filename=\"mobile_sam.pt\",\n", " # ),\n", " \"leres\": self.initialize_detector(LeresDetector),\n", " }\n", "\n", " torch.device(\"cuda\")\n", "\n", " def initialize_detector(\n", " self, detector_class, model_name=\"lllyasviel/Annotators\", **kwargs\n", " ):\n", " return detector_class.from_pretrained(\n", " model_name,\n", " cache_dir=\"model_cache\",\n", " **kwargs,\n", " )\n", "\n", " def process_images(self, image_dir: str) -> List[Path]:\n", " # Start time for overall processing\n", " start_time = time.time()\n", "\n", " # Load all images into memory\n", " images = [Image.open(os.path.join(image_dir, image_name)).convert(\"RGB\").resize((512, 512)) for image_name in os.listdir(image_dir)]\n", "\n", " paths = []\n", "\n", " def predict(\n", " self,\n", " image_dir: str = Input(\n", " default=\"/content/frames\",\n", " description=\"Directory containing the images to be processed\"\n", " )\n", "):\n", "\n", " canny: bool = Input(\n", " default=True,\n", " description=\"Run canny edge detection\",\n", " ),\n", " content: bool = Input(\n", " default=True,\n", " description=\"Run content shuffle detection\",\n", " ),\n", " face_detector: bool = Input(\n", " default=True,\n", " description=\"Run face detection\",\n", " ),\n", " hed: bool = Input(\n", " default=True,\n", " description=\"Run HED detection\",\n", " ),\n", " midas: bool = Input(\n", " default=True,\n", " description=\"Run Midas detection\",\n", " ),\n", " mlsd: bool = Input(\n", " default=True,\n", " description=\"Run MLSD detection\",\n", " ),\n", " open_pose: bool = Input(\n", " default=True,\n", " description=\"Run Openpose detection\",\n", " ),\n", " pidi: bool = Input(\n", " default=True,\n", " description=\"Run PidiNet detection\",\n", " ),\n", " normal_bae: bool = Input(\n", " default=True,\n", " description=\"Run NormalBae detection\",\n", " ),\n", " lineart: bool = Input(\n", " default=True,\n", " description=\"Run Lineart detection\",\n", " ),\n", " lineart_anime: bool = Input(\n", " default=True,\n", " description=\"Run LineartAnime detection\",\n", "\n", " ),\n", " leres: bool = Input(\n", " default=True,\n", " description=\"Run Leres detection\",\n", " ),\n", "\n", "\n", " # Load image\n", " # Load all images into memory\n", " start_time = time.time() # Start time for overall processing\n", " images = [Image.open(os.path.join(image_dir, image_name)).convert(\"RGB\").resize((512, 512)) for image_name in os.listdir(image_dir)]\n", "\n", " paths = []\n", " annotator_inputs = {\n", " \"canny\": canny,\n", " \"content\": content,\n", " \"face_detector\": face_detector,\n", " \"hed\": hed,\n", " \"midas\": midas,\n", " \"mlsd\": mlsd,\n", " \"open_pose\": open_pose,\n", " \"pidi\": pidi,\n", " \"normal_bae\": normal_bae,\n", " \"lineart\": lineart,\n", " \"lineart_anime\": lineart_anime, \"openpose_full\": openpose_full,\n", "\n", " \"leres\": leres,\n", " }\n", " for annotator, run_annotator in annotator_inputs.items():\n", " if run_annotator:\n", " processed_image = self.process_image(image, annotator)\n", " #processed_image.save(f\"/tmp/{annotator}.png\")\n", " processed_path = f'/content/test1/{image_name}'\n", "\n", " return paths\n", "\n", "import time\n", "from tqdm import tqdm\n", "\n", "# Load images and paths\n", "images = []\n", "image_paths = []\n", "for name in os.listdir(image_dir):\n", " path = os.path.join(image_dir, name)\n", " image = Image.open(path)\n", "\n", " images.append(image)\n", " image_paths.append(path)\n", "\n", "# Process images\n", "processed = [\n", " Processor(\"openpose_full\") for path in tqdm(image_paths)\n", "]\n", "\n", "# Save processed\n", "import os\n", "from PIL import Image\n", "\n", "# Get a list of filenames in /content/frames\n", "filenames = os.listdir('/content/frames')\n", "\n", "# Save processed\n", "for filename in filenames:\n", "\n", " # Get the full path of the image file\n", " image_path = os.path.join('/content/frames', filename)\n", "\n", " # Load the image\n", " image = Image.open(image_path)\n", "\n", " # Process image\n", " # Process all images with progress bar\n", " processed_image = processor(image, to_pil=True)\n", "\n", " # Extract original name\n", " original_name = filename.split('.')[0]\n", "\n", " # Save image\n", " processed_path = f'/content/test2/{original_name}.png'\n", " processed_image.save(processed_path)\n", "\n" ], "metadata": { "id": "qgKAWKrBL5d2" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "@#title for seeing whats in controlnet_aux\n", "dir(controlnet_aux)" ], "metadata": { "id": "X08c_PPKTQiq" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "!zip -r nameof.zip " ], "metadata": { "id": "Oax1BHwYTZog" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "FaF3RdKdaFa8" }, "outputs": [], "source": [ "#@title Login to HuggingFace 🤗\n", "\n", "#@markdown You need to accept the model license before downloading or using the Stable Diffusion weights. Please, visit the [model card](https://huggingface.co/runwayml/stable-diffusion-v1-5), read the license and tick the checkbox if you agree. You have to be a registered user in 🤗 Hugging Face Hub, and you'll also need to use an access token for the code to work.\n", "# https://huggingface.co/settings/tokens\n", "!mkdir -p ~/.huggingface\n", "HUGGINGFACE_TOKEN = \"\" #@param {type:\"string\"}\n", "!echo -n \"{HUGGINGFACE_TOKEN}\" > ~/.huggingface/token" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "aEJZoFQ2YHIb" }, "outputs": [], "source": [ "@#title upload to Huggingface\n", "from huggingface_hub import HfApi\n", "api = HfApi()\n", "api.upload_file(\n", " path_or_fileobj=\"\",\n", " path_in_repo=\"name.zip\",\n", " repo_id=\"\",\n", " repo_type=\"dataset\",\n", ")" ] } ] }