{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" } }, "cells": [ { "cell_type": "code", "execution_count": null, "metadata": { "id": "asNLOn0uIC5o" }, "outputs": [], "source": [ "### based on https://github.com/patrickvonplaten/controlnet_aux\n", "### which is derived from https://github.com/lllyasviel/ControlNet/tree/main/annotator and connected to the 🤗 Hub.\n", "\n", "#All credit & copyright goes to https://github.com/lllyasviel .\n", "#some of the models are large comment them out to save space if not needed" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "qbM01EucvW58" }, "outputs": [], "source": [ "!pip install controlnet-aux==0.0.7\n", "!pip install -U openmim\n", "!pip install cog\n", "!pip install mediapipe\n", "!mim install mmengine\n", "!mim install \"mmcv>=2.0.1\"\n", "!mim install \"mmdet>=3.1.0\"\n", "!mim install \"mmpose>=1.1.0\"\n", "!pip install moviepy\n", "!pip install argparse\n", "\n", "import os\n", "\n", "# Create the directory /content/test\n", "os.makedirs(\"/content/test\", exist_ok=True)\n", "\n", "# Create the directory /content/frames\n", "os.makedirs(\"/content/frames\", exist_ok=True)\n" ] }, { "cell_type": "code", "source": [ "from google.colab import files\n", "uploaded = files.upload()" ], "metadata": { "id": "fy-P7QkwCMBd" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "#@title break video down into frames\n", "import cv2\n", "\n", "# Open the video file\n", "cap = cv2.VideoCapture('/content/a.mp4')\n", "\n", "i = 0\n", "while(cap.isOpened()):\n", " ret, frame = cap.read()\n", "\n", " if ret == False:\n", " break\n", "\n", " # Save each frame of the video\n", " cv2.imwrite('/content/frames/frame_' + str(i) + '.jpg', frame)\n", "\n", " i += 1\n", "\n", "cap.release()\n", "cv2.destroyAllWindows()" ], "metadata": { "id": "Kw0hIeYnvjLV" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "###COMMENT OUT PROCESSORS YOU DONT WANT TO USE ALSO COMMENT OUT ONES WITH LARGE MODELS IF YOU WANT TO SAVE SPACE\n", "### based on https://github.com/patrickvonplaten/controlnet_aux\n", "### which is derived from https://github.com/lllyasviel/ControlNet/tree/main/annotator and connected to the 🤗 Hub.\n", "#All credit & copyright goes to https://github.com/lllyasviel .\n", "#some of the models are large comment them out to save space if not needed\n", "\n", "import torch\n", "import os\n", "import shutil\n", "from PIL import Image\n", "from tqdm import tqdm\n", "from controlnet_aux import (CannyDetector, ContentShuffleDetector, HEDdetector,\n", " LeresDetector, LineartAnimeDetector,\n", " LineartDetector, MediapipeFaceDetector,\n", " MidasDetector, MLSDdetector, NormalBaeDetector,\n", " OpenposeDetector, PidiNetDetector, SamDetector,\n", " ZoeDetector, DWposeDetector)\n", "\n", "# Create the directory /content/test\n", "os.makedirs(\"/content/test\", exist_ok=True)\n", "\n", "INPUT_DIR = \"/content/frames\" # replace with your input directory\n", "OUTPUT_DIR = \"/content/test\" # replace with your output directory\n", "\n", "# Check if CUDA is available and set the device accordingly\n", "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n", "\n", "\n", "def output(filename, img):\n", " img.save(os.path.join(OUTPUT_DIR, filename))\n", "\n", "def process_image(processor, img):\n", " return processor(img)\n", "\n", "def load_images():\n", " if os.path.exists(OUTPUT_DIR):\n", " shutil.rmtree(OUTPUT_DIR)\n", " os.mkdir(OUTPUT_DIR)\n", " images = []\n", " filenames = []\n", " for filename in os.listdir(INPUT_DIR):\n", " if filename.endswith(\".png\") or filename.endswith(\".jpg\"):\n", " img_path = os.path.join(INPUT_DIR, filename)\n", " img = Image.open(img_path).convert(\"RGB\").resize((512, 512))\n", " images.append(img)\n", " filenames.append(filename)\n", " return images, filenames\n", "\n", "def process_images(processor):\n", " images, filenames = load_images()\n", " for img, filename in tqdm(zip(images, filenames), total=len(images), desc=\"Processing images\"):\n", " output_img = process_image(processor, img)\n", " output(filename, output_img)\n", "\n", "# Initialize the detectors\n", "\n", "canny = CannyDetector()\n", "hed = HEDdetector.from_pretrained(\"lllyasviel/Annotators\")\n", "shuffle = ContentShuffleDetector()\n", "leres = LeresDetector.from_pretrained(\"lllyasviel/Annotators\")\n", "lineart_anime = LineartAnimeDetector.from_pretrained(\"lllyasviel/Annotators\")\n", "lineart = LineartDetector.from_pretrained(\"lllyasviel/Annotators\")\n", "mediapipe_face = MediapipeFaceDetector()\n", "midas = MidasDetector.from_pretrained(\"lllyasviel/Annotators\")\n", "mlsd = MLSDdetector.from_pretrained(\"lllyasviel/Annotators\")\n", "normal_bae = NormalBaeDetector.from_pretrained(\"lllyasviel/Annotators\")\n", "openpose = OpenposeDetector.from_pretrained(\"lllyasviel/Annotators\")\n", "pidi_net = PidiNetDetector.from_pretrained(\"lllyasviel/Annotators\")\n", "sam = SamDetector.from_pretrained(\"ybelkada/segment-anything\", subfolder=\"checkpoints\")\n", "#zoe = ZoeDetector.from_pretrained(\"lllyasviel/Annotators\")\n", "#dwpose = DWposeDetector()\n", "\n", "\n", "\n", "# Run the image processing\n", "# Uncomment the line for the detector you want to use\n", "#process_images(canny)\n", "#process_images(hed)\n" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "46d65432-5661-4377-ab34-64e5767f6e91", "id": "pXgCvJvi45mo" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ "/usr/local/lib/python3.10/dist-packages/timm/models/_factory.py:117: UserWarning: Mapping deprecated model name vit_base_resnet50_384 to current vit_base_r50_s16_384.orig_in21k_ft_in1k.\n", " model = create_fn(\n" ] }, { "output_type": "stream", "name": "stdout", "text": [ "Loading base model ()...Done.\n", "Removing last two layers (global_pool & classifier).\n" ] }, { "output_type": "stream", "name": "stderr", "text": [ "Processing images: 100%|██████████| 7/7 [00:14<00:00, 2.02s/it]\n" ] } ] }, { "cell_type": "code", "source": [ "#command line version (may need extra work)\n", "!python /content/test.py --processor hed --use_cuda --output_dir /content/test/" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "lsJnu9BiJbId", "outputId": "c68d113f-27bc-4bf0-9c04-625b1fce6aa5" }, "execution_count": 1, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "python3: can't open file '/content/test.py': [Errno 2] No such file or directory\n" ] } ] }, { "cell_type": "code", "source": [ "### COMMAND LINE VERSION test.py\n", "# based on https://github.com/patrickvonplaten/controlnet_aux\n", "### which is derived from https://github.com/lllyasviel/ControlNet/tree/main/annotator and connected to the 🤗 Hub.\n", "\n", "#All credit & copyright goes to https://github.com/lllyasviel .\n", "#some of the models are large comment them out to save space if not needed\n", "import torch\n", "import argparse\n", "import os\n", "import shutil\n", "from PIL import Image\n", "from tqdm import tqdm\n", "from controlnet_aux import (CannyDetector, ContentShuffleDetector, HEDdetector,\n", " LeresDetector, LineartAnimeDetector,\n", " LineartDetector, MediapipeFaceDetector,\n", " MidasDetector, MLSDdetector, NormalBaeDetector,\n", " OpenposeDetector, PidiNetDetector, SamDetector,\n", " ZoeDetector, DWposeDetector)\n", "\n", "# Create the directory /content/test\n", "os.makedirs(\"/content/test\", exist_ok=True)\n", "\n", "INPUT_DIR = \"/content/frames\" # replace with your input directory\n", "OUTPUT_DIR = \"/content/test\" # replace with your output directory\n", "\n", "# Check if CUDA is available and set the device accordingly\n", "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n", "\n", "def output(filename, img):\n", " img.save(os.path.join(OUTPUT_DIR, filename))\n", "\n", "def process_image(processor, img):\n", " return processor(img)\n", "\n", "def load_images():\n", " if os.path.exists(OUTPUT_DIR):\n", " shutil.rmtree(OUTPUT_DIR)\n", " os.mkdir(OUTPUT_DIR)\n", " images = []\n", " filenames = []\n", " for filename in os.listdir(INPUT_DIR):\n", " if filename.endswith(\".png\") or filename.endswith(\".jpg\"):\n", " img_path = os.path.join(INPUT_DIR, filename)\n", " img = Image.open(img_path).convert(\"RGB\").resize((512, 512))\n", " images.append(img)\n", " filenames.append(filename)\n", " return images, filenames\n", "\n", "def process_images(processor):\n", " images, filenames = load_images()\n", " for img, filename in tqdm(zip(images, filenames), total=len(images), desc=\"Processing images\"):\n", " output_img = process_image(processor, img)\n", " output(filename, output_img)\n", "\n", "\n", "\n", "# Initialize the argument parser\n", "parser = argparse.ArgumentParser(description='Choose a processor to run.')\n", "parser.add_argument('--processor', type=str, help='The name of the processor to run.')\n", "parser.add_argument('--use_cuda', action='store_true', help='Use CUDA if available.')\n", "parser.add_argument('--output_dir', type=str, default='./', help='The directory to save the output.')\n", "# Parse the arguments\n", "args = parser.parse_args()\n", "\n", "# Check if CUDA is available and set the device accordingly\n", "device = torch.device(\"cuda\" if args.use_cuda and torch.cuda.is_available() else \"cpu\")\n", "\n", "\n", "# Initialize the detectors\n", "detectors = {\n", " 'canny': CannyDetector(),\n", " 'hed': HEDdetector.from_pretrained(\"lllyasviel/Annotators\"),\n", " 'shuffle': ContentShuffleDetector(),\n", " 'leres': LeresDetector.from_pretrained(\"lllyasviel/Annotators\"),\n", " 'lineart_anime': LineartAnimeDetector.from_pretrained(\"lllyasviel/Annotators\"),\n", " 'lineart': LineartDetector.from_pretrained(\"lllyasviel/Annotators\"),\n", " 'mediapipe_face': MediapipeFaceDetector(),\n", " 'midas': MidasDetector.from_pretrained(\"lllyasviel/Annotators\"),\n", " 'mlsd': MLSDdetector.from_pretrained(\"lllyasviel/Annotators\"),\n", " 'normal_bae': NormalBaeDetector.from_pretrained(\"lllyasviel/Annotators\"),\n", " 'openpose': OpenposeDetector.from_pretrained(\"lllyasviel/Annotators\"),\n", " 'pidi_net': PidiNetDetector.from_pretrained(\"lllyasviel/Annotators\"),\n", " 'sam': SamDetector.from_pretrained(\"ybelkada/segment-anything\", subfolder=\"checkpoints\"),\n", " # 'zoe': ZoeDetector.from_pretrained(\"lllyasviel/Annotators\"),\n", " # 'dwpose': DWposeDetector(),\n", "}\n", "\n", "# Run the chosen processor\n", "if args.processor in detectors:\n", " detector = detectors[args.processor]\n", " # Run your code here with the chosen detector\n", "else:\n", " print(f\"Unknown processor: {args.processor}\")\n" ], "metadata": { "id": "8YYwMuMpJoKB" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "#@title interpolate processed frames (best to keep fps same as input video)\n", "!ffmpeg -r 25 -i /content/test/frame_%d_%d.png -start_number 0 -end_number 6 -c:v libx264 -vf \"fps=25,format=yuv420p\" testpose1.mp4\n" ], "metadata": { "id": "8kUk-kFPwzmq" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "#display video\n", "from IPython.display import HTML\n", "from base64 import b64encode\n", "\n", "# Open the video file and read its contents\n", "mp4 = open('/content/testpose.mp4', 'rb').read()\n", "\n", "# Encode the video data as a base64 string\n", "data_url = \"data:video/mp4;base64,\" + b64encode(mp4).decode()\n", "\n", "# Display the video using an HTML video element\n", "HTML(f\"\"\"\n", "\n", "\"\"\")" ], "metadata": { "id": "6AKmRPK3J7GO" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "!zip -r nameof.zip " ], "metadata": { "id": "Oax1BHwYTZog" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "FaF3RdKdaFa8" }, "outputs": [], "source": [ "#@title Login to HuggingFace 🤗\n", "\n", "#@markdown You need to accept the model license before downloading or using the Stable Diffusion weights. Please, visit the [model card](https://huggingface.co/runwayml/stable-diffusion-v1-5), read the license and tick the checkbox if you agree. You have to be a registered user in 🤗 Hugging Face Hub, and you'll also need to use an access token for the code to work.\n", "# https://huggingface.co/settings/tokens\n", "!mkdir -p ~/.huggingface\n", "HUGGINGFACE_TOKEN = \"\" #@param {type:\"string\"}\n", "!echo -n \"{HUGGINGFACE_TOKEN}\" > ~/.huggingface/token" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "aEJZoFQ2YHIb" }, "outputs": [], "source": [ "@#title upload to Huggingface\n", "from huggingface_hub import HfApi\n", "api = HfApi()\n", "api.upload_file(\n", " path_or_fileobj=\"\",\n", " path_in_repo=\"name.zip\",\n", " repo_id=\"\",\n", " repo_type=\"dataset\",\n", ")" ] }, { "cell_type": "code", "source": [], "metadata": { "id": "lUf1h6FSKlr7" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [], "metadata": { "id": "9DOaoGnnKl_M" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [], "metadata": { "id": "H_iCXpzCKmQl" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "#@title working FAST batch processing CODE TEMPLATE WIP (just doesnt save as og filenames)\n", "\n", "import torch\n", "import os\n", "from typing import List\n", "from cog import BasePredictor, Input, Path\n", "from PIL import Image\n", "from io import BytesIO\n", "import time\n", "from tqdm import tqdm\n", "from controlnet_aux.processor import Processor\n", "from controlnet_aux import (\n", " HEDdetector,\n", " MidasDetector,\n", " MLSDdetector,\n", " OpenposeDetector,\n", " PidiNetDetector,\n", " NormalBaeDetector,\n", " LineartDetector,\n", " LineartAnimeDetector,\n", " CannyDetector,\n", " ContentShuffleDetector,\n", " ZoeDetector,\n", " MediapipeFaceDetector,\n", " SamDetector,\n", " LeresDetector,\n", " DWposeDetector,\n", ")\n", "\n", "#Processor = processor\n", "image_dir = '/content/frames'\n", "\n", "class Predictor(BasePredictor):\n", " def setup(self) -> None:\n", " \"\"\"Load the model into memory to make running multiple predictions efficient\"\"\"\n", "\n", " self.annotators = {\n", " \"canny\": CannyDetector(),\n", " \"content\": ContentShuffleDetector(),\n", " \"face_detector\": MediapipeFaceDetector(),\n", " \"hed\": self.initialize_detector(HEDdetector),\n", " \"midas\": self.initialize_detector(MidasDetector),\n", " \"mlsd\": self.initialize_detector(MLSDdetector),\n", " \"open_pose\": self.initialize_detector(OpenposeDetector),\n", " \"pidi\": self.initialize_detector(PidiNetDetector),\n", " \"normal_bae\": self.initialize_detector(NormalBaeDetector),\n", " \"lineart\": self.initialize_detector(LineartDetector),\n", " \"lineart_anime\": self.initialize_detector(LineartAnimeDetector),\n", " # \"zoe\": self.initialize_detector(ZoeDetector),\n", "\n", "\n", " # \"mobile_sam\": self.initialize_detector(\n", " # SamDetector,\n", " # model_name=\"dhkim2810/MobileSAM\",\n", " # model_type=\"vit_t\",\n", " # filename=\"mobile_sam.pt\",\n", " # ),\n", " \"leres\": self.initialize_detector(LeresDetector),\n", " }\n", "\n", " torch.device(\"cuda\")\n", "\n", " def initialize_detector(\n", " self, detector_class, model_name=\"lllyasviel/Annotators\", **kwargs\n", " ):\n", " return detector_class.from_pretrained(\n", " model_name,\n", " cache_dir=\"model_cache\",\n", " **kwargs,\n", " )\n", "\n", " def process_images(self, image_dir: str) -> List[Path]:\n", " # Start time for overall processing\n", " start_time = time.time()\n", "\n", " # Load all images into memory\n", " images = [Image.open(os.path.join(image_dir, image_name)).convert(\"RGB\").resize((512, 512)) for image_name in os.listdir(image_dir)]\n", "\n", " paths = []\n", "\n", " def predict(\n", " self,\n", " image_dir: str = Input(\n", " default=\"/content/frames\",\n", " description=\"Directory containing the images to be processed\"\n", " )\n", "):\n", "\n", " canny: bool = Input(\n", " default=True,\n", " description=\"Run canny edge detection\",\n", " ),\n", " content: bool = Input(\n", " default=True,\n", " description=\"Run content shuffle detection\",\n", " ),\n", " face_detector: bool = Input(\n", " default=True,\n", " description=\"Run face detection\",\n", " ),\n", " hed: bool = Input(\n", " default=True,\n", " description=\"Run HED detection\",\n", " ),\n", " midas: bool = Input(\n", " default=True,\n", " description=\"Run Midas detection\",\n", " ),\n", " mlsd: bool = Input(\n", " default=True,\n", " description=\"Run MLSD detection\",\n", " ),\n", " open_pose: bool = Input(\n", " default=True,\n", " description=\"Run Openpose detection\",\n", " ),\n", " pidi: bool = Input(\n", " default=True,\n", " description=\"Run PidiNet detection\",\n", " ),\n", " normal_bae: bool = Input(\n", " default=True,\n", " description=\"Run NormalBae detection\",\n", " ),\n", " lineart: bool = Input(\n", " default=True,\n", " description=\"Run Lineart detection\",\n", " ),\n", " lineart_anime: bool = Input(\n", " default=True,\n", " description=\"Run LineartAnime detection\",\n", "\n", " ),\n", " leres: bool = Input(\n", " default=True,\n", " description=\"Run Leres detection\",\n", " ),\n", "\n", "\n", " # Load image\n", " # Load all images into memory\n", " start_time = time.time() # Start time for overall processing\n", " images = [Image.open(os.path.join(image_dir, image_name)).convert(\"RGB\").resize((512, 512)) for image_name in os.listdir(image_dir)]\n", "\n", " paths = []\n", " annotator_inputs = {\n", " \"canny\": canny, \"openpose_full\": openpose_full,\n", " \"content\": content,\n", " \"face_detector\": face_detector,\n", " \"hed\": hed,\n", " \"midas\": midas,\n", " \"mlsd\": mlsd,\n", " \"open_pose\": open_pose,\n", " \"pidi\": pidi,\n", " \"normal_bae\": normal_bae,\n", " \"lineart\": lineart,\n", " \"lineart_anime\": lineart_anime,\n", "\n", " \"leres\": leres,\n", " }\n", " for annotator, run_annotator in annotator_inputs.items():\n", " if run_annotator:\n", " processed_image = self.process_image(image, annotator)\n", " #processed_image.save(f\"/tmp/{annotator}.png\")\n", " processed_path = f'/content/test2/{image_name}'\n", "\n", " return paths\n", "\n", "import time\n", "from tqdm import tqdm\n", "\n", "# Load images and paths\n", "images = []\n", "image_paths = []\n", "for name in os.listdir(image_dir):\n", " path = os.path.join(image_dir, name)\n", " image = Image.open(path)\n", "\n", " images.append(image)\n", " image_paths.append(path)\n", "\n", "# Process images\n", "processed = [\n", " Processor(\"lineart_anime\") for path in tqdm(image_paths)\n", "]\n", "\n", "# Save processed\n", "from PIL import Image\n", "\n", "# Save processed\n", "for name, processor in zip(images, processed):\n", "\n", " # Process image\n", " # Process all images with progress bar\n", " processed_images = [processor(image, to_pil=True) for image in tqdm(images, desc=\"Processing images\")]\n", "\n", " # Save each image\n", " for i, img in enumerate(processed_images):\n", " processed_path = f'/content/test/{name}_{i}.png'\n", " img.save(processed_path)\n", "\n", "from PIL import Image\n", "\n", "\n" ], "metadata": { "id": "ajRzOZtiDrGP", "colab": { "base_uri": "https://localhost:8080/", "height": 213, "referenced_widgets": [ "b7207b0dd06849beb14d8c0cdaebcaa0", "ce42c14100d342f1a1b929fead2c1d60", "8d60c1de06464ac49b383e558e33c8f7", "83779c3a8afc4fb4a4b71bb3b4dae8be", "ea55fcf91d4346c5820079305f5c4752", "2e93ac3132a74f9ea031f94c222230fb", "bb87d0010b71413db38634d4f3d7dc9a", "d97ff4fe72954aa5891a44e48b7eea35", "70fa493960104cf4bc032470ff7f3dcf", "2302182b276c4b3b82d23b35001a5893", "623b93d070da4478abb4039f722af9ec" ] }, "outputId": "d102947c-450c-4dcf-96b1-d8fedf5da525" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ "\r 0%| | 0/7 [00:00