{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": [],
      "gpuType": "T4"
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    },
    "accelerator": "GPU"
  },
  "cells": [
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "qbM01EucvW58"
      },
      "outputs": [],
      "source": [
        "!pip install controlnet-aux==0.0.7\n",
        "!pip install -U openmim\n",
        "!pip install cog\n",
        "!pip install mediapipe\n",
        "!mim install mmengine\n",
        "!mim install \"mmcv>=2.0.1\"\n",
        "!mim install \"mmdet>=3.1.0\"\n",
        "!mim install \"mmpose>=1.1.0\""
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "from google.colab import files\n",
        "uploaded = files.upload()"
      ],
      "metadata": {
        "id": "rvxvKxkiR8ih"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "#@title break video down into frames\n",
        "import cv2\n",
        "\n",
        "# Open the video file\n",
        "cap = cv2.VideoCapture('/content/a.mp4')\n",
        "\n",
        "i = 0\n",
        "while(cap.isOpened()):\n",
        "    ret, frame = cap.read()\n",
        "\n",
        "    if ret == False:\n",
        "        break\n",
        "\n",
        "    # Save each frame of the video\n",
        "    cv2.imwrite('/content/frames/frame_' + str(i) + '.jpg', frame)\n",
        "\n",
        "    i += 1\n",
        "\n",
        "cap.release()\n",
        "cv2.destroyAllWindows()"
      ],
      "metadata": {
        "id": "Kw0hIeYnvjLV"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "#==========Interpolate the pose frames==========\n",
        "!pip install moviepy"
      ],
      "metadata": {
        "id": "jTIwuo4ESGBw"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "#@title interpolate processed frames (best to keep fps same as input video)\n",
        "!ffmpeg -r 8 -i /content/test/frame_%d.jpg -c:v libx264 -vf \"fps=8,format=yuv420p\" testpose.mp4\n"
      ],
      "metadata": {
        "id": "8kUk-kFPwzmq"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "#=======AVAILABLE PROCESSORS========\n",
        "# load processor from processor_id\n",
        "# options are:\n",
        "# [\"canny\", \"depth_leres\", \"depth_leres++\", \"depth_midas\", \"depth_zoe\", \"lineart_anime\",\n",
        "#  \"lineart_coarse\", \"lineart_realistic\", \"mediapipe_face\", \"mlsd\", \"normal_bae\", \"normal_midas\",\n",
        "#  \"openpose\", \"openpose_face\", \"openpose_faceonly\", \"openpose_full\", \"openpose_hand\",\n",
        "#  \"scribble_hed, \"scribble_pidinet\", \"shuffle\", \"softedge_hed\", \"softedge_hedsafe\",\n",
        "#  \"softedge_pidinet\", \"softedge_pidsafe\", \"dwpose\"]"
      ],
      "metadata": {
        "id": "l5aAanvtyMz9"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "#@title simply change the processor under 'processor' at the bottom, may need to add if not available, see above (goto v1 notebook if errors start)\n",
        "#=======AVAILABLE PROCESSORS========\n",
        "# load processor from processor_id\n",
        "# options are:\n",
        "# [\"canny\", \"depth_leres\", \"depth_leres++\", \"depth_midas\", \"depth_zoe\", \"lineart_anime\",\n",
        "#  \"lineart_coarse\", \"lineart_realistic\", \"mediapipe_face\", \"mlsd\", \"normal_bae\", \"normal_midas\",\n",
        "#  \"openpose\", \"openpose_face\", \"openpose_faceonly\", \"openpose_full\", \"openpose_hand\",\n",
        "#  \"scribble_hed, \"scribble_pidinet\", \"shuffle\", \"softedge_hed\", \"softedge_hedsafe\",\n",
        "#  \"softedge_pidinet\", \"softedge_pidsafe\", \"dwpose\"]\n",
        "import torch\n",
        "import os\n",
        "from typing import List\n",
        "from cog import BasePredictor, Input, Path\n",
        "from PIL import Image\n",
        "from io import BytesIO\n",
        "import time\n",
        "from tqdm import tqdm\n",
        "from controlnet_aux.processor import Processor\n",
        "from controlnet_aux import (\n",
        "    HEDdetector,\n",
        "    MidasDetector,\n",
        "    MLSDdetector,\n",
        "    OpenposeDetector,\n",
        "    PidiNetDetector,\n",
        "    NormalBaeDetector,\n",
        "    LineartDetector,\n",
        "    LineartAnimeDetector,\n",
        "    CannyDetector,\n",
        "    ContentShuffleDetector,\n",
        "    ZoeDetector,\n",
        "    MediapipeFaceDetector,\n",
        "    SamDetector,\n",
        "    LeresDetector,\n",
        "    DWposeDetector,\n",
        ")\n",
        "\n",
        "#Processor = processor\n",
        "image_dir = '/content/frames'\n",
        "\n",
        "class Predictor(BasePredictor):\n",
        "    def setup(self) -> None:\n",
        "        \"\"\"Load the model into memory to make running multiple predictions efficient\"\"\"\n",
        "\n",
        "        self.annotators = {\n",
        "            \"canny\": CannyDetector(),\n",
        "            \"content\": ContentShuffleDetector(),\n",
        "            \"face_detector\": MediapipeFaceDetector(),\n",
        "            \"hed\": self.initialize_detector(HEDdetector),\n",
        "            \"midas\": self.initialize_detector(MidasDetector),\n",
        "            \"mlsd\": self.initialize_detector(MLSDdetector),\n",
        "            \"open_pose\": self.initialize_detector(OpenposeDetector),\n",
        "            \"pidi\": self.initialize_detector(PidiNetDetector),\n",
        "            \"normal_bae\": self.initialize_detector(NormalBaeDetector),\n",
        "            \"lineart\": self.initialize_detector(LineartDetector),\n",
        "            \"lineart_anime\": self.initialize_detector(LineartAnimeDetector),\n",
        "            # \"zoe\": self.initialize_detector(ZoeDetector),\n",
        "\n",
        "\n",
        "            # \"mobile_sam\": self.initialize_detector(\n",
        "            #     SamDetector,\n",
        "            #     model_name=\"dhkim2810/MobileSAM\",\n",
        "            #     model_type=\"vit_t\",\n",
        "            #     filename=\"mobile_sam.pt\",\n",
        "            # ),\n",
        "            \"leres\": self.initialize_detector(LeresDetector),\n",
        "        }\n",
        "\n",
        "        torch.device(\"cuda\")\n",
        "\n",
        "    def initialize_detector(\n",
        "        self, detector_class, model_name=\"lllyasviel/Annotators\", **kwargs\n",
        "    ):\n",
        "        return detector_class.from_pretrained(\n",
        "            model_name,\n",
        "            cache_dir=\"model_cache\",\n",
        "            **kwargs,\n",
        "        )\n",
        "\n",
        "    def process_images(self, image_dir: str) -> List[Path]:\n",
        "    # Start time for overall processing\n",
        "     start_time = time.time()\n",
        "\n",
        "    # Load all images into memory\n",
        "    images = [Image.open(os.path.join(image_dir, image_name)).convert(\"RGB\").resize((512, 512)) for image_name in os.listdir(image_dir)]\n",
        "\n",
        "    paths = []\n",
        "\n",
        "    def predict(\n",
        "    self,\n",
        "    image_dir: str = Input(\n",
        "        default=\"/content/frames\",\n",
        "        description=\"Directory containing the images to be processed\"\n",
        "    )\n",
        "):\n",
        "\n",
        "        canny: bool = Input(\n",
        "            default=True,\n",
        "            description=\"Run canny edge detection\",\n",
        "        ),\n",
        "        content: bool = Input(\n",
        "            default=True,\n",
        "            description=\"Run content shuffle detection\",\n",
        "        ),\n",
        "        face_detector: bool = Input(\n",
        "            default=True,\n",
        "            description=\"Run face detection\",\n",
        "        ),\n",
        "        hed: bool = Input(\n",
        "            default=True,\n",
        "            description=\"Run HED detection\",\n",
        "        ),\n",
        "        midas: bool = Input(\n",
        "            default=True,\n",
        "            description=\"Run Midas detection\",\n",
        "        ),\n",
        "        mlsd: bool = Input(\n",
        "            default=True,\n",
        "            description=\"Run MLSD detection\",\n",
        "        ),\n",
        "        open_pose: bool = Input(\n",
        "            default=True,\n",
        "            description=\"Run Openpose detection\",\n",
        "        ),\n",
        "        pidi: bool = Input(\n",
        "            default=True,\n",
        "            description=\"Run PidiNet detection\",\n",
        "        ),\n",
        "        normal_bae: bool = Input(\n",
        "            default=True,\n",
        "            description=\"Run NormalBae detection\",\n",
        "        ),\n",
        "        lineart: bool = Input(\n",
        "            default=True,\n",
        "            description=\"Run Lineart detection\",\n",
        "        ),\n",
        "        lineart_anime: bool = Input(\n",
        "            default=True,\n",
        "            description=\"Run LineartAnime detection\",\n",
        "\n",
        "        ),\n",
        "        leres: bool = Input(\n",
        "            default=True,\n",
        "            description=\"Run Leres detection\",\n",
        "        ),\n",
        "\n",
        "\n",
        "        # Load image\n",
        "        # Load all images into memory\n",
        "        start_time = time.time()  # Start time for overall processing\n",
        "        images = [Image.open(os.path.join(image_dir, image_name)).convert(\"RGB\").resize((512, 512)) for image_name in os.listdir(image_dir)]\n",
        "\n",
        "        paths = []\n",
        "        annotator_inputs = {\n",
        "            \"canny\": canny,\n",
        "            \"content\": content,\n",
        "            \"face_detector\": face_detector,\n",
        "            \"hed\": hed,\n",
        "            \"midas\": midas,\n",
        "            \"mlsd\": mlsd,\n",
        "            \"open_pose\": open_pose,\n",
        "            \"pidi\": pidi,\n",
        "            \"normal_bae\": normal_bae,\n",
        "            \"lineart\": lineart,\n",
        "            \"lineart_anime\": lineart_anime, \"openpose_full\": openpose_full,\n",
        "\n",
        "            \"leres\": leres,\n",
        "        }\n",
        "        for annotator, run_annotator in annotator_inputs.items():\n",
        "            if run_annotator:\n",
        "                processed_image = self.process_image(image, annotator)\n",
        "                #processed_image.save(f\"/tmp/{annotator}.png\")\n",
        "                processed_path = f'/content/test1/{image_name}'\n",
        "\n",
        "        return paths\n",
        "\n",
        "import time\n",
        "from tqdm import tqdm\n",
        "\n",
        "# Load images and paths\n",
        "images = []\n",
        "image_paths = []\n",
        "for name in os.listdir(image_dir):\n",
        "  path = os.path.join(image_dir, name)\n",
        "  image = Image.open(path)\n",
        "\n",
        "  images.append(image)\n",
        "  image_paths.append(path)\n",
        "\n",
        "# Process images\n",
        "processed = [\n",
        "  Processor(\"openpose_full\") for path in tqdm(image_paths)\n",
        "]\n",
        "\n",
        "# Save processed\n",
        "import os\n",
        "from PIL import Image\n",
        "\n",
        "# Get a list of filenames in /content/frames\n",
        "filenames = os.listdir('/content/frames')\n",
        "\n",
        "# Save processed\n",
        "for filename in filenames:\n",
        "\n",
        "  # Get the full path of the image file\n",
        "  image_path = os.path.join('/content/frames', filename)\n",
        "\n",
        "  # Load the image\n",
        "  image = Image.open(image_path)\n",
        "\n",
        "  # Process image\n",
        "  # Process all images with progress bar\n",
        "  processed_image = processor(image, to_pil=True)\n",
        "\n",
        "  # Extract original name\n",
        "  original_name = filename.split('.')[0]\n",
        "\n",
        "  # Save image\n",
        "  processed_path = f'/content/test2/{original_name}.png'\n",
        "  processed_image.save(processed_path)\n",
        "\n"
      ],
      "metadata": {
        "id": "qgKAWKrBL5d2"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "@#title for seeing whats in controlnet_aux\n",
        "dir(controlnet_aux)"
      ],
      "metadata": {
        "id": "X08c_PPKTQiq"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "!zip -r nameof.zip <location of files and folder>"
      ],
      "metadata": {
        "id": "Oax1BHwYTZog"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "FaF3RdKdaFa8"
      },
      "outputs": [],
      "source": [
        "#@title Login to HuggingFace 🤗\n",
        "\n",
        "#@markdown You need to accept the model license before downloading or using the Stable Diffusion weights. Please, visit the [model card](https://huggingface.co/runwayml/stable-diffusion-v1-5), read the license and tick the checkbox if you agree. You have to be a registered user in 🤗 Hugging Face Hub, and you'll also need to use an access token for the code to work.\n",
        "# https://huggingface.co/settings/tokens\n",
        "!mkdir -p ~/.huggingface\n",
        "HUGGINGFACE_TOKEN = \"\" #@param {type:\"string\"}\n",
        "!echo -n \"{HUGGINGFACE_TOKEN}\" > ~/.huggingface/token"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "aEJZoFQ2YHIb"
      },
      "outputs": [],
      "source": [
        "@#title upload to Huggingface\n",
        "from huggingface_hub import HfApi\n",
        "api = HfApi()\n",
        "api.upload_file(\n",
        "    path_or_fileobj=\"\",\n",
        "    path_in_repo=\"name.zip\",\n",
        "    repo_id=\"\",\n",
        "    repo_type=\"dataset\",\n",
        ")"
      ]
    }
  ]
}