waveydaveygravy
/

magic-animate

Model card Files Files and versions Community

waveydaveygravy commited on Dec 27, 2023

Commit

1dda0a5

1 Parent(s): 075c482

Upload vid2pose_v2.ipynb

Browse files

Files changed (1) hide show

vid2pose_v2.ipynb +411 -0

vid2pose_v2.ipynb ADDED Viewed

	@@ -0,0 +1,411 @@

+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "provenance": [],
+      "gpuType": "T4"
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "language_info": {
+      "name": "python"
+    },
+    "accelerator": "GPU"
+  },
+  "cells": [
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "qbM01EucvW58"
+      },
+      "outputs": [],
+      "source": [
+        "!pip install controlnet-aux==0.0.7\n",
+        "!pip install -U openmim\n",
+        "!pip install cog\n",
+        "!pip install mediapipe\n",
+        "!mim install mmengine\n",
+        "!mim install \"mmcv>=2.0.1\"\n",
+        "!mim install \"mmdet>=3.1.0\"\n",
+        "!mim install \"mmpose>=1.1.0\""
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from google.colab import files\n",
+        "uploaded = files.upload()"
+      ],
+      "metadata": {
+        "id": "rvxvKxkiR8ih"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "#@title break video down into frames\n",
+        "import cv2\n",
+        "\n",
+        "# Open the video file\n",
+        "cap = cv2.VideoCapture('/content/a.mp4')\n",
+        "\n",
+        "i = 0\n",
+        "while(cap.isOpened()):\n",
+        "    ret, frame = cap.read()\n",
+        "\n",
+        "    if ret == False:\n",
+        "        break\n",
+        "\n",
+        "    # Save each frame of the video\n",
+        "    cv2.imwrite('/content/frames/frame_' + str(i) + '.jpg', frame)\n",
+        "\n",
+        "    i += 1\n",
+        "\n",
+        "cap.release()\n",
+        "cv2.destroyAllWindows()"
+      ],
+      "metadata": {
+        "id": "Kw0hIeYnvjLV"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "#==========Interpolate the pose frames==========\n",
+        "!pip install moviepy"
+      ],
+      "metadata": {
+        "id": "jTIwuo4ESGBw"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "#@title interpolate processed frames (best to keep fps same as input video)\n",
+        "!ffmpeg -r 8 -i /content/test/frame_%d.jpg -c:v libx264 -vf \"fps=8,format=yuv420p\" testpose.mp4\n"
+      ],
+      "metadata": {
+        "id": "8kUk-kFPwzmq"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "#=======AVAILABLE PROCESSORS========\n",
+        "# load processor from processor_id\n",
+        "# options are:\n",
+        "# [\"canny\", \"depth_leres\", \"depth_leres++\", \"depth_midas\", \"depth_zoe\", \"lineart_anime\",\n",
+        "#  \"lineart_coarse\", \"lineart_realistic\", \"mediapipe_face\", \"mlsd\", \"normal_bae\", \"normal_midas\",\n",
+        "#  \"openpose\", \"openpose_face\", \"openpose_faceonly\", \"openpose_full\", \"openpose_hand\",\n",
+        "#  \"scribble_hed, \"scribble_pidinet\", \"shuffle\", \"softedge_hed\", \"softedge_hedsafe\",\n",
+        "#  \"softedge_pidinet\", \"softedge_pidsafe\", \"dwpose\"]"
+      ],
+      "metadata": {
+        "id": "l5aAanvtyMz9"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "#@title simply change the processor under 'processor' at the bottom, may need to add if not available, see above (goto v1 notebook if errors start)\n",
+        "#=======AVAILABLE PROCESSORS========\n",
+        "# load processor from processor_id\n",
+        "# options are:\n",
+        "# [\"canny\", \"depth_leres\", \"depth_leres++\", \"depth_midas\", \"depth_zoe\", \"lineart_anime\",\n",
+        "#  \"lineart_coarse\", \"lineart_realistic\", \"mediapipe_face\", \"mlsd\", \"normal_bae\", \"normal_midas\",\n",
+        "#  \"openpose\", \"openpose_face\", \"openpose_faceonly\", \"openpose_full\", \"openpose_hand\",\n",
+        "#  \"scribble_hed, \"scribble_pidinet\", \"shuffle\", \"softedge_hed\", \"softedge_hedsafe\",\n",
+        "#  \"softedge_pidinet\", \"softedge_pidsafe\", \"dwpose\"]\n",
+        "import torch\n",
+        "import os\n",
+        "from typing import List\n",
+        "from cog import BasePredictor, Input, Path\n",
+        "from PIL import Image\n",
+        "from io import BytesIO\n",
+        "import time\n",
+        "from tqdm import tqdm\n",
+        "from controlnet_aux.processor import Processor\n",
+        "from controlnet_aux import (\n",
+        "    HEDdetector,\n",
+        "    MidasDetector,\n",
+        "    MLSDdetector,\n",
+        "    OpenposeDetector,\n",
+        "    PidiNetDetector,\n",
+        "    NormalBaeDetector,\n",
+        "    LineartDetector,\n",
+        "    LineartAnimeDetector,\n",
+        "    CannyDetector,\n",
+        "    ContentShuffleDetector,\n",
+        "    ZoeDetector,\n",
+        "    MediapipeFaceDetector,\n",
+        "    SamDetector,\n",
+        "    LeresDetector,\n",
+        "    DWposeDetector,\n",
+        ")\n",
+        "\n",
+        "#Processor = processor\n",
+        "image_dir = '/content/frames'\n",
+        "\n",
+        "class Predictor(BasePredictor):\n",
+        "    def setup(self) -> None:\n",
+        "        \"\"\"Load the model into memory to make running multiple predictions efficient\"\"\"\n",
+        "\n",
+        "        self.annotators = {\n",
+        "            \"canny\": CannyDetector(),\n",
+        "            \"content\": ContentShuffleDetector(),\n",
+        "            \"face_detector\": MediapipeFaceDetector(),\n",
+        "            \"hed\": self.initialize_detector(HEDdetector),\n",
+        "            \"midas\": self.initialize_detector(MidasDetector),\n",
+        "            \"mlsd\": self.initialize_detector(MLSDdetector),\n",
+        "            \"open_pose\": self.initialize_detector(OpenposeDetector),\n",
+        "            \"pidi\": self.initialize_detector(PidiNetDetector),\n",
+        "            \"normal_bae\": self.initialize_detector(NormalBaeDetector),\n",
+        "            \"lineart\": self.initialize_detector(LineartDetector),\n",
+        "            \"lineart_anime\": self.initialize_detector(LineartAnimeDetector),\n",
+        "            # \"zoe\": self.initialize_detector(ZoeDetector),\n",
+        "\n",
+        "\n",
+        "            # \"mobile_sam\": self.initialize_detector(\n",
+        "            #     SamDetector,\n",
+        "            #     model_name=\"dhkim2810/MobileSAM\",\n",
+        "            #     model_type=\"vit_t\",\n",
+        "            #     filename=\"mobile_sam.pt\",\n",
+        "            # ),\n",
+        "            \"leres\": self.initialize_detector(LeresDetector),\n",
+        "        }\n",
+        "\n",
+        "        torch.device(\"cuda\")\n",
+        "\n",
+        "    def initialize_detector(\n",
+        "        self, detector_class, model_name=\"lllyasviel/Annotators\", **kwargs\n",
+        "    ):\n",
+        "        return detector_class.from_pretrained(\n",
+        "            model_name,\n",
+        "            cache_dir=\"model_cache\",\n",
+        "            **kwargs,\n",
+        "        )\n",
+        "\n",
+        "    def process_images(self, image_dir: str) -> List[Path]:\n",
+        "    # Start time for overall processing\n",
+        "     start_time = time.time()\n",
+        "\n",
+        "    # Load all images into memory\n",
+        "    images = [Image.open(os.path.join(image_dir, image_name)).convert(\"RGB\").resize((512, 512)) for image_name in os.listdir(image_dir)]\n",
+        "\n",
+        "    paths = []\n",
+        "\n",
+        "    def predict(\n",
+        "    self,\n",
+        "    image_dir: str = Input(\n",
+        "        default=\"/content/frames\",\n",
+        "        description=\"Directory containing the images to be processed\"\n",
+        "    )\n",
+        "):\n",
+        "\n",
+        "        canny: bool = Input(\n",
+        "            default=True,\n",
+        "            description=\"Run canny edge detection\",\n",
+        "        ),\n",
+        "        content: bool = Input(\n",
+        "            default=True,\n",
+        "            description=\"Run content shuffle detection\",\n",
+        "        ),\n",
+        "        face_detector: bool = Input(\n",
+        "            default=True,\n",
+        "            description=\"Run face detection\",\n",
+        "        ),\n",
+        "        hed: bool = Input(\n",
+        "            default=True,\n",
+        "            description=\"Run HED detection\",\n",
+        "        ),\n",
+        "        midas: bool = Input(\n",
+        "            default=True,\n",
+        "            description=\"Run Midas detection\",\n",
+        "        ),\n",
+        "        mlsd: bool = Input(\n",
+        "            default=True,\n",
+        "            description=\"Run MLSD detection\",\n",
+        "        ),\n",
+        "        open_pose: bool = Input(\n",
+        "            default=True,\n",
+        "            description=\"Run Openpose detection\",\n",
+        "        ),\n",
+        "        pidi: bool = Input(\n",
+        "            default=True,\n",
+        "            description=\"Run PidiNet detection\",\n",
+        "        ),\n",
+        "        normal_bae: bool = Input(\n",
+        "            default=True,\n",
+        "            description=\"Run NormalBae detection\",\n",
+        "        ),\n",
+        "        lineart: bool = Input(\n",
+        "            default=True,\n",
+        "            description=\"Run Lineart detection\",\n",
+        "        ),\n",
+        "        lineart_anime: bool = Input(\n",
+        "            default=True,\n",
+        "            description=\"Run LineartAnime detection\",\n",
+        "\n",
+        "        ),\n",
+        "        leres: bool = Input(\n",
+        "            default=True,\n",
+        "            description=\"Run Leres detection\",\n",
+        "        ),\n",
+        "\n",
+        "\n",
+        "        # Load image\n",
+        "        # Load all images into memory\n",
+        "        start_time = time.time()  # Start time for overall processing\n",
+        "        images = [Image.open(os.path.join(image_dir, image_name)).convert(\"RGB\").resize((512, 512)) for image_name in os.listdir(image_dir)]\n",
+        "\n",
+        "        paths = []\n",
+        "        annotator_inputs = {\n",
+        "            \"canny\": canny,\n",
+        "            \"content\": content,\n",
+        "            \"face_detector\": face_detector,\n",
+        "            \"hed\": hed,\n",
+        "            \"midas\": midas,\n",
+        "            \"mlsd\": mlsd,\n",
+        "            \"open_pose\": open_pose,\n",
+        "            \"pidi\": pidi,\n",
+        "            \"normal_bae\": normal_bae,\n",
+        "            \"lineart\": lineart,\n",
+        "            \"lineart_anime\": lineart_anime, \"openpose_full\": openpose_full,\n",
+        "\n",
+        "            \"leres\": leres,\n",
+        "        }\n",
+        "        for annotator, run_annotator in annotator_inputs.items():\n",
+        "            if run_annotator:\n",
+        "                processed_image = self.process_image(image, annotator)\n",
+        "                #processed_image.save(f\"/tmp/{annotator}.png\")\n",
+        "                processed_path = f'/content/test1/{image_name}'\n",
+        "\n",
+        "        return paths\n",
+        "\n",
+        "import time\n",
+        "from tqdm import tqdm\n",
+        "\n",
+        "# Load images and paths\n",
+        "images = []\n",
+        "image_paths = []\n",
+        "for name in os.listdir(image_dir):\n",
+        "  path = os.path.join(image_dir, name)\n",
+        "  image = Image.open(path)\n",
+        "\n",
+        "  images.append(image)\n",
+        "  image_paths.append(path)\n",
+        "\n",
+        "# Process images\n",
+        "processed = [\n",
+        "  Processor(\"openpose_full\") for path in tqdm(image_paths)\n",
+        "]\n",
+        "\n",
+        "# Save processed\n",
+        "import os\n",
+        "from PIL import Image\n",
+        "\n",
+        "# Get a list of filenames in /content/frames\n",
+        "filenames = os.listdir('/content/frames')\n",
+        "\n",
+        "# Save processed\n",
+        "for filename in filenames:\n",
+        "\n",
+        "  # Get the full path of the image file\n",
+        "  image_path = os.path.join('/content/frames', filename)\n",
+        "\n",
+        "  # Load the image\n",
+        "  image = Image.open(image_path)\n",
+        "\n",
+        "  # Process image\n",
+        "  # Process all images with progress bar\n",
+        "  processed_image = processor(image, to_pil=True)\n",
+        "\n",
+        "  # Extract original name\n",
+        "  original_name = filename.split('.')[0]\n",
+        "\n",
+        "  # Save image\n",
+        "  processed_path = f'/content/test2/{original_name}.png'\n",
+        "  processed_image.save(processed_path)\n",
+        "\n"
+      ],
+      "metadata": {
+        "id": "qgKAWKrBL5d2"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "@#title for seeing whats in controlnet_aux\n",
+        "dir(controlnet_aux)"
+      ],
+      "metadata": {
+        "id": "X08c_PPKTQiq"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "!zip -r nameof.zip <location of files and folder>"
+      ],
+      "metadata": {
+        "id": "Oax1BHwYTZog"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "FaF3RdKdaFa8"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Login to HuggingFace 🤗\n",
+        "\n",
+        "#@markdown You need to accept the model license before downloading or using the Stable Diffusion weights. Please, visit the [model card](https://huggingface.co/runwayml/stable-diffusion-v1-5), read the license and tick the checkbox if you agree. You have to be a registered user in 🤗 Hugging Face Hub, and you'll also need to use an access token for the code to work.\n",
+        "# https://huggingface.co/settings/tokens\n",
+        "!mkdir -p ~/.huggingface\n",
+        "HUGGINGFACE_TOKEN = \"\" #@param {type:\"string\"}\n",
+        "!echo -n \"{HUGGINGFACE_TOKEN}\" > ~/.huggingface/token"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "aEJZoFQ2YHIb"
+      },
+      "outputs": [],
+      "source": [
+        "@#title upload to Huggingface\n",
+        "from huggingface_hub import HfApi\n",
+        "api = HfApi()\n",
+        "api.upload_file(\n",
+        "    path_or_fileobj=\"\",\n",
+        "    path_in_repo=\"name.zip\",\n",
+        "    repo_id=\"\",\n",
+        "    repo_type=\"dataset\",\n",
+        ")"
+      ]
+    }
+  ]
+}