microsoft
/

BiomedCLIP-PubMedBERT_256-vit_base_patch16_224

@@ -15,7 +15,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 1,
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/"
@@ -99,7 +99,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 2,
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/"
@@ -107,132 +107,12 @@
         "id": "V8Yv9g_8EQ1W",
         "outputId": "3ec24c9b-4c4f-4c17-8d76-6cfd74bb8bdf"
       },
-      "outputs": [
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "/home/shezhan/anaconda3/envs/biomedclip/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
-            "  from .autonotebook import tqdm as notebook_tqdm\n"
-          ]
-        }
-      ],
       "source": [
-        "import open_clip\n",
         "\n",
-        "model, preprocess_train, preprocess_val = open_clip.create_model_and_transforms('hf-hub:microsoft/BiomedCLIP-PubMedBERT_256-vit_base_patch16_224')\n",
-        "tokenizer = open_clip.get_tokenizer('hf-hub:microsoft/BiomedCLIP-PubMedBERT_256-vit_base_patch16_224')"
-      ]
-    },
-    {
-      "attachments": {},
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "bk0hm1R7qqU_"
-      },
-      "source": [
-        "# Download sample images"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 3,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 67,
-          "referenced_widgets": [
-            "692f8c386f9743a1a12f7d6c7959ca67",
-            "3e0f188e73294f6ea4d1e28640cfdc22",
-            "b754e18c5c49499d92db4803cfa426b7",
-            "6743cbc5ca2c47e7be565e0d6cd933c9",
-            "02aa2c49f2a94a7eb48794ed783c93e8",
-            "ad84c0ed082d4ab7abf2815fc1910efa",
-            "87a18840cc2c45ac824e8fe3d83d5150",
-            "0b3b4fc0e99a47d0a494aee20166337f",
-            "2de24c12eebd4054a3e6163fb6951986",
-            "1c9af9a39e594c689590d09ae71baeb3",
-            "182cc15b918a45d081543a6b3f182a07"
-          ]
-        },
-        "id": "qqafKW1kqgc4",
-        "outputId": "34c29f78-32c5-4a6f-914e-30e8a07840a6"
-      },
-      "outputs": [
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "README.md: 100%|██████████| 4.13k/4.13k [00:00<00:00, 7.84MB/s]\n",
-            "biomed-vlp-eval.svg: 100%|██████████| 63.4k/63.4k [00:00<00:00, 9.46MB/s]\n",
-            "\n",
-            "(…)e_data/adenocarcinoma_histopathology.jpg: 100%|██████████| 26.9k/26.9k [00:00<00:00, 8.89MB/s]\n",
-            "\n",
-            "\n",
-            "(…)tion_example_data/IHC_histopathology.jpg: 100%|██████████| 181k/181k [00:00<00:00, 11.9MB/s]\n",
-            "(…)_example_data/H_and_E_histopathology.jpg: 100%|██████████| 177k/177k [00:00<00:00, 5.38MB/s]\n",
-            "\n",
-            "\n",
-            "biomed_clip_example.ipynb: 100%|██████████| 2.88M/2.88M [00:00<00:00, 26.7MB/s]\n",
-            "LICENSE.md: 100%|██████████| 1.07k/1.07k [00:00<00:00, 9.03MB/s]\n",
-            ".gitattributes: 100%|██████████| 1.48k/1.48k [00:00<00:00, 8.57MB/s]\n",
-            "(…)assification_example_data/bone_X-ray.jpg: 100%|██████████| 7.44k/7.44k [00:00<00:00, 13.3MB/s]\n",
-            "(…)lassification_example_data/brain_MRI.jpg: 100%|██████████| 128k/128k [00:00<00:00, 27.3MB/s]\n",
-            "(…)cation_example_data/covid_line_chart.png: 100%|██████████| 6.30k/6.30k [00:00<00:00, 10.7MB/s]\n",
-            "\n",
-            "(…)lassification_example_data/pie_chart.png: 100%|██████████| 371k/371k [00:00<00:00, 29.6MB/s]\n",
-            "\n",
-            "special_tokens_map.json: 100%|██████████| 125/125 [00:00<00:00, 941kB/s]\n",
-            "\n",
-            "(…)ssification_example_data/chest_X-ray.jpg: 100%|██████████| 906k/906k [00:00<00:00, 4.06MB/s]\n",
-            "vocab.txt: 100%|██████████| 225k/225k [00:00<00:00, 84.4MB/s]it/s]\n",
-            "(…)amous_cell_carcinoma_histopathology.jpeg: 100%|██████████| 17.2k/17.2k [00:00<00:00, 24.9MB/s]\n",
-            "tokenizer_config.json: 100%|██████████| 394/394 [00:00<00:00, 1.78MB/s]\n",
-            "\n",
-            "tokenizer.json: 100%|██████████| 679k/679k [00:00<00:00, 3.06MB/s]\n",
-            "Fetching 20 files: 100%|���█████████| 20/20 [00:01<00:00, 19.93it/s]\n"
-          ]
-        },
-        {
-          "data": {
-            "text/plain": [
-              "'/home/shezhan/repos/biomedclip/biomed-clip-share'"
-            ]
-          },
-          "execution_count": 3,
-          "metadata": {},
-          "output_type": "execute_result"
-        }
-      ],
-      "source": [
-        "from huggingface_hub import snapshot_download\n",
-        "snapshot_download(\"microsoft/BiomedCLIP-PubMedBERT_256-vit_base_patch16_224\", local_dir=\"biomed-clip-share\")"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 4,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "4WOxBdKr0e_m",
-        "outputId": "2a05beae-6f5f-4c3c-ef59-b23210b6e1b5"
-      },
-      "outputs": [
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "biomed_clip_example.ipynb  open_clip_config.json\ttokenizer_config.json\n",
-            "biomed-vlp-eval.svg\t   open_clip_pytorch_model.bin\ttokenizer.json\n",
-            "example_data\t\t   README.md\t\t\tvocab.txt\n",
-            "LICENSE.md\t\t   special_tokens_map.json\n"
-          ]
-        }
-      ],
-      "source": [
-        "!ls biomed-clip-share"
       ]
     },
     {
@@ -247,7 +127,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 5,
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/"
@@ -372,14 +252,10 @@
         }
       ],
       "source": [
-        "import glob\n",
-        "from collections import OrderedDict\n",
-        "\n",
         "import torch\n",
         "from PIL import Image\n",
-        "import open_clip\n",
         "\n",
-        "dataset_path = 'biomed-clip-share/example_data/biomed_image_classification_example_data'\n",
         "template = 'this is a photo of '\n",
         "labels = [\n",
         "    'adenocarcinoma histopathology',\n",
@@ -393,15 +269,25 @@
         "    'hematoxylin and eosin histopathology'\n",
         "]\n",
         "\n",
-        "test_imgs = glob.glob(dataset_path + '/*')\n",
-        "\n",
         "device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')\n",
         "model.to(device)\n",
         "model.eval()\n",
         "\n",
         "context_length = 256\n",
         "\n",
-        "images = torch.stack([preprocess_val(Image.open(img)) for img in test_imgs]).to(device)\n",
         "texts = tokenizer([template + l for l in labels], context_length=context_length).to(device)\n",
         "with torch.no_grad():\n",
         "    image_features, text_features, logit_scale = model(images, texts)\n",
@@ -547,7 +433,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 6,
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/",
@@ -576,7 +462,7 @@
         "    fig, axes = plt.subplots(nrows=num_images, ncols=1, figsize=(5, 5 * num_images))\n",
         "\n",
         "    for i, (img_path, metadata) in enumerate(zip(images, metadata)):\n",
-        "        img = Image.open(img_path)\n",
         "        ax = axes[i]\n",
         "        ax.imshow(img)\n",
         "        ax.axis('off')\n",

     },
     {
       "cell_type": "code",
+      "execution_count": 19,
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/"
     },
     {
       "cell_type": "code",
+      "execution_count": 20,
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/"
         "id": "V8Yv9g_8EQ1W",
         "outputId": "3ec24c9b-4c4f-4c17-8d76-6cfd74bb8bdf"
       },
+      "outputs": [],
       "source": [
+        "from open_clip import create_model_from_pretrained, get_tokenizer # works on open-clip-torch>=2.23.0, timm>=0.9.8\n",
         "\n",
+        "model, preprocess = create_model_from_pretrained('hf-hub:microsoft/BiomedCLIP-PubMedBERT_256-vit_base_patch16_224')\n",
+        "tokenizer = get_tokenizer('hf-hub:microsoft/BiomedCLIP-PubMedBERT_256-vit_base_patch16_224')"
       ]
     },
     {
     },
     {
       "cell_type": "code",
+      "execution_count": 21,
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/"
         }
       ],
       "source": [
         "import torch\n",
+        "from urllib.request import urlopen\n",
         "from PIL import Image\n",
         "\n",
         "template = 'this is a photo of '\n",
         "labels = [\n",
         "    'adenocarcinoma histopathology',\n",
         "    'hematoxylin and eosin histopathology'\n",
         "]\n",
         "\n",
+        "dataset_url = 'https://huggingface.co/microsoft/BiomedCLIP-PubMedBERT_256-vit_base_patch16_224/resolve/main/example_data/biomed_image_classification_example_data/'\n",
+        "test_imgs = [\n",
+        "    'squamous_cell_carcinoma_histopathology.jpeg',\n",
+        "    'H_and_E_histopathology.jpg',\n",
+        "    'bone_X-ray.jpg',\n",
+        "    'adenocarcinoma_histopathology.jpg',\n",
+        "    'covid_line_chart.png',\n",
+        "    'IHC_histopathology.jpg',\n",
+        "    'chest_X-ray.jpg',\n",
+        "    'brain_MRI.jpg',\n",
+        "    'pie_chart.png'\n",
+        "]\n",
         "device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')\n",
         "model.to(device)\n",
         "model.eval()\n",
         "\n",
         "context_length = 256\n",
         "\n",
+        "images = torch.stack([preprocess(Image.open(urlopen(dataset_url + img))) for img in test_imgs]).to(device)\n",
         "texts = tokenizer([template + l for l in labels], context_length=context_length).to(device)\n",
         "with torch.no_grad():\n",
         "    image_features, text_features, logit_scale = model(images, texts)\n",
     },
     {
       "cell_type": "code",
+      "execution_count": 22,
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/",
         "    fig, axes = plt.subplots(nrows=num_images, ncols=1, figsize=(5, 5 * num_images))\n",
         "\n",
         "    for i, (img_path, metadata) in enumerate(zip(images, metadata)):\n",
+        "        img = Image.open(urlopen(dataset_url + img_path))\n",
         "        ax = axes[i]\n",
         "        ax.imshow(img)\n",
         "        ax.axis('off')\n",