{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 73,
   "metadata": {},
   "outputs": [],
   "source": [
    "import torch    \n",
    "import pickle\n",
    "import matplotlib.pyplot as plt\n",
    "import numpy as np\n",
    "import time"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "metadata": {},
   "outputs": [],
   "source": [
    "IMAGE_SIZE = 224 # We need to resize the images given resnet takes input of image size >= 224\n",
    "\n",
    "mean, std = [0.4914, 0.4822, 0.4465], [0.247, 0.243, 0.261]\n",
    "classes = ('airplane', \n",
    "           'automobile', \n",
    "           'bird',\n",
    "           'cat',\n",
    "           'deer',\n",
    "           'dog', \n",
    "           'frog', \n",
    "           'horse', \n",
    "           'ship',\n",
    "           'truck')\n",
    "\n",
    "if torch.cuda.is_available():\n",
    "    torch.set_default_device('cuda')\n",
    "\n",
    "def show_data(img):\n",
    "    try:\n",
    "        plt.imshow(img[0])\n",
    "    except Exception as e:\n",
    "        print(e)\n",
    "    print(img[0].shape, img[0].permute(1,2,0).shape)\n",
    "    plt.imshow(img[0].permute(1,2,0))\n",
    "    plt.title('y = '+ str(img[1]))\n",
    "    plt.show()\n",
    "    \n",
    "# We need to convert the images to numpy arrays as tensors are not compatible with matplotlib.\n",
    "def im_convert(tensor):\n",
    "    #Lets\n",
    "    img = tensor.cpu().clone().detach().numpy() #\n",
    "    img = img.transpose(1, 2, 0)\n",
    "    img = img * np.array(tuple(mean)) + np.array(tuple(std))\n",
    "    img = img.clip(0, 1) # Clipping the size to print the images later\n",
    "    return img"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "metadata": {},
   "outputs": [],
   "source": [
    "def unpickle(file):\n",
    "    with open(file, 'rb') as fo:\n",
    "        data_dict = pickle.load(fo, encoding='bytes')\n",
    "        \n",
    "        # Decode keys from bytes to strings\n",
    "        decoded_dict = {}\n",
    "        for key, value in data_dict.items():\n",
    "            decoded_key = key.decode('utf-8')  # Assuming UTF-8 encoding\n",
    "            decoded_dict[decoded_key] = value\n",
    "        \n",
    "    return decoded_dict\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 76,
   "metadata": {},
   "outputs": [],
   "source": [
    "decoded_dict = unpickle('./test_batch')\n",
    "decoded_dict\n",
    "data = torch.tensor(decoded_dict['data']).reshape([10000,3,32,32])\n",
    "dataset = {\"image\":data, \"target\": torch.tensor(decoded_dict[\"labels\"])}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 77,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "dict_keys(['batch_label', 'labels', 'data', 'filenames'])"
      ]
     },
     "execution_count": 77,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "decoded_dict.keys()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "metadata": {},
   "outputs": [],
   "source": [
    "idx = 0\n",
    "image = dataset['image'][idx]\n",
    "label = dataset[\"target\"][idx].item()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 79,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'cat'"
      ]
     },
     "execution_count": 79,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "classes[label]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 82,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "cat\n",
      "Time taken: 0.013 s\n"
     ]
    }
   ],
   "source": [
    "# Load model directly\n",
    "from transformers import AutoImageProcessor, AutoModelForImageClassification\n",
    "\n",
    "processor = AutoImageProcessor.from_pretrained(\"heyitskim1912/AML_A2_Q4\")\n",
    "model = AutoModelForImageClassification.from_pretrained(\"heyitskim1912/AML_A2_Q4\")\n",
    "\n",
    "inputs = processor(image, return_tensors=\"pt\")\n",
    "\n",
    "start_time = time.time()\n",
    "with torch.no_grad():\n",
    "    logits = model(**inputs).logits\n",
    "\n",
    "# model predicts one of the 1000 ImageNet classes\n",
    "predicted_label = logits.argmax(-1).item()\n",
    "print(model.config.id2label[predicted_label])\n",
    "end_time = time.time()\n",
    "time_taken = round(end_time - start_time, 3)\n",
    "print(f\"Time taken: {time_taken} s\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "PyTorchenv",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}