{ "cells": [ { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "view-in-github" }, "source": [ "\"Open" ] }, { "cell_type": "markdown", "metadata": { "id": "hRdpoWePeYHn" }, "source": [ "## Importing Libraries and models" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "0LBvFtYGCNgJ" }, "outputs": [], "source": [ "%%capture\n", "!pip install wandb" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "zkZTzr7OCPBM" }, "outputs": [], "source": [ "import wandb" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "z4ZVrIumZcDt" }, "outputs": [], "source": [ "from __future__ import unicode_literals, print_function, division\n", "from io import open\n", "import unicodedata\n", "import string\n", "import re\n", "import random\n", "import pandas as pd\n", "import torch\n", "import torch.nn as nn\n", "from torch import optim\n", "import torch.nn.functional as F\n", "\n", "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n", "torch.cuda.empty_cache()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "qwL09v65CIse", "outputId": "f1dcbc80-5110-48f9-d0c5-836a2daa05b4" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "cuda\n" ] } ], "source": [ "print(device)" ] }, { "cell_type": "markdown", "metadata": { "id": "44xIRolL_T_d" }, "source": [ "## Load Dataset" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "-XRMpx9eBzRK", "outputId": "177ee7ae-bb7d-46ea-9269-fa3aa045a89e" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Mounted at /content/drive\n" ] } ], "source": [ "from google.colab import drive\n", "drive.mount('/content/drive')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "Y4zemXiyE6Fi" }, "outputs": [], "source": [ "class Lang:\n", " def __init__(self, name):\n", " self.name = name\n", " self.char2index = {'#': 0, '$': 1, '^': 2}\n", " self.char2count = {'#': 1, '$': 1, '^': 1}\n", " self.index2char = {0: '#', 1: '$', 2: '^'}\n", " self.n_chars = 3 # Count\n", " self.data = {}\n", " \n", "\n", " def addWord(self, word):\n", " for char in word:\n", " self.addChar(char)\n", "\n", " def addChar(self, char):\n", " if char not in self.char2index:\n", " self.char2index[char] = self.n_chars\n", " self.char2count[char] = 1\n", " self.index2char[self.n_chars] = char\n", " self.n_chars += 1\n", " else:\n", " self.char2count[char] += 1\n", "\n", " \n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "dCR658yRvXpy" }, "outputs": [], "source": [ "# return max length of input and output words\n", "def maxLength(data):\n", " ip_mlen, op_mlen = 0, 0\n", "\n", " for i in range(len(data)):\n", " input = data[0][i]\n", " output = data[1][i]\n", " if(len(input)>ip_mlen):\n", " ip_mlen=len(input)\n", "\n", " if(len(output)>op_mlen):\n", " op_mlen=len(output)\n", "\n", " return ip_mlen, op_mlen" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "IDGaCO8DkYpc" }, "outputs": [], "source": [ "import numpy\n", "input_shape = 0\n", "from torch.utils.data import TensorDataset, DataLoader\n", "def preprocess(data, input_lang, output_lang):\n", " maxlenInput, maxlenOutput = maxLength(data)\n", " # we use maxlenInput as 26 since it is the maximum of all input len\n", " maxlenInput = 26\n", " input = numpy.zeros((len(data), maxlenInput + 1))\n", " output = numpy.zeros((len(data), maxlenOutput + 2))\n", " maxlenInput, maxlenOutput = maxLength(data)\n", " unknown = input_lang.char2index['$']\n", "\n", " for i in range(len(data)):\n", " op = '^' + data[1][i]\n", " ip = data[0][i].ljust(maxlenInput + 1, '#')\n", " op = op.ljust(maxlenOutput + 2, '#')\n", " \n", "\n", " for index, char in enumerate(ip):\n", " if input_lang.char2index.get(char) is not None:\n", " input[i][index] = input_lang.char2index[char]\n", " else:\n", " input[i][index] = unknown\n", " \n", "\n", " \n", " for index, char in enumerate(op):\n", " if output_lang.char2index.get(char) is not None:\n", " output[i][index] = output_lang.char2index[char]\n", " else:\n", " output[i][index] = unknown \n", "\n", " print(input.shape)\n", " print(output.shape)\n", "\n", " return TensorDataset(torch.from_numpy(input), torch.from_numpy(output))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "PdS5OXKxfdCX", "outputId": "178f1d73-5b0c-431d-ca9b-d9435b924c41" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(51200, 27)\n", "(51200, 22)\n", "(4096, 27)\n", "(4096, 22)\n", "(4096, 27)\n", "(4096, 22)\n" ] } ], "source": [ "def loadData(lang):\n", " train_df = pd.read_csv(f\"drive/MyDrive/aksharantar_sampled/{lang}/{lang}_train.csv\", header = None)\n", " val_df = pd.read_csv(f\"drive/MyDrive/aksharantar_sampled/{lang}/{lang}_valid.csv\", header = None)\n", " test_df = pd.read_csv(f\"drive/MyDrive/aksharantar_sampled/{lang}/{lang}_test.csv\", header = None)\n", "\n", " input_lang = Lang('eng')\n", " output_lang = Lang(lang)\n", " \n", " # add the words to the respective languages\n", " for i in range(len(train_df)):\n", " \n", " input_lang.addWord(train_df[0][i])\n", " output_lang.addWord(train_df[1][i])\n", "\n", " # print(input_lang.char2index)\n", " # print(input_lang.index2char)\n", " trainDataset = preprocess(train_df, input_lang, output_lang)\n", " testDataset = preprocess(test_df, input_lang, output_lang)\n", " valDataset = preprocess(val_df, input_lang, output_lang)\n", "\n", " return trainDataset, testDataset, valDataset, input_lang, output_lang\n", "\n", "\n", "trainData, testData, valData, ipLang, opLang = loadData('hin')\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "SvmzS5Lt_Jnl", "outputId": "33defb60-5aee-46cb-e683-ee2df9e98436" }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "\u001b[34m\u001b[1mwandb\u001b[0m: W&B API key is configured. Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n", "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m If you're specifying your api key in code, ensure this code is not shared publicly.\n", "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m Consider setting the WANDB_API_KEY environment variable, or running `wandb login` from the command line.\n", "\u001b[34m\u001b[1mwandb\u001b[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc\n" ] }, { "data": { "text/plain": [ "True" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "wandb.login(key =\"\")" ] }, { "cell_type": "markdown", "metadata": { "id": "Q1TioafYgICa" }, "source": [ "# seq2seq model" ] }, { "cell_type": "markdown", "metadata": { "id": "svxssm9Havhb" }, "source": [ "## Encoder" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "YTwk8nKNcbkb" }, "outputs": [], "source": [ "class EncoderRNN(nn.Module):\n", " def __init__(self, input_size, hidden_size, embedding_size, # input_size is size of input language dictionary\n", " num_layers, cell_type,\n", " bidirectional, dropout, batch_size) :\n", " super(EncoderRNN, self).__init__()\n", " self.hidden_size = hidden_size # size of an hidden state representation\n", " self.num_layers = num_layers \n", " self.bidirectional = True if bidirectional == 'Yes' else False\n", " self.batch_size = batch_size\n", " self.cell_type = cell_type\n", " self.embedding_size=embedding_size\n", "\n", " # this adds the embedding layer\n", " self.embedding = nn.Embedding(num_embeddings=input_size,embedding_dim= embedding_size)\n", " self.dropout = nn.Dropout(dropout)\n", "\n", " # this adds the Neural Network layer for the encoder\n", " if self.cell_type == \"GRU\":\n", " self.rnn = nn.GRU(embedding_size, hidden_size, num_layers=num_layers, bidirectional=self.bidirectional, dropout=dropout)\n", " elif self.cell_type == \"LSTM\":\n", " self.rnn = nn.LSTM(embedding_size, hidden_size, num_layers=num_layers, bidirectional=self.bidirectional, dropout=dropout)\n", " else:\n", " self.rnn = nn.RNN(embedding_size, hidden_size, num_layers=num_layers, bidirectional=self.bidirectional, dropout=dropout)\n", "\n", " def forward(self, input, hidden): # input shape (seq_len, batch_size) hidden shape tuple for lstm, otherwise single\n", " embedded = self.embedding(input.long()).view(-1,self.batch_size, self.embedding_size)\n", " output = self.dropout(embedded) # output shape (seq_len, batch_size, embedding size)\n", "\n", " output, hidden = self.rnn(output, hidden) # for LSTM hidden is a tuple\n", " if self.bidirectional:\n", " if self.cell_type == \"LSTM\":\n", " hidden_state = hidden[0].resize(2,self.num_layers,self.batch_size,self.hidden_size)\n", " cell_state = hidden[1].resize(2,self.num_layers,self.batch_size,self.hidden_size)\n", " hidden = (torch.add(hidden_state[0],hidden_state[1])/2, torch.add(cell_state[0],cell_state[1])/2)\n", " else:\n", " hidden=hidden.resize(2,self.num_layers,self.batch_size,self.hidden_size)\n", " hidden=torch.add(hidden[0],hidden[1])/2\n", " \n", " split_tensor= torch.split(output, self.hidden_size, dim=-1)\n", " output=torch.add(split_tensor[0],split_tensor[1])/2\n", " return output, hidden\n", "\n", " # initializing the initial hidden state for the encoder\n", " def initHidden(self):\n", " num_directions = 2 if self.bidirectional else 1\n", " if self.cell_type == \"LSTM\":\n", " return (torch.zeros(self.num_layers * num_directions, self.batch_size, self.hidden_size, device=device),\n", " torch.zeros(self.num_layers * num_directions, self.batch_size, self.hidden_size, device=device))\n", " else:\n", " return torch.zeros(self.num_layers * num_directions, self.batch_size, self.hidden_size, device=device)\n" ] }, { "cell_type": "markdown", "metadata": { "id": "J56aq1J6a07q" }, "source": [ "## Decoder" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "53ki6eJUH2u2" }, "outputs": [], "source": [ "class DecoderRNN(nn.Module):\n", " def __init__(self, hidden_size, output_size, embedding_size, num_layers, # output size is the size of output language dictionary\n", " cell_type, dropout, batch_size):\n", " super(DecoderRNN, self).__init__()\n", " self.hidden_size = hidden_size\n", " self.num_layers = num_layers\n", " self.cell_type = cell_type.lower()\n", " self.batch_size = batch_size\n", " self.embedding_size=embedding_size\n", "\n", " self.embedding = nn.Embedding(output_size, embedding_size)\n", " # self.dropout = nn.Dropout(dropout)\n", " \n", " if self.cell_type == \"gru\":\n", " self.rnn = nn.GRU(embedding_size, hidden_size, num_layers=num_layers)\n", " elif self.cell_type == \"lstm\":\n", " self.rnn = nn.LSTM(embedding_size, hidden_size, num_layers=num_layers)\n", " else:\n", " self.rnn = nn.RNN(embedding_size, hidden_size, num_layers=num_layers)\n", "\n", " self.out = nn.Linear(hidden_size, output_size)\n", " self.softmax = nn.LogSoftmax(dim=2)\n", "\n", " def forward(self, input, hidden): # input shape (1, batch_size)\n", " embedded = self.embedding(input.long()).view(-1, self.batch_size, self.embedding_size)\n", " # # shape (1, batch_size, embedding_size)\n", " output = F.relu(embedded)\n", " output, hidden = self.rnn(output, hidden) # output shape (1, batch_size, hidden_size)\n", " output = self.softmax(self.out(output)) # shape (1, batch_size, output_size)\n", " return output, hidden\n", "\n", " # not needed since hidden will be provided by the encoder" ] }, { "cell_type": "markdown", "metadata": { "id": "5JcQdylzI_Fc" }, "source": [ "## Attention Decoder" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "R1Xysuv9I-Qr" }, "outputs": [], "source": [ "class AttentionDecoderRNN(nn.Module):\n", " def __init__(self, hidden_size, output_size, embedding_size, num_layers,\n", " cell_type, dropout, batch_size, max_length):\n", " super(AttentionDecoderRNN, self).__init__()\n", " self.hidden_size = hidden_size\n", " self.num_layers = num_layers\n", " self.cell_type = cell_type\n", " self.batch_size = batch_size\n", " self.embedding_size = embedding_size\n", " self.max_length = max_length\n", " self.dropout = dropout\n", "\n", " self.embedding = nn.Embedding(output_size, embedding_size)\n", " self.dropout = nn.Dropout(self.dropout)\n", " self.attention = nn.Linear(hidden_size + embedding_size, self.max_length)\n", " self.attention_combine = nn.Linear(hidden_size + embedding_size, hidden_size)\n", "\n", " if self.cell_type == \"GRU\":\n", " self.rnn = nn.GRU(hidden_size, hidden_size, num_layers=num_layers)\n", " elif self.cell_type == \"LSTM\":\n", " self.rnn = nn.LSTM(hidden_size, hidden_size, num_layers=num_layers)\n", " else:\n", " self.rnn = nn.RNN(hidden_size, hidden_size, num_layers=num_layers)\n", "\n", " self.out = nn.Linear(hidden_size, output_size)\n", " self.softmax = nn.LogSoftmax(dim=2)\n", "\n", " def forward(self, input, hidden, encoder_outputs): #input shape (1, batch_size)\n", " embedded = self.embedding(input.long()).view(-1, self.batch_size, self.embedding_size) \n", " # embedded shape (1, batch_size, embedding_size)\n", " embedded = F.relu(embedded)\n", "\n", " # Compute attention scores\n", " if self.cell_type == \"LSTM\":\n", " attn_hidden = torch.mean(hidden[0], dim=0)\n", " else:\n", " attn_hidden = torch.mean(hidden, dim = 0)\n", " attn_scores = self.attention(torch.cat((embedded, attn_hidden.unsqueeze(0)), dim=2)) # attn_scores shape (1, batch_size, max_length)\n", " \n", " attn_weights = F.softmax(attn_scores, dim=-1) # attn_scores shape (1, 16, 25)\n", " \n", "\n", " # Apply attention weights to encoder outputs\n", " attn_applied = torch.bmm(attn_weights.transpose(0, 1), encoder_outputs.transpose(0, 1))\n", " \n", " # Combine attention output and embedded input\n", " combined = torch.cat((embedded, attn_applied.transpose(0, 1)), dim=2)\n", " combined = self.attention_combine(combined)\n", " combined = F.relu(combined) # shape (1, batch_size, hidden_size)\n", "\n", " # Run through the RNN\n", " output, hidden = self.rnn(combined, hidden)\n", " # output shape: (1, batch_size, hidden_size)\n", "\n", " # Pass through linear layer and softmax activation\n", " output = self.out(output) # shape: (1, batch_size, output_size)\n", " output = self.softmax(output)\n", " return output, hidden, attn_weights.transpose(0, 1)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "LJ2Papj_jTX8" }, "outputs": [], "source": [] }, { "cell_type": "markdown", "metadata": { "id": "658W9RARGEUf" }, "source": [ "# Helper functions" ] }, { "cell_type": "markdown", "metadata": { "id": "q7fAgs5uQni_" }, "source": [ "## count matches" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "8fzy8U6_lbug" }, "outputs": [], "source": [ "def count_exact_matches(pred, target):\n", " \"\"\"\n", " Counts the number of rows in preds tensor that match exactly with each row in y tensor.\n", " pred: tensor of shape (batch_size, seq_len-1)\n", " y: tensor of shape (batch_size, seq_len-1)\n", " \"\"\"\n", " \n", " count=0;\n", " for i in range(pred.shape[0]):\n", " flag = True\n", " for j in range(pred.shape[1]):\n", " if(target[i][j]!=pred[i][j]):\n", " flag=False\n", " break;\n", " \n", " if(flag):\n", " count+=1;\n", " \n", " return count" ] }, { "cell_type": "markdown", "metadata": { "id": "n4rGh7vuQqaa" }, "source": [ "## evaluation" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "zp6gvWmDlWoB" }, "outputs": [], "source": [ "def evaluate(data,encoder, decoder,output_size,batch_size,hidden_size,num_layers_encoder,num_layers_decoder, cell_type, attention):\n", " \n", "\n", "\n", " running_loss = 0\n", " correct =0\n", " \n", " loader = DataLoader(data, batch_size=batch_size)\n", " loss_fun = nn.CrossEntropyLoss(reduction=\"sum\")\n", " seq_len = 0\n", "\n", " atten_weights = torch.zeros(1,21, 27).to(device) # required to return the attention weights\n", " predictions = torch.zeros(22-1, 1).to(device)\n", " with torch.no_grad():\n", " for j,(x,y) in enumerate(loader):\n", " loss=0\n", " encoder.eval()\n", " decoder.eval()\n", "\n", " x = x.to(device)\n", " y = y.to(device)\n", "\n", " x = x.T\n", " y = y.T\n", " seq_len = len(y)\n", " \n", " encoder_hidden=encoder.initHidden()\n", " encoder_output,encoder_hidden = encoder(x,encoder_hidden)\n", " \n", " \n", " decoder_input =y[0]\n", " \n", " # Handle different numbers of layers in the encoder and decoder\n", " if num_layers_encoder != num_layers_decoder:\n", " if num_layers_encoder < num_layers_decoder:\n", " remaining_layers = num_layers_decoder - num_layers_encoder\n", "\n", " # Copy all encoder hidden layers and then repeat the top layer\n", " if cell_type == \"LSTM\":\n", " top_layer_hidden = (encoder_hidden[0][-1].unsqueeze(0), encoder_hidden[1][-1].unsqueeze(0))\n", " extra_hidden = (top_layer_hidden[0].repeat(remaining_layers, 1, 1), top_layer_hidden[1].repeat(remaining_layers, 1, 1))\n", " decoder_hidden = (torch.cat((encoder_hidden[0], extra_hidden[0]), dim=0), torch.cat((encoder_hidden[1], extra_hidden[1]), dim=0))\n", " else:\n", " top_layer_hidden = encoder_hidden[-1].unsqueeze(0) #top_layer_hidden shape (1, batch_size, hidden_size)\n", " extra_hidden = top_layer_hidden.repeat(remaining_layers, 1, 1)\n", " decoder_hidden = torch.cat((encoder_hidden, extra_hidden), dim=0)\n", "\n", " else:\n", " # Slice the hidden states of the encoder to match the decoder layers\n", " if cell_type == \"LSTM\":\n", " decoder_hidden = (encoder_hidden[0][-num_layers_decoder:], encoder_hidden[1][-num_layers_decoder:])\n", " else :\n", " decoder_hidden = encoder_hidden[-num_layers_decoder:]\n", " else:\n", " decoder_hidden = encoder_hidden\n", "\n", " pred=torch.zeros(len(y)-1, batch_size).to(device)\n", " atten_weight_default = torch.zeros(batch_size,1, 27).to(device)\n", " for k in range(1,len(y)):\n", " if attention == \"Yes\":\n", " \n", " decoder_output, decoder_hidden, atten_weight = decoder(decoder_input, decoder_hidden, encoder_output)\n", " atten_weight_default = torch.cat((atten_weight_default, atten_weight), dim = 1)\n", " else:\n", " decoder_output, decoder_hidden= decoder(decoder_input, decoder_hidden)\n", " max_prob, index = decoder_output.topk(1) # max_prob shape (1, batch_size, 1)\n", " decoder_output = torch.squeeze(decoder_output)\n", " loss += loss_fun(decoder_output, y[k].long())\n", " pred[k-1]= torch.squeeze(index)\n", " decoder_input = index\n", " if attention == \"Yes\":\n", " atten_weights = torch.cat((atten_weights, atten_weight_default[:, 1:, :]), dim = 0)\n", "\n", " running_loss += loss.item()\n", " correct += count_exact_matches(pred.T,y[1:,:].T)\n", " predictions = torch.cat((predictions, pred), dim=1)\n", "\n", " \n", " avg_loss = running_loss / (len(data) * seq_len)\n", " print(\"correct =\", correct)\n", " avg_acc = 100 * (correct / (len(data)))\n", " if attention == \"Yes\":\n", " return avg_loss, avg_acc, predictions, atten_weights[1:, :, :]\n", " else:\n", " return avg_loss, avg_acc, predictions\n", " \n", " \n", " " ] }, { "cell_type": "markdown", "metadata": { "id": "0SsnRWlgQmCI" }, "source": [ "# Training function" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "PhDgsZG0QqPW" }, "outputs": [], "source": [ "def train(sweeps = True, test = False):\n", "\n", " if sweeps == False: \n", " configs = config_defaults # use the default configuration which has the best hyperparameters\n", " else:\n", " wandb.init(config= config_defaults, project='DL_assign_3') # if not test then run wandb sweeps\n", " configs=wandb.config\n", " \n", "\n", " learn_rate = configs['learn_rate']\n", " batch_size = configs['batch_size']\n", " hidden_size = configs['hidden_size']\n", " embedding_size = configs['embedding_size']\n", " num_layers_encoder = configs['num_layers_encoder']\n", " num_layers_decoder = configs['num_layers_decoder']\n", " cell_type = configs['cell_type']\n", " bidirectional = configs['bidirectional']\n", " dropout = configs['dropout']\n", " teach_ratio = configs['teach_ratio']\n", " epochs = configs['epochs']\n", " attention = configs['attention']\n", "\n", " if sweeps:\n", " wandb.run.name='hidden_'+str(hidden_size)+'_batch_'+str(batch_size)+'_embed_size_'+str(embedding_size)+'_dropout_'+str(dropout)+'_cell_'+str(cell_type)\n", "\n", " input_len = ipLang.n_chars\n", " output_len = opLang.n_chars\n", " \n", " encoder = EncoderRNN(input_len, hidden_size, embedding_size, \n", " num_layers_encoder, cell_type,\n", " bidirectional, dropout, batch_size)\n", " \n", " if attention ==\"Yes\":\n", " decoder = AttentionDecoderRNN(hidden_size, output_len, embedding_size, num_layers_decoder, \n", " cell_type, dropout, batch_size, 27)\n", " else:\n", " decoder = DecoderRNN(hidden_size, output_len, embedding_size, num_layers_decoder, \n", " cell_type, dropout, batch_size)#dropout not used\n", " \n", " train_loader = DataLoader(trainData, batch_size=batch_size, shuffle=True)\n", " val_loader = DataLoader(valData, batch_size=batch_size, shuffle=True)\n", "\n", " encoder_optimizer=optim.Adam(encoder.parameters(),learn_rate)\n", " decoder_optimizer=optim.Adam(decoder.parameters(),learn_rate)\n", " loss_fun=nn.CrossEntropyLoss(reduction=\"sum\")\n", "\n", " encoder.to(device)\n", " decoder.to(device)\n", " seq_len = 0\n", "\n", " # Initialize variables for early stopping\n", " best_val_loss = float('inf')\n", " patience = 5\n", " epochs_without_improvement = 0\n", "\n", " for i in range(epochs):\n", " \n", " running_loss = 0.0\n", " train_correct = 0\n", "\n", " encoder.train()\n", " decoder.train()\n", "\n", " for j,(train_x,train_y) in enumerate(train_loader):\n", " train_x = train_x.to(device)\n", " train_y = train_y.to(device)\n", "\n", " encoder_optimizer.zero_grad()\n", " decoder_optimizer.zero_grad()\n", "\n", " train_x=train_x.T\n", " train_y=train_y.T\n", " # print(\"train_x.shapetrain_x.shape)\n", " seq_len = len(train_y)\n", " encoder_hidden=encoder.initHidden()\n", " # for LSTM encoder_hidden shape ((num_layers * num_directions, batch_size,hidden_size),(self.num_layers * num_directions, batch_size, hidden_size))\n", " encoder_output,encoder_hidden = encoder(train_x,encoder_hidden)\n", " # encoder_hidden shape (num_layers, batch_size, hidden_size)\n", " \n", " \n", " # lets move to the decoder\n", " decoder_input = train_y[0] # shape (1, batch_size)\n", " \n", " # Handle different numbers of layers in the encoder and decoder\n", " if num_layers_encoder != num_layers_decoder:\n", " if num_layers_encoder < num_layers_decoder:\n", " remaining_layers = num_layers_decoder - num_layers_encoder\n", " # Copy all encoder hidden layers and then repeat the top layer\n", " if cell_type == \"LSTM\":\n", " top_layer_hidden = (encoder_hidden[0][-1].unsqueeze(0), encoder_hidden[1][-1].unsqueeze(0))\n", " extra_hidden = (top_layer_hidden[0].repeat(remaining_layers, 1, 1), top_layer_hidden[1].repeat(remaining_layers, 1, 1))\n", " decoder_hidden = (torch.cat((encoder_hidden[0], extra_hidden[0]), dim=0), torch.cat((encoder_hidden[1], extra_hidden[1]), dim=0))\n", " else:\n", " top_layer_hidden = encoder_hidden[-1].unsqueeze(0) #top_layer_hidden shape (1, batch_size, hidden_size)\n", " extra_hidden = top_layer_hidden.repeat(remaining_layers, 1, 1)\n", " decoder_hidden = torch.cat((encoder_hidden, extra_hidden), dim=0)\n", " \n", " else:\n", " # Slice the hidden states of the encoder to match the decoder layers\n", " if cell_type == \"LSTM\":\n", " decoder_hidden = (encoder_hidden[0][-num_layers_decoder:], encoder_hidden[1][-num_layers_decoder:])\n", " else :\n", " decoder_hidden = encoder_hidden[-num_layers_decoder:]\n", " else:\n", " decoder_hidden = encoder_hidden\n", " \n", " loss = 0\n", " correct = 0\n", " \n", " for k in range(0, len(train_y)-1):\n", " \n", " if attention == \"Yes\":\n", " decoder_output, decoder_hidden, atten_weights = decoder(decoder_input, decoder_hidden, encoder_output)\n", " else:\n", " decoder_output, decoder_hidden= decoder(decoder_input, decoder_hidden) # decoder_output shape (1, batch_size, output_size)\n", "\n", " max_prob, index = decoder_output.topk(1) # max_prob shape (1, batch_size, 1)\n", " index = torch.squeeze(index) # shape (batch_size)\n", " decoder_output = torch.squeeze(decoder_output)\n", " loss += loss_fun(decoder_output, train_y[k+1].long())\n", " \n", " correct += (index == train_y[k+1]).sum().item()\n", "\n", " # Apply teacher forcing\n", " use_teacher_forcing = True if random.random() < teach_ratio else False\n", "\n", " if use_teacher_forcing:\n", " decoder_input = train_y[k+1]\n", " \n", " else:\n", " decoder_input = index\n", "\n", " running_loss += loss.item()\n", " train_correct += correct\n", " loss.backward()\n", " encoder_optimizer.step()\n", " decoder_optimizer.step()\n", " \n", "\n", " # find train loss and accuracy and print + log to wandb\n", " if attention == \"Yes\":\n", " _, train_accuracy,_, _ = evaluate(trainData,encoder, decoder,output_len,batch_size,hidden_size,num_layers_encoder,num_layers_decoder, cell_type, attention)\n", " else:\n", " _, train_accuracy,_= evaluate(trainData,encoder, decoder,output_len,batch_size,hidden_size,num_layers_encoder,num_layers_decoder, cell_type, attention)\n", " \n", " print(f\"epoch {i}, training loss {running_loss/(len(trainData)* seq_len)}, training accuracy {train_accuracy}\")\n", " if sweeps:\n", " wandb.log({\"epoch\": i, \"train_loss\": running_loss/(len(trainData)* seq_len), \"train_accuracy\": train_accuracy})\n", " \n", " # # find validation loss and accuracy and print + log to wandb\n", " if attention == \"Yes\":\n", " val_loss, val_accuracy,_, _ = evaluate(valData,encoder, decoder,output_len,batch_size,hidden_size,num_layers_encoder,num_layers_decoder, cell_type, attention)\n", " else:\n", " val_loss, val_accuracy,_ = evaluate(valData,encoder, decoder,output_len,batch_size,hidden_size,num_layers_encoder,num_layers_decoder, cell_type, attention)\n", " \n", " print(f\"epoch {i}, validation loss {val_loss}, validation accuracy {val_accuracy}\")\n", " if sweeps:\n", " wandb.log({\"val_loss\": val_loss, \"val_accuracy\": val_accuracy})\n", "\n", " # Check for early stopping\n", " if val_loss < best_val_loss:\n", " best_val_loss = val_loss\n", " epochs_without_improvement = 0\n", " # Save the model weights\n", " torch.save(encoder.state_dict(), 'best_encoder.pt')\n", " torch.save(decoder.state_dict(), 'best_decoder.pt')\n", " else:\n", " epochs_without_improvement += 1\n", " if epochs_without_improvement >= patience:\n", " print(\"Early stopping triggered. No improvement in validation loss.\")\n", " break\n", " \n", " \n", " # if testing mode is on print the test accuracy \n", " if test:\n", " # Load the best model weights\n", " encoder.load_state_dict(torch.load('best_encoder.pt'))\n", " decoder.load_state_dict(torch.load('best_decoder.pt'))\n", " if attention == \"Yes\":\n", " _, test_accuracy, pred, atten_weights = evaluate(testData,encoder, decoder,output_len,batch_size,hidden_size,num_layers_encoder,num_layers_decoder, cell_type, attention)\n", " else:\n", " _, test_accuracy, pred = evaluate(testData,encoder, decoder,output_len,batch_size,hidden_size,num_layers_encoder,num_layers_decoder, cell_type, attention)\n", " print(f\"test accuracy {test_accuracy}\")\n", "\n", " if attention == \"Yes\":\n", " return pred, atten_weights\n", " else:\n", " return pred\n", " " ] }, { "cell_type": "markdown", "metadata": { "id": "nvyRJWUUbR2f" }, "source": [ "# Translating predictions to words\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "Hd3zCTnSbSaL" }, "outputs": [], "source": [ "def translate_prediction(input_dict , input, output_dict, pred,target):\n", " \n", " '''pred in shape of seq_len-1 * dataset_size\n", " target in shape datasize * seq_len-1\n", " '''\n", " pred = pred.T # shape datasize * seq len-1\n", " pred = pred[1:, :-1] # ignore last index of each row\n", " input = input[:, :-1] # ignore last index of each row\n", " target = target[:, 1:-1] # ignore last index of each row\n", " print(f\"pred shape {pred.shape}, input shape {input.shape}, target shape {target.shape}\")\n", " predictions = [] \n", " Input = [] \n", " Target = []\n", " for i in range(len(pred)):\n", " \n", " pred_word=\"\"\n", " input_word=\"\"\n", " target_word = \"\"\n", "\n", " for j in range(pred.shape[1]):\n", "\n", " # Ignore padding\n", " if(target[i][j].item() != 0):\n", " \n", " pred_word += output_dict[pred[i][j].item()]\n", " target_word += output_dict[target[i][j].item()]\n", " \n", " for j in range(input.shape[1]):\n", " \n", " if(input[i][j].item()!=0):\n", " \n", " input_word += input_dict[input[i][j].item()] \n", "\n", " # Append words in respective List\n", " \n", " predictions.append(pred_word)\n", " Input.append(input_word) \n", " Target.append(target_word) \n", "\n", " # Create a DataFrame\n", " df = pd.DataFrame({\"input\": Input, \"predicted\": predictions,\"Actual\":Target})\n", " return df\n", "\n", " " ] }, { "cell_type": "markdown", "metadata": { "id": "8ETW0BG_Pa24" }, "source": [ "#call train" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "pgGp7MoGzfPg" }, "outputs": [], "source": [ "# train(sweeps = False, test = True)" ] }, { "cell_type": "markdown", "metadata": { "id": "MQPGy32rnD3V" }, "source": [ "# Runnning sweeps for models without Attention\n", "\n" ] }, { "cell_type": "markdown", "metadata": { "id": "z_aYZvDD1OHU" }, "source": [ "## Sweep Config" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "SVv8bI-D1Q_I" }, "outputs": [], "source": [ "sweep_config = {\n", " 'name': 'sweepDL', \n", " 'method': 'bayes',\n", " 'metric': {\n", " 'name': 'val_accuracy',\n", " 'goal': 'maximize'\n", " },\n", " 'parameters': {\n", " \n", " 'learn_rate': {\n", " 'values': [0.01, 0.001, 0.001]\n", " },\n", " 'embedding_size': {\n", " 'values': [32, 64, 128, 256, 512, 1024]\n", " },\n", " 'batch_size':{\n", " 'values':[16, 32, 64, 128, 256]\n", " },\n", " 'hidden_size':{\n", " 'values':[32, 64, 128, 256, 512, 1024]\n", " },\n", " 'teach_ratio':{\n", " 'values':[0.4, 0.5, 0.6]\n", " },\n", " 'dropout':{\n", " 'values':[0, 0.2, 0.4]\n", " },\n", " 'cell_type':{\n", " 'values':[\"RNN\", \"LSTM\", \"GRU\"]\n", " },\n", " 'bidirectional':{\n", " 'values' : [\"Yes\",\"No\"]\n", " },\n", " 'num_layers_decoder':{\n", " 'values': [1,2, 3, 4]\n", " },\n", " 'num_layers_encoder':{\n", " 'values': [1,2,3,4]\n", " },\n", " 'epochs':{\n", " 'values': [10, 15, 20, 25, 30]\n", " },\n", " 'attention':{\n", " 'values': [\"Yes\"]\n", " }\n", " \n", " }\n", "}\n", "config_defaults={\n", " 'learn_rate' : 0.001,\n", " 'embedding_size': 32,\n", " 'batch_size': 256,\n", " 'hidden_size' : 1024,\n", " 'num_layers_encoder': 3,\n", " 'num_layers_decoder': 3,\n", " 'bidirectional': 'No',\n", " 'cell_type': \"LSTM\",\n", " 'teach_ratio': 0.6,\n", " 'dropout': 0.4,\n", " 'epochs': 15,\n", " 'attention': \"No\"\n", "}" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "4KxsOOpvr1oi" }, "outputs": [], "source": [ "sweep_id=wandb.sweep(sweep_config, project=\"CS6910_Assignment_3\")\n", "wandb.agent(sweep_id,function=train)" ] }, { "cell_type": "markdown", "metadata": { "id": "pKvBd5mKf0Hf" }, "source": [ "# Testing the Best Model(without Attention) on Test Data \n", "Set default hyperparameters to the best hyperparameters got from sweeps Hyperparamer tuning" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "kMQvZjZl0q4U" }, "outputs": [], "source": [ "config_defaults={\n", " 'learn_rate' : 0.001,\n", " 'embedding_size': 32,\n", " 'batch_size': 256,\n", " 'hidden_size' : 1024,\n", " 'num_layers_encoder': 3,\n", " 'num_layers_decoder': 3,\n", " 'bidirectional': 'No',\n", " 'cell_type': \"LSTM\",\n", " 'teach_ratio': 0.6,\n", " 'dropout': 0.4,\n", " 'epochs': 15,\n", " 'attention': \"No\"\n", "}" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "ygtFpEvp8jFU", "outputId": "1a71d3be-f17f-498c-8844-3c115c411f0a" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "correct = 1490\n", "test accuracy 36.376953125\n" ] } ], "source": [ "pred= train(sweeps = False, test = True)" ] }, { "cell_type": "markdown", "metadata": { "id": "hMf0OAuscOJx" }, "source": [ "# Saving the predictions by Vanilla model in csv file" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "1cgUOUdsfzUB", "outputId": "8784a3aa-315e-476f-cced-c38ebb8434b3" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "pred shape torch.Size([4096, 20]), input shape torch.Size([4096, 26]), target shape torch.Size([4096, 20])\n" ] } ], "source": [ "# save the predictions\n", "dataframe = translate_prediction(ipLang.index2char, testData[:][0], opLang.index2char, pred, testData[:][1])\n", "dataframe.to_csv(\"predictions.csv\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "ZZW-IEWZ5syU" }, "outputs": [], "source": [ "import pandas as pd\n", "data = pd.read_csv(\"predictions.csv\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 424 }, "id": "2sOkc_0vmDlB", "outputId": "750d06b5-fee2-4eb8-d7e6-a7043cd0c15a" }, "outputs": [], "source": [ "data" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 142 }, "id": "AkG1vCpZ_vjG", "outputId": "d64b794c-d173-4871-80fc-93b8211ebedc" }, "outputs": [], "source": [ "# We also want to plot the prdiction table to wandb\n", "wandb.init(project=\"CS6910_Assignment_3\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "MmKDX6V5_kGu" }, "outputs": [], "source": [ "table = wandb.Table(dataframe=data)\n", "wandb.log({\"data\": table})" ] }, { "cell_type": "markdown", "metadata": { "id": "FYMa5jTQRUaB" }, "source": [ "## Plotting the confusion matrix in wandB" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "YBaJZCIBRAGZ" }, "outputs": [], "source": [ "import numpy as np\n", "CM = np.zeros((opLang.n_chars, ipLang.n_chars))\n", "\n", "for i in range(len(testData[1])):\n", " for j in range(testData[1].shape[1]):\n", " pred = int(pred[i][j])\n", " targ = int(testData[1][i][j])\n", " CM[pred][targ] += 1\n", "\n", "classes =[]\n", "\n", "for i in range(len(CM)):\n", " classes.append(opLang.index2char[i])\n", "\n", "percentages = 100 * (CM / np.sum(CM))\n", "\n", "# Define the text for each cell\n", "cell_text = []\n", "for i in range(len(classes)):\n", " row_text = []\n", " for j in range(len(classes)):\n", "\n", " txt = \"Total \"+f'{CM[i, j]}Per. ({percentages[i, j]:.3f})'\n", " if(i==j):\n", " txt =\"Correcty Predicted \" +classes[i]+\"\"+txt\n", " if(i!=j):\n", " txt =\"Predicted \" +classes[j]+\" For \"+classes[i]+\"\"+txt\n", " row_text.append(txt)\n", " cell_text.append(row_text)\n", "\n", "import plotly.graph_objs as go\n", "\n", "# Define the trace\n", "trace = go.Heatmap(z=percentages,\n", " x=classes,\n", " y=classes,\n", " colorscale='Blues',\n", " colorbar=dict(title='Percentage'),\n", " hovertemplate='%{text}%',\n", " text=cell_text,\n", " )\n", "\n", "# Define the layout\n", "layout = go.Layout(title='Confusion Matrix',\n", " xaxis=dict(title='Predicted Character'),\n", " yaxis=dict(title='True Character'),\n", " )\n", "\n", "# Plot the figure\n", "fig = go.Figure(data=[trace], layout=layout)\n", "wandb.log({'confusion_matrix': (fig)})" ] }, { "cell_type": "markdown", "metadata": { "id": "zfuv5FoA1wt2" }, "source": [ "# Runnning sweeps for models with Attention\n" ] }, { "cell_type": "markdown", "metadata": { "id": "tsHS0PkNGHdV" }, "source": [ "## Sweep Config" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "HwCn-Ci5xkTb" }, "outputs": [], "source": [ "sweep_config = {\n", " 'name': 'sweepDL', \n", " 'method': 'bayes',\n", " 'metric': {\n", " 'name': 'val_accuracy',\n", " 'goal': 'maximize'\n", " },\n", " 'parameters': {\n", " \n", " 'learn_rate': {\n", " 'values': [0.01, 0.001, 0.001]\n", " },\n", " 'embedding_size': {\n", " 'values': [32, 64, 128, 256, 512, 1024]\n", " },\n", " 'batch_size':{\n", " 'values':[16, 32, 64, 128, 256]\n", " },\n", " 'hidden_size':{\n", " 'values':[32, 64, 128, 256, 512, 1024]\n", " },\n", " 'teach_ratio':{\n", " 'values':[0.4, 0.5, 0.6]\n", " },\n", " 'dropout':{\n", " 'values':[0, 0.2, 0.4]\n", " },\n", " 'cell_type':{\n", " 'values':[\"RNN\", \"LSTM\", \"GRU\"]\n", " },\n", " 'bidirectional':{\n", " 'values' : [\"Yes\",\"No\"]\n", " },\n", " 'num_layers_decoder':{\n", " 'values': [1,2, 3, 4]\n", " },\n", " 'num_layers_encoder':{\n", " 'values': [1,2,3,4]\n", " },\n", " 'epochs':{\n", " 'values': [10, 15, 20, 25, 30]\n", " },\n", " 'attention':{\n", " 'values': [\"Yes\"]\n", " }\n", " \n", " }\n", "}\n", "config_defaults={\n", " 'learn_rate' : 0.001,\n", " 'embedding_size': 32,\n", " 'batch_size': 64,\n", " 'hidden_size' : 1024,\n", " 'num_layers_encoder': 1,\n", " 'num_layers_decoder': 1,\n", " 'bidirectional': 'Yes',\n", " 'cell_type': \"LSTM\",\n", " 'teach_ratio': 0.5,\n", " 'dropout': 0.4,\n", " 'epochs': 20,\n", " 'attention': \"Yes\"\n", "}" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "3ADMwinqaQVF" }, "outputs": [], "source": [ "sweep_id=wandb.sweep(sweep_config, project=\"CS6910_Assignment_3\")\n", "wandb.agent(sweep_id,function=train)\n", "# wandb.agent(sweep_id= \"xiyggu44\",function=train, project=\"CS6910_Assignment_3\")" ] }, { "cell_type": "markdown", "metadata": { "id": "W7CYNChRGuGK" }, "source": [ "# Testing the Best Model(with Attention) on Test Data \n", "Set default hyperparameters to the best hyperparameters got from sweeps Hyperparamer tuning" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "C9MUrsXu_Rr4" }, "outputs": [], "source": [ "config_defaults={\n", " 'learn_rate' : 0.001,\n", " 'embedding_size': 32,\n", " 'batch_size': 64,\n", " 'hidden_size' : 1024,\n", " 'num_layers_encoder': 1,\n", " 'num_layers_decoder': 1,\n", " 'bidirectional': 'Yes',\n", " 'cell_type': \"LSTM\",\n", " 'teach_ratio': 0.5,\n", " 'dropout': 0.4,\n", " 'epochs': 20,\n", " 'attention': \"Yes\"\n", "}" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "u7XAB4Q5Hpxj" }, "outputs": [], "source": [ "pred, atten_weights = train(sweeps = False, test = True)" ] }, { "cell_type": "markdown", "metadata": { "id": "fld21YRZdRdG" }, "source": [ "# Saving the predictions by Vanilla model in csv file" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "BpDQ1mrydYWg", "outputId": "8784a3aa-315e-476f-cced-c38ebb8434b3" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "pred shape torch.Size([4096, 20]), input shape torch.Size([4096, 26]), target shape torch.Size([4096, 20])\n" ] } ], "source": [ "# save the predictions\n", "dataframe = translate_prediction(ipLang.index2char, testData[:][0], opLang.index2char, pred, testData[:][1])\n", "dataframe.to_csv(\"predictions.csv\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "PKMYPZdtdbDh" }, "outputs": [], "source": [ "import pandas as pd\n", "data = pd.read_csv(\"predictions.csv\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 142 }, "id": "8gCL1rXCdgYp", "outputId": "d64b794c-d173-4871-80fc-93b8211ebedc" }, "outputs": [], "source": [ "# We also want to plot the prdiction table to wandb\n", "wandb.init(project=\"CS6910_Assignment_3\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "N1r2ownhdjbz" }, "outputs": [], "source": [ "table = wandb.Table(dataframe=data)\n", "wandb.log({\"data\": table})" ] }, { "cell_type": "markdown", "metadata": { "id": "LDP4KvWdFnIL" }, "source": [ "# Plotting the Attention HeatMaps" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 1000, "referenced_widgets": [ "1b0c5a6e21a349cba57322f850ad9f48", "3aa935a6db14483d8aaada58a84a3e47", "eabcea7a8bbf42f6aaa3995c0dece721", "b3b7711edb5542e08c53c4f37da10203", "39a8a3a9b6f1495ea17fd1b3d86b67c0", "18a8e2e817b947f9aad87b1ccaf96ea6", "da62d6e5ad0a462b98e1591d39038e1e", "9b5bb4f7f4a846c28ab967b64107726e" ] }, "id": "4WfJEdcgFmiI", "outputId": "ff266529-4345-4cdc-9860-11914b099052" }, "outputs": [], "source": [ "import matplotlib.pyplot as plt\n", "import numpy as np\n", "from matplotlib.font_manager import FontProperties\n", "tel_font = FontProperties(fname = 'TiroDevanagariHindi-Regular.ttf')\n", "# Assuming you have attention_weights of shape (batch_size, output_sequence_length, batch_size, input_sequence_length)\n", "# and prediction_matrix of shape (batch_size, output_sequence_length)\n", "# and input_matrix of shape (batch_size, input_sequence_length)\n", "\n", "# Define the grid dimensions\n", "rows = int(np.ceil(np.sqrt(12)))\n", "cols = int(np.ceil(12 / rows))\n", "\n", "# Create a figure and subplots\n", "fig, axes = plt.subplots(rows, cols, figsize=(9, 9))\n", "\n", "for i, ax in enumerate(axes.flatten()):\n", " if i < 12:\n", " prediction = [opLang.index2char[j.item()] for j in pred[i+1]]\n", " \n", " pred_word=\"\"\n", " input_word=\"\"\n", "\n", " for j in range(len(prediction)):\n", " # Ignore padding\n", " if(prediction[j] != '#'):\n", " pred_word += prediction[j]\n", " else : \n", " break\n", " input_seq = [ipLang.index2char[j.item()] for j in testData[i][0]]\n", " \n", " for j in range(len(input_seq)):\n", " if(input_seq[j] != '#'):\n", " input_word += input_seq[j]\n", " else : \n", " break\n", " attn_weights = atten_weights[i, :len(pred_word), :len(input_word)].detach().cpu().numpy()\n", " ax.imshow(attn_weights.T, cmap='hot', interpolation='nearest')\n", " ax.xaxis.set_label_position('top')\n", " ax.set_title(f'Example {i+1}')\n", " ax.set_xlabel('Output predicted')\n", " ax.set_ylabel('Input word')\n", " ax.set_xticks(np.arange(len(pred_word)))\n", " ax.set_xticklabels(pred_word, rotation = 90, fontproperties = tel_font,fontdict={'fontsize':8})\n", " ax.xaxis.tick_top()\n", "\n", " ax.set_yticks(np.arange(len(input_word)))\n", " ax.set_yticklabels(input_word, rotation=90)\n", " \n", " \n", "\n", "# Adjust the spacing between subplots\n", "plt.tight_layout()\n", "\n", "# Show the plot\n", "plt.show()\n", "wandb.init(project='CS6910_Assignment_3')\n", "\n", "# Convert the matplotlib figure to an image\n", "fig.canvas.draw()\n", "image = np.frombuffer(fig.canvas.tostring_rgb(), dtype='uint8')\n", "image = image.reshape(fig.canvas.get_width_height()[::-1] + (3,))\n", "\n", "# Log the image in wandb\n", "wandb.log({\"attention_heatmaps\": [wandb.Image(image)]})" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "FnHR_oql6-S4" }, "outputs": [], "source": [] } ], "metadata": { "accelerator": "GPU", "colab": { "collapsed_sections": [ "hRdpoWePeYHn", "44xIRolL_T_d", "Q1TioafYgICa", "svxssm9Havhb", "J56aq1J6a07q", "5JcQdylzI_Fc", "658W9RARGEUf", "q7fAgs5uQni_", "n4rGh7vuQqaa", "0SsnRWlgQmCI", "nvyRJWUUbR2f", "8ETW0BG_Pa24", "MQPGy32rnD3V", "z_aYZvDD1OHU", "pKvBd5mKf0Hf", "FYMa5jTQRUaB", "zfuv5FoA1wt2", "W7CYNChRGuGK" ], "gpuType": "T4", "include_colab_link": true, "provenance": [] }, "gpuClass": "standard", "kernelspec": { "display_name": "Python 3", "name": "python3" }, "language_info": { "name": "python" }, "widgets": { "application/vnd.jupyter.widget-state+json": { "18a8e2e817b947f9aad87b1ccaf96ea6": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "1b0c5a6e21a349cba57322f850ad9f48": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "VBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "VBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "VBoxView", "box_style": "", "children": [ "IPY_MODEL_3aa935a6db14483d8aaada58a84a3e47", "IPY_MODEL_eabcea7a8bbf42f6aaa3995c0dece721" ], "layout": "IPY_MODEL_b3b7711edb5542e08c53c4f37da10203" } }, "39a8a3a9b6f1495ea17fd1b3d86b67c0": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "3aa935a6db14483d8aaada58a84a3e47": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "LabelModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "LabelModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "LabelView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_39a8a3a9b6f1495ea17fd1b3d86b67c0", "placeholder": "​", "style": "IPY_MODEL_18a8e2e817b947f9aad87b1ccaf96ea6", "value": "0.071 MB of 0.071 MB uploaded (0.000 MB deduped)\r" } }, "9b5bb4f7f4a846c28ab967b64107726e": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "ProgressStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "b3b7711edb5542e08c53c4f37da10203": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "da62d6e5ad0a462b98e1591d39038e1e": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "eabcea7a8bbf42f6aaa3995c0dece721": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "FloatProgressModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_da62d6e5ad0a462b98e1591d39038e1e", "max": 1, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_9b5bb4f7f4a846c28ab967b64107726e", "value": 1 } } } } }, "nbformat": 4, "nbformat_minor": 0 }