{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Setup" ] }, { "cell_type": "code", "execution_count": 61, "metadata": {}, "outputs": [], "source": [ "import torch\n", "\n", "import torch.nn as nn\n", "import torch.nn.functional as F\n", "import torch.optim as optim\n", "\n", "from torch.utils.data import Dataset, DataLoader\n", "\n", "from datasets import load_dataset\n", "from transformers import DistilBertForSequenceClassification, DistilBertTokenizer\n", "\n", "import numpy as np\n", "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "from sklearn.metrics import accuracy_score, f1_score\n", "\n", "import os\n", "import pickle" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Since we are operating on a Mac with M2 chip, CUDA is not available. However, we can get GPU acceleration like this:" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CUDA? False\n", "MPS available? True\n", "MPS built? True\n", "Device: mps\n" ] } ], "source": [ "print(\"CUDA? \", torch.cuda.is_available())\n", "\n", "print(\"MPS available? \", torch.backends.mps.is_available()) #the MacOS is higher than 12.3+\n", "print(\"MPS built? \", torch.backends.mps.is_built()) #MPS is activated\n", "\n", "device = torch.device(\"mps\")\n", "print(\"Device: \", device)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# EDA" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "First, we will load the dataset: https://huggingface.co/datasets/dair-ai/emotion" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "No config specified, defaulting to: emotion/split\n", "Found cached dataset emotion (/Users/david/.cache/huggingface/datasets/emotion/split/1.0.0/cca5efe2dfeb58c1d098e0f9eeb200e9927d889b5a03c67097275dfb5fe463bd)\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "861120cd48de4646996c7bc8e7daf92a", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/3 [00:00, ?it/s]" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "emotions = load_dataset(\"emotion\")" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "DatasetDict({\n", " train: Dataset({\n", " features: ['text', 'label'],\n", " num_rows: 16000\n", " })\n", " validation: Dataset({\n", " features: ['text', 'label'],\n", " num_rows: 2000\n", " })\n", " test: Dataset({\n", " features: ['text', 'label'],\n", " num_rows: 2000\n", " })\n", "})" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "emotions" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'text': Value(dtype='string', id=None), 'label': ClassLabel(names=['sadness', 'joy', 'love', 'anger', 'fear', 'surprise'], id=None)}\n", "\n", "{'text': ['i didnt feel humiliated', 'i can go from feeling so hopeless to so damned hopeful just from being around someone who cares and is awake', 'im grabbing a minute to post i feel greedy wrong', 'i am ever feeling nostalgic about the fireplace i will know that it is still on the property', 'i am feeling grouchy'], 'label': [0, 0, 3, 2, 3]}\n" ] } ], "source": [ "train_ds = emotions[\"train\"]\n", "val_ds = emotions[\"validation\"]\n", "test_ds = emotions[\"test\"]\n", "\n", "print(train_ds.features)\n", "print()\n", "print(train_ds[:5])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Next, we will buid a DataFrame to use for further data analysis." ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | text | \n", "label | \n", "label_names | \n", "
---|---|---|---|
0 | \n", "i didnt feel humiliated | \n", "0 | \n", "sadness | \n", "
1 | \n", "i can go from feeling so hopeless to so damned... | \n", "0 | \n", "sadness | \n", "
2 | \n", "im grabbing a minute to post i feel greedy wrong | \n", "3 | \n", "anger | \n", "
3 | \n", "i am ever feeling nostalgic about the fireplac... | \n", "2 | \n", "love | \n", "
4 | \n", "i am feeling grouchy | \n", "3 | \n", "anger | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "
15995 | \n", "i just had a very brief time in the beanbag an... | \n", "0 | \n", "sadness | \n", "
15996 | \n", "i am now turning and i feel pathetic that i am... | \n", "0 | \n", "sadness | \n", "
15997 | \n", "i feel strong and good overall | \n", "1 | \n", "joy | \n", "
15998 | \n", "i feel like this was such a rude comment and i... | \n", "3 | \n", "anger | \n", "
15999 | \n", "i know a lot but i feel so stupid because i ca... | \n", "0 | \n", "sadness | \n", "
16000 rows × 3 columns
\n", "