{"cells":[{"cell_type":"markdown","metadata":{"id":"QbM7x-5UEzUR"},"source":["# Sentiment Analysis using BERT\n","\n","BERT (Bidirectionnal Encoder Representations for Transformers) is a “new method of pre-training language representations” developed by Google and released in late 2018."]},{"cell_type":"markdown","metadata":{"id":"Q6hKNfAlEzUS"},"source":["### Import Libraries and Set the intial variables"]},{"cell_type":"code","execution_count":1,"metadata":{"execution":{"iopub.execute_input":"2023-07-10T16:50:49.495093Z","iopub.status.busy":"2023-07-10T16:50:49.494748Z","iopub.status.idle":"2023-07-10T16:50:58.493961Z","shell.execute_reply":"2023-07-10T16:50:58.493016Z","shell.execute_reply.started":"2023-07-10T16:50:49.495058Z"},"id":"vezpsX-7GphM","outputId":"a7164402-8a7b-4e4c-e118-ce6498ba4f2e","trusted":true},"outputs":[{"name":"stdout","output_type":"stream","text":["Requirement already satisfied: transformers in /opt/conda/lib/python3.7/site-packages (3.5.1)\n","Requirement already satisfied: regex!=2019.12.17 in /opt/conda/lib/python3.7/site-packages (from transformers) (2020.4.4)\n","Requirement already satisfied: requests in /opt/conda/lib/python3.7/site-packages (from transformers) (2.23.0)\n","Requirement already satisfied: numpy in /opt/conda/lib/python3.7/site-packages (from transformers) (1.18.5)\n","Requirement already satisfied: tqdm>=4.27 in /opt/conda/lib/python3.7/site-packages (from transformers) (4.45.0)\n","Requirement already satisfied: sacremoses in /opt/conda/lib/python3.7/site-packages (from transformers) (0.0.43)\n","Requirement already satisfied: protobuf in /opt/conda/lib/python3.7/site-packages (from transformers) (3.14.0)\n","Requirement already satisfied: sentencepiece==0.1.91 in /opt/conda/lib/python3.7/site-packages (from transformers) (0.1.91)\n","Requirement already satisfied: filelock in /opt/conda/lib/python3.7/site-packages (from transformers) (3.0.10)\n","Requirement already satisfied: packaging in /opt/conda/lib/python3.7/site-packages (from transformers) (20.1)\n","Requirement already satisfied: tokenizers==0.9.3 in /opt/conda/lib/python3.7/site-packages (from transformers) (0.9.3)\n","Requirement already satisfied: six in /opt/conda/lib/python3.7/site-packages (from packaging->transformers) (1.14.0)\n","Requirement already satisfied: pyparsing>=2.0.2 in /opt/conda/lib/python3.7/site-packages (from packaging->transformers) (2.4.7)\n","Requirement already satisfied: six in /opt/conda/lib/python3.7/site-packages (from packaging->transformers) (1.14.0)\n","Requirement already satisfied: idna<3,>=2.5 in /opt/conda/lib/python3.7/site-packages (from requests->transformers) (2.9)\n","Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.7/site-packages (from requests->transformers) (2020.12.5)\n","Requirement already satisfied: chardet<4,>=3.0.2 in /opt/conda/lib/python3.7/site-packages (from requests->transformers) (3.0.4)\n","Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /opt/conda/lib/python3.7/site-packages (from requests->transformers) (1.25.9)\n","Requirement already satisfied: six in /opt/conda/lib/python3.7/site-packages (from packaging->transformers) (1.14.0)\n","Requirement already satisfied: joblib in /opt/conda/lib/python3.7/site-packages (from sacremoses->transformers) (0.14.1)\n","Requirement already satisfied: regex!=2019.12.17 in /opt/conda/lib/python3.7/site-packages (from transformers) (2020.4.4)\n","Requirement already satisfied: click in /opt/conda/lib/python3.7/site-packages (from sacremoses->transformers) (7.1.1)\n","Requirement already satisfied: tqdm>=4.27 in /opt/conda/lib/python3.7/site-packages (from transformers) (4.45.0)\n","\u001b[33mWARNING: You are using pip version 20.3.1; however, version 23.1.2 is available.\n","You should consider upgrading via the '/opt/conda/bin/python3.7 -m pip install --upgrade pip' command.\u001b[0m\n"]}],"source":["!pip install transformers"]},{"cell_type":"code","execution_count":2,"metadata":{"execution":{"iopub.execute_input":"2023-07-10T16:50:58.497414Z","iopub.status.busy":"2023-07-10T16:50:58.497019Z","iopub.status.idle":"2023-07-10T16:51:07.643083Z","shell.execute_reply":"2023-07-10T16:51:07.642100Z","shell.execute_reply.started":"2023-07-10T16:50:58.497372Z"},"id":"WtQykqrfEzUT","trusted":true},"outputs":[],"source":["# Import necessary libraries\n","import numpy as np\n","import pandas as pd\n","import seaborn as sns\n","from pylab import rcParams\n","import matplotlib.pyplot as plt\n","from matplotlib import rc\n","from sklearn.model_selection import train_test_split\n","from sklearn.metrics import confusion_matrix, classification_report\n","from collections import defaultdict\n","from textwrap import wrap\n","\n","# Torch ML libraries\n","import transformers\n","from transformers import BertModel, BertTokenizer, AdamW, get_linear_schedule_with_warmup\n","import torch\n","from torch import nn, optim\n","from torch.utils.data import Dataset, DataLoader\n","\n","# Misc.\n","import warnings\n","warnings.filterwarnings('ignore')"]},{"cell_type":"code","execution_count":3,"metadata":{"execution":{"iopub.execute_input":"2023-07-10T16:51:07.645102Z","iopub.status.busy":"2023-07-10T16:51:07.644736Z","iopub.status.idle":"2023-07-10T16:51:07.673480Z","shell.execute_reply":"2023-07-10T16:51:07.672727Z","shell.execute_reply.started":"2023-07-10T16:51:07.645062Z"},"id":"gRINjFWWEzUb","trusted":true},"outputs":[],"source":["# Set intial variables and constants\n","%config InlineBackend.figure_format='retina'\n","\n","# Graph Designs\n","sns.set(style='whitegrid', palette='muted', font_scale=1.2)\n","HAPPY_COLORS_PALETTE = [\"#01BEFE\", \"#FFDD00\", \"#FF7D00\", \"#FF006D\", \"#ADFF02\", \"#8F00FF\"]\n","sns.set_palette(sns.color_palette(HAPPY_COLORS_PALETTE))\n","rcParams['figure.figsize'] = 12, 8\n","\n","# Random seed for reproducibilty\n","RANDOM_SEED = 42\n","np.random.seed(RANDOM_SEED)\n","torch.manual_seed(RANDOM_SEED)\n","\n","# Set GPU\n","device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")"]},{"cell_type":"markdown","metadata":{"id":"LQOFO5MSEzUf"},"source":["### Load the data"]},{"cell_type":"code","execution_count":4,"metadata":{"execution":{"iopub.execute_input":"2023-07-10T16:51:07.675860Z","iopub.status.busy":"2023-07-10T16:51:07.675332Z","iopub.status.idle":"2023-07-10T16:51:07.906167Z","shell.execute_reply":"2023-07-10T16:51:07.905252Z","shell.execute_reply.started":"2023-07-10T16:51:07.675813Z"},"id":"g6b5ajqzEzUg","outputId":"a7888891-5d69-42b7-a1e0-ccdbe75f04e4","trusted":true},"outputs":[{"data":{"text/plain":["(12495, 12)"]},"execution_count":4,"metadata":{},"output_type":"execute_result"}],"source":["df = pd.read_csv('../input/google-play-store-reviews/reviews.csv')\n","df.shape"]},{"cell_type":"code","execution_count":5,"metadata":{"execution":{"iopub.execute_input":"2023-07-10T16:51:07.909988Z","iopub.status.busy":"2023-07-10T16:51:07.909702Z","iopub.status.idle":"2023-07-10T16:51:07.939908Z","shell.execute_reply":"2023-07-10T16:51:07.939226Z","shell.execute_reply.started":"2023-07-10T16:51:07.909959Z"},"id":"eQ5Uwg8xEzUk","outputId":"2b23324b-d208-462f-d011-95c2f7847390","trusted":true},"outputs":[{"data":{"text/html":["
\n"," | reviewId | \n","userName | \n","userImage | \n","content | \n","score | \n","thumbsUpCount | \n","reviewCreatedVersion | \n","at | \n","replyContent | \n","repliedAt | \n","sortOrder | \n","appId | \n","
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n","gp:AOqpTOEhZuqSqqWnaKRgv-9ABYdajFUB0WugPGh-SG-... | \n","Eric Tie | \n","https://play-lh.googleusercontent.com/a-/AOh14... | \n","I cannot open the app anymore | \n","1 | \n","0 | \n","5.4.0.6 | \n","2020-10-27 21:24:41 | \n","NaN | \n","NaN | \n","newest | \n","com.anydo | \n","
1 | \n","gp:AOqpTOH0WP4IQKBZ2LrdNmFy_YmpPCVrV3diEU9KGm3... | \n","john alpha | \n","https://play-lh.googleusercontent.com/a-/AOh14... | \n","I have been begging for a refund from this app... | \n","1 | \n","0 | \n","NaN | \n","2020-10-27 14:03:28 | \n","Please note that from checking our records, yo... | \n","2020-10-27 15:05:52 | \n","newest | \n","com.anydo | \n","
2 | \n","gp:AOqpTOEMCkJB8Iq1p-r9dPwnSYadA5BkPWTf32Z1azu... | \n","Sudhakar .S | \n","https://play-lh.googleusercontent.com/a-/AOh14... | \n","Very costly for the premium version (approx In... | \n","1 | \n","0 | \n","NaN | \n","2020-10-27 08:18:40 | \n","NaN | \n","NaN | \n","newest | \n","com.anydo | \n","
3 | \n","gp:AOqpTOGFrUWuKGycpje8kszj3uwHN6tU_fd4gLVFy9z... | \n","SKGflorida@bellsouth.net DAVID S | \n","https://play-lh.googleusercontent.com/-75aK0WF... | \n","Used to keep me organized, but all the 2020 UP... | \n","1 | \n","0 | \n","NaN | \n","2020-10-26 13:28:07 | \n","What do you find troublesome about the update?... | \n","2020-10-26 14:58:29 | \n","newest | \n","com.anydo | \n","
4 | \n","gp:AOqpTOHls7DW8wmDFzTkHwxuqFkdNQtKHmO6Pt9jhZE... | \n","Louann Stoker | \n","https://play-lh.googleusercontent.com/-pBcY_Z-... | \n","Dan Birthday Oct 28 | \n","1 | \n","0 | \n","5.6.0.7 | \n","2020-10-26 06:10:50 | \n","NaN | \n","NaN | \n","newest | \n","com.anydo | \n","