{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from transformers import AutoModelForSequenceClassification, AutoTokenizer\n",
"import pandas as pd\n",
"import torch"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 One of the other reviewers has mentioned that ...\n",
"1 A wonderful little production.
The...\n",
"2 I thought this was a wonderful way to spend ti...\n",
"3 Basically there's a family where a little boy ...\n",
"4 Petter Mattei's \"Love in the Time of Money\" is...\n",
" ... \n",
"495 \"American Nightmare\" is officially tied, in my...\n",
"496 First off, I have to say that I loved the book...\n",
"497 This movie was extremely boring. I only laughe...\n",
"498 I was disgusted by this movie. No it wasn't be...\n",
"499 Such a joyous world has been created for us in...\n",
"Name: review, Length: 500, dtype: object"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"reviews = pd.read_csv('../Datasets/IMDB Dataset.csv')\n",
"reviews = reviews.head(500)[\"review\"]\n",
"reviews"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']\n",
"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
]
},
{
"data": {
"text/html": [
"
\n", " | label | \n", "score | \n", "
---|---|---|
0 | \n", "POSITIVE | \n", "0.508178 | \n", "
1 | \n", "POSITIVE | \n", "0.521151 | \n", "
2 | \n", "POSITIVE | \n", "0.528036 | \n", "
3 | \n", "POSITIVE | \n", "0.517413 | \n", "
4 | \n", "POSITIVE | \n", "0.520384 | \n", "
... | \n", "... | \n", "... | \n", "
495 | \n", "POSITIVE | \n", "0.528022 | \n", "
496 | \n", "POSITIVE | \n", "0.512645 | \n", "
497 | \n", "POSITIVE | \n", "0.524352 | \n", "
498 | \n", "POSITIVE | \n", "0.503319 | \n", "
499 | \n", "POSITIVE | \n", "0.526241 | \n", "
500 rows × 2 columns
\n", "