{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "# dataset link (Turkis)\n", "# https://sites.google.com/site/offensevalsharedtask/more-datasets" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/home/sebit/anaconda3/envs/dl_env/lib/python3.9/site-packages/neptune/internal/backends/hosted_client.py:51: NeptuneDeprecationWarning: The 'neptune-client' package has been deprecated and will be removed in the future. Install the 'neptune' package instead. For more, see https://docs.neptune.ai/setup/upgrading/\n", " from neptune.version import version as neptune_client_version\n", "/home/sebit/anaconda3/envs/dl_env/lib/python3.9/site-packages/pytorch_lightning/loggers/neptune.py:39: NeptuneDeprecationWarning: You're importing the Neptune client library via the deprecated `neptune.new` module, which will be removed in a future release. Import directly from `neptune` instead.\n", " from neptune import new as neptune\n" ] } ], "source": [ "import os\n", "import numpy as np\n", "import pandas as pd\n", "import pytorch_lightning as pl\n", "import random\n", "import torch\n", "import emoji\n", "\n", "\n", "import datetime\n", "import numpy as np\n", "import torch.optim as optim\n", "\n", "\n", "import torch.nn as nn\n", "\n", "from torch.utils.data import DataLoader,Dataset,random_split,TensorDataset ,RandomSampler, SequentialSampler\n", "from torchmetrics import Accuracy, F1Score \n", "from sklearn.preprocessing import LabelEncoder\n", "from pytorch_lightning.callbacks import EarlyStopping,ModelCheckpoint\n", "from pytorch_lightning.loggers import TensorBoardLogger,MLFlowLogger\n", "from sklearn.model_selection import train_test_split\n", "\n", "from sklearn.preprocessing import LabelEncoder\n", "from transformers import BertForSequenceClassification, AdamW, BertConfig,BertTokenizer,get_linear_schedule_with_warmup" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "device(type='cuda', index=0)" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n", "device" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "True" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "torch.cuda.is_available()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "seed_val = 42\n", "random.seed(seed_val)\n", "np.random.seed(seed_val)\n", "torch.manual_seed(seed_val)\n", "torch.cuda.manual_seed_all(seed_val)" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "# load dataaset\n" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "# train_df=pd.read_csv('SemEval-2020 dataset/offenseval2020-turkish/offenseval2020-turkish/offenseval-tr-training-v1/offenseval-tr-training-v1.tsv',sep='\\t')\n", "# test_df=pd.read_csv('SemEval-2020 dataset/offenseval2020-turkish/offenseval2020-turkish/offenseval-tr-testset-v1/offenseval-tr-testset-v1.tsv',sep='\\t')" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "ename": "NameError", "evalue": "name 'train_df' is not defined", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", "Cell \u001b[0;32mIn[6], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m train_df\u001b[39m=\u001b[39mpd\u001b[39m.\u001b[39mconcat([train_df,test_df], axis\u001b[39m=\u001b[39m\u001b[39m0\u001b[39m)\n\u001b[1;32m 2\u001b[0m train_df\u001b[39m=\u001b[39mtrain_df\u001b[39m.\u001b[39mdrop([\u001b[39m'\u001b[39m\u001b[39mid\u001b[39m\u001b[39m'\u001b[39m], axis\u001b[39m=\u001b[39m\u001b[39m1\u001b[39m)\n", "\u001b[0;31mNameError\u001b[0m: name 'train_df' is not defined" ] } ], "source": [ "train_df=pd.concat([train_df,test_df], axis=0)\n", "train_df=train_df.drop(['id'], axis=1)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "subtask_a\n", "NOT 25231\n", "OFF 6046\n", "Name: count, dtype: int64" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train_df['subtask_a'].value_counts()" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "data=train_df['tweet'].tolist()\n", "for i in range(len(data)):\n", " data[i] = data[i].replace('@USER','')\n", " data[i] = data[i].replace('#','')\n", " data[i] = data[i].replace('$','')\n", " data[i] = emoji.demojize(data[i])\n", " \n", "train_df['tweet'] = data" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "lab = LabelEncoder()\n", "train_df['subtask_a'] = lab.fit_transform(train_df['subtask_a'])" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "subtask_a\n", "0 25231\n", "1 6046\n", "2 3515\n", "Name: count, dtype: int64" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train_df['subtask_a'].value_counts()" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "train_df.drop(train_df[train_df['subtask_a'] == 2].index, inplace = True)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "subtask_a\n", "0 22345\n", "1 5417\n", "Name: count, dtype: int64" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train_df['subtask_a'].value_counts()" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
tweetsubtask_a
3515holstein ineği (alacalı siyah-beyaz inek, yani...0
3516Haaaa. O zaman oylar Binali'ye demek.0
3517Disk genel merkez yönetimine HDP'nin hiç etki...0
3518Bir insanı zorla kaliteli yapamazsın. Sen elin...0
3519Sus yaa açtım sonra korkudan telefon elimden ...0
.........
31272Bu ödül sunan kızı kim giydirdiyse, kızın en b...0
31273Bunu sana beddua olarak etmiyorum bunlar ilerd...0
31274CHP'liler sandıkları bırakmıyor üstüne oturmuş...1
31275karanlığın içinde yalnız kalsam ne oluuuuurr0
31276Ne yalan söyleyeyim bu haftalıkta fitil olara...0
\n", "

27762 rows × 2 columns

\n", "
" ], "text/plain": [ " tweet subtask_a\n", "3515 holstein ineği (alacalı siyah-beyaz inek, yani... 0\n", "3516 Haaaa. O zaman oylar Binali'ye demek. 0\n", "3517 Disk genel merkez yönetimine HDP'nin hiç etki... 0\n", "3518 Bir insanı zorla kaliteli yapamazsın. Sen elin... 0\n", "3519 Sus yaa açtım sonra korkudan telefon elimden ... 0\n", "... ... ...\n", "31272 Bu ödül sunan kızı kim giydirdiyse, kızın en b... 0\n", "31273 Bunu sana beddua olarak etmiyorum bunlar ilerd... 0\n", "31274 CHP'liler sandıkları bırakmıyor üstüne oturmuş... 1\n", "31275 karanlığın içinde yalnız kalsam ne oluuuuurr 0\n", "31276 Ne yalan söyleyeyim bu haftalıkta fitil olara... 0\n", "\n", "[27762 rows x 2 columns]" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train_df" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "data = train_df.tweet.values\n", "labels = train_df.subtask_a.values" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "# BERT Tokenizer" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "tokenizer = BertTokenizer.from_pretrained(\"bert-base-multilingual-cased\", do_basic_tokenize=True)\n", "# tokenizer.add_tokens(data)" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " Original: Sallandık diyorum, merkezi bilmiyorum, sokağa fırlamadım, duruyorum. Senden bir açıklama gelmeden, ben bu sandığı terketmiyorum \n", "Tokenized: ['Sal', '##landı', '##k', 'di', '##yor', '##um', ',', 'merkezi', 'bil', '##mi', '##yor', '##um', ',', 'sok', '##a', '##ğa', 'f', '##ır', '##lama', '##dı', '##m', ',', 'dur', '##uy', '##orum', '.', 'Sen', '##den', 'bir', 'açık', '##lama', 'gel', '##mede', '##n', ',', 'ben', 'bu', 'sand', '##ığı', 'ter', '##ket', '##mi', '##yor', '##um']\n", "Token IDs: [64831, 35783, 10174, 10120, 26101, 10465, 117, 47522, 13897, 10500, 26101, 10465, 117, 29509, 10113, 25163, 174, 17145, 24540, 17532, 10147, 117, 28959, 53452, 28048, 119, 18082, 10633, 10561, 71769, 24540, 74458, 59268, 10115, 117, 11015, 11499, 45989, 28581, 12718, 13650, 10500, 26101, 10465]\n" ] } ], "source": [ "print(' Original: ', data[78])\n", "print('Tokenized: ', tokenizer.tokenize(data[78]))\n", "print('Token IDs: ', tokenizer.convert_tokens_to_ids(tokenizer.tokenize(data[78])))" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "# Tokenize Dataset" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Token indices sequence length is longer than the specified maximum sequence length for this model (1277 > 512). Running this sequence through the model will result in indexing errors\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Max sentence length: 6906\n" ] } ], "source": [ "max_len = 0\n", "for sent in data:\n", "\n", " input_ids = tokenizer.encode(sent, add_special_tokens=True)\n", " max_len = max(max_len, len(input_ids))\n", "\n", "print('Max sentence length: ', max_len)" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.\n", "/home/sebit/anaconda3/envs/testenv/lib/python3.9/site-packages/transformers/tokenization_utils_base.py:2418: FutureWarning: The `pad_to_max_length` argument is deprecated and will be removed in a future version, use `padding=True` or `padding='longest'` to pad to the longest sequence in the batch, or use `padding='max_length'` to pad to a max length. In this case, you can give a specific length with `max_length` (e.g. `max_length=45`) or leave max_length to None to pad to the maximal input size of the model (e.g. 512 for Bert).\n", " warnings.warn(\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Original: holstein ineği (alacalı siyah-beyaz inek, yani hollanda ineği) en verimli süt alınan inek ırkıymış, trt belgesel'de öyle söylediler\n", "Token IDs: tensor([ 101, 110516, 16206, 10106, 10112, 16054, 113, 21739, 15794,\n", " 10713, 34543, 10237, 118, 110744, 10106, 10707, 117, 84251,\n", " 46232, 41971, 10106, 10112, 16054, 114, 10110, 55011, 98373,\n", " 187, 41559, 10164, 65890, 10106, 10707, 321, 16299, 10713,\n", " 16889, 19733, 117, 32221, 10123, 34831, 12912, 112, 10104,\n", " 276, 18369, 100721, 18369, 28113, 10165, 102, 0, 0,\n", " 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", " 0])\n" ] } ], "source": [ "input_ids = []\n", "attention_masks = []\n", "\n", "for sent in data:\n", " encoded_dict = tokenizer.encode_plus(\n", " sent, \n", " add_special_tokens = True, \n", " max_length = 64, \n", " pad_to_max_length = True,\n", " return_attention_mask = True, \n", " return_tensors = 'pt', \n", " )\n", " \n", " \n", " input_ids.append(encoded_dict['input_ids'])\n", " attention_masks.append(encoded_dict['attention_mask'])\n", "\n", "\n", "input_ids = torch.cat(input_ids, dim=0)\n", "attention_masks = torch.cat(attention_masks, dim=0)\n", "labels = torch.tensor(labels)\n", "\n", "\n", "print('Original: ', data[0])\n", "print('Token IDs:', input_ids[0])" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "# Split Dataset" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "24,985 training samples\n", "2,777 validation samples\n" ] } ], "source": [ "dataset = TensorDataset(input_ids, attention_masks, labels)\n", "train_size = int(0.9 * len(dataset))\n", "val_size = len(dataset) - train_size\n", "\n", "\n", "train_dataset, val_dataset = random_split(dataset, [train_size, val_size])\n", "\n", "print('{:>5,} training samples'.format(train_size))\n", "print('{:>5,} validation samples'.format(val_size))" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']\n", "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n" ] }, { "data": { "text/plain": [ "BertForSequenceClassification(\n", " (bert): BertModel(\n", " (embeddings): BertEmbeddings(\n", " (word_embeddings): Embedding(119547, 768, padding_idx=0)\n", " (position_embeddings): Embedding(512, 768)\n", " (token_type_embeddings): Embedding(2, 768)\n", " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", " (dropout): Dropout(p=0.1, inplace=False)\n", " )\n", " (encoder): BertEncoder(\n", " (layer): ModuleList(\n", " (0-11): 12 x BertLayer(\n", " (attention): BertAttention(\n", " (self): BertSelfAttention(\n", " (query): Linear(in_features=768, out_features=768, bias=True)\n", " (key): Linear(in_features=768, out_features=768, bias=True)\n", " (value): Linear(in_features=768, out_features=768, bias=True)\n", " (dropout): Dropout(p=0.1, inplace=False)\n", " )\n", " (output): BertSelfOutput(\n", " (dense): Linear(in_features=768, out_features=768, bias=True)\n", " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", " (dropout): Dropout(p=0.1, inplace=False)\n", " )\n", " )\n", " (intermediate): BertIntermediate(\n", " (dense): Linear(in_features=768, out_features=3072, bias=True)\n", " (intermediate_act_fn): GELUActivation()\n", " )\n", " (output): BertOutput(\n", " (dense): Linear(in_features=3072, out_features=768, bias=True)\n", " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", " (dropout): Dropout(p=0.1, inplace=False)\n", " )\n", " )\n", " )\n", " )\n", " (pooler): BertPooler(\n", " (dense): Linear(in_features=768, out_features=768, bias=True)\n", " (activation): Tanh()\n", " )\n", " )\n", " (dropout): Dropout(p=0.1, inplace=False)\n", " (classifier): Linear(in_features=768, out_features=2, bias=True)\n", ")" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from transformers import BertForSequenceClassification, AdamW, BertConfig\n", "\n", "model = BertForSequenceClassification.from_pretrained(\n", " \"bert-base-multilingual-cased\",\n", " num_labels = 2, \n", " output_attentions = False,\n", " output_hidden_states = False, \n", ")\n", "\n", "model.cuda()" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The BERT model has 201 different named parameters.\n", "\n", "==== Embedding Layer ====\n", "\n", "bert.embeddings.word_embeddings.weight (119547, 768)\n", "bert.embeddings.position_embeddings.weight (512, 768)\n", "bert.embeddings.token_type_embeddings.weight (2, 768)\n", "bert.embeddings.LayerNorm.weight (768,)\n", "bert.embeddings.LayerNorm.bias (768,)\n", "\n", "==== First Transformer ====\n", "\n", "bert.encoder.layer.0.attention.self.query.weight (768, 768)\n", "bert.encoder.layer.0.attention.self.query.bias (768,)\n", "bert.encoder.layer.0.attention.self.key.weight (768, 768)\n", "bert.encoder.layer.0.attention.self.key.bias (768,)\n", "bert.encoder.layer.0.attention.self.value.weight (768, 768)\n", "bert.encoder.layer.0.attention.self.value.bias (768,)\n", "bert.encoder.layer.0.attention.output.dense.weight (768, 768)\n", "bert.encoder.layer.0.attention.output.dense.bias (768,)\n", "bert.encoder.layer.0.attention.output.LayerNorm.weight (768,)\n", "bert.encoder.layer.0.attention.output.LayerNorm.bias (768,)\n", "bert.encoder.layer.0.intermediate.dense.weight (3072, 768)\n", "bert.encoder.layer.0.intermediate.dense.bias (3072,)\n", "bert.encoder.layer.0.output.dense.weight (768, 3072)\n", "bert.encoder.layer.0.output.dense.bias (768,)\n", "bert.encoder.layer.0.output.LayerNorm.weight (768,)\n", "bert.encoder.layer.0.output.LayerNorm.bias (768,)\n", "\n", "==== Output Layer ====\n", "\n", "bert.pooler.dense.weight (768, 768)\n", "bert.pooler.dense.bias (768,)\n", "classifier.weight (2, 768)\n", "classifier.bias (2,)\n" ] } ], "source": [ "params = list(model.named_parameters())\n", "\n", "print('The BERT model has {:} different named parameters.\\n'.format(len(params)))\n", "\n", "print('==== Embedding Layer ====\\n')\n", "\n", "for p in params[0:5]:\n", " print(\"{:<55} {:>12}\".format(p[0], str(tuple(p[1].size()))))\n", "\n", "print('\\n==== First Transformer ====\\n')\n", "\n", "for p in params[5:21]:\n", " print(\"{:<55} {:>12}\".format(p[0], str(tuple(p[1].size()))))\n", "\n", "print('\\n==== Output Layer ====\\n')\n", "\n", "for p in params[-4:]:\n", " print(\"{:<55} {:>12}\".format(p[0], str(tuple(p[1].size()))))" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/home/sebit/anaconda3/envs/testenv/lib/python3.9/site-packages/transformers/optimization.py:411: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n", " warnings.warn(\n" ] } ], "source": [ "optimizer = AdamW(model.parameters(),\n", " lr = 2e-5,\n", " eps = 1e-8\n", " )" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [], "source": [ "def flat_accuracy(preds, labels):\n", " pred_flat = np.argmax(preds, axis=1).flatten()\n", " labels_flat = labels.flatten()\n", " return np.sum(pred_flat == labels_flat) / len(labels_flat)" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [], "source": [ "def format_time(elapsed):\n", "\n", " elapsed_rounded = int(round((elapsed)))\n", " return str(datetime.timedelta(seconds=elapsed_rounded))\n" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [], "source": [ "class sinKafModel(pl.LightningModule):\n", " def __init__(self, model, optimizer, scheduler):\n", " super().__init__()\n", " self.model = model\n", " self.optimizer = optimizer\n", " self.scheduler = scheduler\n", "\n", "\n", " def forward(self, input_ids, attention_mask, labels):\n", " outputs = self.model(input_ids, attention_mask=attention_mask, labels=labels)\n", " return outputs\n", "\n", " def training_step(self, batch, batch_idx):\n", " input_ids, input_mask, labels = batch\n", " outputs = self(input_ids, input_mask, labels)\n", " loss = outputs.loss\n", " self.log('train_loss', loss)\n", " return loss\n", "\n", " def validation_step(self, batch, batch_idx):\n", " input_ids, input_mask, labels = batch\n", " outputs = self(input_ids, input_mask, labels)\n", " loss = outputs.loss\n", " logits = outputs.logits\n", " preds = torch.argmax(logits, dim=1)\n", " acc = (preds == labels).sum().item() / len(labels)\n", " self.log('val_loss', loss)\n", " self.log('val_acc', acc)\n", " return loss\n", "\n", " def configure_optimizers(self):\n", " return [self.optimizer], [self.scheduler]\n", "\n", " # def train_dataloader(self):\n", " # return self.train_dataloader\n", "\n", " # def val_dataloader(self):\n", " # return self.validation_dataloader\n" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [], "source": [ "train_dataloader = DataLoader(train_dataset, sampler = RandomSampler(train_dataset), batch_size = 2 )\n", "validation_dataloader = DataLoader(val_dataset, sampler = SequentialSampler(val_dataset), batch_size = 2 )" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [], "source": [ "epochs = 4\n", "total_steps = len(train_dataloader) * epochs\n", "scheduler = get_linear_schedule_with_warmup(optimizer, \n", " num_warmup_steps = 0, \n", " num_training_steps = total_steps)" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "GPU available: True (cuda), used: True\n", "TPU available: False, using: 0 TPU cores\n", "IPU available: False, using: 0 IPUs\n", "HPU available: False, using: 0 HPUs\n", "/home/sebit/anaconda3/envs/testenv/lib/python3.9/site-packages/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py:67: UserWarning: Starting from v1.9.0, `tensorboardX` has been removed as a dependency of the `pytorch_lightning` package, due to potential conflicts with other packages in the ML ecosystem. For this reason, `logger=True` will use `CSVLogger` as the default logger, unless the `tensorboard` or `tensorboardX` packages are found. Please `pip install lightning[extra]` or one of them to enable TensorBoard support by default\n", " warning_cache.warn(\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n", "\n", " | Name | Type | Params\n", "--------------------------------------------------------\n", "0 | model | BertForSequenceClassification | 177 M \n", "--------------------------------------------------------\n", "177 M Trainable params\n", "0 Non-trainable params\n", "177 M Total params\n", "711.420 Total estimated model params size (MB)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Sanity Checking DataLoader 0: 0%| | 0/2 [00:00> allocated memory try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mOutOfMemoryError\u001b[0m Traceback (most recent call last)", "Cell \u001b[0;32mIn[28], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m model \u001b[39m=\u001b[39m sinKafModel(model, optimizer, scheduler)\n\u001b[1;32m 2\u001b[0m trainer \u001b[39m=\u001b[39m pl\u001b[39m.\u001b[39mTrainer( max_epochs\u001b[39m=\u001b[39mepochs, limit_train_batches\u001b[39m=\u001b[39m\u001b[39m0.1\u001b[39m, devices\u001b[39m=\u001b[39m\u001b[39m1\u001b[39m, accelerator\u001b[39m=\u001b[39m\u001b[39m'\u001b[39m\u001b[39mgpu\u001b[39m\u001b[39m'\u001b[39m) \n\u001b[0;32m----> 3\u001b[0m trainer\u001b[39m.\u001b[39;49mfit(model,train_dataloader,validation_dataloader )\n", "File \u001b[0;32m~/anaconda3/envs/testenv/lib/python3.9/site-packages/pytorch_lightning/trainer/trainer.py:532\u001b[0m, in \u001b[0;36mTrainer.fit\u001b[0;34m(self, model, train_dataloaders, val_dataloaders, datamodule, ckpt_path)\u001b[0m\n\u001b[1;32m 530\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mstrategy\u001b[39m.\u001b[39m_lightning_module \u001b[39m=\u001b[39m model\n\u001b[1;32m 531\u001b[0m _verify_strategy_supports_compile(model, \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mstrategy)\n\u001b[0;32m--> 532\u001b[0m call\u001b[39m.\u001b[39;49m_call_and_handle_interrupt(\n\u001b[1;32m 533\u001b[0m \u001b[39mself\u001b[39;49m, \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_fit_impl, model, train_dataloaders, val_dataloaders, datamodule, ckpt_path\n\u001b[1;32m 534\u001b[0m )\n", "File \u001b[0;32m~/anaconda3/envs/testenv/lib/python3.9/site-packages/pytorch_lightning/trainer/call.py:43\u001b[0m, in \u001b[0;36m_call_and_handle_interrupt\u001b[0;34m(trainer, trainer_fn, *args, **kwargs)\u001b[0m\n\u001b[1;32m 41\u001b[0m \u001b[39mif\u001b[39;00m trainer\u001b[39m.\u001b[39mstrategy\u001b[39m.\u001b[39mlauncher \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[1;32m 42\u001b[0m \u001b[39mreturn\u001b[39;00m trainer\u001b[39m.\u001b[39mstrategy\u001b[39m.\u001b[39mlauncher\u001b[39m.\u001b[39mlaunch(trainer_fn, \u001b[39m*\u001b[39margs, trainer\u001b[39m=\u001b[39mtrainer, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n\u001b[0;32m---> 43\u001b[0m \u001b[39mreturn\u001b[39;00m trainer_fn(\u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n\u001b[1;32m 45\u001b[0m \u001b[39mexcept\u001b[39;00m _TunerExitException:\n\u001b[1;32m 46\u001b[0m _call_teardown_hook(trainer)\n", "File \u001b[0;32m~/anaconda3/envs/testenv/lib/python3.9/site-packages/pytorch_lightning/trainer/trainer.py:571\u001b[0m, in \u001b[0;36mTrainer._fit_impl\u001b[0;34m(self, model, train_dataloaders, val_dataloaders, datamodule, ckpt_path)\u001b[0m\n\u001b[1;32m 561\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_data_connector\u001b[39m.\u001b[39mattach_data(\n\u001b[1;32m 562\u001b[0m model, train_dataloaders\u001b[39m=\u001b[39mtrain_dataloaders, val_dataloaders\u001b[39m=\u001b[39mval_dataloaders, datamodule\u001b[39m=\u001b[39mdatamodule\n\u001b[1;32m 563\u001b[0m )\n\u001b[1;32m 565\u001b[0m ckpt_path \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_checkpoint_connector\u001b[39m.\u001b[39m_select_ckpt_path(\n\u001b[1;32m 566\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mstate\u001b[39m.\u001b[39mfn,\n\u001b[1;32m 567\u001b[0m ckpt_path,\n\u001b[1;32m 568\u001b[0m model_provided\u001b[39m=\u001b[39m\u001b[39mTrue\u001b[39;00m,\n\u001b[1;32m 569\u001b[0m model_connected\u001b[39m=\u001b[39m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mlightning_module \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m,\n\u001b[1;32m 570\u001b[0m )\n\u001b[0;32m--> 571\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_run(model, ckpt_path\u001b[39m=\u001b[39;49mckpt_path)\n\u001b[1;32m 573\u001b[0m \u001b[39massert\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mstate\u001b[39m.\u001b[39mstopped\n\u001b[1;32m 574\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mtraining \u001b[39m=\u001b[39m \u001b[39mFalse\u001b[39;00m\n", "File \u001b[0;32m~/anaconda3/envs/testenv/lib/python3.9/site-packages/pytorch_lightning/trainer/trainer.py:980\u001b[0m, in \u001b[0;36mTrainer._run\u001b[0;34m(self, model, ckpt_path)\u001b[0m\n\u001b[1;32m 975\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_signal_connector\u001b[39m.\u001b[39mregister_signal_handlers()\n\u001b[1;32m 977\u001b[0m \u001b[39m# ----------------------------\u001b[39;00m\n\u001b[1;32m 978\u001b[0m \u001b[39m# RUN THE TRAINER\u001b[39;00m\n\u001b[1;32m 979\u001b[0m \u001b[39m# ----------------------------\u001b[39;00m\n\u001b[0;32m--> 980\u001b[0m results \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_run_stage()\n\u001b[1;32m 982\u001b[0m \u001b[39m# ----------------------------\u001b[39;00m\n\u001b[1;32m 983\u001b[0m \u001b[39m# POST-Training CLEAN UP\u001b[39;00m\n\u001b[1;32m 984\u001b[0m \u001b[39m# ----------------------------\u001b[39;00m\n\u001b[1;32m 985\u001b[0m log\u001b[39m.\u001b[39mdebug(\u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m{\u001b[39;00m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39m\u001b[39m__class__\u001b[39m\u001b[39m.\u001b[39m\u001b[39m__name__\u001b[39m\u001b[39m}\u001b[39;00m\u001b[39m: trainer tearing down\u001b[39m\u001b[39m\"\u001b[39m)\n", "File \u001b[0;32m~/anaconda3/envs/testenv/lib/python3.9/site-packages/pytorch_lightning/trainer/trainer.py:1023\u001b[0m, in \u001b[0;36mTrainer._run_stage\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1021\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_run_sanity_check()\n\u001b[1;32m 1022\u001b[0m \u001b[39mwith\u001b[39;00m torch\u001b[39m.\u001b[39mautograd\u001b[39m.\u001b[39mset_detect_anomaly(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_detect_anomaly):\n\u001b[0;32m-> 1023\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mfit_loop\u001b[39m.\u001b[39;49mrun()\n\u001b[1;32m 1024\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mNone\u001b[39;00m\n\u001b[1;32m 1025\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mRuntimeError\u001b[39;00m(\u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mUnexpected state \u001b[39m\u001b[39m{\u001b[39;00m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mstate\u001b[39m}\u001b[39;00m\u001b[39m\"\u001b[39m)\n", "File \u001b[0;32m~/anaconda3/envs/testenv/lib/python3.9/site-packages/pytorch_lightning/loops/fit_loop.py:202\u001b[0m, in \u001b[0;36m_FitLoop.run\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 200\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m 201\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mon_advance_start()\n\u001b[0;32m--> 202\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49madvance()\n\u001b[1;32m 203\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mon_advance_end()\n\u001b[1;32m 204\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_restarting \u001b[39m=\u001b[39m \u001b[39mFalse\u001b[39;00m\n", "File \u001b[0;32m~/anaconda3/envs/testenv/lib/python3.9/site-packages/pytorch_lightning/loops/fit_loop.py:355\u001b[0m, in \u001b[0;36m_FitLoop.advance\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 353\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_data_fetcher\u001b[39m.\u001b[39msetup(combined_loader)\n\u001b[1;32m 354\u001b[0m \u001b[39mwith\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mtrainer\u001b[39m.\u001b[39mprofiler\u001b[39m.\u001b[39mprofile(\u001b[39m\"\u001b[39m\u001b[39mrun_training_epoch\u001b[39m\u001b[39m\"\u001b[39m):\n\u001b[0;32m--> 355\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mepoch_loop\u001b[39m.\u001b[39;49mrun(\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_data_fetcher)\n", "File \u001b[0;32m~/anaconda3/envs/testenv/lib/python3.9/site-packages/pytorch_lightning/loops/training_epoch_loop.py:133\u001b[0m, in \u001b[0;36m_TrainingEpochLoop.run\u001b[0;34m(self, data_fetcher)\u001b[0m\n\u001b[1;32m 131\u001b[0m \u001b[39mwhile\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mdone:\n\u001b[1;32m 132\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m--> 133\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49madvance(data_fetcher)\n\u001b[1;32m 134\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mon_advance_end()\n\u001b[1;32m 135\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_restarting \u001b[39m=\u001b[39m \u001b[39mFalse\u001b[39;00m\n", "File \u001b[0;32m~/anaconda3/envs/testenv/lib/python3.9/site-packages/pytorch_lightning/loops/training_epoch_loop.py:219\u001b[0m, in \u001b[0;36m_TrainingEpochLoop.advance\u001b[0;34m(self, data_fetcher)\u001b[0m\n\u001b[1;32m 216\u001b[0m \u001b[39mwith\u001b[39;00m trainer\u001b[39m.\u001b[39mprofiler\u001b[39m.\u001b[39mprofile(\u001b[39m\"\u001b[39m\u001b[39mrun_training_batch\u001b[39m\u001b[39m\"\u001b[39m):\n\u001b[1;32m 217\u001b[0m \u001b[39mif\u001b[39;00m trainer\u001b[39m.\u001b[39mlightning_module\u001b[39m.\u001b[39mautomatic_optimization:\n\u001b[1;32m 218\u001b[0m \u001b[39m# in automatic optimization, there can only be one optimizer\u001b[39;00m\n\u001b[0;32m--> 219\u001b[0m batch_output \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mautomatic_optimization\u001b[39m.\u001b[39;49mrun(trainer\u001b[39m.\u001b[39;49moptimizers[\u001b[39m0\u001b[39;49m], kwargs)\n\u001b[1;32m 220\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m 221\u001b[0m batch_output \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mmanual_optimization\u001b[39m.\u001b[39mrun(kwargs)\n", "File \u001b[0;32m~/anaconda3/envs/testenv/lib/python3.9/site-packages/pytorch_lightning/loops/optimization/automatic.py:188\u001b[0m, in \u001b[0;36m_AutomaticOptimization.run\u001b[0;34m(self, optimizer, kwargs)\u001b[0m\n\u001b[1;32m 181\u001b[0m closure()\n\u001b[1;32m 183\u001b[0m \u001b[39m# ------------------------------\u001b[39;00m\n\u001b[1;32m 184\u001b[0m \u001b[39m# BACKWARD PASS\u001b[39;00m\n\u001b[1;32m 185\u001b[0m \u001b[39m# ------------------------------\u001b[39;00m\n\u001b[1;32m 186\u001b[0m \u001b[39m# gradient update with accumulated gradients\u001b[39;00m\n\u001b[1;32m 187\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m--> 188\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_optimizer_step(kwargs\u001b[39m.\u001b[39;49mget(\u001b[39m\"\u001b[39;49m\u001b[39mbatch_idx\u001b[39;49m\u001b[39m\"\u001b[39;49m, \u001b[39m0\u001b[39;49m), closure)\n\u001b[1;32m 190\u001b[0m result \u001b[39m=\u001b[39m closure\u001b[39m.\u001b[39mconsume_result()\n\u001b[1;32m 191\u001b[0m \u001b[39mif\u001b[39;00m result\u001b[39m.\u001b[39mloss \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n", "File \u001b[0;32m~/anaconda3/envs/testenv/lib/python3.9/site-packages/pytorch_lightning/loops/optimization/automatic.py:266\u001b[0m, in \u001b[0;36m_AutomaticOptimization._optimizer_step\u001b[0;34m(self, batch_idx, train_step_and_backward_closure)\u001b[0m\n\u001b[1;32m 263\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39moptim_progress\u001b[39m.\u001b[39moptimizer\u001b[39m.\u001b[39mstep\u001b[39m.\u001b[39mincrement_ready()\n\u001b[1;32m 265\u001b[0m \u001b[39m# model hook\u001b[39;00m\n\u001b[0;32m--> 266\u001b[0m call\u001b[39m.\u001b[39;49m_call_lightning_module_hook(\n\u001b[1;32m 267\u001b[0m trainer,\n\u001b[1;32m 268\u001b[0m \u001b[39m\"\u001b[39;49m\u001b[39moptimizer_step\u001b[39;49m\u001b[39m\"\u001b[39;49m,\n\u001b[1;32m 269\u001b[0m trainer\u001b[39m.\u001b[39;49mcurrent_epoch,\n\u001b[1;32m 270\u001b[0m batch_idx,\n\u001b[1;32m 271\u001b[0m optimizer,\n\u001b[1;32m 272\u001b[0m train_step_and_backward_closure,\n\u001b[1;32m 273\u001b[0m )\n\u001b[1;32m 275\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m should_accumulate:\n\u001b[1;32m 276\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39moptim_progress\u001b[39m.\u001b[39moptimizer\u001b[39m.\u001b[39mstep\u001b[39m.\u001b[39mincrement_completed()\n", "File \u001b[0;32m~/anaconda3/envs/testenv/lib/python3.9/site-packages/pytorch_lightning/trainer/call.py:146\u001b[0m, in \u001b[0;36m_call_lightning_module_hook\u001b[0;34m(trainer, hook_name, pl_module, *args, **kwargs)\u001b[0m\n\u001b[1;32m 143\u001b[0m pl_module\u001b[39m.\u001b[39m_current_fx_name \u001b[39m=\u001b[39m hook_name\n\u001b[1;32m 145\u001b[0m \u001b[39mwith\u001b[39;00m trainer\u001b[39m.\u001b[39mprofiler\u001b[39m.\u001b[39mprofile(\u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m[LightningModule]\u001b[39m\u001b[39m{\u001b[39;00mpl_module\u001b[39m.\u001b[39m\u001b[39m__class__\u001b[39m\u001b[39m.\u001b[39m\u001b[39m__name__\u001b[39m\u001b[39m}\u001b[39;00m\u001b[39m.\u001b[39m\u001b[39m{\u001b[39;00mhook_name\u001b[39m}\u001b[39;00m\u001b[39m\"\u001b[39m):\n\u001b[0;32m--> 146\u001b[0m output \u001b[39m=\u001b[39m fn(\u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n\u001b[1;32m 148\u001b[0m \u001b[39m# restore current_fx when nested context\u001b[39;00m\n\u001b[1;32m 149\u001b[0m pl_module\u001b[39m.\u001b[39m_current_fx_name \u001b[39m=\u001b[39m prev_fx_name\n", "File \u001b[0;32m~/anaconda3/envs/testenv/lib/python3.9/site-packages/pytorch_lightning/core/module.py:1270\u001b[0m, in \u001b[0;36mLightningModule.optimizer_step\u001b[0;34m(self, epoch, batch_idx, optimizer, optimizer_closure)\u001b[0m\n\u001b[1;32m 1232\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39moptimizer_step\u001b[39m(\n\u001b[1;32m 1233\u001b[0m \u001b[39mself\u001b[39m,\n\u001b[1;32m 1234\u001b[0m epoch: \u001b[39mint\u001b[39m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1237\u001b[0m optimizer_closure: Optional[Callable[[], Any]] \u001b[39m=\u001b[39m \u001b[39mNone\u001b[39;00m,\n\u001b[1;32m 1238\u001b[0m ) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m \u001b[39mNone\u001b[39;00m:\n\u001b[1;32m 1239\u001b[0m \u001b[39m \u001b[39m\u001b[39mr\u001b[39m\u001b[39m\"\"\"Override this method to adjust the default way the :class:`~pytorch_lightning.trainer.trainer.Trainer`\u001b[39;00m\n\u001b[1;32m 1240\u001b[0m \u001b[39m calls the optimizer.\u001b[39;00m\n\u001b[1;32m 1241\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1268\u001b[0m \u001b[39m pg[\"lr\"] = lr_scale * self.learning_rate\u001b[39;00m\n\u001b[1;32m 1269\u001b[0m \u001b[39m \"\"\"\u001b[39;00m\n\u001b[0;32m-> 1270\u001b[0m optimizer\u001b[39m.\u001b[39;49mstep(closure\u001b[39m=\u001b[39;49moptimizer_closure)\n", "File \u001b[0;32m~/anaconda3/envs/testenv/lib/python3.9/site-packages/pytorch_lightning/core/optimizer.py:161\u001b[0m, in \u001b[0;36mLightningOptimizer.step\u001b[0;34m(self, closure, **kwargs)\u001b[0m\n\u001b[1;32m 158\u001b[0m \u001b[39mraise\u001b[39;00m MisconfigurationException(\u001b[39m\"\u001b[39m\u001b[39mWhen `optimizer.step(closure)` is called, the closure should be callable\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[1;32m 160\u001b[0m \u001b[39massert\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_strategy \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m\n\u001b[0;32m--> 161\u001b[0m step_output \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_strategy\u001b[39m.\u001b[39;49moptimizer_step(\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_optimizer, closure, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n\u001b[1;32m 163\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_on_after_step()\n\u001b[1;32m 165\u001b[0m \u001b[39mreturn\u001b[39;00m step_output\n", "File \u001b[0;32m~/anaconda3/envs/testenv/lib/python3.9/site-packages/pytorch_lightning/strategies/strategy.py:231\u001b[0m, in \u001b[0;36mStrategy.optimizer_step\u001b[0;34m(self, optimizer, closure, model, **kwargs)\u001b[0m\n\u001b[1;32m 229\u001b[0m \u001b[39m# TODO(fabric): remove assertion once strategy's optimizer_step typing is fixed\u001b[39;00m\n\u001b[1;32m 230\u001b[0m \u001b[39massert\u001b[39;00m \u001b[39misinstance\u001b[39m(model, pl\u001b[39m.\u001b[39mLightningModule)\n\u001b[0;32m--> 231\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mprecision_plugin\u001b[39m.\u001b[39;49moptimizer_step(optimizer, model\u001b[39m=\u001b[39;49mmodel, closure\u001b[39m=\u001b[39;49mclosure, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n", "File \u001b[0;32m~/anaconda3/envs/testenv/lib/python3.9/site-packages/pytorch_lightning/plugins/precision/precision_plugin.py:116\u001b[0m, in \u001b[0;36mPrecisionPlugin.optimizer_step\u001b[0;34m(self, optimizer, model, closure, **kwargs)\u001b[0m\n\u001b[1;32m 114\u001b[0m \u001b[39m\u001b[39m\u001b[39m\"\"\"Hook to run the optimizer step.\"\"\"\u001b[39;00m\n\u001b[1;32m 115\u001b[0m closure \u001b[39m=\u001b[39m partial(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_wrap_closure, model, optimizer, closure)\n\u001b[0;32m--> 116\u001b[0m \u001b[39mreturn\u001b[39;00m optimizer\u001b[39m.\u001b[39;49mstep(closure\u001b[39m=\u001b[39;49mclosure, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n", "File \u001b[0;32m~/anaconda3/envs/testenv/lib/python3.9/site-packages/torch/optim/lr_scheduler.py:69\u001b[0m, in \u001b[0;36mLRScheduler.__init__..with_counter..wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 67\u001b[0m instance\u001b[39m.\u001b[39m_step_count \u001b[39m+\u001b[39m\u001b[39m=\u001b[39m \u001b[39m1\u001b[39m\n\u001b[1;32m 68\u001b[0m wrapped \u001b[39m=\u001b[39m func\u001b[39m.\u001b[39m\u001b[39m__get__\u001b[39m(instance, \u001b[39mcls\u001b[39m)\n\u001b[0;32m---> 69\u001b[0m \u001b[39mreturn\u001b[39;00m wrapped(\u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n", "File \u001b[0;32m~/anaconda3/envs/testenv/lib/python3.9/site-packages/torch/optim/optimizer.py:280\u001b[0m, in \u001b[0;36mOptimizer.profile_hook_step..wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 276\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m 277\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mRuntimeError\u001b[39;00m(\u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m{\u001b[39;00mfunc\u001b[39m}\u001b[39;00m\u001b[39m must return None or a tuple of (new_args, new_kwargs),\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 278\u001b[0m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mbut got \u001b[39m\u001b[39m{\u001b[39;00mresult\u001b[39m}\u001b[39;00m\u001b[39m.\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[0;32m--> 280\u001b[0m out \u001b[39m=\u001b[39m func(\u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n\u001b[1;32m 281\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_optimizer_step_code()\n\u001b[1;32m 283\u001b[0m \u001b[39m# call optimizer step post hooks\u001b[39;00m\n", "File \u001b[0;32m~/anaconda3/envs/testenv/lib/python3.9/site-packages/torch/utils/_contextlib.py:115\u001b[0m, in \u001b[0;36mcontext_decorator..decorate_context\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 112\u001b[0m \u001b[39m@functools\u001b[39m\u001b[39m.\u001b[39mwraps(func)\n\u001b[1;32m 113\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mdecorate_context\u001b[39m(\u001b[39m*\u001b[39margs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs):\n\u001b[1;32m 114\u001b[0m \u001b[39mwith\u001b[39;00m ctx_factory():\n\u001b[0;32m--> 115\u001b[0m \u001b[39mreturn\u001b[39;00m func(\u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n", "File \u001b[0;32m~/anaconda3/envs/testenv/lib/python3.9/site-packages/transformers/optimization.py:468\u001b[0m, in \u001b[0;36mAdamW.step\u001b[0;34m(self, closure)\u001b[0m\n\u001b[1;32m 466\u001b[0m exp_avg\u001b[39m.\u001b[39mmul_(beta1)\u001b[39m.\u001b[39madd_(grad, alpha\u001b[39m=\u001b[39m(\u001b[39m1.0\u001b[39m \u001b[39m-\u001b[39m beta1))\n\u001b[1;32m 467\u001b[0m exp_avg_sq\u001b[39m.\u001b[39mmul_(beta2)\u001b[39m.\u001b[39maddcmul_(grad, grad, value\u001b[39m=\u001b[39m\u001b[39m1.0\u001b[39m \u001b[39m-\u001b[39m beta2)\n\u001b[0;32m--> 468\u001b[0m denom \u001b[39m=\u001b[39m exp_avg_sq\u001b[39m.\u001b[39;49msqrt()\u001b[39m.\u001b[39madd_(group[\u001b[39m\"\u001b[39m\u001b[39meps\u001b[39m\u001b[39m\"\u001b[39m])\n\u001b[1;32m 470\u001b[0m step_size \u001b[39m=\u001b[39m group[\u001b[39m\"\u001b[39m\u001b[39mlr\u001b[39m\u001b[39m\"\u001b[39m]\n\u001b[1;32m 471\u001b[0m \u001b[39mif\u001b[39;00m group[\u001b[39m\"\u001b[39m\u001b[39mcorrect_bias\u001b[39m\u001b[39m\"\u001b[39m]: \u001b[39m# No bias correction for Bert\u001b[39;00m\n", "\u001b[0;31mOutOfMemoryError\u001b[0m: CUDA out of memory. Tried to allocate 352.00 MiB (GPU 0; 4.00 GiB total capacity; 2.67 GiB already allocated; 0 bytes free; 2.80 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF" ] } ], "source": [ "model = sinKafModel(model, optimizer, scheduler)\n", "trainer = pl.Trainer( max_epochs=epochs, limit_train_batches=0.1, devices=1, accelerator='gpu') \n", "trainer.fit(model,train_dataloader,validation_dataloader )" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "sent = 'Koyunlar hasta'" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "input_ids = []\n", "attention_masks = []\n", "\n", "encoded_dict = tokenizer.encode_plus(\n", " sent,\n", " add_special_tokens = True,\n", " max_length = 64,\n", " pad_to_max_length = True,\n", " return_attention_mask = True,\n", " return_tensors = 'pt',\n", " )\n", "\n", "\n", "input_ids = encoded_dict['input_ids']\n", "attention_masks = encoded_dict['attention_mask']\n", "\n", "\n", "input_ids = torch.cat([input_ids], dim=0)\n", "input_mask = torch.cat([attention_masks], dim=0)\n", "labels = torch.tensor(labels)\n", "\n", "\n", "\n", "\n", "print('Original: ', sent)\n", "print('Token IDs:', input_ids)\n", "print('Token IDs:', input_mask)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "outputs = model(input_ids, input_mask, labels[0])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "outputs[0]" ] } ], "metadata": { "kernelspec": { "display_name": "sbtenv", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.0" }, "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 }