{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "a825ba6b", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "===================================BUG REPORT===================================\n", "Welcome to bitsandbytes. For bug reports, please submit your error trace to: https://github.com/TimDettmers/bitsandbytes/issues\n", "For effortless bug reporting copy-paste your error into this form: https://docs.google.com/forms/d/e/1FAIpQLScPB8emS3Thkp66nvqwmjTEgxp8Y9ufuWTzFyr9kJ5AoI47dQ/viewform?usp=sf_link\n", "================================================================================\n", "CUDA SETUP: CUDA runtime path found: /home/sourab/miniconda3/envs/ml/lib/libcudart.so\n", "CUDA SETUP: Highest compute capability among GPUs detected: 7.5\n", "CUDA SETUP: Detected CUDA version 117\n", "CUDA SETUP: Loading binary /home/sourab/miniconda3/envs/ml/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda117.so...\n" ] } ], "source": [ "import argparse\n", "import os\n", "\n", "import torch\n", "from torch.optim import AdamW\n", "from torch.utils.data import DataLoader\n", "from peft import (\n", " get_peft_config,\n", " get_peft_model,\n", " get_peft_model_state_dict,\n", " set_peft_model_state_dict,\n", " PeftType,\n", " PrefixTuningConfig,\n", " PromptEncoderConfig,\n", ")\n", "\n", "import evaluate\n", "from datasets import load_dataset\n", "from transformers import AutoModelForSequenceClassification, AutoTokenizer, get_linear_schedule_with_warmup, set_seed\n", "from tqdm import tqdm" ] }, { "cell_type": "code", "execution_count": 2, "id": "2bd7cbb2", "metadata": {}, "outputs": [], "source": [ "batch_size = 32\n", "model_name_or_path = \"roberta-large\"\n", "task = \"mrpc\"\n", "peft_type = PeftType.PREFIX_TUNING\n", "device = \"cuda\"\n", "num_epochs = 20" ] }, { "cell_type": "code", "execution_count": 3, "id": "33d9b62e", "metadata": {}, "outputs": [], "source": [ "peft_config = PrefixTuningConfig(task_type=\"SEQ_CLS\", num_virtual_tokens=20)\n", "lr = 1e-2" ] }, { "cell_type": "code", "execution_count": 4, "id": "152b6177", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Found cached dataset glue (/home/sourab/.cache/huggingface/datasets/glue/mrpc/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "be1eddbb9a7d4e6dae32fd026e167f96", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/3 [00:00 use the model max length (it's actually the default)\n", " outputs = tokenizer(examples[\"sentence1\"], examples[\"sentence2\"], truncation=True, max_length=None)\n", " return outputs\n", "\n", "\n", "tokenized_datasets = datasets.map(\n", " tokenize_function,\n", " batched=True,\n", " remove_columns=[\"idx\", \"sentence1\", \"sentence2\"],\n", ")\n", "\n", "# We also rename the 'label' column to 'labels' which is the expected name for labels by the models of the\n", "# transformers library\n", "tokenized_datasets = tokenized_datasets.rename_column(\"label\", \"labels\")\n", "\n", "\n", "def collate_fn(examples):\n", " return tokenizer.pad(examples, padding=\"longest\", return_tensors=\"pt\")\n", "\n", "\n", "# Instantiate dataloaders.\n", "train_dataloader = DataLoader(tokenized_datasets[\"train\"], shuffle=True, collate_fn=collate_fn, batch_size=batch_size)\n", "eval_dataloader = DataLoader(\n", " tokenized_datasets[\"validation\"], shuffle=False, collate_fn=collate_fn, batch_size=batch_size\n", ")" ] }, { "cell_type": "code", "execution_count": null, "id": "f6bc8144", "metadata": {}, "outputs": [], "source": [ "model = AutoModelForSequenceClassification.from_pretrained(model_name_or_path, return_dict=True)\n", "model = get_peft_model(model, peft_config)\n", "model.print_trainable_parameters()\n", "model" ] }, { "cell_type": "code", "execution_count": 6, "id": "af41c571", "metadata": {}, "outputs": [], "source": [ "optimizer = AdamW(params=model.parameters(), lr=lr)\n", "\n", "# Instantiate scheduler\n", "lr_scheduler = get_linear_schedule_with_warmup(\n", " optimizer=optimizer,\n", " num_warmup_steps=0.06 * (len(train_dataloader) * num_epochs),\n", " num_training_steps=(len(train_dataloader) * num_epochs),\n", ")" ] }, { "cell_type": "code", "execution_count": 7, "id": "90993c93", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ " 0%| | 0/115 [00:00