{ "cells": [ { "cell_type": "markdown", "metadata": { "id": "0-7S1J6Jq7nc" }, "source": [ "# Fine-Tuning BERT as a `RewardModel`\n", "\n", "1. First, intall `transformers`, `tlr`, and `codecarbon`." ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "Fx7pg9eT62-d", "outputId": "6beb9be4-0b50-4c4e-ea8b-6aafe07e75c7" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", "Collecting transformers\n", " Downloading transformers-4.30.1-py3-none-any.whl (7.2 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.2/7.2 MB\u001b[0m \u001b[31m52.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from transformers) (3.12.0)\n", "Collecting huggingface-hub<1.0,>=0.14.1 (from transformers)\n", " Downloading huggingface_hub-0.15.1-py3-none-any.whl (236 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m236.8/236.8 kB\u001b[0m \u001b[31m23.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (1.22.4)\n", "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from transformers) (23.1)\n", "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (6.0)\n", "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (2022.10.31)\n", "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from transformers) (2.27.1)\n", "Collecting tokenizers!=0.11.3,<0.14,>=0.11.1 (from transformers)\n", " Downloading tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.8/7.8 MB\u001b[0m \u001b[31m92.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting safetensors>=0.3.1 (from transformers)\n", " Downloading safetensors-0.3.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m71.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.10/dist-packages (from transformers) (4.65.0)\n", "Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.14.1->transformers) (2023.4.0)\n", "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.14.1->transformers) (4.5.0)\n", "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (1.26.15)\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2022.12.7)\n", "Requirement already satisfied: charset-normalizer~=2.0.0 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2.0.12)\n", "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (3.4)\n", "Installing collected packages: tokenizers, safetensors, huggingface-hub, transformers\n", "Successfully installed huggingface-hub-0.15.1 safetensors-0.3.1 tokenizers-0.13.3 transformers-4.30.1\n" ] } ], "source": [ "%pip install transformers\n", "%pip install trl\n", "%pip install codecarbon" ] }, { "cell_type": "markdown", "metadata": { "id": "Y6xzGtxPrMaF" }, "source": [ "2. Downloas the `reward-aira-dataset-comparisons` from the Hub." ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "DtCgCgEr62C9", "outputId": "08428c5a-f363-44fc-8da5-10e1c4c9d3d6" }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Found cached dataset parquet (C:/Users/CWLINK/.cache/huggingface/datasets/nicholasKluge___parquet/nicholasKluge--reward-aira-dataset-9a1556f180412706/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Dataset loaded.\n" ] } ], "source": [ "from datasets import load_dataset\n", "\n", "dataset = load_dataset(\"nicholasKluge/reward-aira-dataset\", split=\"portuguese\")\n", "\n", "print(\"Dataset loaded.\")" ] }, { "cell_type": "markdown", "metadata": { "id": "kQQ1DkB5rjpS" }, "source": [ "3. Download your base model for fine-tuning. Here he are using `bert-base-cased` for the English reward model and `bert-base-portuguese-cased` for the Portuguse version." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 357, "referenced_widgets": [ "156d84739bd24ad69728147036fcfe15", "e9d1e8cf5fb24ae5bf77fff494cb19be", "9667517f0c5d488fa438af0a9194a56a", "ae94488d8cd240ab8d1df56b22373d90", "4873769002374f2999d2f0d98eea4292", "888e8e9350ef4e4da1704f4f24f91313", "696496958f524be682e8a535c3590d19", "7ac859e3ac7345288fcc6ee78320f42e", "d1f94722f882476f86ccc5514ea1d0d2", "eee9df2eac924fd5a5cf1af74a64e42c", "974c6bad10b94b6d9693a24902badc27", "43a1b3f35d4c4d1abb75f02e4c040789", "a16898eda30d4d75a6e0c2c512869137", "46cb332062024f1f8cff246b90e7c725", "e701f34d18b3470490590fe33307a030", "1ffde46bb56c4c91a3213eb19abf1cdb", "5a84b6dba3734d6a94bfddba7fe60597", "b896fcccb74047dcab871b34e8bd2068", "a7111164cac442dfb0e533cbff4f363b", "321c0523ced14f08af825cdd3ee1bb30", "12f77f4404c34ce7849f9823dc852b7a", "5c416b5821fb4ad491e6fcc47100e187", "13a74debb1244846b84f7efb6fbf8220", "f7f18b55aeb54406875646174240bcde", "a9d5ae2631f146ca8832ea365cafd44b", "25794ab9c76742cd911fba5828a07b55", "b28d4940b8d84bcd85dda20595bb78e8", "8f360e5e80dc493a91c4dbc5b89ea7ee", "5c0b409becc04c9391032793874207d6", "3f4e2f7663054c1988b8d4981a92da46", "6d8dfb3e01f24d199925d5b84beff125", "c82ad873691f4d7e8059423a2a6be328", "aab499acbe144705bc57ff18989f4b3e", "153b7cc478bb4d9e9c981572de075848", "6acf79c2fd0d4420b0b9307ac2b11e3b", "758a5d93359e496b8b0a4ebd3a3f606b", "b057a27133da4f6a85c3b51956fe52db", "4000bacb63914286803b7ada8ac94bea", "c4b438a528db4431b21efe0e720a3c32", "60f6b14d68864c98aa9c786f7da55306", "2748cefbcec3483fbd053694c674ebfc", "7c186a738943475281633caf2a925ae9", "1514af6070f445aa86530e424a6eafc3", "25db2786cede4d31b118b1ca66afd7f0", "c12c1669e7b04106ba949c3330fdb89b", "35d6487007594eadbd63ffc561cc3a26", "18944f718b8144db935c890cffde4fa5", "f8645f100ee643b6b47381678fda0e3b", "0dda003be9bd4ac38fc1e1f38bd5caea", "531cd094572347a491c811816f02269a", "67938079109640f492d94527aec5ca09", "ca98152355644f90a9d44bdbacb5ff88", "b89eceaa13fd4b47ac270f709d35485f", "28937f1429984e2886507ce98944641a", "3c2c5c93035e4b58b2726d48f10aea71" ] }, "id": "W3d0nlgO62DB", "outputId": "f680cdb9-32a5-447a-f09d-456a2717ce79" }, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "156d84739bd24ad69728147036fcfe15", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Downloading (…)lve/main/config.json: 0%| | 0.00/570 [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "43a1b3f35d4c4d1abb75f02e4c040789", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Downloading model.safetensors: 0%| | 0.00/436M [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stderr", "output_type": "stream", "text": [ "Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']\n", "- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", "- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", "Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.weight', 'classifier.bias']\n", "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "13a74debb1244846b84f7efb6fbf8220", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Downloading (…)okenizer_config.json: 0%| | 0.00/29.0 [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "153b7cc478bb4d9e9c981572de075848", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Downloading (…)solve/main/vocab.txt: 0%| | 0.00/213k [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "c12c1669e7b04106ba949c3330fdb89b", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Downloading (…)/main/tokenizer.json: 0%| | 0.00/436k [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Model (bert-base-cased) ready.\n" ] } ], "source": [ "from transformers import AutoModelForSequenceClassification, AutoTokenizer\n", "import torch\n", "\n", "model_name = \"bert-base-cased\" # \"neuralmind/bert-base-portuguese-cased\" bert-base-cased\n", "\n", "model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=1)\n", "tokenizer = AutoTokenizer.from_pretrained(model_name)\n", "\n", "if tokenizer.pad_token is None:\n", " tokenizer.pad_token = tokenizer.eos_token\n", " model.config.pad_token_id = model.config.eos_token_id\n", "\n", "print(f\"Model ({model_name}) ready.\")" ] }, { "cell_type": "markdown", "metadata": { "id": "kvq6hWQ4sAlw" }, "source": [ "4. Preprocess the dataset to be compatible with the `RewardTrainer` from `tlr`." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 17, "referenced_widgets": [ "ca2f225fdf53485f95f878a6de8cc471", "d2367b8814824b108a250bc935770720", "68610ec59cdc4ffda0461f03ff17ae0f", "d6867cbeba504574b3fd0dd0622925eb", "a3050b0a67ed4f0ab2a8d4801fb608c2", "5529e7881782416f9f4030f9848eae5a", "6659d18fc6c14f1ebd4fcd181e09ea9f", "acb193199ff3418e827d9776914e93fc", "1796651a4ec846c291e54feb4bd1d803", "f50f691208214f92869aa286a32b4ffd", "3c8058abf64d40d0ba7c61b415e6c694" ] }, "id": "JwrPjIbo62DC", "outputId": "96deaafd-478b-420e-b19c-6b3b86e2356a" }, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "ca2f225fdf53485f95f878a6de8cc471", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Map: 0%| | 0/34234 [00:00, ? examples/s]" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "def preprocess(examples):\n", " kwargs = {\"padding\": \"max_length\", \"truncation\": True, \"max_length\": 350, \"return_tensors\": \"pt\"}\n", "\n", " prompt_plus_chosen_response = examples[\"instruction\"] + \"[SEP]\" + examples[\"chosen_response\"]\n", " prompt_plus_rejected_response = examples[\"instruction\"] + \"[SEP]\" + examples[\"rejected_response\"]\n", "\n", " # Then tokenize these modified fields.\n", " tokens_chosen = tokenizer.encode_plus(prompt_plus_chosen_response, **kwargs)\n", " tokens_rejected = tokenizer.encode_plus(prompt_plus_rejected_response, **kwargs)\n", "\n", " return {\n", " \"input_ids_chosen\": tokens_chosen[\"input_ids\"][0], \"attention_mask_chosen\": tokens_chosen[\"attention_mask\"][0],\n", " \"input_ids_rejected\": tokens_rejected[\"input_ids\"][0], \"attention_mask_rejected\": tokens_rejected[\"attention_mask\"][0]\n", " }\n", "\n", "formatted_dataset = dataset.map(preprocess)\n", "formatted_dataset = formatted_dataset.train_test_split()" ] }, { "cell_type": "markdown", "metadata": { "id": "rpytDiCusMk4" }, "source": [ "5. Train your model while tracking the CO2 emissions. 🌱" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "background_save": true, "base_uri": "https://localhost:8080/", "height": 1000 }, "id": "KwAoxTTF62DD", "outputId": "757673d1-dfe6-43eb-8f57-dea8454ac0bd" }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).\n", "/usr/local/lib/python3.10/dist-packages/trl/trainer/reward_trainer.py:125: UserWarning: When using RewardDataCollatorWithPadding, you should set `max_length` in the RewardTrainer's init it will be set to `512` by default, but you should do it yourself in the future.\n", " warnings.warn(\n", "/usr/local/lib/python3.10/dist-packages/trl/trainer/reward_trainer.py:136: UserWarning: When using RewardDataCollatorWithPadding, you should set `remove_unused_columns=False` in your TrainingArguments we have set it for you, but you should do it yourself in the future.\n", " warnings.warn(\n", "[codecarbon INFO @ 13:43:43] [setup] RAM Tracking...\n", "[codecarbon INFO @ 13:43:43] [setup] GPU Tracking...\n", "[codecarbon INFO @ 13:43:43] Tracking Nvidia GPU via pynvml\n", "[codecarbon INFO @ 13:43:43] [setup] CPU Tracking...\n", "[codecarbon WARNING @ 13:43:43] No CPU tracking mode found. Falling back on CPU constant mode.\n", "[codecarbon WARNING @ 13:43:44] We saw that you have a Intel(R) Xeon(R) CPU @ 2.20GHz but we don't know it. Please contact us.\n", "[codecarbon INFO @ 13:43:44] CPU Model on constant consumption mode: Intel(R) Xeon(R) CPU @ 2.20GHz\n", "[codecarbon INFO @ 13:43:44] >>> Tracker's metadata:\n", "[codecarbon INFO @ 13:43:44] Platform system: Linux-5.15.107+-x86_64-with-glibc2.31\n", "[codecarbon INFO @ 13:43:44] Python version: 3.10.12\n", "[codecarbon INFO @ 13:43:44] CodeCarbon version: 2.2.3\n", "[codecarbon INFO @ 13:43:44] Available RAM : 83.481 GB\n", "[codecarbon INFO @ 13:43:44] CPU count: 12\n", "[codecarbon INFO @ 13:43:44] CPU model: Intel(R) Xeon(R) CPU @ 2.20GHz\n", "[codecarbon INFO @ 13:43:44] GPU count: 1\n", "[codecarbon INFO @ 13:43:44] GPU model: 1 x NVIDIA A100-SXM4-40GB\n", "/usr/local/lib/python3.10/dist-packages/transformers/optimization.py:411: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n", " warnings.warn(\n", "You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n", "/usr/local/lib/python3.10/dist-packages/transformers/tokenization_utils_base.py:2395: UserWarning: `max_length` is ignored when `padding`=`True` and there is no truncation strategy. To pad to max length, use `padding='max_length'`.\n", " warnings.warn(\n", "Could not estimate the number of tokens of the input, floating-point operations will not be computed\n" ] }, { "data": { "text/html": [ "\n", "
Step | \n", "Training Loss | \n", "Validation Loss | \n", "Accuracy | \n", "
---|---|---|---|
200 | \n", "0.080300 | \n", "0.037106 | \n", "0.987499 | \n", "
400 | \n", "0.039300 | \n", "0.036421 | \n", "0.988433 | \n", "
600 | \n", "0.037200 | \n", "0.041799 | \n", "0.986447 | \n", "
800 | \n", "0.011400 | \n", "0.039411 | \n", "0.989602 | \n", "
1000 | \n", "0.013800 | \n", "0.039781 | \n", "0.989718 | \n", "
1200 | \n", "0.012700 | \n", "0.034337 | \n", "0.990887 | \n", "
1400 | \n", "0.005200 | \n", "0.037403 | \n", "0.991120 | \n", "
1600 | \n", "0.001800 | \n", "0.047661 | \n", "0.990653 | \n", "
1800 | \n", "0.000900 | \n", "0.051354 | \n", "0.991237 | \n", "
2000 | \n", "0.001000 | \n", "0.046224 | \n", "0.990419 | \n", "
2200 | \n", "0.000200 | \n", "0.046582 | \n", "0.991120 | \n", "
2400 | \n", "0.000600 | \n", "0.046632 | \n", "0.990536 | \n", "
2600 | \n", "0.000100 | \n", "0.051437 | \n", "0.990770 | \n", "
2800 | \n", "0.000500 | \n", "0.049085 | \n", "0.990887 | \n", "
3000 | \n", "0.000400 | \n", "0.049938 | \n", "0.991004 | \n", "
"
],
"text/plain": [
"