{ "cells": [ { "cell_type": "markdown", "metadata": { "id": "IqM-T1RTzY6C" }, "source": [ "To run this, press \"*Runtime*\" and press \"*Run all*\" on a **free** Tesla T4 Google Colab instance!" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "2eSvM9zX_2d3" }, "outputs": [], "source": [ "%%capture\n", "!pip install unsloth\n", "# Also get the latest nightly Unsloth!\n", "!pip uninstall unsloth -y && pip install --upgrade --no-cache-dir \"unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git\"\n", "\n", "# Install Flash Attention 2 for softcapping support\n", "import torch\n", "if torch.cuda.get_device_capability()[0] >= 8:\n", " !pip install --no-deps packaging ninja einops \"flash-attn>=2.6.3\"" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 331, "referenced_widgets": [ "b98deac402f94f32b982529906fdd059", "3ff6801117ae4a8484ba5b441db6a622", "7ec126f592914cb6bf22afc1624cf51d", "2d13343db0a842d5ba00e94ae9ebcd0f", "5319abc2828c4ab29453c84ed1d1f101", "b9b5d4bd07eb49dcae4c78f90f065c59", "dc99982081b84763be5779b328d8d74a", "daec0bd9f5fd4dbe89a4b309b80828cb", "ca42671e31114c218c98d1b80a0ab8b5", "3e85e215583947a1a1ea4d31d4d046fa", "d2586a6a8c55471faac9381585dc32da", "02c9b28ba88d4e23863dc1b2c072becc", "894d23fb562a45ffb986e22b3b19e3eb", "afb183d59ecb4ac69e74a1e92aa9797a", "28ece098deee41da820e28f81541e837", "4ffeab0cb3c446c9b76f390f69ef775e", "3e426520ba3a4475b1260b6991fafe6b", "432faf95d3154d6eabe6dd68d0ff910d", "bfa724bb20dc449a89cd56f15b33a490", "dad9b5ed1ec94c97b4c1de069a608f5b", "a887c7be6da84583aa79bcdb6509817e", "8a8a7a84bdf6434db4c9dac503008062", "823230c7f64f4fc5b61271ab9014b1fe", "5cba789481864bc584f65c8a1f15b583", "0b9703507d004f8ab23c58cfd53a21bd", "29d935841e734155aaa1243bab3fb616", "32c01cf0bea341ffa0acc6ac65820806", "408f4a8ed2cf4d8589a063171ed17d7c", "d241b67c2e794f0a878bdc3f0c04e308", "4bd1e7bcf694462bb20d70645367f4fc", "c8cdb91d2f0a4a308ab8adea0e94f7d7", "5a7a036995c84940877213b94c905dbb", "2dbfe2911050487e8ec5fa6f5daad0c5", "e26b322930f24b92ab8cfeafc5751939", "3c641afa9dd44470a7906a0588a3ca67", "a36d44d16c944deb850103a33f4cdc6b", "68f1b0a406134eaabee4ab1932d9739f", "1f481be6624e463ca083e3e80f8fef1d", "66be70eca39540c9b471b1f73bcac010", "2a62c9e49d9a4a36b3c266479f780637", "199088a9cbd14b79ad5e6d96e4ca3a8f", "daeb4c06806c461ba1d7e5663ff129df", "70c0a17b4e9a4ee19328ce750c30e790", "752546010193451db8392e32a0a36417", "16ca3f914d104fbeab198bc5f37904cc", "2a35a35c7b9e4a67963b8d3aaaaf1e53", "f02ecfd039ec47b1aed81cea46303fe3", "3858fb08ec884c9f87113734ee3cbc9d", "bf482fa43ab7478299d44f461f21c2e4", "a49ecaf08fe74cdc80e76650c0a7d689", "88300f63e24948f487846e9ce30be81f", "d54c70aa11994073b0fd70cb1c0c5deb", "03ce807cb7fa4727b7c027aaf839554d", "9fd04745a2de47a0b73293be4007d503", "7fcf255ea4174670aa3daf4f7f7dfb58", "50a88e08676e4c378d68a0e34ea3f89d", "707b692233f14b91b48dac15f0bb3923", "b599319cfafe4948aa7f454ddf8b0f5a", "cf0ce4efdc30421b824129574eebe7b2", "b379f3423186498c8c82054ddbfd009d", "dd4d3b973a4b4ed58e4f29359ff93b74", "583faab9a5c3458e9f662e5903632425", "9eee537fa1d84b36bfce481112150576", "6a5c151f7ac04b489199c4d66b69d90f", "ce1ac7f9b4364a4c962f09398fec6d3f", "95cbc3010002406faf8154812b5a1a8c" ] }, "id": "QmUBVEnvCDJv", "outputId": "10436b44-7e7b-4b8d-8d55-59ea9f5830bd" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "π¦₯ Unsloth: Will patch your computer to enable 2x faster free finetuning.\n", "==((====))== Unsloth 2024.9.post3: Fast Gemma2 patching. Transformers = 4.44.2.\n", " \\\\ /| GPU: Tesla T4. Max memory: 14.748 GB. Platform = Linux.\n", "O^O/ \\_/ \\ Pytorch: 2.4.1+cu121. CUDA = 7.5. CUDA Toolkit = 12.1.\n", "\\ / Bfloat16 = FALSE. FA [Xformers = 0.0.28.post1. FA2 = False]\n", " \"-____-\" Free Apache license: http://github.com/unslothai/unsloth\n", "Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "b98deac402f94f32b982529906fdd059", "version_major": 2, "version_minor": 0 }, "text/plain": [ "model.safetensors: 0%| | 0.00/2.22G [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "02c9b28ba88d4e23863dc1b2c072becc", "version_major": 2, "version_minor": 0 }, "text/plain": [ "generation_config.json: 0%| | 0.00/190 [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "823230c7f64f4fc5b61271ab9014b1fe", "version_major": 2, "version_minor": 0 }, "text/plain": [ "tokenizer_config.json: 0%| | 0.00/46.4k [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "e26b322930f24b92ab8cfeafc5751939", "version_major": 2, "version_minor": 0 }, "text/plain": [ "tokenizer.model: 0%| | 0.00/4.24M [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "16ca3f914d104fbeab198bc5f37904cc", "version_major": 2, "version_minor": 0 }, "text/plain": [ "special_tokens_map.json: 0%| | 0.00/636 [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "50a88e08676e4c378d68a0e34ea3f89d", "version_major": 2, "version_minor": 0 }, "text/plain": [ "tokenizer.json: 0%| | 0.00/17.5M [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "from unsloth import FastLanguageModel\n", "import torch\n", "max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!\n", "dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+\n", "load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.\n", "\n", "# 4bit pre quantized models we support for 4x faster downloading + no OOMs.\n", "fourbit_models = [\n", " \"unsloth/Meta-Llama-3.1-8B-bnb-4bit\", # Llama-3.1 15 trillion tokens model 2x faster!\n", " \"unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit\",\n", " \"unsloth/Meta-Llama-3.1-70B-bnb-4bit\",\n", " \"unsloth/Meta-Llama-3.1-405B-bnb-4bit\", # We also uploaded 4bit for 405b!\n", " \"unsloth/Mistral-Nemo-Base-2407-bnb-4bit\", # New Mistral 12b 2x faster!\n", " \"unsloth/Mistral-Nemo-Instruct-2407-bnb-4bit\",\n", " \"unsloth/mistral-7b-v0.3-bnb-4bit\", # Mistral v3 2x faster!\n", " \"unsloth/mistral-7b-instruct-v0.3-bnb-4bit\",\n", " \"unsloth/Phi-3-mini-4k-instruct\", # Phi-3 2x faster!d\n", " \"unsloth/Phi-3-medium-4k-instruct\",\n", " \"unsloth/gemma-2-9b-bnb-4bit\",\n", " \"unsloth/gemma-2-27b-bnb-4bit\", # Gemma 2x faster!\n", " \"unsloth/gemma-2-2b-bnb-4bit\", # New small Gemma model!\n", "] # More models at https://huggingface.co/unsloth\n", "\n", "model, tokenizer = FastLanguageModel.from_pretrained(\n", " model_name = \"unsloth/gemma-2-2b\",\n", " max_seq_length = max_seq_length,\n", " dtype = dtype,\n", " load_in_4bit = load_in_4bit,\n", " # token = \"hf_...\", # use one if using gated models like meta-llama/Llama-2-7b-hf\n", ")" ] }, { "cell_type": "markdown", "metadata": { "id": "SXd9bTZd1aaL" }, "source": [ "We now add LoRA adapters so we only need to update 1 to 10% of all parameters!" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "6bZsfBuZDeCL", "outputId": "222ba5d2-b328-435f-d240-fe109af81335" }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Unsloth 2024.9.post3 patched 26 layers with 26 QKV layers, 26 O layers and 26 MLP layers.\n" ] } ], "source": [ "model = FastLanguageModel.get_peft_model(\n", " model,\n", " r = 16, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128\n", " target_modules = [\"q_proj\", \"k_proj\", \"v_proj\", \"o_proj\",\n", " \"gate_proj\", \"up_proj\", \"down_proj\",],\n", " lora_alpha = 16,\n", " lora_dropout = 0, # Supports any, but = 0 is optimized\n", " bias = \"none\", # Supports any, but = \"none\" is optimized\n", " # [NEW] \"unsloth\" uses 30% less VRAM, fits 2x larger batch sizes!\n", " use_gradient_checkpointing = \"unsloth\", # True or \"unsloth\" for very long context\n", " random_state = 3157,\n", " use_rslora = False, # We support rank stabilized LoRA\n", " loftq_config = None, # And LoftQ\n", ")" ] }, { "cell_type": "markdown", "metadata": { "id": "vITh0KVJ10qX" }, "source": [ "\n", "### Data Prep\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "EbUIVm1jLt6P" }, "outputs": [], "source": [ "from datasets import Dataset\n", "import pandas as pd\n", "dataset = Dataset.from_pandas(pd.read_csv('train.csv'))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 49, "referenced_widgets": [ "17c04d54063249cbb7ca949fce48442b", "8a4b784bbcf54fa69c69f71228e2eab7", "1d1ce5c649f1457494e90e140532dd9e", "f1d0cb9c3b2a45f08d4115db9abb50ca", "2c17d5e9b5da49de95327e12ea6c613d", "7083a705495a4e7a83b8ef1c71ffdca2", "41edab5d78f6466e83cfa339cd94c924", "ee8083c8429c442ca161901385c868a0", "a8beca360e244523b592feae6e5a0fe3", "de020cd0340b491fad4737067d7de827", "ed85db3b61ab457eadc7d5cb04a2403e" ] }, "id": "LjY75GoYUCB8", "outputId": "c3722c4f-6f58-4c56-f36f-6233dc92ae3b" }, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "17c04d54063249cbb7ca949fce48442b", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Map: 0%| | 0/2700 [00:00, ? examples/s]" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "my_prompt = \"\"\"\n", "### Instruction:\n", "{}\n", "\n", "### Response:\n", "{}\"\"\"\n", "\n", "\n", "EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN\n", "def formatting_prompts_func(examples):\n", " instructions = examples[\"instruction\"]\n", " outputs = examples[\"output\"]\n", " texts = []\n", " for instruction, output in zip(instructions, outputs):\n", " # Must add EOS_TOKEN, otherwise your generation will go on forever!\n", " text = my_prompt.format(instruction, output) + EOS_TOKEN\n", " texts.append(text)\n", " return { \"text\" : texts, }\n", "pass\n", "\n", "# from datasets import load_dataset\n", "# dataset = load_dataset(\"yahma/alpaca-cleaned\", split = \"train\")\n", "dataset = dataset.map(formatting_prompts_func, batched = True,)" ] }, { "cell_type": "markdown", "metadata": { "id": "idAEIeSQ3xdS" }, "source": [ "\n", "### Train the model\n", "Now let's use Huggingface TRL's `SFTTrainer`! More docs here: [TRL SFT docs](https://huggingface.co/docs/trl/sft_trainer). We do 60 steps to speed things up, but you can set `num_train_epochs=1` for a full run, and turn off `max_steps=None`. We also support TRL's `DPOTrainer`!" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 66, "referenced_widgets": [ "a6001deecd1b441cad4fb9ef73dea593", "4719bb75e6754b8fbc260c3f413a018f", "c57687e753c141cf8f897c06643fe6f6", "48680faf594742299b3663fe36b21612", "67d5e68310854e96a86156eeaf96c4e2", "34a00fe6fbe64d06969c29a14ac0794d", "0411f0e428b4435b976e22df6b8790a0", "41a585c2b6e4440dbc88f28ddeb1c7c2", "a0ffa168614740c89074ec0aa2f76493", "0e39a5ac20d9435ea186ccf715203dba", "a9a9a824579a456c8feae80de7d58481" ] }, "id": "95_Nn-89DhsL", "outputId": "1d9946b8-fabb-49b1-d08c-9be1df0aff4a" }, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "a6001deecd1b441cad4fb9ef73dea593", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Map (num_proc=2): 0%| | 0/2700 [00:00, ? examples/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stderr", "output_type": "stream", "text": [ "max_steps is given, it will override any value given in num_train_epochs\n" ] } ], "source": [ "from trl import SFTTrainer\n", "from transformers import TrainingArguments\n", "from unsloth import is_bfloat16_supported\n", "\n", "trainer = SFTTrainer(\n", " model = model,\n", " tokenizer = tokenizer,\n", " train_dataset = dataset,\n", " dataset_text_field = \"text\",\n", " max_seq_length = max_seq_length,\n", " dataset_num_proc = 2,\n", " packing = False, # Can make training 5x faster for short sequences.\n", " args = TrainingArguments(\n", " per_device_train_batch_size = 2,\n", " gradient_accumulation_steps = 4,\n", " warmup_steps = 5,\n", " # num_train_epochs = 1, # Set this for 1 full training run.\n", " max_steps = 3000,\n", " learning_rate = 2e-4,\n", " fp16 = not is_bfloat16_supported(),\n", " bf16 = is_bfloat16_supported(),\n", " logging_steps = 100,\n", " optim = \"adamw_8bit\",\n", " weight_decay = 0.01,\n", " lr_scheduler_type = \"linear\",\n", " seed = 3407,\n", " output_dir = \"outputs\",\n", " ),\n", ")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 1000 }, "id": "yqxqAZ7KJ4oL", "outputId": "b4e7ff26-27c6-4197-c71e-81ee8b738b0a" }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "==((====))== Unsloth - 2x faster free finetuning | Num GPUs = 1\n", " \\\\ /| Num examples = 2,700 | Num Epochs = 9\n", "O^O/ \\_/ \\ Batch size per device = 2 | Gradient Accumulation steps = 4\n", "\\ / Total batch size = 8 | Total steps = 3,000\n", " \"-____-\" Number of trainable parameters = 20,766,720\n" ] }, { "data": { "text/html": [ "\n", "
Step | \n", "Training Loss | \n", "
---|---|
100 | \n", "0.809900 | \n", "
200 | \n", "0.471200 | \n", "
300 | \n", "0.452000 | \n", "
400 | \n", "0.419900 | \n", "
500 | \n", "0.389400 | \n", "
600 | \n", "0.396300 | \n", "
700 | \n", "0.376600 | \n", "
800 | \n", "0.331700 | \n", "
900 | \n", "0.338700 | \n", "
1000 | \n", "0.335900 | \n", "
1100 | \n", "0.282800 | \n", "
1200 | \n", "0.281600 | \n", "
1300 | \n", "0.283500 | \n", "
1400 | \n", "0.259900 | \n", "
1500 | \n", "0.230700 | \n", "
1600 | \n", "0.235800 | \n", "
1700 | \n", "0.232200 | \n", "
1800 | \n", "0.197000 | \n", "
1900 | \n", "0.199700 | \n", "
2000 | \n", "0.203000 | \n", "
2100 | \n", "0.175900 | \n", "
2200 | \n", "0.171800 | \n", "
2300 | \n", "0.171600 | \n", "
2400 | \n", "0.160500 | \n", "
2500 | \n", "0.144100 | \n", "
2600 | \n", "0.145600 | \n", "
2700 | \n", "0.146000 | \n", "
2800 | \n", "0.119600 | \n", "
2900 | \n", "0.118700 | \n", "
3000 | \n", "0.118800 | \n", "
"
],
"text/plain": [
"
Copy a token from your Hugging Face\ntokens page and paste it below.
Immediately click login after copying\nyour token or it might be stored in plain text in this notebook file.