{ "cells": [ { "cell_type": "code", "execution_count": 3, "metadata": { "id": "2eSvM9zX_2d3" }, "outputs": [], "source": [ "%%capture\n", "# Installs Unsloth, Xformers (Flash Attention) and all other packages!\n", "!pip install \"unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git\"\n", "\n", "# We have to check which Torch version for Xformers (2.3 -> 0.0.27)\n", "from torch import __version__; from packaging.version import Version as V\n", "xformers = \"xformers==0.0.27\" if V(__version__) < V(\"2.4.0\") else \"xformers\"\n", "!pip install --no-deps {xformers} trl peft accelerate bitsandbytes triton" ] }, { "cell_type": "markdown", "metadata": { "id": "r2v_X2fA0Df5" }, "source": [ "* We support Llama, Mistral, Phi-3, Gemma, Yi, DeepSeek, Qwen, TinyLlama, Vicuna, Open Hermes etc\n", "* We support 16bit LoRA or 4bit QLoRA. Both 2x faster.\n", "* `max_seq_length` can be set to anything, since we do automatic RoPE Scaling via [kaiokendev's](https://kaiokendev.github.io/til) method.\n", "* With [PR 26037](https://github.com/huggingface/transformers/pull/26037), we support downloading 4bit models **4x faster**! [Our repo](https://huggingface.co/unsloth) has Llama, Mistral 4bit models.\n", "* [**NEW**] We make Phi-3 Medium / Mini **2x faster**! See our [Phi-3 Medium notebook](https://colab.research.google.com/drive/1hhdhBa1j_hsymiW9m-WzxQtgqTH_NHqi?usp=sharing)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 531, "referenced_widgets": [ "d02e7fd6db8443aaba33f5c92409c008", "85f2c68a14074e778ce4314cc8af98f0", "ac4920c4534c421c971e515caf349b12", "eda939372e5c4c168386cef64bf40636", "c6e0f67f04b7449784b481309594e789", "e08fb1944c8a4b2f8f9017e875036bee", "7e4f1bd255f9468385fa23a694b93ece", "7236c426bfa54443a03452eb18101409", "52caacae4afd40ca9e8aa931746ec0c4", "3ed97edcb46740f283861db76ff9d8fd", "c83329fa445a4917b2d0748cd6dc84df", "0456b3d2bd084d0aacc7f9b13f87fab1", "abe11efdea8f491cb5808c4413e35724", "ffca90f4afdf464c8b93a78ca7be390e", "a0b0d7c078b74a02b4eae4be91593549", "27c376fd959a47b2ae5dee0af4257a38", "8c5ebc058cc946c2ab962ef69929bf54", "b2115fa9f27346c2a271f4614669e55d", "95bcb12cac7948a696abbde96e00a2ee", "e6ba37135ff8460a9d6f69f76dad7660", "fea7bf39b5f745f8a7511c70d6118670", "ae6efd8a31674b50a1383b59a161ef45", "154dd96089c346ec89f8b2af2fdf1013", "734d9d74eab44584964e2a3841dd9e40", "4c08f80700b8410688a9f4c38de72d20", "91bd8b1c56ea4451b6799b37a35fbd0a", "3c4a5863ddc34c489141908d79a4af57", "6b3741cf38ed4b79bc14f486bbd2cf65", "d27958462e5f4f07b6171d5768c02c7a", "45e96652eaf0497f93aa8fc17eb98434", "dd1802d1951543939297cdc29b16f3a0", "032f7643aca54b35bd3ac40a868eeb64", "a707331403664a4fa0eb096471a948a2", "7957d5afcd784ab39660867128dc5b33", "05eedcea87e54cebaeca0847421b3f39", "058a01509b6f4ae2bc3eaf155fe9a11d", "92baba9fc9e04e98bfc68b8cbc70ce17", "ddaaa03d303e42aead782ec10af49749", "053faf50ee4d4fbc83aeb6b3b456c6f7", "c660a0ad590e4a4aa187583b0bc0ac05", "f038959f25094f4a8415820b45ea61a6", "59572873b3c8421293623ca2a312a695", "e15f13b5b1784e49a89a64ea366b08f7", "e83297bad69845f989764b2e724ff037", "dd8a59a431934aa4a8542bef3e1b03cc", "7012f6da38444a88ab13fac24484f42d", "0e0158304e91455b841005e67215d1ef", "9d1483d3eb3842acbf8517526c86da54", "da14f6d9fa16493f8b565e0fcbbe9eb6", "c722650b4c6448d1a30641ee95d60c5d", "cd8947fd79f848c598dbe99357825f43", "84e783239c4448c5a4c52c410a037d7b", "6748263596d843f486e218137f0841f9", "6118dbdf5bdd4f14a68d521394a55c32", "8e305d156470464c9c6f66cd2624c008", "b8c8c167b1494ee1b9f612083556ee29", "ae86858a6f6649a189f45ac74ebd0f12", "7c2f83e31db3429e9542828feeaf0eb8", "7b547f9704954d1a8b0e621558322bbc", "e8d9c403f1f54e8b890cb43fa42627e5", "22c88a4dd2eb4be5aadf7e1cb8341416", "2449d931a2d84941883cae30ffef1c5e", "ff83829a055948cd8a46d1c30a100706", "7680773dcf0b4f6180240cbbbef7cfa2", "736fab037b4345288c317341652ebfcc", "0d0a06aeda4e451f8c5acd8ddd730610", "dfbaa3850dfa473cad45a21eedda2647", "cfa6206fa65f43368a67c8db4c7681be", "ad5593aa53154444a76fa255f3a8aded", "7630a16dafc145edbbd6967b97bf4b40", "a023f7d779444dfda14a9514440ce322", "ff4612ba3e084068b9baaf758e89e5fa", "dfe1a3d8d62146f1bf0bcc145023fd16", "abc52da02f574307911664b50fecceb9", "dd21902778cd4547b4ba2c01dfd9834d", "b194d9ca3d324638a7bf94ce89311851", "30819a0c189e4d9faf83bf8861bec8ab", "28895eedd87445edae9aeb951660ee60", "3dd4549228a440c0870d4772ce393a8a", "96759b0c24f04d6c9eb13bd65a9bd58a", "6101b1c53f3f4c7f82dcbe3492292702", "4533659224b04407a1cc46e1938837c5", "99b66efd718844d7a9c6ff08948d9ed6", "12f703cf17e54eb1a133cb46cf03e610", "4540ef70431a4db2ab9d43994d78f0ca", "5edbb685f0684a85b2b2fb7942604727", "e6236c639fe04273856cb3aa9f322a47", "64787fddab704048ae847b9e8ac61a78", "81dbc48041d04dff92da91970e4cba62", "b2a12aeec3814c1282fb501ff0615df2", "385d421be4ea4dd0a10b74a9fd2b5c71", "34959cfa4c5a4c239e225842b397245a", "429bde12d4084de6a2a1304c9f50dd48", "175df900a7734abcb20d66bb5d67ec70", "97cc4784a1cb4de2b1491f194976b128", "b658bdc0f70b4304873e4d65d1bc34c6", "254d93a15bcf4d7189d86211754ad227", "3b488153e813410fb7b73b5ae867b32c", "375c2d7650ed47fca2397c58471966d4" ] }, "id": "QmUBVEnvCDJv", "outputId": "16ff43ac-ca80-4270-d56b-87bdd052c184" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "==((====))== Unsloth 2024.10.2: Fast Gemma patching. Transformers = 4.44.2.\n", " \\\\ /| GPU: Tesla T4. Max memory: 14.748 GB. Platform = Linux.\n", "O^O/ \\_/ \\ Pytorch: 2.4.1+cu121. CUDA = 7.5. CUDA Toolkit = 12.1.\n", "\\ / Bfloat16 = FALSE. FA [Xformers = 0.0.28.post1. FA2 = False]\n", " \"-____-\" Free Apache license: http://github.com/unslothai/unsloth\n", "Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "d02e7fd6db8443aaba33f5c92409c008", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Downloading shards: 0%| | 0/2 [00:00, ?it/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "0456b3d2bd084d0aacc7f9b13f87fab1", "version_major": 2, "version_minor": 0 }, "text/plain": [ "model-00001-of-00002.safetensors: 0%| | 0.00/4.95G [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "154dd96089c346ec89f8b2af2fdf1013", "version_major": 2, "version_minor": 0 }, "text/plain": [ "model-00002-of-00002.safetensors: 0%| | 0.00/67.1M [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stderr", "output_type": "stream", "text": [ "`config.hidden_act` is ignored, you should use `config.hidden_activation` instead.\n", "Gemma's activation function will be set to `gelu_pytorch_tanh`. Please, use\n", "`config.hidden_activation` if you want to override this behaviour.\n", "See https://github.com/huggingface/transformers/pull/29402 for more details.\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "7957d5afcd784ab39660867128dc5b33", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Loading checkpoint shards: 0%| | 0/2 [00:00, ?it/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "dd8a59a431934aa4a8542bef3e1b03cc", "version_major": 2, "version_minor": 0 }, "text/plain": [ "generation_config.json: 0%| | 0.00/137 [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "b8c8c167b1494ee1b9f612083556ee29", "version_major": 2, "version_minor": 0 }, "text/plain": [ "tokenizer_config.json: 0%| | 0.00/33.6k [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "dfbaa3850dfa473cad45a21eedda2647", "version_major": 2, "version_minor": 0 }, "text/plain": [ "tokenizer.model: 0%| | 0.00/4.24M [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "28895eedd87445edae9aeb951660ee60", "version_major": 2, "version_minor": 0 }, "text/plain": [ "special_tokens_map.json: 0%| | 0.00/636 [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "81dbc48041d04dff92da91970e4cba62", "version_major": 2, "version_minor": 0 }, "text/plain": [ "tokenizer.json: 0%| | 0.00/17.5M [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stderr", "output_type": "stream", "text": [ "Unsloth: We fixed a gradient accumulation bug, but it seems like you don't have the latest transformers version!\n", "Please update transformers via:\n", "`pip uninstall transformers -y && pip install --upgrade --no-cache-dir \"git+https://github.com/huggingface/transformers.git\"`\n" ] } ], "source": [ "from unsloth import FastLanguageModel\n", "import torch\n", "max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!\n", "dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+\n", "load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.\n", "\n", "\n", "model, tokenizer = FastLanguageModel.from_pretrained(\n", " model_name = \"google/gemma-2b\",\n", " max_seq_length = max_seq_length,\n", " dtype = dtype,\n", " load_in_4bit = load_in_4bit,\n", " token = \"hf_\",\n", ")" ] }, { "cell_type": "markdown", "metadata": { "id": "SXd9bTZd1aaL" }, "source": [ "We now add LoRA adapters so we only need to update 1 to 10% of all parameters!" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "6bZsfBuZDeCL", "outputId": "a2885e8f-af98-4158-a33e-476e8a4d4aeb" }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Unsloth 2024.10.2 patched 18 layers with 18 QKV layers, 18 O layers and 18 MLP layers.\n" ] } ], "source": [ "model = FastLanguageModel.get_peft_model(\n", " model,\n", " r = 16, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128\n", " target_modules = [\"q_proj\", \"k_proj\", \"v_proj\", \"o_proj\",\n", " \"gate_proj\", \"up_proj\", \"down_proj\",],\n", " lora_alpha = 16,\n", " lora_dropout = 0, # Supports any, but = 0 is optimized\n", " bias = \"none\", # Supports any, but = \"none\" is optimized\n", " # [NEW] \"unsloth\" uses 30% less VRAM, fits 2x larger batch sizes!\n", " use_gradient_checkpointing = \"unsloth\", # True or \"unsloth\" for very long context\n", " random_state = 3407,\n", " use_rslora = False, # We support rank stabilized LoRA\n", " loftq_config = None, # And LoftQ\n", ")" ] }, { "cell_type": "markdown", "metadata": { "id": "vITh0KVJ10qX" }, "source": [ "\n", "### Data Prep\n", "We now use the Alpaca dataset from [yahma](https://huggingface.co/datasets/yahma/alpaca-cleaned), which is a filtered version of 52K of the original [Alpaca dataset](https://crfm.stanford.edu/2023/03/13/alpaca.html). You can replace this code section with your own data prep.\n", "\n", "**[NOTE]** To train only on completions (ignoring the user's input) read TRL's docs [here](https://huggingface.co/docs/trl/sft_trainer#train-on-completions-only).\n", "\n", "**[NOTE]** Remember to add the **EOS_TOKEN** to the tokenized output!! Otherwise you'll get infinite generations!\n", "\n", "If you want to use the `llama-3` template for ShareGPT datasets, try our conversational [notebook](https://colab.research.google.com/drive/1XamvWYinY6FOSX9GLvnqSjjsNflxdhNc?usp=sharing).\n", "\n", "For text completions like novel writing, try this [notebook](https://colab.research.google.com/drive/1ef-tab5bhkvWmBOObepl1WgJvfvSzn5Q?usp=sharing)." ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 177, "referenced_widgets": [ "f56eabceed6d43c09991a5d45ae91ca7", "278528a1f9be4f4489d5e9f079c140fc", "eefe0d2480b241d790f5af2dd0608c56", "cf0c9a30ecf2479e9eabcd6897cd2ae4", "bdc21cb998cc46c0b5bbb164e97caa3c", "4c6e92a233d846e891361505fcb662f6", "250e9ac8e09647048c4d3d547c7d308b", "7f58e340056d4093b16b78764cb9b250", "9644fcbdddd74d03a4ea5dd4d08ab503", "2607a6cb68324b2e914b2215ff21718c", "c51aeb5cc9134b2eb14be937f62f5324", "087b8ea60c154db78830e2f0951f1149", "864144823c3f412e9a9e633433335de4", "2a88ee04ec8a45b2b7501d818a24e590", "1fee9ee653b1410ba830f529c91a81cd", "dedd0dad381e4eada83049c050d928ef", "a001e114cb814ad1a365b8573db7e5d1", "25223801e9fe4259bfc2fb6806126925", "2bb29b0a85d24902921d9c775a043610", "5dbffddd42d74c2395a8780eb97d8621", "cddb97a4075d4f7c800a4adf2b3d51ad", "de3846b29fb74247bb54c162e9d269e1", "cd16fe5996ad436eaa71f061eb636080", "346ffd6c0a8d4294ae3b34039cd24d35", "0faa89bfff194caeb94c8aafbe01ef8d", "5ebb319b66fa4654938f632e4c835873", "298ecc6277b940d09a54dd3e40b9eea2", "d504a1c60ce34be6b69ca3f71350b441", "74c2344516804d3aa1fc5790ae0be7c8", "fb3312065b134aea8828e5ea365a034b", "5949f23ddee64ef58372bde70afe326e", "a1f7c20dcbfe4985901ccaa9e72d0e8f", "48ca9d738b594844a0629a32c6d46f00", "390ebf5b320044599a1f9745ac097920", "fbf09dd6e12b44f2af819103adf27bde", "f41f16ce15844fe2aaeab58ec2247169", "ad032e4f771d4e0badf64b954b890954", "82635117e1584dac84f12bb3f32aefe8", "f7ca3324dd5f4aa496c08c5cb68dec7d", "99300f8f51af4986b96d661ed4e3ab96", "b7dd1814ee244ab2aa713e69a8949757", "57d7e970f82b459caadee910456f0a3d", "eec90c452b094e4aac9a361d27ee6f1c", "6a9632cbe57642f094d213b338a2b9fb", "d120909e42be4e09a5a9a0c65dd1b8c9", "cb217356b0484cb9b05d10527101ba23", "c8ce6a5e365049a2b7a5f4233f96dc75", "01df545e3c734457b40343fd1ff92235", "83994791f7f648a299ea04d5a1892287", "acea1929d3684ed2bf879325196984ba", "31f4e7ccbb7b416c8d2d9c8f961ca21e", "0ea7da5f90894fb6bc81214c3d47e397", "76da0a2ba14a49e1b6042d3684dc2762", "c8581dca214c412caf7950ab81b66f8a", "14dd20bafc03480a9c6bb8553419a10c" ] }, "id": "LjY75GoYUCB8", "outputId": "af248563-7622-40cc-9b0f-f40a10f822e2" }, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "f56eabceed6d43c09991a5d45ae91ca7", "version_major": 2, "version_minor": 0 }, "text/plain": [ "README.md: 0%| | 0.00/450 [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "087b8ea60c154db78830e2f0951f1149", "version_major": 2, "version_minor": 0 }, "text/plain": [ "train-00000-of-00002.parquet: 0%| | 0.00/158M [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "cd16fe5996ad436eaa71f061eb636080", "version_major": 2, "version_minor": 0 }, "text/plain": [ "train-00001-of-00002.parquet: 0%| | 0.00/144M [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "390ebf5b320044599a1f9745ac097920", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Generating train split: 0%| | 0/172026 [00:00, ? examples/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "d120909e42be4e09a5a9a0c65dd1b8c9", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Map: 0%| | 0/172026 [00:00, ? examples/s]" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "alpaca_prompt = \"\"\"Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n", "\n", "### Instruction:\n", "{}\n", "\n", "### Input:\n", "{}\n", "\n", "### Response:\n", "{}\"\"\"\n", "\n", "EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN\n", "def formatting_prompts_func(examples):\n", " instructions = examples[\"instruction\"]\n", " inputs = examples[\"input\"]\n", " outputs = examples[\"output\"]\n", " texts = []\n", " for instruction, input, output in zip(instructions, inputs, outputs):\n", " # Must add EOS_TOKEN, otherwise your generation will go on forever!\n", " text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN\n", " texts.append(text)\n", " return { \"text\" : texts, }\n", "pass\n", "\n", "from datasets import load_dataset\n", "dataset = load_dataset(\"BanglaLLM/bangla-alpaca-orca\", split = \"train\")\n", "dataset = dataset.map(formatting_prompts_func, batched = True,)" ] }, { "cell_type": "markdown", "metadata": { "id": "idAEIeSQ3xdS" }, "source": [ "\n", "### Train the model\n", "Now let's use Huggingface TRL's `SFTTrainer`! More docs here: [TRL SFT docs](https://huggingface.co/docs/trl/sft_trainer). We do 60 steps to speed things up, but you can set `num_train_epochs=1` for a full run, and turn off `max_steps=None`. We also support TRL's `DPOTrainer`!" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 66, "referenced_widgets": [ "38b006b55c644ebdb6b010f1483d2dd5", "2f66d9ba81ff44e8b8a75186b06f1dab", "cfd7d85c85f44fbe87a72ff8174e5a47", "f4bc584c945546528904de08cee38f9c", "8e13342de0cb4657894ce5ea95c899dd", "627a338e41144f36a8a830a57281ee0b", "03b3e9dc42b145e485f628ff914ed9e5", "09758d6007f748b8897a3dfc1325efaf", "09a65291c7684e0eb41299243c6d1854", "eb83291e2c104a498dfc1e307edea512", "fda7d087bc0c4fa8a7d69ac87ad857b7" ] }, "id": "95_Nn-89DhsL", "outputId": "18daabf1-0894-4321-c34e-0907c3f7cfb3" }, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "38b006b55c644ebdb6b010f1483d2dd5", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Map (num_proc=2): 0%| | 0/172026 [00:00, ? examples/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stderr", "output_type": "stream", "text": [ "max_steps is given, it will override any value given in num_train_epochs\n" ] } ], "source": [ "from trl import SFTTrainer\n", "from transformers import TrainingArguments\n", "from unsloth import is_bfloat16_supported\n", "\n", "trainer = SFTTrainer(\n", " model = model,\n", " tokenizer = tokenizer,\n", " train_dataset = dataset,\n", " dataset_text_field = \"text\",\n", " max_seq_length = max_seq_length,\n", " dataset_num_proc = 2,\n", " packing = False, # Can make training 5x faster for short sequences.\n", " args = TrainingArguments(\n", " per_device_train_batch_size = 2,\n", " gradient_accumulation_steps = 4,\n", " warmup_steps = 5,\n", " max_steps = 160,\n", " learning_rate = 2e-4,\n", " fp16 = not is_bfloat16_supported(),\n", " bf16 = is_bfloat16_supported(),\n", " logging_steps = 1,\n", " optim = \"adamw_8bit\",\n", " weight_decay = 0.01,\n", " lr_scheduler_type = \"linear\",\n", " seed = 3407,\n", " output_dir = \"outputs\",\n", " ),\n", ")" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "cellView": "form", "colab": { "base_uri": "https://localhost:8080/" }, "id": "2ejIt2xSNKKp", "outputId": "0ada3646-adbf-459f-991e-61186268a5fe" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "GPU = Tesla T4. Max memory = 14.748 GB.\n", "2.34 GB of memory reserved.\n" ] } ], "source": [ "#@title Show current memory stats\n", "gpu_stats = torch.cuda.get_device_properties(0)\n", "start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)\n", "max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)\n", "print(f\"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.\")\n", "print(f\"{start_gpu_memory} GB of memory reserved.\")" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 1000 }, "id": "yqxqAZ7KJ4oL", "outputId": "95d4d03c-200d-42ec-f8b5-190ab2606264" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "**** Unsloth: Please use our fixed gradient_accumulation_steps by updating transformers and Unsloth!\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "==((====))== Unsloth - 2x faster free finetuning | Num GPUs = 1\n", " \\\\ /| Num examples = 172,026 | Num Epochs = 1\n", "O^O/ \\_/ \\ Batch size per device = 2 | Gradient Accumulation steps = 4\n", "\\ / Total batch size = 8 | Total steps = 160\n", " \"-____-\" Number of trainable parameters = 19,611,648\n" ] }, { "data": { "text/html": [ "\n", "
Step | \n", "Training Loss | \n", "
---|---|
1 | \n", "1.675500 | \n", "
2 | \n", "1.778500 | \n", "
3 | \n", "2.056900 | \n", "
4 | \n", "1.955700 | \n", "
5 | \n", "1.856800 | \n", "
6 | \n", "1.785000 | \n", "
7 | \n", "1.896900 | \n", "
8 | \n", "1.718600 | \n", "
9 | \n", "1.768200 | \n", "
10 | \n", "1.407700 | \n", "
11 | \n", "1.649500 | \n", "
12 | \n", "1.342500 | \n", "
13 | \n", "1.529600 | \n", "
14 | \n", "1.449200 | \n", "
15 | \n", "1.439700 | \n", "
16 | \n", "1.137000 | \n", "
17 | \n", "1.436500 | \n", "
18 | \n", "1.501400 | \n", "
19 | \n", "1.310200 | \n", "
20 | \n", "1.245700 | \n", "
21 | \n", "1.205500 | \n", "
22 | \n", "1.322600 | \n", "
23 | \n", "1.573100 | \n", "
24 | \n", "1.172300 | \n", "
25 | \n", "1.360100 | \n", "
26 | \n", "1.248600 | \n", "
27 | \n", "1.005900 | \n", "
28 | \n", "1.294200 | \n", "
29 | \n", "1.507300 | \n", "
30 | \n", "1.290300 | \n", "
31 | \n", "1.035400 | \n", "
32 | \n", "1.219600 | \n", "
33 | \n", "1.320200 | \n", "
34 | \n", "1.560700 | \n", "
35 | \n", "1.717500 | \n", "
36 | \n", "1.284100 | \n", "
37 | \n", "1.344400 | \n", "
38 | \n", "1.239000 | \n", "
39 | \n", "1.223900 | \n", "
40 | \n", "0.851200 | \n", "
41 | \n", "1.513000 | \n", "
42 | \n", "1.361100 | \n", "
43 | \n", "0.828300 | \n", "
44 | \n", "0.911600 | \n", "
45 | \n", "1.201600 | \n", "
46 | \n", "0.865500 | \n", "
47 | \n", "0.934000 | \n", "
48 | \n", "1.231800 | \n", "
49 | \n", "1.268500 | \n", "
50 | \n", "1.344600 | \n", "
51 | \n", "1.365500 | \n", "
52 | \n", "1.300500 | \n", "
53 | \n", "1.251400 | \n", "
54 | \n", "1.167400 | \n", "
55 | \n", "1.396300 | \n", "
56 | \n", "1.325000 | \n", "
57 | \n", "1.214300 | \n", "
58 | \n", "0.823800 | \n", "
59 | \n", "1.290900 | \n", "
60 | \n", "1.393100 | \n", "
61 | \n", "1.085100 | \n", "
62 | \n", "1.122200 | \n", "
63 | \n", "0.821200 | \n", "
64 | \n", "1.174700 | \n", "
65 | \n", "1.503200 | \n", "
66 | \n", "1.187100 | \n", "
67 | \n", "1.421600 | \n", "
68 | \n", "0.937800 | \n", "
69 | \n", "1.355600 | \n", "
70 | \n", "1.004400 | \n", "
71 | \n", "1.092600 | \n", "
72 | \n", "0.764000 | \n", "
73 | \n", "1.459200 | \n", "
74 | \n", "1.306600 | \n", "
75 | \n", "1.431100 | \n", "
76 | \n", "0.920500 | \n", "
77 | \n", "1.162500 | \n", "
78 | \n", "1.095700 | \n", "
79 | \n", "1.414600 | \n", "
80 | \n", "1.334700 | \n", "
81 | \n", "1.092000 | \n", "
82 | \n", "1.433400 | \n", "
83 | \n", "0.868900 | \n", "
84 | \n", "1.260300 | \n", "
85 | \n", "0.944800 | \n", "
86 | \n", "1.423900 | \n", "
87 | \n", "1.094900 | \n", "
88 | \n", "1.252000 | \n", "
89 | \n", "1.168800 | \n", "
90 | \n", "1.027600 | \n", "
91 | \n", "1.259300 | \n", "
92 | \n", "1.369600 | \n", "
93 | \n", "1.026800 | \n", "
94 | \n", "1.270400 | \n", "
95 | \n", "1.282100 | \n", "
96 | \n", "1.340300 | \n", "
97 | \n", "1.171200 | \n", "
98 | \n", "1.471200 | \n", "
99 | \n", "1.032800 | \n", "
100 | \n", "1.374100 | \n", "
101 | \n", "1.135700 | \n", "
102 | \n", "0.997500 | \n", "
103 | \n", "1.421100 | \n", "
104 | \n", "1.102700 | \n", "
105 | \n", "1.072300 | \n", "
106 | \n", "1.324000 | \n", "
107 | \n", "1.189700 | \n", "
108 | \n", "1.228900 | \n", "
109 | \n", "1.217100 | \n", "
110 | \n", "1.244500 | \n", "
111 | \n", "0.803400 | \n", "
112 | \n", "1.211600 | \n", "
113 | \n", "1.172700 | \n", "
114 | \n", "1.350600 | \n", "
115 | \n", "1.199600 | \n", "
116 | \n", "1.155800 | \n", "
117 | \n", "1.126200 | \n", "
118 | \n", "1.199100 | \n", "
119 | \n", "1.094700 | \n", "
120 | \n", "1.342100 | \n", "
121 | \n", "0.935500 | \n", "
122 | \n", "1.205900 | \n", "
123 | \n", "1.217400 | \n", "
124 | \n", "1.180500 | \n", "
125 | \n", "1.209800 | \n", "
126 | \n", "0.995800 | \n", "
127 | \n", "1.342800 | \n", "
128 | \n", "1.412200 | \n", "
129 | \n", "1.071800 | \n", "
130 | \n", "0.845400 | \n", "
131 | \n", "0.858500 | \n", "
132 | \n", "1.128800 | \n", "
133 | \n", "1.043600 | \n", "
134 | \n", "1.176000 | \n", "
135 | \n", "1.143900 | \n", "
136 | \n", "1.269200 | \n", "
137 | \n", "1.309100 | \n", "
138 | \n", "1.182100 | \n", "
139 | \n", "1.228500 | \n", "
140 | \n", "1.079800 | \n", "
141 | \n", "1.193200 | \n", "
142 | \n", "0.869800 | \n", "
143 | \n", "1.181900 | \n", "
144 | \n", "1.087600 | \n", "
145 | \n", "1.343700 | \n", "
146 | \n", "1.288300 | \n", "
147 | \n", "1.187700 | \n", "
148 | \n", "1.190000 | \n", "
149 | \n", "1.382100 | \n", "
150 | \n", "1.226200 | \n", "
151 | \n", "0.994000 | \n", "
152 | \n", "1.172000 | \n", "
153 | \n", "1.481800 | \n", "
154 | \n", "1.274100 | \n", "
155 | \n", "1.173100 | \n", "
156 | \n", "1.375400 | \n", "
157 | \n", "1.176000 | \n", "
158 | \n", "1.210200 | \n", "
159 | \n", "1.283300 | \n", "
160 | \n", "1.183500 | \n", "
"
],
"text/plain": [
"