diff --git "a/notebooks/07r2_tune-lf-py3.11.ipynb" "b/notebooks/07r2_tune-lf-py3.11.ipynb" deleted file mode 100644--- "a/notebooks/07r2_tune-lf-py3.11.ipynb" +++ /dev/null @@ -1,9938 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "application/vnd.databricks.v1+cell": { - "cellMetadata": {}, - "inputWidgets": {}, - "nuid": "0ea8b46b-839b-445b-8043-ccdf4e920ace", - "showTitle": false, - "title": "" - } - }, - "outputs": [], - "source": [ - "%load_ext autoreload\n", - "%autoreload 2" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "application/vnd.databricks.v1+cell": { - "cellMetadata": {}, - "inputWidgets": {}, - "nuid": "6d394937-6c99-4a7c-9d32-7600a280032f", - "showTitle": false, - "title": "" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "workding dir: /home/inflaton/code/projects/courses/llm-finetuning\n" - ] - } - ], - "source": [ - "import os\n", - "import sys\n", - "from pathlib import Path\n", - "\n", - "workding_dir = str(Path.cwd().parent)\n", - "os.chdir(workding_dir)\n", - "sys.path.append(workding_dir)\n", - "print(\"workding dir:\", workding_dir)" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "application/vnd.databricks.v1+cell": { - "cellMetadata": {}, - "inputWidgets": {}, - "nuid": "9f67ec60-2f24-411c-84eb-0dd664b44775", - "showTitle": false, - "title": "" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "loading env vars from: /home/inflaton/code/projects/courses/llm-finetuning/.env\n" - ] - }, - { - "data": { - "text/plain": [ - "True" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from dotenv import find_dotenv, load_dotenv\n", - "\n", - "found_dotenv = find_dotenv(\".env\")\n", - "\n", - "if len(found_dotenv) == 0:\n", - " found_dotenv = find_dotenv(\".env.example\")\n", - "print(f\"loading env vars from: {found_dotenv}\")\n", - "load_dotenv(found_dotenv, override=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "application/vnd.databricks.v1+cell": { - "cellMetadata": {}, - "inputWidgets": {}, - "nuid": "f1597656-8042-4878-9d3b-9ebfb8dd86dc", - "showTitle": false, - "title": "" - } - }, - "outputs": [ - { - "data": { - "text/plain": [ - "('unsloth/Qwen2-0.5B-Instruct',\n", - " True,\n", - " None,\n", - " None,\n", - " 2048,\n", - " 6,\n", - " None,\n", - " 'datasets/mac/mac.tsv',\n", - " 'results/mac-results_lf-r2.csv')" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "import os\n", - "\n", - "model_name = os.getenv(\"MODEL_NAME\")\n", - "token = os.getenv(\"HF_TOKEN\") or None\n", - "load_in_4bit = os.getenv(\"LOAD_IN_4BIT\") == \"true\"\n", - "local_model = os.getenv(\"LOCAL_MODEL\")\n", - "hub_model = os.getenv(\"HUB_MODEL\")\n", - "num_train_epochs = int(os.getenv(\"NUM_TRAIN_EPOCHS\") or 0)\n", - "data_path = os.getenv(\"DATA_PATH\")\n", - "results_path = os.getenv(\"RESULTS_PATH\")\n", - "\n", - "max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!\n", - "dtype = (\n", - " None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+\n", - ")\n", - "\n", - "model_name, load_in_4bit, local_model, hub_model, max_seq_length, num_train_epochs, dtype, data_path, results_path" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Thu Jul 4 11:06:16 2024 \n", - "+---------------------------------------------------------------------------------------+\n", - "| NVIDIA-SMI 545.23.07 Driver Version: 546.12 CUDA Version: 12.3 |\n", - "|-----------------------------------------+----------------------+----------------------+\n", - "| GPU Name Persistence-M | Bus-Id Disp.A | Volatile Uncorr. ECC |\n", - "| Fan Temp Perf Pwr:Usage/Cap | Memory-Usage | GPU-Util Compute M. |\n", - "| | | MIG M. |\n", - "|=========================================+======================+======================|\n", - "| 0 NVIDIA GeForce RTX 4080 ... On | 00000000:01:00.0 Off | N/A |\n", - "| N/A 52C P8 3W / 150W | 355MiB / 12282MiB | 0% Default |\n", - "| | | N/A |\n", - "+-----------------------------------------+----------------------+----------------------+\n", - " \n", - "+---------------------------------------------------------------------------------------+\n", - "| Processes: |\n", - "| GPU GI CI PID Type Process name GPU Memory |\n", - "| ID ID Usage |\n", - "|=======================================================================================|\n", - "| No running processes found |\n", - "+---------------------------------------------------------------------------------------+\n" - ] - } - ], - "source": [ - "!nvidia-smi" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "๐Ÿฆฅ Unsloth: Will patch your computer to enable 2x faster free finetuning.\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[nltk_data] Downloading package wordnet to /home/inflaton/nltk_data...\n", - "[nltk_data] Package wordnet is already up-to-date!\n", - "[nltk_data] Downloading package punkt to /home/inflaton/nltk_data...\n", - "[nltk_data] Package punkt is already up-to-date!\n", - "[nltk_data] Downloading package omw-1.4 to /home/inflaton/nltk_data...\n", - "[nltk_data] Package omw-1.4 is already up-to-date!\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "loading /home/inflaton/code/projects/courses/llm-finetuning/llm_toolkit/translation_engine.py\n", - "loading train/test data files\n", - "DatasetDict({\n", - " train: Dataset({\n", - " features: ['chinese', 'english'],\n", - " num_rows: 4528\n", - " })\n", - " test: Dataset({\n", - " features: ['chinese', 'english'],\n", - " num_rows: 1133\n", - " })\n", - "})\n" - ] - } - ], - "source": [ - "from llm_toolkit.translation_engine import load_translation_dataset\n", - "\n", - "dataset = load_translation_dataset(data_path)" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [], - "source": [ - "df = dataset[\"train\"].to_pandas()" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd\n", - "\n", - "df_alpaca = pd.DataFrame({\"instruction\": [\"Please translate the following Chinese text into English and provide only the translated content, nothing else.\"]*len(df)})" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
instructioninputoutput
0Please translate the following Chinese text in...ๅ…จไป—็€็‹ไป™ๆญๆ•‘ใ€‚Because I was protected by a fox fairy.
1Please translate the following Chinese text in...่ฟ‡ๅŽ๏ผŒ่กจๅ“ฅๅ‘Š่ฏ‰ๅฅนไฟฉ๏ผŒ่ฟ™ไบบๆ˜ฏๅฏผๆผ”๏ผŒๅœจๅค–ๅ›ฝ็•™่ฟ‡ๅญฆ็š„๏ผŒ่ฟ˜ไผš็ผ–ๅ‰ง๏ผŒไปŠๅคฉๆ‹็š„่ฟ™ๆˆ๏ผŒๅฐฑๆ˜ฏไป–่‡ช็ผ–่‡ชๅฏผ็š„ใ€‚He was the director, the cousin later told the...
2Please translate the following Chinese text in...่ฟ™ๅ‡คๅงๅฟฝ็„ถๆƒณ่ตทไธ€ไปถไบ‹ๆฅ๏ผŒไพฟๅ‘็ช—ๅค–ๅซ๏ผšโ€œ่“‰ๅ„ฟๅ›žๆฅ๏ผโ€Xi-feng suddenly seemed to remember something,...
3Please translate the following Chinese text in...ไธ‰ไธช่€็บขๅซๅ…ต่ตฐๅˆฐๅถๆ–‡ๆด้ขๅ‰๏ผŒ้ขๅฏน็€ๅฅน็ซ™ๆˆไบ†ไธ€ๆŽ’โ€”โ€”ๅฝ“ๅนด๏ผŒๅฅนไปฌไนŸๆ˜ฏ่ฟ™ๆ ท้ขๅฏนๅถๅ“ฒๆณฐ็š„โ€”โ€”่ฏ•ๅ›พๅ†็Žฐ...The three old Red Guards stood in front of Ye ...
4Please translate the following Chinese text in...็จ‹ๅ…ˆ็”Ÿ็…งๅ•ๅ…จๆ”ถ๏ผŒ้ƒฝๆ˜ฏไธ€ไธชโ€œ่ฐขโ€ๅญ—๏ผŒ็„ถๅŽ้—ฎ็Ž‹็ฆ็‘ถๆœ‰ไป€ไนˆ่ฏ่ฏดใ€‚Mr. Cheng accepted their toast with equanimity...
............
4523Please translate the following Chinese text in...ๅค–่พนๆœ‰ไธคๅผ ่…ฟๆญช้ข่ฃ‚็š„ๅ…ซไป™ๆกŒๅญ๏ผŒๆกŒๆ—่ƒกไนฑๆก็€ๅ‡ ๆก็‹ญ็ช„็š„ๆœจๅ‡ณใ€‚Two rickety tables with scarred tops and a few...
4524Please translate the following Chinese text in...่ดพ็‘žๅฌไบ†๏ผŒๅ–œ็š„ๆŠ“่€ณๆŒ ่…ฎใ€‚At this last remark Jia Rui positively scratch...
4525Please translate the following Chinese text in...ๅฌไบ†่ฟ™ๆ ท็š„่ฏ„ไปท๏ผŒๆˆ‘ไปฌๅฟƒๆƒ…ๆฟ€ๅŠจ๏ผŒๅ’Œๅคงๅฎถไธ€่ตทๆŒฏ่‡‚้ซ˜ๅ‘ผ๏ผšๆ‰“ๅ€’็Ž‹ไบŒ๏ผHearing comments like this, our emotions were ...
4526Please translate the following Chinese text in...ๆตท่€ๅ…ฌ้“๏ผšโ€œ่ฎฐไฝไบ†ๅ—๏ผŸโ€'Can you remember that?'
4527Please translate the following Chinese text in...ไธŠ้ข่ฏด๏ผŒ่ฟ™ๆ ทๅ†™็ผบๅฐ‘็ป†่Š‚ใ€‚This time the opinions from above said it need...
\n", - "

4528 rows ร— 3 columns

\n", - "
" - ], - "text/plain": [ - " instruction \\\n", - "0 Please translate the following Chinese text in... \n", - "1 Please translate the following Chinese text in... \n", - "2 Please translate the following Chinese text in... \n", - "3 Please translate the following Chinese text in... \n", - "4 Please translate the following Chinese text in... \n", - "... ... \n", - "4523 Please translate the following Chinese text in... \n", - "4524 Please translate the following Chinese text in... \n", - "4525 Please translate the following Chinese text in... \n", - "4526 Please translate the following Chinese text in... \n", - "4527 Please translate the following Chinese text in... \n", - "\n", - " input \\\n", - "0 ๅ…จไป—็€็‹ไป™ๆญๆ•‘ใ€‚ \n", - "1 ่ฟ‡ๅŽ๏ผŒ่กจๅ“ฅๅ‘Š่ฏ‰ๅฅนไฟฉ๏ผŒ่ฟ™ไบบๆ˜ฏๅฏผๆผ”๏ผŒๅœจๅค–ๅ›ฝ็•™่ฟ‡ๅญฆ็š„๏ผŒ่ฟ˜ไผš็ผ–ๅ‰ง๏ผŒไปŠๅคฉๆ‹็š„่ฟ™ๆˆ๏ผŒๅฐฑๆ˜ฏไป–่‡ช็ผ–่‡ชๅฏผ็š„ใ€‚ \n", - "2 ่ฟ™ๅ‡คๅงๅฟฝ็„ถๆƒณ่ตทไธ€ไปถไบ‹ๆฅ๏ผŒไพฟๅ‘็ช—ๅค–ๅซ๏ผšโ€œ่“‰ๅ„ฟๅ›žๆฅ๏ผโ€ \n", - "3 ไธ‰ไธช่€็บขๅซๅ…ต่ตฐๅˆฐๅถๆ–‡ๆด้ขๅ‰๏ผŒ้ขๅฏน็€ๅฅน็ซ™ๆˆไบ†ไธ€ๆŽ’โ€”โ€”ๅฝ“ๅนด๏ผŒๅฅนไปฌไนŸๆ˜ฏ่ฟ™ๆ ท้ขๅฏนๅถๅ“ฒๆณฐ็š„โ€”โ€”่ฏ•ๅ›พๅ†็Žฐ... \n", - "4 ็จ‹ๅ…ˆ็”Ÿ็…งๅ•ๅ…จๆ”ถ๏ผŒ้ƒฝๆ˜ฏไธ€ไธชโ€œ่ฐขโ€ๅญ—๏ผŒ็„ถๅŽ้—ฎ็Ž‹็ฆ็‘ถๆœ‰ไป€ไนˆ่ฏ่ฏดใ€‚ \n", - "... ... \n", - "4523 ๅค–่พนๆœ‰ไธคๅผ ่…ฟๆญช้ข่ฃ‚็š„ๅ…ซไป™ๆกŒๅญ๏ผŒๆกŒๆ—่ƒกไนฑๆก็€ๅ‡ ๆก็‹ญ็ช„็š„ๆœจๅ‡ณใ€‚ \n", - "4524 ่ดพ็‘žๅฌไบ†๏ผŒๅ–œ็š„ๆŠ“่€ณๆŒ ่…ฎใ€‚ \n", - "4525 ๅฌไบ†่ฟ™ๆ ท็š„่ฏ„ไปท๏ผŒๆˆ‘ไปฌๅฟƒๆƒ…ๆฟ€ๅŠจ๏ผŒๅ’Œๅคงๅฎถไธ€่ตทๆŒฏ่‡‚้ซ˜ๅ‘ผ๏ผšๆ‰“ๅ€’็Ž‹ไบŒ๏ผ \n", - "4526 ๆตท่€ๅ…ฌ้“๏ผšโ€œ่ฎฐไฝไบ†ๅ—๏ผŸโ€ \n", - "4527 ไธŠ้ข่ฏด๏ผŒ่ฟ™ๆ ทๅ†™็ผบๅฐ‘็ป†่Š‚ใ€‚ \n", - "\n", - " output \n", - "0 Because I was protected by a fox fairy. \n", - "1 He was the director, the cousin later told the... \n", - "2 Xi-feng suddenly seemed to remember something,... \n", - "3 The three old Red Guards stood in front of Ye ... \n", - "4 Mr. Cheng accepted their toast with equanimity... \n", - "... ... \n", - "4523 Two rickety tables with scarred tops and a few... \n", - "4524 At this last remark Jia Rui positively scratch... \n", - "4525 Hearing comments like this, our emotions were ... \n", - "4526 'Can you remember that?' \n", - "4527 This time the opinions from above said it need... \n", - "\n", - "[4528 rows x 3 columns]" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df_alpaca[\"input\"] = df[\"chinese\"]\n", - "df_alpaca[\"output\"] = df[\"english\"]\n", - "df_alpaca" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [], - "source": [ - "df_alpaca.to_json(\n", - " \"llama-factory/data/alpaca_mac.json\", orient=\"records\", lines=False, indent=2\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [], - "source": [ - "df = pd.read_json(\"llama-factory/data/alpaca_mac.json\", orient=\"records\", lines=False)" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
instructioninputoutput
0Please translate the following Chinese text in...ๅ…จไป—็€็‹ไป™ๆญๆ•‘ใ€‚Because I was protected by a fox fairy.
1Please translate the following Chinese text in...่ฟ‡ๅŽ๏ผŒ่กจๅ“ฅๅ‘Š่ฏ‰ๅฅนไฟฉ๏ผŒ่ฟ™ไบบๆ˜ฏๅฏผๆผ”๏ผŒๅœจๅค–ๅ›ฝ็•™่ฟ‡ๅญฆ็š„๏ผŒ่ฟ˜ไผš็ผ–ๅ‰ง๏ผŒไปŠๅคฉๆ‹็š„่ฟ™ๆˆ๏ผŒๅฐฑๆ˜ฏไป–่‡ช็ผ–่‡ชๅฏผ็š„ใ€‚He was the director, the cousin later told the...
2Please translate the following Chinese text in...่ฟ™ๅ‡คๅงๅฟฝ็„ถๆƒณ่ตทไธ€ไปถไบ‹ๆฅ๏ผŒไพฟๅ‘็ช—ๅค–ๅซ๏ผšโ€œ่“‰ๅ„ฟๅ›žๆฅ๏ผโ€Xi-feng suddenly seemed to remember something,...
3Please translate the following Chinese text in...ไธ‰ไธช่€็บขๅซๅ…ต่ตฐๅˆฐๅถๆ–‡ๆด้ขๅ‰๏ผŒ้ขๅฏน็€ๅฅน็ซ™ๆˆไบ†ไธ€ๆŽ’โ€”โ€”ๅฝ“ๅนด๏ผŒๅฅนไปฌไนŸๆ˜ฏ่ฟ™ๆ ท้ขๅฏนๅถๅ“ฒๆณฐ็š„โ€”โ€”่ฏ•ๅ›พๅ†็Žฐ...The three old Red Guards stood in front of Ye ...
4Please translate the following Chinese text in...็จ‹ๅ…ˆ็”Ÿ็…งๅ•ๅ…จๆ”ถ๏ผŒ้ƒฝๆ˜ฏไธ€ไธชโ€œ่ฐขโ€ๅญ—๏ผŒ็„ถๅŽ้—ฎ็Ž‹็ฆ็‘ถๆœ‰ไป€ไนˆ่ฏ่ฏดใ€‚Mr. Cheng accepted their toast with equanimity...
\n", - "
" - ], - "text/plain": [ - " instruction \\\n", - "0 Please translate the following Chinese text in... \n", - "1 Please translate the following Chinese text in... \n", - "2 Please translate the following Chinese text in... \n", - "3 Please translate the following Chinese text in... \n", - "4 Please translate the following Chinese text in... \n", - "\n", - " input \\\n", - "0 ๅ…จไป—็€็‹ไป™ๆญๆ•‘ใ€‚ \n", - "1 ่ฟ‡ๅŽ๏ผŒ่กจๅ“ฅๅ‘Š่ฏ‰ๅฅนไฟฉ๏ผŒ่ฟ™ไบบๆ˜ฏๅฏผๆผ”๏ผŒๅœจๅค–ๅ›ฝ็•™่ฟ‡ๅญฆ็š„๏ผŒ่ฟ˜ไผš็ผ–ๅ‰ง๏ผŒไปŠๅคฉๆ‹็š„่ฟ™ๆˆ๏ผŒๅฐฑๆ˜ฏไป–่‡ช็ผ–่‡ชๅฏผ็š„ใ€‚ \n", - "2 ่ฟ™ๅ‡คๅงๅฟฝ็„ถๆƒณ่ตทไธ€ไปถไบ‹ๆฅ๏ผŒไพฟๅ‘็ช—ๅค–ๅซ๏ผšโ€œ่“‰ๅ„ฟๅ›žๆฅ๏ผโ€ \n", - "3 ไธ‰ไธช่€็บขๅซๅ…ต่ตฐๅˆฐๅถๆ–‡ๆด้ขๅ‰๏ผŒ้ขๅฏน็€ๅฅน็ซ™ๆˆไบ†ไธ€ๆŽ’โ€”โ€”ๅฝ“ๅนด๏ผŒๅฅนไปฌไนŸๆ˜ฏ่ฟ™ๆ ท้ขๅฏนๅถๅ“ฒๆณฐ็š„โ€”โ€”่ฏ•ๅ›พๅ†็Žฐ... \n", - "4 ็จ‹ๅ…ˆ็”Ÿ็…งๅ•ๅ…จๆ”ถ๏ผŒ้ƒฝๆ˜ฏไธ€ไธชโ€œ่ฐขโ€ๅญ—๏ผŒ็„ถๅŽ้—ฎ็Ž‹็ฆ็‘ถๆœ‰ไป€ไนˆ่ฏ่ฏดใ€‚ \n", - "\n", - " output \n", - "0 Because I was protected by a fox fairy. \n", - "1 He was the director, the cousin later told the... \n", - "2 Xi-feng suddenly seemed to remember something,... \n", - "3 The three old Red Guards stood in front of Ye ... \n", - "4 Mr. Cheng accepted their toast with equanimity... " - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Python 3.11.9\n", - "\u001b[33mWARNING: Package(s) not found: flash-attn\u001b[0m\u001b[33m\n", - "\u001b[0mCPU times: user 5.39 ms, sys: 19.5 ms, total: 24.9 ms\n", - "Wall time: 527 ms\n" - ] - } - ], - "source": [ - "%%time\n", - "!python --version\n", - "!pip show flash-attn" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Current Directory:\n", - "/home/inflaton/code/projects/courses/llm-finetuning/llama-factory\n", - "07/04/2024 11:09:05 - INFO - llamafactory.hparams.parser - Process rank: 0, device: cuda:0, n_gpu: 1, distributed training: False, compute dtype: torch.bfloat16\n", - "[INFO|tokenization_utils_base.py:2161] 2024-07-04 11:09:06,545 >> loading file vocab.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/vocab.json\n", - "[INFO|tokenization_utils_base.py:2161] 2024-07-04 11:09:06,545 >> loading file merges.txt from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/merges.txt\n", - "[INFO|tokenization_utils_base.py:2161] 2024-07-04 11:09:06,545 >> loading file tokenizer.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/tokenizer.json\n", - "[INFO|tokenization_utils_base.py:2161] 2024-07-04 11:09:06,545 >> loading file added_tokens.json from cache at None\n", - "[INFO|tokenization_utils_base.py:2161] 2024-07-04 11:09:06,545 >> loading file special_tokens_map.json from cache at None\n", - "[INFO|tokenization_utils_base.py:2161] 2024-07-04 11:09:06,545 >> loading file tokenizer_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/tokenizer_config.json\n", - "[WARNING|logging.py:313] 2024-07-04 11:09:06,662 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n", - "07/04/2024 11:09:06 - INFO - llamafactory.data.template - Replace eos token: <|im_end|>\n", - "07/04/2024 11:09:06 - INFO - llamafactory.data.template - Add <|im_start|> to stop words.\n", - "07/04/2024 11:09:06 - INFO - llamafactory.data.loader - Loading dataset alpaca_mac.json...\n", - "Converting format of dataset (num_proc=16): 100%|โ–ˆ| 4528/4528 [00:00<00:00, 1685\n", - "Running tokenizer on dataset (num_proc=16): 100%|โ–ˆ| 4528/4528 [00:01<00:00, 3476\n", - "input_ids:\n", - "[151644, 872, 198, 5501, 14683, 279, 2701, 8453, 1467, 1119, 6364, 323, 3410, 1172, 279, 24531, 2213, 11, 4302, 770, 624, 35987, 102895, 99164, 100324, 100717, 100095, 99509, 1773, 151645, 198, 151644, 77091, 198, 17949, 358, 572, 2617, 553, 264, 38835, 44486, 13, 151645]\n", - "inputs:\n", - "<|im_start|>user\n", - "Please translate the following Chinese text into English and provide only the translated content, nothing else.\n", - "ๅ…จไป—็€็‹ไป™ๆญๆ•‘ใ€‚<|im_end|>\n", - "<|im_start|>assistant\n", - "Because I was protected by a fox fairy.<|im_end|>\n", - "label_ids:\n", - "[-100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 17949, 358, 572, 2617, 553, 264, 38835, 44486, 13, 151645]\n", - "labels:\n", - "Because I was protected by a fox fairy.<|im_end|>\n", - "[INFO|configuration_utils.py:733] 2024-07-04 11:09:09,749 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/config.json\n", - "[INFO|configuration_utils.py:800] 2024-07-04 11:09:09,750 >> Model config Qwen2Config {\n", - " \"_name_or_path\": \"Qwen/Qwen2-0.5B-Instruct\",\n", - " \"architectures\": [\n", - " \"Qwen2ForCausalLM\"\n", - " ],\n", - " \"attention_dropout\": 0.0,\n", - " \"bos_token_id\": 151643,\n", - " \"eos_token_id\": 151645,\n", - " \"hidden_act\": \"silu\",\n", - " \"hidden_size\": 896,\n", - " \"initializer_range\": 0.02,\n", - " \"intermediate_size\": 4864,\n", - " \"max_position_embeddings\": 32768,\n", - " \"max_window_layers\": 24,\n", - " \"model_type\": \"qwen2\",\n", - " \"num_attention_heads\": 14,\n", - " \"num_hidden_layers\": 24,\n", - " \"num_key_value_heads\": 2,\n", - " \"rms_norm_eps\": 1e-06,\n", - " \"rope_theta\": 1000000.0,\n", - " \"sliding_window\": 32768,\n", - " \"tie_word_embeddings\": true,\n", - " \"torch_dtype\": \"bfloat16\",\n", - " \"transformers_version\": \"4.42.3\",\n", - " \"use_cache\": true,\n", - " \"use_sliding_window\": false,\n", - " \"vocab_size\": 151936\n", - "}\n", - "\n", - "[INFO|modeling_utils.py:3556] 2024-07-04 11:09:09,841 >> loading weights file model.safetensors from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/model.safetensors\n", - "[INFO|modeling_utils.py:1531] 2024-07-04 11:09:13,066 >> Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16.\n", - "[INFO|configuration_utils.py:1000] 2024-07-04 11:09:13,069 >> Generate config GenerationConfig {\n", - " \"bos_token_id\": 151643,\n", - " \"eos_token_id\": 151645\n", - "}\n", - "\n", - "[INFO|modeling_utils.py:4364] 2024-07-04 11:10:03,269 >> All model checkpoint weights were used when initializing Qwen2ForCausalLM.\n", - "\n", - "[INFO|modeling_utils.py:4372] 2024-07-04 11:10:03,270 >> All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at Qwen/Qwen2-0.5B-Instruct.\n", - "If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training.\n", - "[INFO|configuration_utils.py:955] 2024-07-04 11:10:03,578 >> loading configuration file generation_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/generation_config.json\n", - "[INFO|configuration_utils.py:1000] 2024-07-04 11:10:03,578 >> Generate config GenerationConfig {\n", - " \"bos_token_id\": 151643,\n", - " \"do_sample\": true,\n", - " \"eos_token_id\": [\n", - " 151645,\n", - " 151643\n", - " ],\n", - " \"pad_token_id\": 151643,\n", - " \"repetition_penalty\": 1.1,\n", - " \"temperature\": 0.7,\n", - " \"top_k\": 20,\n", - " \"top_p\": 0.8\n", - "}\n", - "\n", - "07/04/2024 11:10:03 - INFO - llamafactory.model.model_utils.checkpointing - Gradient checkpointing enabled.\n", - "07/04/2024 11:10:03 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.\n", - "07/04/2024 11:10:03 - INFO - llamafactory.model.adapter - Upcasting trainable params to float32.\n", - "07/04/2024 11:10:03 - INFO - llamafactory.model.adapter - Fine-tuning method: LoRA\n", - "07/04/2024 11:10:03 - INFO - llamafactory.model.model_utils.misc - Found linear modules: up_proj,down_proj,k_proj,q_proj,v_proj,o_proj,gate_proj\n", - "07/04/2024 11:10:04 - INFO - llamafactory.model.loader - trainable params: 4,399,104 || all params: 498,431,872 || trainable%: 0.8826\n", - "[INFO|trainer.py:642] 2024-07-04 11:10:04,049 >> Using auto half precision backend\n", - "07/04/2024 11:10:04 - WARNING - llamafactory.train.callbacks - Previous trainer log in this folder will be deleted.\n", - "[INFO|trainer.py:2128] 2024-07-04 11:10:04,194 >> ***** Running training *****\n", - "[INFO|trainer.py:2129] 2024-07-04 11:10:04,194 >> Num examples = 4,482\n", - "[INFO|trainer.py:2130] 2024-07-04 11:10:04,194 >> Num Epochs = 6\n", - "[INFO|trainer.py:2131] 2024-07-04 11:10:04,194 >> Instantaneous batch size per device = 1\n", - "[INFO|trainer.py:2134] 2024-07-04 11:10:04,194 >> Total train batch size (w. parallel, distributed & accumulation) = 8\n", - "[INFO|trainer.py:2135] 2024-07-04 11:10:04,194 >> Gradient Accumulation steps = 8\n", - "[INFO|trainer.py:2136] 2024-07-04 11:10:04,195 >> Total optimization steps = 3,360\n", - "[INFO|trainer.py:2137] 2024-07-04 11:10:04,196 >> Number of trainable parameters = 4,399,104\n", - "[INFO|integration_utils.py:750] 2024-07-04 11:10:04,198 >> Automatic Weights & Biases logging enabled, to disable set os.environ[\"WANDB_DISABLED\"] = \"true\"\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33minflaton-sg\u001b[0m (\u001b[33minflaton-ai\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: Tracking run with wandb version 0.17.4\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: Run data is saved locally in \u001b[35m\u001b[1m/home/inflaton/code/projects/courses/llm-finetuning/llama-factory/wandb/run-20240704_111005-u8sqhi0x\u001b[0m\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: Run \u001b[1m`wandb offline`\u001b[0m to turn off syncing.\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: Syncing run \u001b[33mqwen2_0.5b_lora_sft\u001b[0m\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: โญ๏ธ View project at \u001b[34m\u001b[4mhttps://wandb.ai/inflaton-ai/huggingface\u001b[0m\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: ๐Ÿš€ View run at \u001b[34m\u001b[4mhttps://wandb.ai/inflaton-ai/huggingface/runs/u8sqhi0x\u001b[0m\n", - "{'loss': 2.581, 'grad_norm': 2.9743993282318115, 'learning_rate': 2.9761904761904763e-06, 'epoch': 0.02}\n", - "{'loss': 2.704, 'grad_norm': 3.803558826446533, 'learning_rate': 5.9523809523809525e-06, 'epoch': 0.04}\n", - "{'loss': 2.5764, 'grad_norm': 2.419433116912842, 'learning_rate': 8.92857142857143e-06, 'epoch': 0.05}\n", - "{'loss': 2.4994, 'grad_norm': 4.8528876304626465, 'learning_rate': 1.1904761904761905e-05, 'epoch': 0.07}\n", - "{'loss': 2.6881, 'grad_norm': 2.5375239849090576, 'learning_rate': 1.4880952380952381e-05, 'epoch': 0.09}\n", - "{'loss': 2.3869, 'grad_norm': 2.810744524002075, 'learning_rate': 1.785714285714286e-05, 'epoch': 0.11}\n", - "{'loss': 2.5728, 'grad_norm': 2.6387815475463867, 'learning_rate': 2.0833333333333336e-05, 'epoch': 0.12}\n", - "{'loss': 2.3077, 'grad_norm': 2.4742910861968994, 'learning_rate': 2.380952380952381e-05, 'epoch': 0.14}\n", - "{'loss': 2.4318, 'grad_norm': 3.0079479217529297, 'learning_rate': 2.6785714285714288e-05, 'epoch': 0.16}\n", - "{'loss': 2.29, 'grad_norm': 2.584622859954834, 'learning_rate': 2.9761904761904762e-05, 'epoch': 0.18}\n", - "{'loss': 2.3407, 'grad_norm': 3.3264784812927246, 'learning_rate': 3.273809523809524e-05, 'epoch': 0.2}\n", - "{'loss': 2.3577, 'grad_norm': 2.667269468307495, 'learning_rate': 3.571428571428572e-05, 'epoch': 0.21}\n", - "{'loss': 2.2612, 'grad_norm': 2.8811182975769043, 'learning_rate': 3.8690476190476195e-05, 'epoch': 0.23}\n", - "{'loss': 2.3096, 'grad_norm': 3.249279499053955, 'learning_rate': 4.166666666666667e-05, 'epoch': 0.25}\n", - "{'loss': 2.183, 'grad_norm': 2.5008630752563477, 'learning_rate': 4.464285714285715e-05, 'epoch': 0.27}\n", - "{'loss': 2.23, 'grad_norm': 2.457791328430176, 'learning_rate': 4.761904761904762e-05, 'epoch': 0.29}\n", - "{'loss': 2.3025, 'grad_norm': 2.4453022480010986, 'learning_rate': 5.05952380952381e-05, 'epoch': 0.3}\n", - "{'loss': 2.0884, 'grad_norm': 2.7773451805114746, 'learning_rate': 5.3571428571428575e-05, 'epoch': 0.32}\n", - "{'loss': 2.2048, 'grad_norm': 3.600346565246582, 'learning_rate': 5.6547619047619046e-05, 'epoch': 0.34}\n", - "{'loss': 2.3676, 'grad_norm': 2.939140796661377, 'learning_rate': 5.9523809523809524e-05, 'epoch': 0.36}\n", - "{'loss': 2.2684, 'grad_norm': 2.7832212448120117, 'learning_rate': 6.25e-05, 'epoch': 0.37}\n", - "{'loss': 2.2021, 'grad_norm': 3.7691140174865723, 'learning_rate': 6.547619047619048e-05, 'epoch': 0.39}\n", - "{'loss': 2.1625, 'grad_norm': 3.3338756561279297, 'learning_rate': 6.845238095238096e-05, 'epoch': 0.41}\n", - "{'loss': 2.3564, 'grad_norm': 4.061848163604736, 'learning_rate': 7.142857142857143e-05, 'epoch': 0.43}\n", - "{'loss': 2.2266, 'grad_norm': 3.3382863998413086, 'learning_rate': 7.440476190476191e-05, 'epoch': 0.45}\n", - "{'loss': 2.1837, 'grad_norm': 3.208007335662842, 'learning_rate': 7.738095238095239e-05, 'epoch': 0.46}\n", - "{'loss': 2.1765, 'grad_norm': 4.045449733734131, 'learning_rate': 8.035714285714287e-05, 'epoch': 0.48}\n", - "{'loss': 2.2863, 'grad_norm': 4.37124490737915, 'learning_rate': 8.333333333333334e-05, 'epoch': 0.5}\n", - "{'loss': 2.0807, 'grad_norm': 2.6629326343536377, 'learning_rate': 8.630952380952382e-05, 'epoch': 0.52}\n", - "{'loss': 2.2086, 'grad_norm': 3.6005942821502686, 'learning_rate': 8.92857142857143e-05, 'epoch': 0.54}\n", - "{'loss': 2.2231, 'grad_norm': 4.065690040588379, 'learning_rate': 9.226190476190478e-05, 'epoch': 0.55}\n", - "{'loss': 1.9875, 'grad_norm': 6.6260294914245605, 'learning_rate': 9.523809523809524e-05, 'epoch': 0.57}\n", - "{'loss': 2.0721, 'grad_norm': 4.6804656982421875, 'learning_rate': 9.821428571428572e-05, 'epoch': 0.59}\n", - "{'loss': 2.1194, 'grad_norm': 4.226340293884277, 'learning_rate': 9.999956828659095e-05, 'epoch': 0.61}\n", - "{'loss': 2.1256, 'grad_norm': 4.530922889709473, 'learning_rate': 9.999471159635539e-05, 'epoch': 0.62}\n", - "{'loss': 2.0243, 'grad_norm': 3.235328197479248, 'learning_rate': 9.998445910004082e-05, 'epoch': 0.64}\n", - "{'loss': 2.2819, 'grad_norm': 4.247537136077881, 'learning_rate': 9.996881190417393e-05, 'epoch': 0.66}\n", - "{'loss': 2.1964, 'grad_norm': 3.339164972305298, 'learning_rate': 9.994777169751806e-05, 'epoch': 0.68}\n", - "{'loss': 1.9102, 'grad_norm': 2.744009494781494, 'learning_rate': 9.992134075089084e-05, 'epoch': 0.7}\n", - "{'loss': 2.0751, 'grad_norm': 3.513111114501953, 'learning_rate': 9.988952191691925e-05, 'epoch': 0.71}\n", - "{'loss': 2.1697, 'grad_norm': 3.301513433456421, 'learning_rate': 9.985231862973168e-05, 'epoch': 0.73}\n", - "{'loss': 2.1742, 'grad_norm': 2.8456363677978516, 'learning_rate': 9.980973490458728e-05, 'epoch': 0.75}\n", - "{'loss': 2.1497, 'grad_norm': 3.499181032180786, 'learning_rate': 9.976177533744261e-05, 'epoch': 0.77}\n", - "{'loss': 2.0643, 'grad_norm': 4.2905964851379395, 'learning_rate': 9.97084451044556e-05, 'epoch': 0.79}\n", - "{'loss': 1.9934, 'grad_norm': 2.706711769104004, 'learning_rate': 9.964974996142698e-05, 'epoch': 0.8}\n", - "{'loss': 2.0795, 'grad_norm': 3.038059949874878, 'learning_rate': 9.958569624317893e-05, 'epoch': 0.82}\n", - "{'loss': 2.0908, 'grad_norm': 4.291042804718018, 'learning_rate': 9.951629086287151e-05, 'epoch': 0.84}\n", - "{'loss': 2.105, 'grad_norm': 3.027702808380127, 'learning_rate': 9.944154131125642e-05, 'epoch': 0.86}\n", - "{'loss': 2.112, 'grad_norm': 2.6875832080841064, 'learning_rate': 9.936145565586871e-05, 'epoch': 0.87}\n", - "{'loss': 2.2824, 'grad_norm': 2.8110086917877197, 'learning_rate': 9.927604254015585e-05, 'epoch': 0.89}\n", - "{'loss': 2.2181, 'grad_norm': 3.3072471618652344, 'learning_rate': 9.918531118254507e-05, 'epoch': 0.91}\n", - "{'loss': 2.1132, 'grad_norm': 3.8883237838745117, 'learning_rate': 9.90892713754483e-05, 'epoch': 0.93}\n", - "{'loss': 2.1513, 'grad_norm': 3.775455951690674, 'learning_rate': 9.898793348420536e-05, 'epoch': 0.95}\n", - "{'loss': 2.1119, 'grad_norm': 3.0280404090881348, 'learning_rate': 9.888130844596524e-05, 'epoch': 0.96}\n", - "{'loss': 2.1126, 'grad_norm': 3.2323291301727295, 'learning_rate': 9.876940776850569e-05, 'epoch': 0.98}\n", - "{'loss': 2.1328, 'grad_norm': 2.91339373588562, 'learning_rate': 9.865224352899119e-05, 'epoch': 1.0}\n", - " 17%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–Ž | 560/3360 [12:30<1:02:53, 1.35s/it][INFO|trainer.py:3788] 2024-07-04 11:22:39,524 >> \n", - "***** Running Evaluation *****\n", - "[INFO|trainer.py:3790] 2024-07-04 11:22:39,524 >> Num examples = 46\n", - "[INFO|trainer.py:3793] 2024-07-04 11:22:39,524 >> Batch size = 1\n", - "\n", - " 0%| | 0/46 [00:00> Saving model checkpoint to saves/qwen2-0.5b/lora/sft/checkpoint-560\n", - "[INFO|configuration_utils.py:733] 2024-07-04 11:22:42,026 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/config.json\n", - "[INFO|configuration_utils.py:800] 2024-07-04 11:22:42,027 >> Model config Qwen2Config {\n", - " \"architectures\": [\n", - " \"Qwen2ForCausalLM\"\n", - " ],\n", - " \"attention_dropout\": 0.0,\n", - " \"bos_token_id\": 151643,\n", - " \"eos_token_id\": 151645,\n", - " \"hidden_act\": \"silu\",\n", - " \"hidden_size\": 896,\n", - " \"initializer_range\": 0.02,\n", - " \"intermediate_size\": 4864,\n", - " \"max_position_embeddings\": 32768,\n", - " \"max_window_layers\": 24,\n", - " \"model_type\": \"qwen2\",\n", - " \"num_attention_heads\": 14,\n", - " \"num_hidden_layers\": 24,\n", - " \"num_key_value_heads\": 2,\n", - " \"rms_norm_eps\": 1e-06,\n", - " \"rope_theta\": 1000000.0,\n", - " \"sliding_window\": 32768,\n", - " \"tie_word_embeddings\": true,\n", - " \"torch_dtype\": \"bfloat16\",\n", - " \"transformers_version\": \"4.42.3\",\n", - " \"use_cache\": true,\n", - " \"use_sliding_window\": false,\n", - " \"vocab_size\": 151936\n", - "}\n", - "\n", - "[INFO|tokenization_utils_base.py:2574] 2024-07-04 11:22:42,060 >> tokenizer config file saved in saves/qwen2-0.5b/lora/sft/checkpoint-560/tokenizer_config.json\n", - "[INFO|tokenization_utils_base.py:2583] 2024-07-04 11:22:42,060 >> Special tokens file saved in saves/qwen2-0.5b/lora/sft/checkpoint-560/special_tokens_map.json\n", - "{'loss': 1.996, 'grad_norm': 2.9073429107666016, 'learning_rate': 9.852982837266955e-05, 'epoch': 1.02}\n", - "{'loss': 1.7941, 'grad_norm': 3.4045894145965576, 'learning_rate': 9.840217551150706e-05, 'epoch': 1.04}\n", - "{'loss': 1.9779, 'grad_norm': 2.8464860916137695, 'learning_rate': 9.826929872276255e-05, 'epoch': 1.05}\n", - "{'loss': 1.92, 'grad_norm': 3.770984411239624, 'learning_rate': 9.81312123475006e-05, 'epoch': 1.07}\n", - "{'loss': 1.8683, 'grad_norm': 3.4236226081848145, 'learning_rate': 9.798793128904356e-05, 'epoch': 1.09}\n", - "{'loss': 1.9201, 'grad_norm': 4.08709716796875, 'learning_rate': 9.78394710113631e-05, 'epoch': 1.11}\n", - "{'loss': 1.8563, 'grad_norm': 3.362687349319458, 'learning_rate': 9.768584753741134e-05, 'epoch': 1.12}\n", - "{'loss': 1.913, 'grad_norm': 5.210264682769775, 'learning_rate': 9.752707744739145e-05, 'epoch': 1.14}\n", - "{'loss': 1.9273, 'grad_norm': 3.515490770339966, 'learning_rate': 9.736317787696816e-05, 'epoch': 1.16}\n", - "{'loss': 1.8016, 'grad_norm': 3.4942610263824463, 'learning_rate': 9.719416651541839e-05, 'epoch': 1.18}\n", - "{'loss': 1.7993, 'grad_norm': 2.7268266677856445, 'learning_rate': 9.702006160372209e-05, 'epoch': 1.2}\n", - "{'loss': 1.9155, 'grad_norm': 3.6193785667419434, 'learning_rate': 9.684088193259355e-05, 'epoch': 1.21}\n", - "{'loss': 1.8261, 'grad_norm': 4.29509973526001, 'learning_rate': 9.665664684045333e-05, 'epoch': 1.23}\n", - "{'loss': 1.9301, 'grad_norm': 4.692563056945801, 'learning_rate': 9.646737621134112e-05, 'epoch': 1.25}\n", - "{'loss': 1.8418, 'grad_norm': 4.545106410980225, 'learning_rate': 9.627309047276974e-05, 'epoch': 1.27}\n", - "{'loss': 2.0611, 'grad_norm': 4.3200860023498535, 'learning_rate': 9.607381059352038e-05, 'epoch': 1.29}\n", - "{'loss': 1.9531, 'grad_norm': 3.2151238918304443, 'learning_rate': 9.586955808137958e-05, 'epoch': 1.3}\n", - "{'loss': 1.9447, 'grad_norm': 3.385021209716797, 'learning_rate': 9.566035498081784e-05, 'epoch': 1.32}\n", - "{'loss': 1.9424, 'grad_norm': 8.94682502746582, 'learning_rate': 9.544622387061055e-05, 'epoch': 1.34}\n", - "{'loss': 1.706, 'grad_norm': 4.064428806304932, 'learning_rate': 9.522718786140097e-05, 'epoch': 1.36}\n", - "{'loss': 1.9165, 'grad_norm': 4.604166507720947, 'learning_rate': 9.500327059320606e-05, 'epoch': 1.37}\n", - "{'loss': 1.7816, 'grad_norm': 5.32956600189209, 'learning_rate': 9.477449623286505e-05, 'epoch': 1.39}\n", - "{'loss': 1.6637, 'grad_norm': 3.613009214401245, 'learning_rate': 9.454088947143116e-05, 'epoch': 1.41}\n", - "{'loss': 1.9416, 'grad_norm': 4.8296799659729, 'learning_rate': 9.430247552150673e-05, 'epoch': 1.43}\n", - "{'loss': 1.8371, 'grad_norm': 4.565757751464844, 'learning_rate': 9.405928011452211e-05, 'epoch': 1.45}\n", - "{'loss': 1.846, 'grad_norm': 3.5512914657592773, 'learning_rate': 9.381132949795861e-05, 'epoch': 1.46}\n", - "{'loss': 2.0069, 'grad_norm': 3.9040660858154297, 'learning_rate': 9.35586504325155e-05, 'epoch': 1.48}\n", - "{'loss': 1.8083, 'grad_norm': 3.609498977661133, 'learning_rate': 9.330127018922194e-05, 'epoch': 1.5}\n", - "{'loss': 1.7487, 'grad_norm': 3.3245325088500977, 'learning_rate': 9.303921654649362e-05, 'epoch': 1.52}\n", - "{'loss': 1.8764, 'grad_norm': 4.417221546173096, 'learning_rate': 9.277251778713474e-05, 'epoch': 1.54}\n", - "{'loss': 1.8843, 'grad_norm': 4.959105014801025, 'learning_rate': 9.250120269528546e-05, 'epoch': 1.55}\n", - "{'loss': 1.793, 'grad_norm': 3.7974698543548584, 'learning_rate': 9.22253005533154e-05, 'epoch': 1.57}\n", - "{'loss': 1.9039, 'grad_norm': 3.882502555847168, 'learning_rate': 9.194484113866313e-05, 'epoch': 1.59}\n", - "{'loss': 1.9854, 'grad_norm': 3.416905164718628, 'learning_rate': 9.165985472062246e-05, 'epoch': 1.61}\n", - "{'loss': 1.7529, 'grad_norm': 3.456245183944702, 'learning_rate': 9.137037205707552e-05, 'epoch': 1.62}\n", - "{'loss': 1.8017, 'grad_norm': 3.490054130554199, 'learning_rate': 9.107642439117321e-05, 'epoch': 1.64}\n", - "{'loss': 1.8225, 'grad_norm': 3.2115142345428467, 'learning_rate': 9.077804344796302e-05, 'epoch': 1.66}\n", - "{'loss': 1.8333, 'grad_norm': 3.5726113319396973, 'learning_rate': 9.04752614309652e-05, 'epoch': 1.68}\n", - "{'loss': 1.7861, 'grad_norm': 3.9323503971099854, 'learning_rate': 9.01681110186971e-05, 'epoch': 1.7}\n", - "{'loss': 1.8067, 'grad_norm': 4.4842352867126465, 'learning_rate': 8.985662536114613e-05, 'epoch': 1.71}\n", - "{'loss': 1.8397, 'grad_norm': 3.1608762741088867, 'learning_rate': 8.954083807619208e-05, 'epoch': 1.73}\n", - "{'loss': 1.9411, 'grad_norm': 3.920475959777832, 'learning_rate': 8.922078324597879e-05, 'epoch': 1.75}\n", - "{'loss': 1.8974, 'grad_norm': 3.438220739364624, 'learning_rate': 8.889649541323574e-05, 'epoch': 1.77}\n", - "{'loss': 1.8202, 'grad_norm': 4.780834674835205, 'learning_rate': 8.856800957755e-05, 'epoch': 1.78}\n", - "{'loss': 1.8528, 'grad_norm': 3.768432378768921, 'learning_rate': 8.823536119158864e-05, 'epoch': 1.8}\n", - "{'loss': 1.753, 'grad_norm': 4.07826042175293, 'learning_rate': 8.789858615727265e-05, 'epoch': 1.82}\n", - "{'loss': 1.7389, 'grad_norm': 3.5676631927490234, 'learning_rate': 8.755772082190194e-05, 'epoch': 1.84}\n", - "{'loss': 1.9198, 'grad_norm': 3.463003635406494, 'learning_rate': 8.721280197423258e-05, 'epoch': 1.86}\n", - "{'loss': 1.7722, 'grad_norm': 4.634316921234131, 'learning_rate': 8.68638668405062e-05, 'epoch': 1.87}\n", - "{'loss': 1.8237, 'grad_norm': 4.284477710723877, 'learning_rate': 8.651095308043232e-05, 'epoch': 1.89}\n", - "{'loss': 2.0051, 'grad_norm': 4.610734462738037, 'learning_rate': 8.61540987831238e-05, 'epoch': 1.91}\n", - "{'loss': 1.9493, 'grad_norm': 4.1395392417907715, 'learning_rate': 8.579334246298593e-05, 'epoch': 1.93}\n", - "{'loss': 1.8477, 'grad_norm': 3.6301958560943604, 'learning_rate': 8.542872305555978e-05, 'epoch': 1.95}\n", - "{'loss': 1.7, 'grad_norm': 3.8048858642578125, 'learning_rate': 8.50602799133199e-05, 'epoch': 1.96}\n", - "{'loss': 1.8371, 'grad_norm': 3.2337429523468018, 'learning_rate': 8.468805280142709e-05, 'epoch': 1.98}\n", - "{'loss': 1.8531, 'grad_norm': 4.216500282287598, 'learning_rate': 8.43120818934367e-05, 'epoch': 2.0}\n", - " 33%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ | 1120/3360 [25:00<49:13, 1.32s/it][INFO|trainer.py:3788] 2024-07-04 11:35:10,200 >> \n", - "***** Running Evaluation *****\n", - "[INFO|trainer.py:3790] 2024-07-04 11:35:10,200 >> Num examples = 46\n", - "[INFO|trainer.py:3793] 2024-07-04 11:35:10,200 >> Batch size = 1\n", - "\n", - " 0%| | 0/46 [00:00> Saving model checkpoint to saves/qwen2-0.5b/lora/sft/checkpoint-1120\n", - "[INFO|configuration_utils.py:733] 2024-07-04 11:35:13,176 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/config.json\n", - "[INFO|configuration_utils.py:800] 2024-07-04 11:35:13,177 >> Model config Qwen2Config {\n", - " \"architectures\": [\n", - " \"Qwen2ForCausalLM\"\n", - " ],\n", - " \"attention_dropout\": 0.0,\n", - " \"bos_token_id\": 151643,\n", - " \"eos_token_id\": 151645,\n", - " \"hidden_act\": \"silu\",\n", - " \"hidden_size\": 896,\n", - " \"initializer_range\": 0.02,\n", - " \"intermediate_size\": 4864,\n", - " \"max_position_embeddings\": 32768,\n", - " \"max_window_layers\": 24,\n", - " \"model_type\": \"qwen2\",\n", - " \"num_attention_heads\": 14,\n", - " \"num_hidden_layers\": 24,\n", - " \"num_key_value_heads\": 2,\n", - " \"rms_norm_eps\": 1e-06,\n", - " \"rope_theta\": 1000000.0,\n", - " \"sliding_window\": 32768,\n", - " \"tie_word_embeddings\": true,\n", - " \"torch_dtype\": \"bfloat16\",\n", - " \"transformers_version\": \"4.42.3\",\n", - " \"use_cache\": true,\n", - " \"use_sliding_window\": false,\n", - " \"vocab_size\": 151936\n", - "}\n", - "\n", - "[INFO|tokenization_utils_base.py:2574] 2024-07-04 11:35:13,210 >> tokenizer config file saved in saves/qwen2-0.5b/lora/sft/checkpoint-1120/tokenizer_config.json\n", - "[INFO|tokenization_utils_base.py:2583] 2024-07-04 11:35:13,211 >> Special tokens file saved in saves/qwen2-0.5b/lora/sft/checkpoint-1120/special_tokens_map.json\n", - "{'loss': 1.5674, 'grad_norm': 4.559268474578857, 'learning_rate': 8.393240776696274e-05, 'epoch': 2.02}\n", - "{'loss': 1.4393, 'grad_norm': 3.3662822246551514, 'learning_rate': 8.354907139929851e-05, 'epoch': 2.03}\n", - "{'loss': 1.5166, 'grad_norm': 4.587384223937988, 'learning_rate': 8.316211416299397e-05, 'epoch': 2.05}\n", - "{'loss': 1.4818, 'grad_norm': 5.713983535766602, 'learning_rate': 8.27715778213905e-05, 'epoch': 2.07}\n", - "{'loss': 1.3679, 'grad_norm': 3.7478792667388916, 'learning_rate': 8.237750452411353e-05, 'epoch': 2.09}\n", - "{'loss': 1.4682, 'grad_norm': 3.7805116176605225, 'learning_rate': 8.197993680252334e-05, 'epoch': 2.11}\n", - "{'loss': 1.6848, 'grad_norm': 4.318390846252441, 'learning_rate': 8.157891756512488e-05, 'epoch': 2.12}\n", - "{'loss': 1.447, 'grad_norm': 4.625955581665039, 'learning_rate': 8.117449009293668e-05, 'epoch': 2.14}\n", - "{'loss': 1.4888, 'grad_norm': 4.70202112197876, 'learning_rate': 8.076669803481965e-05, 'epoch': 2.16}\n", - "{'loss': 1.5405, 'grad_norm': 6.126914978027344, 'learning_rate': 8.035558540276618e-05, 'epoch': 2.18}\n", - "{'loss': 1.4751, 'grad_norm': 3.867528200149536, 'learning_rate': 7.994119656715002e-05, 'epoch': 2.2}\n", - "{'loss': 1.5175, 'grad_norm': 4.935867786407471, 'learning_rate': 7.952357625193749e-05, 'epoch': 2.21}\n", - "{'loss': 1.5586, 'grad_norm': 5.28302001953125, 'learning_rate': 7.91027695298606e-05, 'epoch': 2.23}\n", - "{'loss': 1.5798, 'grad_norm': 4.9564738273620605, 'learning_rate': 7.86788218175523e-05, 'epoch': 2.25}\n", - "{'loss': 1.4184, 'grad_norm': 4.7498779296875, 'learning_rate': 7.8251778870645e-05, 'epoch': 2.27}\n", - "{'loss': 1.4736, 'grad_norm': 5.780045032501221, 'learning_rate': 7.782168677883206e-05, 'epoch': 2.28}\n", - "{'loss': 1.5192, 'grad_norm': 3.647230625152588, 'learning_rate': 7.738859196089358e-05, 'epoch': 2.3}\n", - "{'loss': 1.5836, 'grad_norm': 4.818410396575928, 'learning_rate': 7.695254115968648e-05, 'epoch': 2.32}\n", - "{'loss': 1.6111, 'grad_norm': 4.5074286460876465, 'learning_rate': 7.651358143709972e-05, 'epoch': 2.34}\n", - "{'loss': 1.6122, 'grad_norm': 4.6216816902160645, 'learning_rate': 7.60717601689749e-05, 'epoch': 2.36}\n", - "{'loss': 1.5633, 'grad_norm': 9.873260498046875, 'learning_rate': 7.562712503999327e-05, 'epoch': 2.37}\n", - "{'loss': 1.7444, 'grad_norm': 4.795359134674072, 'learning_rate': 7.517972403852905e-05, 'epoch': 2.39}\n", - "{'loss': 1.5804, 'grad_norm': 4.818080902099609, 'learning_rate': 7.472960545147038e-05, 'epoch': 2.41}\n", - "{'loss': 1.4748, 'grad_norm': 5.576250076293945, 'learning_rate': 7.427681785900761e-05, 'epoch': 2.43}\n", - "{'loss': 1.5531, 'grad_norm': 4.261260509490967, 'learning_rate': 7.382141012939034e-05, 'epoch': 2.45}\n", - "{'loss': 1.4554, 'grad_norm': 4.23293399810791, 'learning_rate': 7.33634314136531e-05, 'epoch': 2.46}\n", - "{'loss': 1.5272, 'grad_norm': 4.627878665924072, 'learning_rate': 7.290293114031061e-05, 'epoch': 2.48}\n", - "{'loss': 1.6616, 'grad_norm': 4.36018705368042, 'learning_rate': 7.243995901002312e-05, 'epoch': 2.5}\n", - "{'loss': 1.5503, 'grad_norm': 5.698966026306152, 'learning_rate': 7.197456499023225e-05, 'epoch': 2.52}\n", - "{'loss': 1.5043, 'grad_norm': 4.486359119415283, 'learning_rate': 7.150679930976825e-05, 'epoch': 2.53}\n", - "{'loss': 1.5796, 'grad_norm': 8.031678199768066, 'learning_rate': 7.103671245342887e-05, 'epoch': 2.55}\n", - "{'loss': 1.4317, 'grad_norm': 5.806405544281006, 'learning_rate': 7.056435515653059e-05, 'epoch': 2.57}\n", - "{'loss': 1.696, 'grad_norm': 6.584068298339844, 'learning_rate': 7.008977839943299e-05, 'epoch': 2.59}\n", - "{'loss': 1.4768, 'grad_norm': 4.871330261230469, 'learning_rate': 6.961303340203653e-05, 'epoch': 2.61}\n", - "{'loss': 1.587, 'grad_norm': 3.9512643814086914, 'learning_rate': 6.91341716182545e-05, 'epoch': 2.62}\n", - "{'loss': 1.4991, 'grad_norm': 3.4907033443450928, 'learning_rate': 6.86532447304597e-05, 'epoch': 2.64}\n", - "{'loss': 1.4822, 'grad_norm': 4.603860855102539, 'learning_rate': 6.817030464390656e-05, 'epoch': 2.66}\n", - "{'loss': 1.6408, 'grad_norm': 5.737949371337891, 'learning_rate': 6.768540348112907e-05, 'epoch': 2.68}\n", - "{'loss': 1.4316, 'grad_norm': 5.838085174560547, 'learning_rate': 6.719859357631535e-05, 'epoch': 2.7}\n", - "{'loss': 1.414, 'grad_norm': 5.460419654846191, 'learning_rate': 6.670992746965938e-05, 'epoch': 2.71}\n", - "{'loss': 1.6858, 'grad_norm': 5.311679363250732, 'learning_rate': 6.621945790169036e-05, 'epoch': 2.73}\n", - "{'loss': 1.5802, 'grad_norm': 4.987999439239502, 'learning_rate': 6.572723780758069e-05, 'epoch': 2.75}\n", - "{'loss': 1.5672, 'grad_norm': 5.01920223236084, 'learning_rate': 6.523332031143272e-05, 'epoch': 2.77}\n", - "{'loss': 1.5914, 'grad_norm': 4.382671356201172, 'learning_rate': 6.473775872054521e-05, 'epoch': 2.78}\n", - "{'loss': 1.4284, 'grad_norm': 3.818115711212158, 'learning_rate': 6.424060651966007e-05, 'epoch': 2.8}\n", - "{'loss': 1.499, 'grad_norm': 4.427730560302734, 'learning_rate': 6.374191736518974e-05, 'epoch': 2.82}\n", - "{'loss': 1.4914, 'grad_norm': 4.508190631866455, 'learning_rate': 6.324174507942637e-05, 'epoch': 2.84}\n", - "{'loss': 1.4629, 'grad_norm': 6.055968284606934, 'learning_rate': 6.274014364473274e-05, 'epoch': 2.86}\n", - "{'loss': 1.717, 'grad_norm': 4.5250678062438965, 'learning_rate': 6.22371671977162e-05, 'epoch': 2.87}\n", - "{'loss': 1.5103, 'grad_norm': 4.378949165344238, 'learning_rate': 6.173287002338577e-05, 'epoch': 2.89}\n", - "{'loss': 1.511, 'grad_norm': 5.3176751136779785, 'learning_rate': 6.122730654929334e-05, 'epoch': 2.91}\n", - "{'loss': 1.4656, 'grad_norm': 4.5037994384765625, 'learning_rate': 6.072053133965938e-05, 'epoch': 2.93}\n", - "{'loss': 1.6443, 'grad_norm': 4.189935684204102, 'learning_rate': 6.021259908948402e-05, 'epoch': 2.95}\n", - "{'loss': 1.6633, 'grad_norm': 4.525129795074463, 'learning_rate': 5.970356461864391e-05, 'epoch': 2.96}\n", - "{'loss': 1.4935, 'grad_norm': 5.440227508544922, 'learning_rate': 5.919348286597569e-05, 'epoch': 2.98}\n", - "{'loss': 1.6304, 'grad_norm': 4.765013694763184, 'learning_rate': 5.868240888334653e-05, 'epoch': 3.0}\n", - " 50%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–Œ | 1680/3360 [37:13<35:24, 1.26s/it][INFO|trainer.py:3788] 2024-07-04 11:47:23,337 >> \n", - "***** Running Evaluation *****\n", - "[INFO|trainer.py:3790] 2024-07-04 11:47:23,337 >> Num examples = 46\n", - "[INFO|trainer.py:3793] 2024-07-04 11:47:23,337 >> Batch size = 1\n", - "\n", - " 0%| | 0/46 [00:00> Saving model checkpoint to saves/qwen2-0.5b/lora/sft/checkpoint-1680\n", - "[INFO|configuration_utils.py:733] 2024-07-04 11:47:25,920 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/config.json\n", - "[INFO|configuration_utils.py:800] 2024-07-04 11:47:25,920 >> Model config Qwen2Config {\n", - " \"architectures\": [\n", - " \"Qwen2ForCausalLM\"\n", - " ],\n", - " \"attention_dropout\": 0.0,\n", - " \"bos_token_id\": 151643,\n", - " \"eos_token_id\": 151645,\n", - " \"hidden_act\": \"silu\",\n", - " \"hidden_size\": 896,\n", - " \"initializer_range\": 0.02,\n", - " \"intermediate_size\": 4864,\n", - " \"max_position_embeddings\": 32768,\n", - " \"max_window_layers\": 24,\n", - " \"model_type\": \"qwen2\",\n", - " \"num_attention_heads\": 14,\n", - " \"num_hidden_layers\": 24,\n", - " \"num_key_value_heads\": 2,\n", - " \"rms_norm_eps\": 1e-06,\n", - " \"rope_theta\": 1000000.0,\n", - " \"sliding_window\": 32768,\n", - " \"tie_word_embeddings\": true,\n", - " \"torch_dtype\": \"bfloat16\",\n", - " \"transformers_version\": \"4.42.3\",\n", - " \"use_cache\": true,\n", - " \"use_sliding_window\": false,\n", - " \"vocab_size\": 151936\n", - "}\n", - "\n", - "[INFO|tokenization_utils_base.py:2574] 2024-07-04 11:47:25,956 >> tokenizer config file saved in saves/qwen2-0.5b/lora/sft/checkpoint-1680/tokenizer_config.json\n", - "[INFO|tokenization_utils_base.py:2583] 2024-07-04 11:47:25,956 >> Special tokens file saved in saves/qwen2-0.5b/lora/sft/checkpoint-1680/special_tokens_map.json\n", - "{'loss': 1.4346, 'grad_norm': 4.2551727294921875, 'learning_rate': 5.8170397829712485e-05, 'epoch': 3.02}\n", - "{'loss': 1.1148, 'grad_norm': 4.442202568054199, 'learning_rate': 5.765750496516547e-05, 'epoch': 3.03}\n", - "{'loss': 1.2852, 'grad_norm': 5.140079021453857, 'learning_rate': 5.714378564496901e-05, 'epoch': 3.05}\n", - "{'loss': 1.2086, 'grad_norm': 5.270480632781982, 'learning_rate': 5.6629295313583974e-05, 'epoch': 3.07}\n", - "{'loss': 1.1824, 'grad_norm': 5.192230224609375, 'learning_rate': 5.611408949868457e-05, 'epoch': 3.09}\n", - "{'loss': 1.2875, 'grad_norm': 5.830446720123291, 'learning_rate': 5.559822380516539e-05, 'epoch': 3.11}\n", - "{'loss': 1.2162, 'grad_norm': 4.606627464294434, 'learning_rate': 5.5081753909140096e-05, 'epoch': 3.12}\n", - "{'loss': 1.3341, 'grad_norm': 5.547798156738281, 'learning_rate': 5.456473555193242e-05, 'epoch': 3.14}\n", - "{'loss': 1.2143, 'grad_norm': 5.579686641693115, 'learning_rate': 5.404722453406017e-05, 'epoch': 3.16}\n", - "{'loss': 1.2823, 'grad_norm': 6.129615783691406, 'learning_rate': 5.3529276709212816e-05, 'epoch': 3.18}\n", - "{'loss': 1.2582, 'grad_norm': 5.295398712158203, 'learning_rate': 5.30109479782233e-05, 'epoch': 3.2}\n", - "{'loss': 1.2438, 'grad_norm': 6.145551681518555, 'learning_rate': 5.249229428303486e-05, 'epoch': 3.21}\n", - "{'loss': 1.4372, 'grad_norm': 4.52131986618042, 'learning_rate': 5.197337160066331e-05, 'epoch': 3.23}\n", - "{'loss': 1.2346, 'grad_norm': 4.7772955894470215, 'learning_rate': 5.145423593715557e-05, 'epoch': 3.25}\n", - "{'loss': 1.1929, 'grad_norm': 5.328940391540527, 'learning_rate': 5.0934943321545115e-05, 'epoch': 3.27}\n", - "{'loss': 1.1731, 'grad_norm': 5.733246803283691, 'learning_rate': 5.041554979980486e-05, 'epoch': 3.28}\n", - "{'loss': 1.3325, 'grad_norm': 6.418582439422607, 'learning_rate': 4.9896111428798254e-05, 'epoch': 3.3}\n", - "{'loss': 1.3305, 'grad_norm': 4.787232398986816, 'learning_rate': 4.9376684270229254e-05, 'epoch': 3.32}\n", - "{'loss': 1.2982, 'grad_norm': 4.655210971832275, 'learning_rate': 4.8857324384591653e-05, 'epoch': 3.34}\n", - "{'loss': 1.2833, 'grad_norm': 4.85659122467041, 'learning_rate': 4.8338087825118675e-05, 'epoch': 3.36}\n", - "{'loss': 1.2835, 'grad_norm': 5.313413143157959, 'learning_rate': 4.781903063173321e-05, 'epoch': 3.37}\n", - "{'loss': 1.2001, 'grad_norm': 4.640489101409912, 'learning_rate': 4.730020882499964e-05, 'epoch': 3.39}\n", - "{'loss': 1.2597, 'grad_norm': 6.197988033294678, 'learning_rate': 4.678167840007767e-05, 'epoch': 3.41}\n", - "{'loss': 1.3514, 'grad_norm': 4.942805290222168, 'learning_rate': 4.626349532067879e-05, 'epoch': 3.43}\n", - "{'loss': 1.3118, 'grad_norm': 5.112833499908447, 'learning_rate': 4.574571551302647e-05, 'epoch': 3.44}\n", - "{'loss': 1.3232, 'grad_norm': 4.470940113067627, 'learning_rate': 4.522839485981994e-05, 'epoch': 3.46}\n", - "{'loss': 1.2533, 'grad_norm': 5.801645755767822, 'learning_rate': 4.471158919420312e-05, 'epoch': 3.48}\n", - "{'loss': 1.2343, 'grad_norm': 6.3296709060668945, 'learning_rate': 4.4195354293738484e-05, 'epoch': 3.5}\n", - "{'loss': 1.1995, 'grad_norm': 6.262467384338379, 'learning_rate': 4.367974587438733e-05, 'epoch': 3.52}\n", - "{'loss': 1.2744, 'grad_norm': 5.313882827758789, 'learning_rate': 4.316481958449634e-05, 'epoch': 3.53}\n", - "{'loss': 1.2366, 'grad_norm': 7.450092792510986, 'learning_rate': 4.2650630998791615e-05, 'epoch': 3.55}\n", - "{'loss': 1.3738, 'grad_norm': 4.7678680419921875, 'learning_rate': 4.213723561238074e-05, 'epoch': 3.57}\n", - "{'loss': 1.1538, 'grad_norm': 4.40903377532959, 'learning_rate': 4.162468883476319e-05, 'epoch': 3.59}\n", - "{'loss': 1.2502, 'grad_norm': 5.227618217468262, 'learning_rate': 4.111304598385018e-05, 'epoch': 3.61}\n", - "{'loss': 1.3061, 'grad_norm': 6.307828903198242, 'learning_rate': 4.060236227999441e-05, 'epoch': 3.62}\n", - "{'loss': 1.2667, 'grad_norm': 5.422544002532959, 'learning_rate': 4.0092692840030134e-05, 'epoch': 3.64}\n", - "{'loss': 1.2039, 'grad_norm': 7.9964141845703125, 'learning_rate': 3.9584092671324606e-05, 'epoch': 3.66}\n", - "{'loss': 1.3509, 'grad_norm': 7.364163875579834, 'learning_rate': 3.907661666584131e-05, 'epoch': 3.68}\n", - "{'loss': 1.3427, 'grad_norm': 6.175056457519531, 'learning_rate': 3.857031959421553e-05, 'epoch': 3.69}\n", - "{'loss': 1.345, 'grad_norm': 5.0636725425720215, 'learning_rate': 3.806525609984312e-05, 'epoch': 3.71}\n", - "{'loss': 1.1779, 'grad_norm': 5.742904186248779, 'learning_rate': 3.7561480692983006e-05, 'epoch': 3.73}\n", - "{'loss': 1.29, 'grad_norm': 5.6552276611328125, 'learning_rate': 3.705904774487396e-05, 'epoch': 3.75}\n", - "{'loss': 1.2704, 'grad_norm': 5.890940189361572, 'learning_rate': 3.655801148186655e-05, 'epoch': 3.77}\n", - "{'loss': 1.1811, 'grad_norm': 5.2217583656311035, 'learning_rate': 3.6058425979570485e-05, 'epoch': 3.78}\n", - "{'loss': 1.2768, 'grad_norm': 5.42200231552124, 'learning_rate': 3.556034515701852e-05, 'epoch': 3.8}\n", - "{'loss': 1.2891, 'grad_norm': 5.615239143371582, 'learning_rate': 3.506382277084696e-05, 'epoch': 3.82}\n", - "{'loss': 1.2401, 'grad_norm': 5.646175861358643, 'learning_rate': 3.4568912409493945e-05, 'epoch': 3.84}\n", - "{'loss': 1.0597, 'grad_norm': 5.7333197593688965, 'learning_rate': 3.4075667487415785e-05, 'epoch': 3.86}\n", - "{'loss': 1.1621, 'grad_norm': 5.321319580078125, 'learning_rate': 3.358414123932195e-05, 'epoch': 3.87}\n", - "{'loss': 1.2736, 'grad_norm': 4.852396011352539, 'learning_rate': 3.3094386714429724e-05, 'epoch': 3.89}\n", - "{'loss': 1.2597, 'grad_norm': 7.163392066955566, 'learning_rate': 3.2606456770738636e-05, 'epoch': 3.91}\n", - "{'loss': 1.1871, 'grad_norm': 5.611868381500244, 'learning_rate': 3.212040406932569e-05, 'epoch': 3.93}\n", - "{'loss': 1.0307, 'grad_norm': 4.783786296844482, 'learning_rate': 3.163628106866172e-05, 'epoch': 3.94}\n", - "{'loss': 1.4526, 'grad_norm': 5.691711902618408, 'learning_rate': 3.115414001894974e-05, 'epoch': 3.96}\n", - "{'loss': 1.3101, 'grad_norm': 5.280589580535889, 'learning_rate': 3.067403295648566e-05, 'epoch': 3.98}\n", - "{'loss': 1.0932, 'grad_norm': 4.22761869430542, 'learning_rate': 3.019601169804216e-05, 'epoch': 4.0}\n", - " 67%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ | 2240/3360 [49:01<23:24, 1.25s/it][INFO|trainer.py:3788] 2024-07-04 11:59:10,533 >> \n", - "***** Running Evaluation *****\n", - "[INFO|trainer.py:3790] 2024-07-04 11:59:10,533 >> Num examples = 46\n", - "[INFO|trainer.py:3793] 2024-07-04 11:59:10,533 >> Batch size = 1\n", - "\n", - " 0%| | 0/46 [00:00> Saving model checkpoint to saves/qwen2-0.5b/lora/sft/checkpoint-2240\n", - "[INFO|configuration_utils.py:733] 2024-07-04 11:59:13,447 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/config.json\n", - "[INFO|configuration_utils.py:800] 2024-07-04 11:59:13,448 >> Model config Qwen2Config {\n", - " \"architectures\": [\n", - " \"Qwen2ForCausalLM\"\n", - " ],\n", - " \"attention_dropout\": 0.0,\n", - " \"bos_token_id\": 151643,\n", - " \"eos_token_id\": 151645,\n", - " \"hidden_act\": \"silu\",\n", - " \"hidden_size\": 896,\n", - " \"initializer_range\": 0.02,\n", - " \"intermediate_size\": 4864,\n", - " \"max_position_embeddings\": 32768,\n", - " \"max_window_layers\": 24,\n", - " \"model_type\": \"qwen2\",\n", - " \"num_attention_heads\": 14,\n", - " \"num_hidden_layers\": 24,\n", - " \"num_key_value_heads\": 2,\n", - " \"rms_norm_eps\": 1e-06,\n", - " \"rope_theta\": 1000000.0,\n", - " \"sliding_window\": 32768,\n", - " \"tie_word_embeddings\": true,\n", - " \"torch_dtype\": \"bfloat16\",\n", - " \"transformers_version\": \"4.42.3\",\n", - " \"use_cache\": true,\n", - " \"use_sliding_window\": false,\n", - " \"vocab_size\": 151936\n", - "}\n", - "\n", - "[INFO|tokenization_utils_base.py:2574] 2024-07-04 11:59:13,487 >> tokenizer config file saved in saves/qwen2-0.5b/lora/sft/checkpoint-2240/tokenizer_config.json\n", - "[INFO|tokenization_utils_base.py:2583] 2024-07-04 11:59:13,487 >> Special tokens file saved in saves/qwen2-0.5b/lora/sft/checkpoint-2240/special_tokens_map.json\n", - "{'loss': 1.1326, 'grad_norm': 3.8860654830932617, 'learning_rate': 2.9720127835276256e-05, 'epoch': 4.02}\n", - "{'loss': 1.0202, 'grad_norm': 5.21559476852417, 'learning_rate': 2.9246432729161055e-05, 'epoch': 4.03}\n", - "{'loss': 1.0934, 'grad_norm': 5.658751964569092, 'learning_rate': 2.8774977504442647e-05, 'epoch': 4.05}\n", - "{'loss': 0.9934, 'grad_norm': 5.090124130249023, 'learning_rate': 2.8305813044122097e-05, 'epoch': 4.07}\n", - "{'loss': 1.0309, 'grad_norm': 5.851395606994629, 'learning_rate': 2.7838989983964065e-05, 'epoch': 4.09}\n", - "{'loss': 1.187, 'grad_norm': 4.703646659851074, 'learning_rate': 2.737455870703155e-05, 'epoch': 4.11}\n", - "{'loss': 0.9171, 'grad_norm': 4.95070219039917, 'learning_rate': 2.6912569338248315e-05, 'epoch': 4.12}\n", - "{'loss': 1.0232, 'grad_norm': 4.933461666107178, 'learning_rate': 2.645307173898901e-05, 'epoch': 4.14}\n", - "{'loss': 1.1005, 'grad_norm': 5.395535469055176, 'learning_rate': 2.5996115501697694e-05, 'epoch': 4.16}\n", - "{'loss': 0.9827, 'grad_norm': 4.670980453491211, 'learning_rate': 2.5541749944535554e-05, 'epoch': 4.18}\n", - "{'loss': 0.9969, 'grad_norm': 9.713501930236816, 'learning_rate': 2.5090024106057962e-05, 'epoch': 4.19}\n", - "{'loss': 1.0557, 'grad_norm': 5.423773288726807, 'learning_rate': 2.464098673992205e-05, 'epoch': 4.21}\n", - "{'loss': 0.9903, 'grad_norm': 5.628043174743652, 'learning_rate': 2.4194686309624663e-05, 'epoch': 4.23}\n", - "{'loss': 1.1397, 'grad_norm': 5.057712554931641, 'learning_rate': 2.3751170983272e-05, 'epoch': 4.25}\n", - "{'loss': 0.962, 'grad_norm': 4.844544410705566, 'learning_rate': 2.3310488628380757e-05, 'epoch': 4.27}\n", - "{'loss': 1.0187, 'grad_norm': 7.445083141326904, 'learning_rate': 2.2872686806712035e-05, 'epoch': 4.28}\n", - "{'loss': 1.0618, 'grad_norm': 4.87847900390625, 'learning_rate': 2.243781276913811e-05, 'epoch': 4.3}\n", - "{'loss': 0.9125, 'grad_norm': 5.181140899658203, 'learning_rate': 2.200591345054267e-05, 'epoch': 4.32}\n", - "{'loss': 1.125, 'grad_norm': 8.97202205657959, 'learning_rate': 2.157703546475539e-05, 'epoch': 4.34}\n", - "{'loss': 0.9747, 'grad_norm': 6.134432792663574, 'learning_rate': 2.115122509952085e-05, 'epoch': 4.36}\n", - "{'loss': 0.9803, 'grad_norm': 9.630309104919434, 'learning_rate': 2.0728528311502976e-05, 'epoch': 4.37}\n", - "{'loss': 1.0843, 'grad_norm': 5.363273620605469, 'learning_rate': 2.0308990721324927e-05, 'epoch': 4.39}\n", - "{'loss': 1.0764, 'grad_norm': 7.712973117828369, 'learning_rate': 1.989265760864542e-05, 'epoch': 4.41}\n", - "{'loss': 1.1397, 'grad_norm': 5.690403938293457, 'learning_rate': 1.947957390727185e-05, 'epoch': 4.43}\n", - "{'loss': 1.1258, 'grad_norm': 5.744186878204346, 'learning_rate': 1.906978420031059e-05, 'epoch': 4.44}\n", - "{'loss': 0.9438, 'grad_norm': 8.820874214172363, 'learning_rate': 1.8663332715355396e-05, 'epoch': 4.46}\n", - "{'loss': 1.132, 'grad_norm': 4.420164108276367, 'learning_rate': 1.8260263319713844e-05, 'epoch': 4.48}\n", - "{'loss': 1.0819, 'grad_norm': 5.586333751678467, 'learning_rate': 1.7860619515673033e-05, 'epoch': 4.5}\n", - "{'loss': 1.0571, 'grad_norm': 5.625140190124512, 'learning_rate': 1.746444443580433e-05, 'epoch': 4.52}\n", - "{'loss': 1.1021, 'grad_norm': 5.7560577392578125, 'learning_rate': 1.7071780838308288e-05, 'epoch': 4.53}\n", - "{'loss': 1.0531, 'grad_norm': 5.3450727462768555, 'learning_rate': 1.6682671102399805e-05, 'epoch': 4.55}\n", - "{'loss': 1.017, 'grad_norm': 6.27817440032959, 'learning_rate': 1.629715722373423e-05, 'epoch': 4.57}\n", - "{'loss': 1.0471, 'grad_norm': 5.72844934463501, 'learning_rate': 1.5915280809874932e-05, 'epoch': 4.59}\n", - "{'loss': 0.9309, 'grad_norm': 5.988643169403076, 'learning_rate': 1.553708307580265e-05, 'epoch': 4.61}\n", - "{'loss': 1.0538, 'grad_norm': 5.950584411621094, 'learning_rate': 1.5162604839467265e-05, 'epoch': 4.62}\n", - "{'loss': 1.0554, 'grad_norm': 4.944731712341309, 'learning_rate': 1.4791886517382413e-05, 'epoch': 4.64}\n", - "{'loss': 1.0857, 'grad_norm': 6.031637191772461, 'learning_rate': 1.4424968120263504e-05, 'epoch': 4.66}\n", - "{'loss': 1.0667, 'grad_norm': 5.933581352233887, 'learning_rate': 1.4061889248709343e-05, 'epoch': 4.68}\n", - "{'loss': 0.9942, 'grad_norm': 6.697149276733398, 'learning_rate': 1.370268908892825e-05, 'epoch': 4.69}\n", - "{'loss': 1.0146, 'grad_norm': 7.122743129730225, 'learning_rate': 1.3347406408508695e-05, 'epoch': 4.71}\n", - "{'loss': 0.9921, 'grad_norm': 4.69237756729126, 'learning_rate': 1.2996079552235263e-05, 'epoch': 4.73}\n", - "{'loss': 1.0017, 'grad_norm': 5.421998977661133, 'learning_rate': 1.264874643795021e-05, 'epoch': 4.75}\n", - "{'loss': 1.0102, 'grad_norm': 6.121133804321289, 'learning_rate': 1.230544455246101e-05, 'epoch': 4.77}\n", - "{'loss': 1.062, 'grad_norm': 5.060891151428223, 'learning_rate': 1.1966210947494583e-05, 'epoch': 4.78}\n", - "{'loss': 1.125, 'grad_norm': 4.1661529541015625, 'learning_rate': 1.1631082235698316e-05, 'epoch': 4.8}\n", - "{'loss': 0.9848, 'grad_norm': 6.484502792358398, 'learning_rate': 1.130009458668863e-05, 'epoch': 4.82}\n", - "{'loss': 0.9632, 'grad_norm': 5.1096086502075195, 'learning_rate': 1.097328372314721e-05, 'epoch': 4.84}\n", - "{'loss': 1.0435, 'grad_norm': 7.69472074508667, 'learning_rate': 1.0650684916965559e-05, 'epoch': 4.85}\n", - "{'loss': 1.1122, 'grad_norm': 6.654355525970459, 'learning_rate': 1.0332332985438248e-05, 'epoch': 4.87}\n", - "{'loss': 1.0846, 'grad_norm': 6.456166744232178, 'learning_rate': 1.0018262287505086e-05, 'epoch': 4.89}\n", - "{'loss': 1.1491, 'grad_norm': 5.556300163269043, 'learning_rate': 9.708506720042932e-06, 'epoch': 4.91}\n", - "{'loss': 1.0227, 'grad_norm': 5.921450614929199, 'learning_rate': 9.403099714207175e-06, 'epoch': 4.93}\n", - "{'loss': 1.13, 'grad_norm': 5.2472052574157715, 'learning_rate': 9.102074231823727e-06, 'epoch': 4.94}\n", - "{'loss': 1.1184, 'grad_norm': 6.798206806182861, 'learning_rate': 8.805462761831418e-06, 'epoch': 4.96}\n", - "{'loss': 1.1483, 'grad_norm': 6.1544647216796875, 'learning_rate': 8.513297316775625e-06, 'epoch': 4.98}\n", - "{'loss': 1.0966, 'grad_norm': 5.619192600250244, 'learning_rate': 8.225609429353187e-06, 'epoch': 5.0}\n", - " 83%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–Š | 2800/3360 [1:00:45<11:43, 1.26s/it][INFO|trainer.py:3788] 2024-07-04 12:10:55,158 >> \n", - "***** Running Evaluation *****\n", - "[INFO|trainer.py:3790] 2024-07-04 12:10:55,158 >> Num examples = 46\n", - "[INFO|trainer.py:3793] 2024-07-04 12:10:55,158 >> Batch size = 1\n", - "\n", - " 0%| | 0/46 [00:00> Saving model checkpoint to saves/qwen2-0.5b/lora/sft/checkpoint-2800\n", - "[INFO|configuration_utils.py:733] 2024-07-04 12:10:57,881 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/config.json\n", - "[INFO|configuration_utils.py:800] 2024-07-04 12:10:57,882 >> Model config Qwen2Config {\n", - " \"architectures\": [\n", - " \"Qwen2ForCausalLM\"\n", - " ],\n", - " \"attention_dropout\": 0.0,\n", - " \"bos_token_id\": 151643,\n", - " \"eos_token_id\": 151645,\n", - " \"hidden_act\": \"silu\",\n", - " \"hidden_size\": 896,\n", - " \"initializer_range\": 0.02,\n", - " \"intermediate_size\": 4864,\n", - " \"max_position_embeddings\": 32768,\n", - " \"max_window_layers\": 24,\n", - " \"model_type\": \"qwen2\",\n", - " \"num_attention_heads\": 14,\n", - " \"num_hidden_layers\": 24,\n", - " \"num_key_value_heads\": 2,\n", - " \"rms_norm_eps\": 1e-06,\n", - " \"rope_theta\": 1000000.0,\n", - " \"sliding_window\": 32768,\n", - " \"tie_word_embeddings\": true,\n", - " \"torch_dtype\": \"bfloat16\",\n", - " \"transformers_version\": \"4.42.3\",\n", - " \"use_cache\": true,\n", - " \"use_sliding_window\": false,\n", - " \"vocab_size\": 151936\n", - "}\n", - "\n", - "[INFO|tokenization_utils_base.py:2574] 2024-07-04 12:10:57,908 >> tokenizer config file saved in saves/qwen2-0.5b/lora/sft/checkpoint-2800/tokenizer_config.json\n", - "[INFO|tokenization_utils_base.py:2583] 2024-07-04 12:10:57,908 >> Special tokens file saved in saves/qwen2-0.5b/lora/sft/checkpoint-2800/special_tokens_map.json\n", - "{'loss': 0.9401, 'grad_norm': 5.465145587921143, 'learning_rate': 7.942430149009161e-06, 'epoch': 5.02}\n", - "{'loss': 0.9053, 'grad_norm': 5.05084228515625, 'learning_rate': 7.663790038585793e-06, 'epoch': 5.03}\n", - "{'loss': 0.9659, 'grad_norm': 6.576834678649902, 'learning_rate': 7.389719171023857e-06, 'epoch': 5.05}\n", - "{'loss': 0.9701, 'grad_norm': 6.316474437713623, 'learning_rate': 7.1202471261170245e-06, 'epoch': 5.07}\n", - "{'loss': 0.9986, 'grad_norm': 5.003658771514893, 'learning_rate': 6.855402987319348e-06, 'epoch': 5.09}\n", - "{'loss': 1.0909, 'grad_norm': 5.66398286819458, 'learning_rate': 6.595215338606397e-06, 'epoch': 5.1}\n", - "{'loss': 0.8373, 'grad_norm': 5.239981174468994, 'learning_rate': 6.339712261390213e-06, 'epoch': 5.12}\n", - "{'loss': 1.0119, 'grad_norm': 5.830501079559326, 'learning_rate': 6.088921331488568e-06, 'epoch': 5.14}\n", - "{'loss': 0.89, 'grad_norm': 5.683416366577148, 'learning_rate': 5.8428696161488215e-06, 'epoch': 5.16}\n", - "{'loss': 0.8321, 'grad_norm': 5.024005889892578, 'learning_rate': 5.601583671126531e-06, 'epoch': 5.18}\n", - "{'loss': 0.9924, 'grad_norm': 5.65994930267334, 'learning_rate': 5.365089537819434e-06, 'epoch': 5.19}\n", - "{'loss': 0.982, 'grad_norm': 5.285236835479736, 'learning_rate': 5.133412740456806e-06, 'epoch': 5.21}\n", - "{'loss': 0.988, 'grad_norm': 6.087540149688721, 'learning_rate': 4.906578283344759e-06, 'epoch': 5.23}\n", - "{'loss': 1.0628, 'grad_norm': 5.564962863922119, 'learning_rate': 4.684610648167503e-06, 'epoch': 5.25}\n", - "{'loss': 0.9339, 'grad_norm': 5.311854362487793, 'learning_rate': 4.467533791345191e-06, 'epoch': 5.27}\n", - "{'loss': 0.9112, 'grad_norm': 6.383027076721191, 'learning_rate': 4.255371141448272e-06, 'epoch': 5.28}\n", - "{'loss': 0.8851, 'grad_norm': 5.323634147644043, 'learning_rate': 4.048145596668967e-06, 'epoch': 5.3}\n", - "{'loss': 0.9129, 'grad_norm': 5.474393844604492, 'learning_rate': 3.84587952234991e-06, 'epoch': 5.32}\n", - "{'loss': 0.8769, 'grad_norm': 6.140456676483154, 'learning_rate': 3.6485947485702832e-06, 'epoch': 5.34}\n", - "{'loss': 0.9177, 'grad_norm': 5.710687637329102, 'learning_rate': 3.4563125677897932e-06, 'epoch': 5.35}\n", - "{'loss': 0.9235, 'grad_norm': 6.829979419708252, 'learning_rate': 3.269053732550581e-06, 'epoch': 5.37}\n", - "{'loss': 0.9744, 'grad_norm': 6.83032751083374, 'learning_rate': 3.086838453237506e-06, 'epoch': 5.39}\n", - "{'loss': 0.7769, 'grad_norm': 5.491135597229004, 'learning_rate': 2.9096863958968268e-06, 'epoch': 5.41}\n", - "{'loss': 0.8412, 'grad_norm': 6.708963394165039, 'learning_rate': 2.737616680113758e-06, 'epoch': 5.43}\n", - "{'loss': 0.8511, 'grad_norm': 6.676459312438965, 'learning_rate': 2.570647876948895e-06, 'epoch': 5.44}\n", - "{'loss': 0.9549, 'grad_norm': 7.339512825012207, 'learning_rate': 2.408798006933882e-06, 'epoch': 5.46}\n", - "{'loss': 1.1274, 'grad_norm': 6.678201198577881, 'learning_rate': 2.252084538126542e-06, 'epoch': 5.48}\n", - "{'loss': 0.9263, 'grad_norm': 6.124770641326904, 'learning_rate': 2.100524384225555e-06, 'epoch': 5.5}\n", - "{'loss': 0.943, 'grad_norm': 7.016269207000732, 'learning_rate': 1.9541339027450256e-06, 'epoch': 5.52}\n", - "{'loss': 0.9571, 'grad_norm': 5.896731853485107, 'learning_rate': 1.8129288932490274e-06, 'epoch': 5.53}\n", - "{'loss': 0.8802, 'grad_norm': 5.532138347625732, 'learning_rate': 1.6769245956464396e-06, 'epoch': 5.55}\n", - "{'loss': 1.0613, 'grad_norm': 6.437876224517822, 'learning_rate': 1.5461356885461075e-06, 'epoch': 5.57}\n", - "{'loss': 0.957, 'grad_norm': 5.419349670410156, 'learning_rate': 1.4205762876726092e-06, 'epoch': 5.59}\n", - "{'loss': 1.0672, 'grad_norm': 6.222854137420654, 'learning_rate': 1.3002599443428243e-06, 'epoch': 5.6}\n", - "{'loss': 1.0228, 'grad_norm': 6.305788993835449, 'learning_rate': 1.1851996440033319e-06, 'epoch': 5.62}\n", - "{'loss': 0.8494, 'grad_norm': 6.640852928161621, 'learning_rate': 1.0754078048289374e-06, 'epoch': 5.64}\n", - "{'loss': 0.9589, 'grad_norm': 5.630051612854004, 'learning_rate': 9.708962763824048e-07, 'epoch': 5.66}\n", - "{'loss': 0.9514, 'grad_norm': 5.754588603973389, 'learning_rate': 8.716763383355864e-07, 'epoch': 5.68}\n", - "{'loss': 0.9896, 'grad_norm': 6.073591232299805, 'learning_rate': 7.777586992519959e-07, 'epoch': 5.69}\n", - "{'loss': 0.8798, 'grad_norm': 6.883085250854492, 'learning_rate': 6.891534954310885e-07, 'epoch': 5.71}\n", - "{'loss': 0.9749, 'grad_norm': 5.874994277954102, 'learning_rate': 6.058702898142643e-07, 'epoch': 5.73}\n", - "{'loss': 0.862, 'grad_norm': 5.205725193023682, 'learning_rate': 5.279180709527765e-07, 'epoch': 5.75}\n", - "{'loss': 1.0397, 'grad_norm': 6.112522602081299, 'learning_rate': 4.553052520375911e-07, 'epoch': 5.77}\n", - "{'loss': 0.8691, 'grad_norm': 6.450985431671143, 'learning_rate': 3.8803966999139684e-07, 'epoch': 5.78}\n", - "{'loss': 0.884, 'grad_norm': 5.139239311218262, 'learning_rate': 3.261285846227868e-07, 'epoch': 5.8}\n", - "{'loss': 0.8508, 'grad_norm': 6.213397979736328, 'learning_rate': 2.6957867784270787e-07, 'epoch': 5.82}\n", - "{'loss': 0.8554, 'grad_norm': 27.320371627807617, 'learning_rate': 2.1839605294330933e-07, 'epoch': 5.84}\n", - "{'loss': 1.036, 'grad_norm': 7.248013973236084, 'learning_rate': 1.725862339392259e-07, 'epoch': 5.85}\n", - "{'loss': 0.8262, 'grad_norm': 7.534704685211182, 'learning_rate': 1.3215416497138754e-07, 'epoch': 5.87}\n", - "{'loss': 1.0454, 'grad_norm': 5.765580654144287, 'learning_rate': 9.710420977340762e-08, 'epoch': 5.89}\n", - "{'loss': 0.8484, 'grad_norm': 5.267190456390381, 'learning_rate': 6.744015120061509e-08, 'epoch': 5.91}\n", - "{'loss': 0.9818, 'grad_norm': 6.66579008102417, 'learning_rate': 4.316519082179227e-08, 'epoch': 5.93}\n", - "{'loss': 0.8825, 'grad_norm': 4.743204593658447, 'learning_rate': 2.4281948573617874e-08, 'epoch': 5.94}\n", - "{'loss': 0.9975, 'grad_norm': 6.015940189361572, 'learning_rate': 1.0792462477909882e-08, 'epoch': 5.96}\n", - "{'loss': 0.9418, 'grad_norm': 5.236660957336426, 'learning_rate': 2.6981884216847884e-09, 'epoch': 5.98}\n", - "{'loss': 0.9678, 'grad_norm': 5.222324371337891, 'learning_rate': 0.0, 'epoch': 6.0}\n", - "100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 3360/3360 [1:12:30<00:00, 1.25s/it][INFO|trainer.py:3788] 2024-07-04 12:22:39,963 >> \n", - "***** Running Evaluation *****\n", - "[INFO|trainer.py:3790] 2024-07-04 12:22:39,963 >> Num examples = 46\n", - "[INFO|trainer.py:3793] 2024-07-04 12:22:39,964 >> Batch size = 1\n", - "\n", - " 0%| | 0/46 [00:00> Saving model checkpoint to saves/qwen2-0.5b/lora/sft/checkpoint-3360\n", - "[INFO|configuration_utils.py:733] 2024-07-04 12:22:42,459 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/config.json\n", - "[INFO|configuration_utils.py:800] 2024-07-04 12:22:42,460 >> Model config Qwen2Config {\n", - " \"architectures\": [\n", - " \"Qwen2ForCausalLM\"\n", - " ],\n", - " \"attention_dropout\": 0.0,\n", - " \"bos_token_id\": 151643,\n", - " \"eos_token_id\": 151645,\n", - " \"hidden_act\": \"silu\",\n", - " \"hidden_size\": 896,\n", - " \"initializer_range\": 0.02,\n", - " \"intermediate_size\": 4864,\n", - " \"max_position_embeddings\": 32768,\n", - " \"max_window_layers\": 24,\n", - " \"model_type\": \"qwen2\",\n", - " \"num_attention_heads\": 14,\n", - " \"num_hidden_layers\": 24,\n", - " \"num_key_value_heads\": 2,\n", - " \"rms_norm_eps\": 1e-06,\n", - " \"rope_theta\": 1000000.0,\n", - " \"sliding_window\": 32768,\n", - " \"tie_word_embeddings\": true,\n", - " \"torch_dtype\": \"bfloat16\",\n", - " \"transformers_version\": \"4.42.3\",\n", - " \"use_cache\": true,\n", - " \"use_sliding_window\": false,\n", - " \"vocab_size\": 151936\n", - "}\n", - "\n", - "[INFO|tokenization_utils_base.py:2574] 2024-07-04 12:22:42,487 >> tokenizer config file saved in saves/qwen2-0.5b/lora/sft/checkpoint-3360/tokenizer_config.json\n", - "[INFO|tokenization_utils_base.py:2583] 2024-07-04 12:22:42,487 >> Special tokens file saved in saves/qwen2-0.5b/lora/sft/checkpoint-3360/special_tokens_map.json\n", - "[INFO|trainer.py:2383] 2024-07-04 12:22:42,628 >> \n", - "\n", - "Training completed. Do not forget to share your model on huggingface.co/models =)\n", - "\n", - "\n", - "{'train_runtime': 4358.4327, 'train_samples_per_second': 6.17, 'train_steps_per_second': 0.771, 'train_loss': 1.4797242326395852, 'epoch': 6.0}\n", - "100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 3360/3360 [1:12:33<00:00, 1.30s/it]\n", - "[INFO|trainer.py:3478] 2024-07-04 12:22:42,631 >> Saving model checkpoint to saves/qwen2-0.5b/lora/sft\n", - "[INFO|configuration_utils.py:733] 2024-07-04 12:22:43,255 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/config.json\n", - "[INFO|configuration_utils.py:800] 2024-07-04 12:22:43,256 >> Model config Qwen2Config {\n", - " \"architectures\": [\n", - " \"Qwen2ForCausalLM\"\n", - " ],\n", - " \"attention_dropout\": 0.0,\n", - " \"bos_token_id\": 151643,\n", - " \"eos_token_id\": 151645,\n", - " \"hidden_act\": \"silu\",\n", - " \"hidden_size\": 896,\n", - " \"initializer_range\": 0.02,\n", - " \"intermediate_size\": 4864,\n", - " \"max_position_embeddings\": 32768,\n", - " \"max_window_layers\": 24,\n", - " \"model_type\": \"qwen2\",\n", - " \"num_attention_heads\": 14,\n", - " \"num_hidden_layers\": 24,\n", - " \"num_key_value_heads\": 2,\n", - " \"rms_norm_eps\": 1e-06,\n", - " \"rope_theta\": 1000000.0,\n", - " \"sliding_window\": 32768,\n", - " \"tie_word_embeddings\": true,\n", - " \"torch_dtype\": \"bfloat16\",\n", - " \"transformers_version\": \"4.42.3\",\n", - " \"use_cache\": true,\n", - " \"use_sliding_window\": false,\n", - " \"vocab_size\": 151936\n", - "}\n", - "\n", - "[INFO|tokenization_utils_base.py:2574] 2024-07-04 12:22:43,285 >> tokenizer config file saved in saves/qwen2-0.5b/lora/sft/tokenizer_config.json\n", - "[INFO|tokenization_utils_base.py:2583] 2024-07-04 12:22:43,285 >> Special tokens file saved in saves/qwen2-0.5b/lora/sft/special_tokens_map.json\n", - "***** train metrics *****\n", - " epoch = 5.9973\n", - " total_flos = 4594110GF\n", - " train_loss = 1.4797\n", - " train_runtime = 1:12:38.43\n", - " train_samples_per_second = 6.17\n", - " train_steps_per_second = 0.771\n", - "Figure saved at: saves/qwen2-0.5b/lora/sft/training_loss.png\n", - "Figure saved at: saves/qwen2-0.5b/lora/sft/training_eval_loss.png\n", - "[INFO|trainer.py:3788] 2024-07-04 12:22:43,568 >> \n", - "***** Running Evaluation *****\n", - "[INFO|trainer.py:3790] 2024-07-04 12:22:43,568 >> Num examples = 46\n", - "[INFO|trainer.py:3793] 2024-07-04 12:22:43,568 >> Batch size = 1\n", - "100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 46/46 [00:01<00:00, 25.60it/s]\n", - "***** eval metrics *****\n", - " epoch = 5.9973\n", - " eval_loss = 2.5474\n", - " eval_runtime = 0:00:01.84\n", - " eval_samples_per_second = 24.959\n", - " eval_steps_per_second = 24.959\n", - "[INFO|modelcard.py:449] 2024-07-04 12:22:45,413 >> Dropping the following result as it does not have all the necessary fields:\n", - "{'task': {'name': 'Causal Language Modeling', 'type': 'text-generation'}}\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: | 0.085 MB of 0.085 MB uploaded\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: Run history:\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: eval/loss โ–‚โ–โ–‚โ–„โ–‡โ–ˆโ–ˆ\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: eval/runtime โ–‡โ–ˆโ–ƒโ–โ–‡โ–„โ–…\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: eval/samples_per_second โ–‚โ–โ–†โ–ˆโ–‚โ–„โ–„\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: eval/steps_per_second โ–‚โ–โ–†โ–ˆโ–‚โ–„โ–„\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: train/epoch โ–โ–โ–โ–‚โ–‚โ–‚โ–‚โ–‚โ–‚โ–ƒโ–ƒโ–ƒโ–ƒโ–ƒโ–ƒโ–„โ–„โ–„โ–„โ–„โ–…โ–…โ–…โ–…โ–…โ–†โ–†โ–†โ–†โ–†โ–†โ–‡โ–‡โ–‡โ–‡โ–‡โ–‡โ–ˆโ–ˆโ–ˆ\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: train/global_step โ–โ–โ–โ–‚โ–‚โ–‚โ–‚โ–‚โ–‚โ–ƒโ–ƒโ–ƒโ–ƒโ–ƒโ–ƒโ–„โ–„โ–„โ–„โ–„โ–…โ–…โ–…โ–…โ–…โ–†โ–†โ–†โ–†โ–†โ–†โ–‡โ–‡โ–‡โ–‡โ–‡โ–‡โ–ˆโ–ˆโ–ˆ\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: train/grad_norm โ–„โ–โ–โ–ƒโ–‚โ–ƒโ–ƒโ–ƒโ–ƒโ–„โ–ƒโ–„โ–‚โ–ƒโ–„โ–†โ–ƒโ–‚โ–„โ–„โ–…โ–†โ–„โ–†โ–…โ–…โ–ƒโ–„โ–ˆโ–…โ–†โ–ˆโ–†โ–†โ–…โ–…โ–†โ–‡โ–‡โ–…\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: train/learning_rate โ–‚โ–„โ–…โ–‡โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–‡โ–‡โ–‡โ–‡โ–‡โ–†โ–†โ–†โ–†โ–…โ–…โ–…โ–„โ–„โ–„โ–ƒโ–ƒโ–ƒโ–ƒโ–‚โ–‚โ–‚โ–‚โ–โ–โ–โ–โ–โ–โ–\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: train/loss โ–ˆโ–‡โ–‡โ–‡โ–†โ–†โ–†โ–†โ–†โ–†โ–…โ–…โ–†โ–…โ–„โ–„โ–„โ–„โ–„โ–„โ–ƒโ–ƒโ–ƒโ–ƒโ–ƒโ–‚โ–‚โ–‚โ–‚โ–‚โ–‚โ–‚โ–‚โ–‚โ–‚โ–โ–โ–โ–โ–‚\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: \n", - "\u001b[34m\u001b[1mwandb\u001b[0m: Run summary:\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: eval/loss 2.5474\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: eval/runtime 1.843\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: eval/samples_per_second 24.959\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: eval/steps_per_second 24.959\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: total_flos 4932888177414144.0\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: train/epoch 5.99732\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: train/global_step 3360\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: train/grad_norm 5.22232\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: train/learning_rate 0.0\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: train/loss 0.9678\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: train_loss 1.47972\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: train_runtime 4358.4327\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: train_samples_per_second 6.17\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: train_steps_per_second 0.771\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: \n", - "\u001b[34m\u001b[1mwandb\u001b[0m: ๐Ÿš€ View run \u001b[33mqwen2_0.5b_lora_sft\u001b[0m at: \u001b[34m\u001b[4mhttps://wandb.ai/inflaton-ai/huggingface/runs/u8sqhi0x\u001b[0m\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: โญ๏ธ View project at: \u001b[34m\u001b[4mhttps://wandb.ai/inflaton-ai/huggingface\u001b[0m\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: Synced 6 W&B file(s), 0 media file(s), 1 artifact file(s) and 0 other file(s)\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: Find logs at: \u001b[35m\u001b[1m./wandb/run-20240704_111005-u8sqhi0x/logs\u001b[0m\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m The new W&B backend becomes opt-out in version 0.18.0; try it out with `wandb.require(\"core\")`! See https://wandb.me/wandb-core for more information.\n", - "CPU times: user 59.8 s, sys: 18.1 s, total: 1min 17s\n", - "Wall time: 1h 13min 51s\n" - ] - } - ], - "source": [ - "%%time\n", - "\n", - "!./scripts/tune-lf.sh config/qwen2_0.5b_lora_sft.yaml" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "loading /home/inflaton/code/projects/courses/llm-finetuning/llm_toolkit/translation_engine.py\n", - "Current Directory:\n", - "/home/inflaton/code/projects/courses/llm-finetuning/llama-factory\n", - "07/04/2024 12:22:59 - INFO - llamafactory.hparams.parser - Process rank: 0, device: cuda:0, n_gpu: 1, distributed training: False, compute dtype: torch.bfloat16\n", - "[INFO|tokenization_utils_base.py:2161] 2024-07-04 12:23:00,122 >> loading file vocab.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/vocab.json\n", - "[INFO|tokenization_utils_base.py:2161] 2024-07-04 12:23:00,122 >> loading file merges.txt from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/merges.txt\n", - "[INFO|tokenization_utils_base.py:2161] 2024-07-04 12:23:00,122 >> loading file tokenizer.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/tokenizer.json\n", - "[INFO|tokenization_utils_base.py:2161] 2024-07-04 12:23:00,122 >> loading file added_tokens.json from cache at None\n", - "[INFO|tokenization_utils_base.py:2161] 2024-07-04 12:23:00,122 >> loading file special_tokens_map.json from cache at None\n", - "[INFO|tokenization_utils_base.py:2161] 2024-07-04 12:23:00,122 >> loading file tokenizer_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/tokenizer_config.json\n", - "[WARNING|logging.py:313] 2024-07-04 12:23:00,234 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n", - "07/04/2024 12:23:00 - INFO - llamafactory.data.template - Replace eos token: <|im_end|>\n", - "07/04/2024 12:23:00 - INFO - llamafactory.data.template - Add <|im_start|> to stop words.\n", - "07/04/2024 12:23:00 - INFO - llamafactory.data.loader - Loading dataset alpaca_mac.json...\n", - "Converting format of dataset (num_proc=16): 100%|โ–ˆ| 4528/4528 [00:00<00:00, 1573\n", - "Running tokenizer on dataset (num_proc=16): 100%|โ–ˆ| 4528/4528 [00:01<00:00, 3491\n", - "input_ids:\n", - "[151644, 872, 198, 5501, 14683, 279, 2701, 8453, 1467, 1119, 6364, 323, 3410, 1172, 279, 24531, 2213, 11, 4302, 770, 624, 35987, 102895, 99164, 100324, 100717, 100095, 99509, 1773, 151645, 198, 151644, 77091, 198, 17949, 358, 572, 2617, 553, 264, 38835, 44486, 13, 151645]\n", - "inputs:\n", - "<|im_start|>user\n", - "Please translate the following Chinese text into English and provide only the translated content, nothing else.\n", - "ๅ…จไป—็€็‹ไป™ๆญๆ•‘ใ€‚<|im_end|>\n", - "<|im_start|>assistant\n", - "Because I was protected by a fox fairy.<|im_end|>\n", - "label_ids:\n", - "[-100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 17949, 358, 572, 2617, 553, 264, 38835, 44486, 13, 151645]\n", - "labels:\n", - "Because I was protected by a fox fairy.<|im_end|>\n", - "[INFO|configuration_utils.py:733] 2024-07-04 12:23:03,981 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/config.json\n", - "[INFO|configuration_utils.py:800] 2024-07-04 12:23:03,982 >> Model config Qwen2Config {\n", - " \"_name_or_path\": \"Qwen/Qwen2-1.5B-Instruct\",\n", - " \"architectures\": [\n", - " \"Qwen2ForCausalLM\"\n", - " ],\n", - " \"attention_dropout\": 0.0,\n", - " \"bos_token_id\": 151643,\n", - " \"eos_token_id\": 151645,\n", - " \"hidden_act\": \"silu\",\n", - " \"hidden_size\": 1536,\n", - " \"initializer_range\": 0.02,\n", - " \"intermediate_size\": 8960,\n", - " \"max_position_embeddings\": 32768,\n", - " \"max_window_layers\": 28,\n", - " \"model_type\": \"qwen2\",\n", - " \"num_attention_heads\": 12,\n", - " \"num_hidden_layers\": 28,\n", - " \"num_key_value_heads\": 2,\n", - " \"rms_norm_eps\": 1e-06,\n", - " \"rope_theta\": 1000000.0,\n", - " \"sliding_window\": 32768,\n", - " \"tie_word_embeddings\": true,\n", - " \"torch_dtype\": \"bfloat16\",\n", - " \"transformers_version\": \"4.42.3\",\n", - " \"use_cache\": true,\n", - " \"use_sliding_window\": false,\n", - " \"vocab_size\": 151936\n", - "}\n", - "\n", - "[INFO|modeling_utils.py:3556] 2024-07-04 12:23:04,016 >> loading weights file model.safetensors from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/model.safetensors\n", - "[INFO|modeling_utils.py:1531] 2024-07-04 12:23:06,701 >> Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16.\n", - "[INFO|configuration_utils.py:1000] 2024-07-04 12:23:06,704 >> Generate config GenerationConfig {\n", - " \"bos_token_id\": 151643,\n", - " \"eos_token_id\": 151645\n", - "}\n", - "\n", - "[INFO|modeling_utils.py:4364] 2024-07-04 12:26:42,040 >> All model checkpoint weights were used when initializing Qwen2ForCausalLM.\n", - "\n", - "[INFO|modeling_utils.py:4372] 2024-07-04 12:26:42,040 >> All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at Qwen/Qwen2-1.5B-Instruct.\n", - "If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training.\n", - "[INFO|configuration_utils.py:955] 2024-07-04 12:26:42,765 >> loading configuration file generation_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/generation_config.json\n", - "[INFO|configuration_utils.py:1000] 2024-07-04 12:26:42,766 >> Generate config GenerationConfig {\n", - " \"bos_token_id\": 151643,\n", - " \"do_sample\": true,\n", - " \"eos_token_id\": [\n", - " 151645,\n", - " 151643\n", - " ],\n", - " \"pad_token_id\": 151643,\n", - " \"repetition_penalty\": 1.1,\n", - " \"temperature\": 0.7,\n", - " \"top_k\": 20,\n", - " \"top_p\": 0.8\n", - "}\n", - "\n", - "07/04/2024 12:26:43 - INFO - llamafactory.model.model_utils.checkpointing - Gradient checkpointing enabled.\n", - "07/04/2024 12:26:43 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.\n", - "07/04/2024 12:26:43 - INFO - llamafactory.model.adapter - Upcasting trainable params to float32.\n", - "07/04/2024 12:26:43 - INFO - llamafactory.model.adapter - Fine-tuning method: LoRA\n", - "07/04/2024 12:26:43 - INFO - llamafactory.model.model_utils.misc - Found linear modules: q_proj,gate_proj,down_proj,k_proj,v_proj,up_proj,o_proj\n", - "07/04/2024 12:26:43 - INFO - llamafactory.model.loader - trainable params: 9,232,384 || all params: 1,552,946,688 || trainable%: 0.5945\n", - "[INFO|trainer.py:642] 2024-07-04 12:26:43,511 >> Using auto half precision backend\n", - "[INFO|trainer.py:2128] 2024-07-04 12:26:43,666 >> ***** Running training *****\n", - "[INFO|trainer.py:2129] 2024-07-04 12:26:43,666 >> Num examples = 4,482\n", - "[INFO|trainer.py:2130] 2024-07-04 12:26:43,666 >> Num Epochs = 6\n", - "[INFO|trainer.py:2131] 2024-07-04 12:26:43,666 >> Instantaneous batch size per device = 1\n", - "[INFO|trainer.py:2134] 2024-07-04 12:26:43,666 >> Total train batch size (w. parallel, distributed & accumulation) = 8\n", - "[INFO|trainer.py:2135] 2024-07-04 12:26:43,666 >> Gradient Accumulation steps = 8\n", - "[INFO|trainer.py:2136] 2024-07-04 12:26:43,666 >> Total optimization steps = 3,360\n", - "[INFO|trainer.py:2137] 2024-07-04 12:26:43,668 >> Number of trainable parameters = 9,232,384\n", - "[INFO|integration_utils.py:750] 2024-07-04 12:26:43,670 >> Automatic Weights & Biases logging enabled, to disable set os.environ[\"WANDB_DISABLED\"] = \"true\"\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33minflaton-sg\u001b[0m (\u001b[33minflaton-ai\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: Tracking run with wandb version 0.17.4\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: Run data is saved locally in \u001b[35m\u001b[1m/home/inflaton/code/projects/courses/llm-finetuning/llama-factory/wandb/run-20240704_122645-mpc5sxtf\u001b[0m\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: Run \u001b[1m`wandb offline`\u001b[0m to turn off syncing.\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: Syncing run \u001b[33mqwen2_1.5b_lora_sft\u001b[0m\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: โญ๏ธ View project at \u001b[34m\u001b[4mhttps://wandb.ai/inflaton-ai/huggingface\u001b[0m\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: ๐Ÿš€ View run at \u001b[34m\u001b[4mhttps://wandb.ai/inflaton-ai/huggingface/runs/mpc5sxtf\u001b[0m\n", - "{'loss': 2.1612, 'grad_norm': 1.7288845777511597, 'learning_rate': 2.9761904761904763e-06, 'epoch': 0.02}\n", - "{'loss': 2.2871, 'grad_norm': 1.9337925910949707, 'learning_rate': 5.9523809523809525e-06, 'epoch': 0.04}\n", - "{'loss': 2.1455, 'grad_norm': 1.5129448175430298, 'learning_rate': 8.92857142857143e-06, 'epoch': 0.05}\n", - "{'loss': 2.1376, 'grad_norm': 2.9766852855682373, 'learning_rate': 1.1904761904761905e-05, 'epoch': 0.07}\n", - "{'loss': 2.2937, 'grad_norm': 1.413576602935791, 'learning_rate': 1.4880952380952381e-05, 'epoch': 0.09}\n", - "{'loss': 2.0076, 'grad_norm': 1.7012724876403809, 'learning_rate': 1.785714285714286e-05, 'epoch': 0.11}\n", - "{'loss': 2.1399, 'grad_norm': 1.679208517074585, 'learning_rate': 2.0833333333333336e-05, 'epoch': 0.12}\n", - "{'loss': 1.9036, 'grad_norm': 1.6296344995498657, 'learning_rate': 2.380952380952381e-05, 'epoch': 0.14}\n", - "{'loss': 2.0186, 'grad_norm': 2.1293675899505615, 'learning_rate': 2.6785714285714288e-05, 'epoch': 0.16}\n", - "{'loss': 1.9517, 'grad_norm': 1.4419277906417847, 'learning_rate': 2.9761904761904762e-05, 'epoch': 0.18}\n", - "{'loss': 1.979, 'grad_norm': 1.8672434091567993, 'learning_rate': 3.273809523809524e-05, 'epoch': 0.2}\n", - "{'loss': 1.9362, 'grad_norm': 1.3589439392089844, 'learning_rate': 3.571428571428572e-05, 'epoch': 0.21}\n", - "{'loss': 1.9264, 'grad_norm': 1.71873140335083, 'learning_rate': 3.8690476190476195e-05, 'epoch': 0.23}\n", - "{'loss': 1.9515, 'grad_norm': 2.2398152351379395, 'learning_rate': 4.166666666666667e-05, 'epoch': 0.25}\n", - "{'loss': 1.8163, 'grad_norm': 1.5651923418045044, 'learning_rate': 4.464285714285715e-05, 'epoch': 0.27}\n", - "{'loss': 1.792, 'grad_norm': 1.5333657264709473, 'learning_rate': 4.761904761904762e-05, 'epoch': 0.29}\n", - "{'loss': 1.9457, 'grad_norm': 1.448676347732544, 'learning_rate': 5.05952380952381e-05, 'epoch': 0.3}\n", - "{'loss': 1.7585, 'grad_norm': 1.8496005535125732, 'learning_rate': 5.3571428571428575e-05, 'epoch': 0.32}\n", - "{'loss': 1.8682, 'grad_norm': 2.332167387008667, 'learning_rate': 5.6547619047619046e-05, 'epoch': 0.34}\n", - "{'loss': 1.9775, 'grad_norm': 1.9075323343276978, 'learning_rate': 5.9523809523809524e-05, 'epoch': 0.36}\n", - "{'loss': 1.9233, 'grad_norm': 1.8132203817367554, 'learning_rate': 6.25e-05, 'epoch': 0.37}\n", - "{'loss': 1.8469, 'grad_norm': 2.514983892440796, 'learning_rate': 6.547619047619048e-05, 'epoch': 0.39}\n", - "{'loss': 1.8242, 'grad_norm': 2.0344440937042236, 'learning_rate': 6.845238095238096e-05, 'epoch': 0.41}\n", - "{'loss': 1.9965, 'grad_norm': 2.310185194015503, 'learning_rate': 7.142857142857143e-05, 'epoch': 0.43}\n", - "{'loss': 1.9004, 'grad_norm': 2.3513343334198, 'learning_rate': 7.440476190476191e-05, 'epoch': 0.45}\n", - "{'loss': 1.8188, 'grad_norm': 2.2934393882751465, 'learning_rate': 7.738095238095239e-05, 'epoch': 0.46}\n", - "{'loss': 1.8803, 'grad_norm': 2.8724184036254883, 'learning_rate': 8.035714285714287e-05, 'epoch': 0.48}\n", - "{'loss': 1.9181, 'grad_norm': 2.4238462448120117, 'learning_rate': 8.333333333333334e-05, 'epoch': 0.5}\n", - "{'loss': 1.6932, 'grad_norm': 1.5286414623260498, 'learning_rate': 8.630952380952382e-05, 'epoch': 0.52}\n", - "{'loss': 1.8331, 'grad_norm': 2.563647985458374, 'learning_rate': 8.92857142857143e-05, 'epoch': 0.54}\n", - "{'loss': 1.8539, 'grad_norm': 2.127699613571167, 'learning_rate': 9.226190476190478e-05, 'epoch': 0.55}\n", - "{'loss': 1.6796, 'grad_norm': 3.4179396629333496, 'learning_rate': 9.523809523809524e-05, 'epoch': 0.57}\n", - "{'loss': 1.7209, 'grad_norm': 2.492151975631714, 'learning_rate': 9.821428571428572e-05, 'epoch': 0.59}\n", - "{'loss': 1.7723, 'grad_norm': 2.3568859100341797, 'learning_rate': 9.999956828659095e-05, 'epoch': 0.61}\n", - "{'loss': 1.7839, 'grad_norm': 3.5560832023620605, 'learning_rate': 9.999471159635539e-05, 'epoch': 0.62}\n", - "{'loss': 1.7146, 'grad_norm': 1.712493658065796, 'learning_rate': 9.998445910004082e-05, 'epoch': 0.64}\n", - "{'loss': 1.8911, 'grad_norm': 2.824240207672119, 'learning_rate': 9.996881190417393e-05, 'epoch': 0.66}\n", - "{'loss': 1.8631, 'grad_norm': 2.2122113704681396, 'learning_rate': 9.994777169751806e-05, 'epoch': 0.68}\n", - "{'loss': 1.5738, 'grad_norm': 1.7466025352478027, 'learning_rate': 9.992134075089084e-05, 'epoch': 0.7}\n", - "{'loss': 1.7552, 'grad_norm': 2.581709623336792, 'learning_rate': 9.988952191691925e-05, 'epoch': 0.71}\n", - "{'loss': 1.808, 'grad_norm': 2.1387972831726074, 'learning_rate': 9.985231862973168e-05, 'epoch': 0.73}\n", - "{'loss': 1.8303, 'grad_norm': 1.7675608396530151, 'learning_rate': 9.980973490458728e-05, 'epoch': 0.75}\n", - "{'loss': 1.8013, 'grad_norm': 2.706218719482422, 'learning_rate': 9.976177533744261e-05, 'epoch': 0.77}\n", - "{'loss': 1.7443, 'grad_norm': 2.9387295246124268, 'learning_rate': 9.97084451044556e-05, 'epoch': 0.79}\n", - "{'loss': 1.6509, 'grad_norm': 1.6503076553344727, 'learning_rate': 9.964974996142698e-05, 'epoch': 0.8}\n", - "{'loss': 1.722, 'grad_norm': 2.0305140018463135, 'learning_rate': 9.958569624317893e-05, 'epoch': 0.82}\n", - "{'loss': 1.7625, 'grad_norm': 2.8122429847717285, 'learning_rate': 9.951629086287151e-05, 'epoch': 0.84}\n", - "{'loss': 1.7194, 'grad_norm': 2.0110862255096436, 'learning_rate': 9.944154131125642e-05, 'epoch': 0.86}\n", - "{'loss': 1.7894, 'grad_norm': 1.7363322973251343, 'learning_rate': 9.936145565586871e-05, 'epoch': 0.87}\n", - "{'loss': 1.9447, 'grad_norm': 1.8065259456634521, 'learning_rate': 9.927604254015585e-05, 'epoch': 0.89}\n", - "{'loss': 1.8639, 'grad_norm': 1.8963510990142822, 'learning_rate': 9.918531118254507e-05, 'epoch': 0.91}\n", - "{'loss': 1.7336, 'grad_norm': 2.30542254447937, 'learning_rate': 9.90892713754483e-05, 'epoch': 0.93}\n", - "{'loss': 1.7705, 'grad_norm': 2.9846692085266113, 'learning_rate': 9.898793348420536e-05, 'epoch': 0.95}\n", - "{'loss': 1.7884, 'grad_norm': 2.1550045013427734, 'learning_rate': 9.888130844596524e-05, 'epoch': 0.96}\n", - "{'loss': 1.7428, 'grad_norm': 2.1323790550231934, 'learning_rate': 9.876940776850569e-05, 'epoch': 0.98}\n", - "{'loss': 1.7183, 'grad_norm': 1.8198726177215576, 'learning_rate': 9.865224352899119e-05, 'epoch': 1.0}\n", - " 17%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–Ž | 560/3360 [15:31<1:20:24, 1.72s/it][INFO|trainer.py:3788] 2024-07-04 12:42:20,584 >> \n", - "***** Running Evaluation *****\n", - "[INFO|trainer.py:3790] 2024-07-04 12:42:20,584 >> Num examples = 46\n", - "[INFO|trainer.py:3793] 2024-07-04 12:42:20,585 >> Batch size = 1\n", - "\n", - " 0%| | 0/46 [00:00> Saving model checkpoint to saves/qwen2-1.5b/lora/sft/checkpoint-560\n", - "[INFO|configuration_utils.py:733] 2024-07-04 12:42:23,808 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/config.json\n", - "[INFO|configuration_utils.py:800] 2024-07-04 12:42:23,809 >> Model config Qwen2Config {\n", - " \"architectures\": [\n", - " \"Qwen2ForCausalLM\"\n", - " ],\n", - " \"attention_dropout\": 0.0,\n", - " \"bos_token_id\": 151643,\n", - " \"eos_token_id\": 151645,\n", - " \"hidden_act\": \"silu\",\n", - " \"hidden_size\": 1536,\n", - " \"initializer_range\": 0.02,\n", - " \"intermediate_size\": 8960,\n", - " \"max_position_embeddings\": 32768,\n", - " \"max_window_layers\": 28,\n", - " \"model_type\": \"qwen2\",\n", - " \"num_attention_heads\": 12,\n", - " \"num_hidden_layers\": 28,\n", - " \"num_key_value_heads\": 2,\n", - " \"rms_norm_eps\": 1e-06,\n", - " \"rope_theta\": 1000000.0,\n", - " \"sliding_window\": 32768,\n", - " \"tie_word_embeddings\": true,\n", - " \"torch_dtype\": \"bfloat16\",\n", - " \"transformers_version\": \"4.42.3\",\n", - " \"use_cache\": true,\n", - " \"use_sliding_window\": false,\n", - " \"vocab_size\": 151936\n", - "}\n", - "\n", - "[INFO|tokenization_utils_base.py:2574] 2024-07-04 12:42:23,882 >> tokenizer config file saved in saves/qwen2-1.5b/lora/sft/checkpoint-560/tokenizer_config.json\n", - "[INFO|tokenization_utils_base.py:2583] 2024-07-04 12:42:23,883 >> Special tokens file saved in saves/qwen2-1.5b/lora/sft/checkpoint-560/special_tokens_map.json\n", - "{'loss': 1.6305, 'grad_norm': 1.8726240396499634, 'learning_rate': 9.852982837266955e-05, 'epoch': 1.02}\n", - "{'loss': 1.4509, 'grad_norm': 2.8097503185272217, 'learning_rate': 9.840217551150706e-05, 'epoch': 1.04}\n", - "{'loss': 1.6345, 'grad_norm': 2.0100064277648926, 'learning_rate': 9.826929872276255e-05, 'epoch': 1.05}\n", - "{'loss': 1.5736, 'grad_norm': 2.456465482711792, 'learning_rate': 9.81312123475006e-05, 'epoch': 1.07}\n", - "{'loss': 1.5363, 'grad_norm': 2.7739548683166504, 'learning_rate': 9.798793128904356e-05, 'epoch': 1.09}\n", - "{'loss': 1.5754, 'grad_norm': 2.8599655628204346, 'learning_rate': 9.78394710113631e-05, 'epoch': 1.11}\n", - "{'loss': 1.5728, 'grad_norm': 2.2901456356048584, 'learning_rate': 9.768584753741134e-05, 'epoch': 1.12}\n", - "{'loss': 1.5632, 'grad_norm': 3.6802914142608643, 'learning_rate': 9.752707744739145e-05, 'epoch': 1.14}\n", - "{'loss': 1.5927, 'grad_norm': 2.5885791778564453, 'learning_rate': 9.736317787696816e-05, 'epoch': 1.16}\n", - "{'loss': 1.4571, 'grad_norm': 2.383814573287964, 'learning_rate': 9.719416651541839e-05, 'epoch': 1.18}\n", - "{'loss': 1.424, 'grad_norm': 2.032453775405884, 'learning_rate': 9.702006160372209e-05, 'epoch': 1.2}\n", - "{'loss': 1.5577, 'grad_norm': 2.8879408836364746, 'learning_rate': 9.684088193259355e-05, 'epoch': 1.21}\n", - "{'loss': 1.5083, 'grad_norm': 2.9004592895507812, 'learning_rate': 9.665664684045333e-05, 'epoch': 1.23}\n", - "{'loss': 1.5696, 'grad_norm': 3.4651644229888916, 'learning_rate': 9.646737621134112e-05, 'epoch': 1.25}\n", - "{'loss': 1.542, 'grad_norm': 3.6657605171203613, 'learning_rate': 9.627309047276974e-05, 'epoch': 1.27}\n", - "{'loss': 1.6975, 'grad_norm': 3.4882619380950928, 'learning_rate': 9.607381059352038e-05, 'epoch': 1.29}\n", - "{'loss': 1.6179, 'grad_norm': 2.73240327835083, 'learning_rate': 9.586955808137958e-05, 'epoch': 1.3}\n", - "{'loss': 1.6236, 'grad_norm': 2.60489559173584, 'learning_rate': 9.566035498081784e-05, 'epoch': 1.32}\n", - "{'loss': 1.5901, 'grad_norm': 3.45670223236084, 'learning_rate': 9.544622387061055e-05, 'epoch': 1.34}\n", - "{'loss': 1.3816, 'grad_norm': 3.3906328678131104, 'learning_rate': 9.522718786140097e-05, 'epoch': 1.36}\n", - "{'loss': 1.6149, 'grad_norm': 3.6723110675811768, 'learning_rate': 9.500327059320606e-05, 'epoch': 1.37}\n", - "{'loss': 1.4588, 'grad_norm': 4.5224103927612305, 'learning_rate': 9.477449623286505e-05, 'epoch': 1.39}\n", - "{'loss': 1.3431, 'grad_norm': 2.5576796531677246, 'learning_rate': 9.454088947143116e-05, 'epoch': 1.41}\n", - "{'loss': 1.6278, 'grad_norm': 3.344188690185547, 'learning_rate': 9.430247552150673e-05, 'epoch': 1.43}\n", - "{'loss': 1.5137, 'grad_norm': 3.4474005699157715, 'learning_rate': 9.405928011452211e-05, 'epoch': 1.45}\n", - "{'loss': 1.4911, 'grad_norm': 2.6104114055633545, 'learning_rate': 9.381132949795861e-05, 'epoch': 1.46}\n", - "{'loss': 1.6567, 'grad_norm': 3.090139150619507, 'learning_rate': 9.35586504325155e-05, 'epoch': 1.48}\n", - "{'loss': 1.5008, 'grad_norm': 3.6463866233825684, 'learning_rate': 9.330127018922194e-05, 'epoch': 1.5}\n", - "{'loss': 1.4248, 'grad_norm': 2.3963379859924316, 'learning_rate': 9.303921654649362e-05, 'epoch': 1.52}\n", - "{'loss': 1.6043, 'grad_norm': 3.4818763732910156, 'learning_rate': 9.277251778713474e-05, 'epoch': 1.54}\n", - "{'loss': 1.5517, 'grad_norm': 3.180640697479248, 'learning_rate': 9.250120269528546e-05, 'epoch': 1.55}\n", - "{'loss': 1.4711, 'grad_norm': 2.7267000675201416, 'learning_rate': 9.22253005533154e-05, 'epoch': 1.57}\n", - "{'loss': 1.5511, 'grad_norm': 3.386282444000244, 'learning_rate': 9.194484113866313e-05, 'epoch': 1.59}\n", - "{'loss': 1.6975, 'grad_norm': 2.707632064819336, 'learning_rate': 9.165985472062246e-05, 'epoch': 1.61}\n", - "{'loss': 1.4396, 'grad_norm': 2.970285177230835, 'learning_rate': 9.137037205707552e-05, 'epoch': 1.62}\n", - "{'loss': 1.5347, 'grad_norm': 2.7082931995391846, 'learning_rate': 9.107642439117321e-05, 'epoch': 1.64}\n", - "{'loss': 1.5446, 'grad_norm': 2.947016716003418, 'learning_rate': 9.077804344796302e-05, 'epoch': 1.66}\n", - "{'loss': 1.5401, 'grad_norm': 2.4926042556762695, 'learning_rate': 9.04752614309652e-05, 'epoch': 1.68}\n", - "{'loss': 1.479, 'grad_norm': 3.50626802444458, 'learning_rate': 9.01681110186971e-05, 'epoch': 1.7}\n", - "{'loss': 1.5107, 'grad_norm': 4.556169509887695, 'learning_rate': 8.985662536114613e-05, 'epoch': 1.71}\n", - "{'loss': 1.473, 'grad_norm': 2.4575538635253906, 'learning_rate': 8.954083807619208e-05, 'epoch': 1.73}\n", - "{'loss': 1.6125, 'grad_norm': 3.063415765762329, 'learning_rate': 8.922078324597879e-05, 'epoch': 1.75}\n", - "{'loss': 1.5893, 'grad_norm': 2.45483660697937, 'learning_rate': 8.889649541323574e-05, 'epoch': 1.77}\n", - "{'loss': 1.4993, 'grad_norm': 3.031142473220825, 'learning_rate': 8.856800957755e-05, 'epoch': 1.78}\n", - "{'loss': 1.5025, 'grad_norm': 2.9005496501922607, 'learning_rate': 8.823536119158864e-05, 'epoch': 1.8}\n", - "{'loss': 1.4725, 'grad_norm': 2.9155054092407227, 'learning_rate': 8.789858615727265e-05, 'epoch': 1.82}\n", - "{'loss': 1.4313, 'grad_norm': 2.5998966693878174, 'learning_rate': 8.755772082190194e-05, 'epoch': 1.84}\n", - "{'loss': 1.5647, 'grad_norm': 2.5580039024353027, 'learning_rate': 8.721280197423258e-05, 'epoch': 1.86}\n", - "{'loss': 1.4349, 'grad_norm': 3.395029067993164, 'learning_rate': 8.68638668405062e-05, 'epoch': 1.87}\n", - "{'loss': 1.5214, 'grad_norm': 2.8961341381073, 'learning_rate': 8.651095308043232e-05, 'epoch': 1.89}\n", - "{'loss': 1.6206, 'grad_norm': 3.4450645446777344, 'learning_rate': 8.61540987831238e-05, 'epoch': 1.91}\n", - "{'loss': 1.6429, 'grad_norm': 3.4198362827301025, 'learning_rate': 8.579334246298593e-05, 'epoch': 1.93}\n", - "{'loss': 1.5473, 'grad_norm': 2.9955196380615234, 'learning_rate': 8.542872305555978e-05, 'epoch': 1.95}\n", - "{'loss': 1.4405, 'grad_norm': 2.7997260093688965, 'learning_rate': 8.50602799133199e-05, 'epoch': 1.96}\n", - "{'loss': 1.5382, 'grad_norm': 2.4689786434173584, 'learning_rate': 8.468805280142709e-05, 'epoch': 1.98}\n", - "{'loss': 1.5378, 'grad_norm': 3.09759783744812, 'learning_rate': 8.43120818934367e-05, 'epoch': 2.0}\n", - " 33%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–Ž | 1120/3360 [32:07<1:05:51, 1.76s/it][INFO|trainer.py:3788] 2024-07-04 12:58:56,606 >> \n", - "***** Running Evaluation *****\n", - "[INFO|trainer.py:3790] 2024-07-04 12:58:56,606 >> Num examples = 46\n", - "[INFO|trainer.py:3793] 2024-07-04 12:58:56,606 >> Batch size = 1\n", - "\n", - " 0%| | 0/46 [00:00> Saving model checkpoint to saves/qwen2-1.5b/lora/sft/checkpoint-1120\n", - "[INFO|configuration_utils.py:733] 2024-07-04 12:58:59,895 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/config.json\n", - "[INFO|configuration_utils.py:800] 2024-07-04 12:58:59,896 >> Model config Qwen2Config {\n", - " \"architectures\": [\n", - " \"Qwen2ForCausalLM\"\n", - " ],\n", - " \"attention_dropout\": 0.0,\n", - " \"bos_token_id\": 151643,\n", - " \"eos_token_id\": 151645,\n", - " \"hidden_act\": \"silu\",\n", - " \"hidden_size\": 1536,\n", - " \"initializer_range\": 0.02,\n", - " \"intermediate_size\": 8960,\n", - " \"max_position_embeddings\": 32768,\n", - " \"max_window_layers\": 28,\n", - " \"model_type\": \"qwen2\",\n", - " \"num_attention_heads\": 12,\n", - " \"num_hidden_layers\": 28,\n", - " \"num_key_value_heads\": 2,\n", - " \"rms_norm_eps\": 1e-06,\n", - " \"rope_theta\": 1000000.0,\n", - " \"sliding_window\": 32768,\n", - " \"tie_word_embeddings\": true,\n", - " \"torch_dtype\": \"bfloat16\",\n", - " \"transformers_version\": \"4.42.3\",\n", - " \"use_cache\": true,\n", - " \"use_sliding_window\": false,\n", - " \"vocab_size\": 151936\n", - "}\n", - "\n", - "[INFO|tokenization_utils_base.py:2574] 2024-07-04 12:58:59,945 >> tokenizer config file saved in saves/qwen2-1.5b/lora/sft/checkpoint-1120/tokenizer_config.json\n", - "[INFO|tokenization_utils_base.py:2583] 2024-07-04 12:58:59,945 >> Special tokens file saved in saves/qwen2-1.5b/lora/sft/checkpoint-1120/special_tokens_map.json\n", - "{'loss': 1.2363, 'grad_norm': 3.1393024921417236, 'learning_rate': 8.393240776696274e-05, 'epoch': 2.02}\n", - "{'loss': 1.1161, 'grad_norm': 2.708930253982544, 'learning_rate': 8.354907139929851e-05, 'epoch': 2.03}\n", - "{'loss': 1.1975, 'grad_norm': 4.3620429039001465, 'learning_rate': 8.316211416299397e-05, 'epoch': 2.05}\n", - "{'loss': 1.1225, 'grad_norm': 3.3463101387023926, 'learning_rate': 8.27715778213905e-05, 'epoch': 2.07}\n", - "{'loss': 1.0548, 'grad_norm': 2.8970718383789062, 'learning_rate': 8.237750452411353e-05, 'epoch': 2.09}\n", - "{'loss': 1.1526, 'grad_norm': 2.99774432182312, 'learning_rate': 8.197993680252334e-05, 'epoch': 2.11}\n", - "{'loss': 1.3093, 'grad_norm': 4.249015808105469, 'learning_rate': 8.157891756512488e-05, 'epoch': 2.12}\n", - "{'loss': 1.1306, 'grad_norm': 3.889763593673706, 'learning_rate': 8.117449009293668e-05, 'epoch': 2.14}\n", - "{'loss': 1.1286, 'grad_norm': 4.101832866668701, 'learning_rate': 8.076669803481965e-05, 'epoch': 2.16}\n", - "{'loss': 1.1271, 'grad_norm': 4.3527703285217285, 'learning_rate': 8.035558540276618e-05, 'epoch': 2.18}\n", - "{'loss': 1.1593, 'grad_norm': 3.5413858890533447, 'learning_rate': 7.994119656715002e-05, 'epoch': 2.2}\n", - "{'loss': 1.1824, 'grad_norm': 5.094357490539551, 'learning_rate': 7.952357625193749e-05, 'epoch': 2.21}\n", - "{'loss': 1.2347, 'grad_norm': 5.2239089012146, 'learning_rate': 7.91027695298606e-05, 'epoch': 2.23}\n", - "{'loss': 1.2285, 'grad_norm': 5.532718658447266, 'learning_rate': 7.86788218175523e-05, 'epoch': 2.25}\n", - "{'loss': 1.1147, 'grad_norm': 3.8143270015716553, 'learning_rate': 7.8251778870645e-05, 'epoch': 2.27}\n", - "{'loss': 1.1478, 'grad_norm': 4.406189441680908, 'learning_rate': 7.782168677883206e-05, 'epoch': 2.28}\n", - "{'loss': 1.1846, 'grad_norm': 3.269481658935547, 'learning_rate': 7.738859196089358e-05, 'epoch': 2.3}\n", - "{'loss': 1.2015, 'grad_norm': 4.366032123565674, 'learning_rate': 7.695254115968648e-05, 'epoch': 2.32}\n", - "{'loss': 1.3038, 'grad_norm': 3.7871077060699463, 'learning_rate': 7.651358143709972e-05, 'epoch': 2.34}\n", - "{'loss': 1.2532, 'grad_norm': 3.805539846420288, 'learning_rate': 7.60717601689749e-05, 'epoch': 2.36}\n", - "{'loss': 1.2044, 'grad_norm': 4.302929401397705, 'learning_rate': 7.562712503999327e-05, 'epoch': 2.37}\n", - "{'loss': 1.3852, 'grad_norm': 4.319093227386475, 'learning_rate': 7.517972403852905e-05, 'epoch': 2.39}\n", - "{'loss': 1.2647, 'grad_norm': 3.8114326000213623, 'learning_rate': 7.472960545147038e-05, 'epoch': 2.41}\n", - "{'loss': 1.1138, 'grad_norm': 4.816274166107178, 'learning_rate': 7.427681785900761e-05, 'epoch': 2.43}\n", - "{'loss': 1.1797, 'grad_norm': 3.7659311294555664, 'learning_rate': 7.382141012939034e-05, 'epoch': 2.45}\n", - "{'loss': 1.1566, 'grad_norm': 3.777496337890625, 'learning_rate': 7.33634314136531e-05, 'epoch': 2.46}\n", - "{'loss': 1.2235, 'grad_norm': 3.779813051223755, 'learning_rate': 7.290293114031061e-05, 'epoch': 2.48}\n", - "{'loss': 1.3044, 'grad_norm': 4.243238925933838, 'learning_rate': 7.243995901002312e-05, 'epoch': 2.5}\n", - "{'loss': 1.1993, 'grad_norm': 3.7302756309509277, 'learning_rate': 7.197456499023225e-05, 'epoch': 2.52}\n", - "{'loss': 1.1955, 'grad_norm': 3.837207555770874, 'learning_rate': 7.150679930976825e-05, 'epoch': 2.53}\n", - "{'loss': 1.2282, 'grad_norm': 4.182308673858643, 'learning_rate': 7.103671245342887e-05, 'epoch': 2.55}\n", - "{'loss': 1.1068, 'grad_norm': 4.697420120239258, 'learning_rate': 7.056435515653059e-05, 'epoch': 2.57}\n", - "{'loss': 1.3001, 'grad_norm': 5.241019248962402, 'learning_rate': 7.008977839943299e-05, 'epoch': 2.59}\n", - "{'loss': 1.1734, 'grad_norm': 5.618649959564209, 'learning_rate': 6.961303340203653e-05, 'epoch': 2.61}\n", - "{'loss': 1.2205, 'grad_norm': 3.501143455505371, 'learning_rate': 6.91341716182545e-05, 'epoch': 2.62}\n", - "{'loss': 1.2196, 'grad_norm': 2.823162317276001, 'learning_rate': 6.86532447304597e-05, 'epoch': 2.64}\n", - "{'loss': 1.1884, 'grad_norm': 3.8134286403656006, 'learning_rate': 6.817030464390656e-05, 'epoch': 2.66}\n", - "{'loss': 1.296, 'grad_norm': 3.9806973934173584, 'learning_rate': 6.768540348112907e-05, 'epoch': 2.68}\n", - "{'loss': 1.0861, 'grad_norm': 5.336892604827881, 'learning_rate': 6.719859357631535e-05, 'epoch': 2.7}\n", - "{'loss': 1.1123, 'grad_norm': 5.413362503051758, 'learning_rate': 6.670992746965938e-05, 'epoch': 2.71}\n", - "{'loss': 1.3405, 'grad_norm': 3.942927122116089, 'learning_rate': 6.621945790169036e-05, 'epoch': 2.73}\n", - "{'loss': 1.2739, 'grad_norm': 3.9731507301330566, 'learning_rate': 6.572723780758069e-05, 'epoch': 2.75}\n", - "{'loss': 1.2215, 'grad_norm': 3.9058139324188232, 'learning_rate': 6.523332031143272e-05, 'epoch': 2.77}\n", - "{'loss': 1.231, 'grad_norm': 3.7157390117645264, 'learning_rate': 6.473775872054521e-05, 'epoch': 2.78}\n", - "{'loss': 1.0667, 'grad_norm': 3.2383055686950684, 'learning_rate': 6.424060651966007e-05, 'epoch': 2.8}\n", - "{'loss': 1.1742, 'grad_norm': 3.6972646713256836, 'learning_rate': 6.374191736518974e-05, 'epoch': 2.82}\n", - "{'loss': 1.2108, 'grad_norm': 3.783498764038086, 'learning_rate': 6.324174507942637e-05, 'epoch': 2.84}\n", - "{'loss': 1.1861, 'grad_norm': 4.8546037673950195, 'learning_rate': 6.274014364473274e-05, 'epoch': 2.86}\n", - "{'loss': 1.364, 'grad_norm': 3.1121954917907715, 'learning_rate': 6.22371671977162e-05, 'epoch': 2.87}\n", - "{'loss': 1.2202, 'grad_norm': 4.141942024230957, 'learning_rate': 6.173287002338577e-05, 'epoch': 2.89}\n", - "{'loss': 1.2125, 'grad_norm': 4.195278167724609, 'learning_rate': 6.122730654929334e-05, 'epoch': 2.91}\n", - "{'loss': 1.1392, 'grad_norm': 3.6065282821655273, 'learning_rate': 6.072053133965938e-05, 'epoch': 2.93}\n", - "{'loss': 1.3093, 'grad_norm': 3.8997342586517334, 'learning_rate': 6.021259908948402e-05, 'epoch': 2.95}\n", - "{'loss': 1.258, 'grad_norm': 4.212363243103027, 'learning_rate': 5.970356461864391e-05, 'epoch': 2.96}\n", - "{'loss': 1.1774, 'grad_norm': 4.735218524932861, 'learning_rate': 5.919348286597569e-05, 'epoch': 2.98}\n", - "{'loss': 1.2808, 'grad_norm': 3.88008713722229, 'learning_rate': 5.868240888334653e-05, 'epoch': 3.0}\n", - " 50%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–Œ | 1680/3360 [48:42<49:15, 1.76s/it][INFO|trainer.py:3788] 2024-07-04 13:15:31,424 >> \n", - "***** Running Evaluation *****\n", - "[INFO|trainer.py:3790] 2024-07-04 13:15:31,425 >> Num examples = 46\n", - "[INFO|trainer.py:3793] 2024-07-04 13:15:31,425 >> Batch size = 1\n", - "\n", - " 0%| | 0/46 [00:00> Saving model checkpoint to saves/qwen2-1.5b/lora/sft/checkpoint-1680\n", - "[INFO|configuration_utils.py:733] 2024-07-04 13:15:34,788 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/config.json\n", - "[INFO|configuration_utils.py:800] 2024-07-04 13:15:34,789 >> Model config Qwen2Config {\n", - " \"architectures\": [\n", - " \"Qwen2ForCausalLM\"\n", - " ],\n", - " \"attention_dropout\": 0.0,\n", - " \"bos_token_id\": 151643,\n", - " \"eos_token_id\": 151645,\n", - " \"hidden_act\": \"silu\",\n", - " \"hidden_size\": 1536,\n", - " \"initializer_range\": 0.02,\n", - " \"intermediate_size\": 8960,\n", - " \"max_position_embeddings\": 32768,\n", - " \"max_window_layers\": 28,\n", - " \"model_type\": \"qwen2\",\n", - " \"num_attention_heads\": 12,\n", - " \"num_hidden_layers\": 28,\n", - " \"num_key_value_heads\": 2,\n", - " \"rms_norm_eps\": 1e-06,\n", - " \"rope_theta\": 1000000.0,\n", - " \"sliding_window\": 32768,\n", - " \"tie_word_embeddings\": true,\n", - " \"torch_dtype\": \"bfloat16\",\n", - " \"transformers_version\": \"4.42.3\",\n", - " \"use_cache\": true,\n", - " \"use_sliding_window\": false,\n", - " \"vocab_size\": 151936\n", - "}\n", - "\n", - "[INFO|tokenization_utils_base.py:2574] 2024-07-04 13:15:34,839 >> tokenizer config file saved in saves/qwen2-1.5b/lora/sft/checkpoint-1680/tokenizer_config.json\n", - "[INFO|tokenization_utils_base.py:2583] 2024-07-04 13:15:34,839 >> Special tokens file saved in saves/qwen2-1.5b/lora/sft/checkpoint-1680/special_tokens_map.json\n", - "{'loss': 1.1006, 'grad_norm': 3.581298589706421, 'learning_rate': 5.8170397829712485e-05, 'epoch': 3.02}\n", - "{'loss': 0.7853, 'grad_norm': 4.149472713470459, 'learning_rate': 5.765750496516547e-05, 'epoch': 3.03}\n", - "{'loss': 0.9606, 'grad_norm': 4.768033027648926, 'learning_rate': 5.714378564496901e-05, 'epoch': 3.05}\n", - "{'loss': 0.8799, 'grad_norm': 3.7473530769348145, 'learning_rate': 5.6629295313583974e-05, 'epoch': 3.07}\n", - "{'loss': 0.8164, 'grad_norm': 3.66397762298584, 'learning_rate': 5.611408949868457e-05, 'epoch': 3.09}\n", - "{'loss': 0.8902, 'grad_norm': 5.061825275421143, 'learning_rate': 5.559822380516539e-05, 'epoch': 3.11}\n", - "{'loss': 0.8534, 'grad_norm': 4.06561803817749, 'learning_rate': 5.5081753909140096e-05, 'epoch': 3.12}\n", - "{'loss': 0.9668, 'grad_norm': 4.875536918640137, 'learning_rate': 5.456473555193242e-05, 'epoch': 3.14}\n", - "{'loss': 0.8607, 'grad_norm': 6.051300048828125, 'learning_rate': 5.404722453406017e-05, 'epoch': 3.16}\n", - "{'loss': 0.9096, 'grad_norm': 5.530092716217041, 'learning_rate': 5.3529276709212816e-05, 'epoch': 3.18}\n", - "{'loss': 0.9201, 'grad_norm': 6.524964809417725, 'learning_rate': 5.30109479782233e-05, 'epoch': 3.2}\n", - "{'loss': 0.856, 'grad_norm': 4.842297554016113, 'learning_rate': 5.249229428303486e-05, 'epoch': 3.21}\n", - "{'loss': 1.0534, 'grad_norm': 3.963986396789551, 'learning_rate': 5.197337160066331e-05, 'epoch': 3.23}\n", - "{'loss': 0.8642, 'grad_norm': 4.481607437133789, 'learning_rate': 5.145423593715557e-05, 'epoch': 3.25}\n", - "{'loss': 0.8856, 'grad_norm': 3.9990179538726807, 'learning_rate': 5.0934943321545115e-05, 'epoch': 3.27}\n", - "{'loss': 0.7925, 'grad_norm': 4.209486484527588, 'learning_rate': 5.041554979980486e-05, 'epoch': 3.28}\n", - "{'loss': 0.9874, 'grad_norm': 4.624832630157471, 'learning_rate': 4.9896111428798254e-05, 'epoch': 3.3}\n", - "{'loss': 0.9581, 'grad_norm': 5.0234785079956055, 'learning_rate': 4.9376684270229254e-05, 'epoch': 3.32}\n", - "{'loss': 0.9273, 'grad_norm': 4.156904220581055, 'learning_rate': 4.8857324384591653e-05, 'epoch': 3.34}\n", - "{'loss': 0.8929, 'grad_norm': 4.292726516723633, 'learning_rate': 4.8338087825118675e-05, 'epoch': 3.36}\n", - "{'loss': 0.9584, 'grad_norm': 5.206954002380371, 'learning_rate': 4.781903063173321e-05, 'epoch': 3.37}\n", - "{'loss': 0.8548, 'grad_norm': 4.075423717498779, 'learning_rate': 4.730020882499964e-05, 'epoch': 3.39}\n", - "{'loss': 0.9083, 'grad_norm': 5.6302008628845215, 'learning_rate': 4.678167840007767e-05, 'epoch': 3.41}\n", - "{'loss': 0.9967, 'grad_norm': 4.765602111816406, 'learning_rate': 4.626349532067879e-05, 'epoch': 3.43}\n", - "{'loss': 0.9439, 'grad_norm': 4.012918949127197, 'learning_rate': 4.574571551302647e-05, 'epoch': 3.44}\n", - "{'loss': 0.9758, 'grad_norm': 3.995499849319458, 'learning_rate': 4.522839485981994e-05, 'epoch': 3.46}\n", - "{'loss': 0.9056, 'grad_norm': 4.168616771697998, 'learning_rate': 4.471158919420312e-05, 'epoch': 3.48}\n", - "{'loss': 0.8866, 'grad_norm': 5.359450817108154, 'learning_rate': 4.4195354293738484e-05, 'epoch': 3.5}\n", - "{'loss': 0.8474, 'grad_norm': 5.709634304046631, 'learning_rate': 4.367974587438733e-05, 'epoch': 3.52}\n", - "{'loss': 0.9325, 'grad_norm': 6.928687572479248, 'learning_rate': 4.316481958449634e-05, 'epoch': 3.53}\n", - "{'loss': 0.8947, 'grad_norm': 9.245586395263672, 'learning_rate': 4.2650630998791615e-05, 'epoch': 3.55}\n", - "{'loss': 1.0068, 'grad_norm': 7.456272602081299, 'learning_rate': 4.213723561238074e-05, 'epoch': 3.57}\n", - "{'loss': 0.8202, 'grad_norm': 3.894721746444702, 'learning_rate': 4.162468883476319e-05, 'epoch': 3.59}\n", - "{'loss': 0.8858, 'grad_norm': 4.249356269836426, 'learning_rate': 4.111304598385018e-05, 'epoch': 3.61}\n", - "{'loss': 0.9275, 'grad_norm': 6.780489921569824, 'learning_rate': 4.060236227999441e-05, 'epoch': 3.62}\n", - "{'loss': 0.8648, 'grad_norm': 5.042501449584961, 'learning_rate': 4.0092692840030134e-05, 'epoch': 3.64}\n", - "{'loss': 0.8905, 'grad_norm': 4.697298526763916, 'learning_rate': 3.9584092671324606e-05, 'epoch': 3.66}\n", - "{'loss': 0.9487, 'grad_norm': 8.913374900817871, 'learning_rate': 3.907661666584131e-05, 'epoch': 3.68}\n", - "{'loss': 0.9711, 'grad_norm': 6.1330885887146, 'learning_rate': 3.857031959421553e-05, 'epoch': 3.69}\n", - "{'loss': 0.9569, 'grad_norm': 4.3145599365234375, 'learning_rate': 3.806525609984312e-05, 'epoch': 3.71}\n", - "{'loss': 0.8455, 'grad_norm': 5.199124813079834, 'learning_rate': 3.7561480692983006e-05, 'epoch': 3.73}\n", - "{'loss': 0.9282, 'grad_norm': 4.841589450836182, 'learning_rate': 3.705904774487396e-05, 'epoch': 3.75}\n", - "{'loss': 0.9613, 'grad_norm': 5.256564140319824, 'learning_rate': 3.655801148186655e-05, 'epoch': 3.77}\n", - "{'loss': 0.832, 'grad_norm': 4.737905025482178, 'learning_rate': 3.6058425979570485e-05, 'epoch': 3.78}\n", - "{'loss': 0.9194, 'grad_norm': 4.6155524253845215, 'learning_rate': 3.556034515701852e-05, 'epoch': 3.8}\n", - "{'loss': 0.9149, 'grad_norm': 5.484898090362549, 'learning_rate': 3.506382277084696e-05, 'epoch': 3.82}\n", - "{'loss': 0.9029, 'grad_norm': 4.691559314727783, 'learning_rate': 3.4568912409493945e-05, 'epoch': 3.84}\n", - "{'loss': 0.7487, 'grad_norm': 4.6990132331848145, 'learning_rate': 3.4075667487415785e-05, 'epoch': 3.86}\n", - "{'loss': 0.8389, 'grad_norm': 3.9886608123779297, 'learning_rate': 3.358414123932195e-05, 'epoch': 3.87}\n", - "{'loss': 0.9443, 'grad_norm': 3.797034978866577, 'learning_rate': 3.3094386714429724e-05, 'epoch': 3.89}\n", - "{'loss': 0.9102, 'grad_norm': 9.836748123168945, 'learning_rate': 3.2606456770738636e-05, 'epoch': 3.91}\n", - "{'loss': 0.8031, 'grad_norm': 6.517895221710205, 'learning_rate': 3.212040406932569e-05, 'epoch': 3.93}\n", - "{'loss': 0.7276, 'grad_norm': 3.757455825805664, 'learning_rate': 3.163628106866172e-05, 'epoch': 3.94}\n", - "{'loss': 1.0437, 'grad_norm': 5.128631591796875, 'learning_rate': 3.115414001894974e-05, 'epoch': 3.96}\n", - "{'loss': 0.9261, 'grad_norm': 4.2124457359313965, 'learning_rate': 3.067403295648566e-05, 'epoch': 3.98}\n", - "{'loss': 0.7864, 'grad_norm': 3.609720230102539, 'learning_rate': 3.019601169804216e-05, 'epoch': 4.0}\n", - " 67%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–‹ | 2240/3360 [1:05:16<32:59, 1.77s/it][INFO|trainer.py:3788] 2024-07-04 13:32:05,670 >> \n", - "***** Running Evaluation *****\n", - "[INFO|trainer.py:3790] 2024-07-04 13:32:05,670 >> Num examples = 46\n", - "[INFO|trainer.py:3793] 2024-07-04 13:32:05,670 >> Batch size = 1\n", - "\n", - " 0%| | 0/46 [00:00> Saving model checkpoint to saves/qwen2-1.5b/lora/sft/checkpoint-2240\n", - "[INFO|configuration_utils.py:733] 2024-07-04 13:32:08,839 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/config.json\n", - "[INFO|configuration_utils.py:800] 2024-07-04 13:32:08,839 >> Model config Qwen2Config {\n", - " \"architectures\": [\n", - " \"Qwen2ForCausalLM\"\n", - " ],\n", - " \"attention_dropout\": 0.0,\n", - " \"bos_token_id\": 151643,\n", - " \"eos_token_id\": 151645,\n", - " \"hidden_act\": \"silu\",\n", - " \"hidden_size\": 1536,\n", - " \"initializer_range\": 0.02,\n", - " \"intermediate_size\": 8960,\n", - " \"max_position_embeddings\": 32768,\n", - " \"max_window_layers\": 28,\n", - " \"model_type\": \"qwen2\",\n", - " \"num_attention_heads\": 12,\n", - " \"num_hidden_layers\": 28,\n", - " \"num_key_value_heads\": 2,\n", - " \"rms_norm_eps\": 1e-06,\n", - " \"rope_theta\": 1000000.0,\n", - " \"sliding_window\": 32768,\n", - " \"tie_word_embeddings\": true,\n", - " \"torch_dtype\": \"bfloat16\",\n", - " \"transformers_version\": \"4.42.3\",\n", - " \"use_cache\": true,\n", - " \"use_sliding_window\": false,\n", - " \"vocab_size\": 151936\n", - "}\n", - "\n", - "[INFO|tokenization_utils_base.py:2574] 2024-07-04 13:32:08,917 >> tokenizer config file saved in saves/qwen2-1.5b/lora/sft/checkpoint-2240/tokenizer_config.json\n", - "[INFO|tokenization_utils_base.py:2583] 2024-07-04 13:32:08,917 >> Special tokens file saved in saves/qwen2-1.5b/lora/sft/checkpoint-2240/special_tokens_map.json\n", - "{'loss': 0.7877, 'grad_norm': 3.5848188400268555, 'learning_rate': 2.9720127835276256e-05, 'epoch': 4.02}\n", - "{'loss': 0.6372, 'grad_norm': 4.3321661949157715, 'learning_rate': 2.9246432729161055e-05, 'epoch': 4.03}\n", - "{'loss': 0.7208, 'grad_norm': 5.079366207122803, 'learning_rate': 2.8774977504442647e-05, 'epoch': 4.05}\n", - "{'loss': 0.6386, 'grad_norm': 4.389534950256348, 'learning_rate': 2.8305813044122097e-05, 'epoch': 4.07}\n", - "{'loss': 0.6416, 'grad_norm': 3.866262197494507, 'learning_rate': 2.7838989983964065e-05, 'epoch': 4.09}\n", - "{'loss': 0.7771, 'grad_norm': 4.4248528480529785, 'learning_rate': 2.737455870703155e-05, 'epoch': 4.11}\n", - "{'loss': 0.5944, 'grad_norm': 5.1537370681762695, 'learning_rate': 2.6912569338248315e-05, 'epoch': 4.12}\n", - "{'loss': 0.6846, 'grad_norm': 4.410754203796387, 'learning_rate': 2.645307173898901e-05, 'epoch': 4.14}\n", - "{'loss': 0.7499, 'grad_norm': 3.9317386150360107, 'learning_rate': 2.5996115501697694e-05, 'epoch': 4.16}\n", - "{'loss': 0.6378, 'grad_norm': 3.794434070587158, 'learning_rate': 2.5541749944535554e-05, 'epoch': 4.18}\n", - "{'loss': 0.6153, 'grad_norm': 4.012321472167969, 'learning_rate': 2.5090024106057962e-05, 'epoch': 4.19}\n", - "{'loss': 0.6922, 'grad_norm': 4.712143898010254, 'learning_rate': 2.464098673992205e-05, 'epoch': 4.21}\n", - "{'loss': 0.6079, 'grad_norm': 5.002867698669434, 'learning_rate': 2.4194686309624663e-05, 'epoch': 4.23}\n", - "{'loss': 0.7554, 'grad_norm': 6.034168720245361, 'learning_rate': 2.3751170983272e-05, 'epoch': 4.25}\n", - "{'loss': 0.6634, 'grad_norm': 5.4491376876831055, 'learning_rate': 2.3310488628380757e-05, 'epoch': 4.27}\n", - "{'loss': 0.6635, 'grad_norm': 6.335705280303955, 'learning_rate': 2.2872686806712035e-05, 'epoch': 4.28}\n", - "{'loss': 0.6732, 'grad_norm': 4.363458633422852, 'learning_rate': 2.243781276913811e-05, 'epoch': 4.3}\n", - "{'loss': 0.5751, 'grad_norm': 4.058308124542236, 'learning_rate': 2.200591345054267e-05, 'epoch': 4.32}\n", - "{'loss': 0.7378, 'grad_norm': 5.493106365203857, 'learning_rate': 2.157703546475539e-05, 'epoch': 4.34}\n", - "{'loss': 0.6231, 'grad_norm': 4.587257385253906, 'learning_rate': 2.115122509952085e-05, 'epoch': 4.36}\n", - "{'loss': 0.6361, 'grad_norm': 4.070307731628418, 'learning_rate': 2.0728528311502976e-05, 'epoch': 4.37}\n", - "{'loss': 0.7245, 'grad_norm': 5.507742404937744, 'learning_rate': 2.0308990721324927e-05, 'epoch': 4.39}\n", - "{'loss': 0.6516, 'grad_norm': 4.98870849609375, 'learning_rate': 1.989265760864542e-05, 'epoch': 4.41}\n", - "{'loss': 0.7311, 'grad_norm': 4.5378618240356445, 'learning_rate': 1.947957390727185e-05, 'epoch': 4.43}\n", - "{'loss': 0.713, 'grad_norm': 6.595687389373779, 'learning_rate': 1.906978420031059e-05, 'epoch': 4.44}\n", - "{'loss': 0.5884, 'grad_norm': 3.995753765106201, 'learning_rate': 1.8663332715355396e-05, 'epoch': 4.46}\n", - "{'loss': 0.7598, 'grad_norm': 3.745181083679199, 'learning_rate': 1.8260263319713844e-05, 'epoch': 4.48}\n", - "{'loss': 0.673, 'grad_norm': 5.82590389251709, 'learning_rate': 1.7860619515673033e-05, 'epoch': 4.5}\n", - "{'loss': 0.6552, 'grad_norm': 5.151037216186523, 'learning_rate': 1.746444443580433e-05, 'epoch': 4.52}\n", - "{'loss': 0.7091, 'grad_norm': 5.6730499267578125, 'learning_rate': 1.7071780838308288e-05, 'epoch': 4.53}\n", - "{'loss': 0.7061, 'grad_norm': 5.2298502922058105, 'learning_rate': 1.6682671102399805e-05, 'epoch': 4.55}\n", - "{'loss': 0.654, 'grad_norm': 5.273619651794434, 'learning_rate': 1.629715722373423e-05, 'epoch': 4.57}\n", - "{'loss': 0.6822, 'grad_norm': 6.466513633728027, 'learning_rate': 1.5915280809874932e-05, 'epoch': 4.59}\n", - "{'loss': 0.5785, 'grad_norm': 6.050833702087402, 'learning_rate': 1.553708307580265e-05, 'epoch': 4.61}\n", - "{'loss': 0.6877, 'grad_norm': 6.502690315246582, 'learning_rate': 1.5162604839467265e-05, 'epoch': 4.62}\n", - "{'loss': 0.6984, 'grad_norm': 4.606260299682617, 'learning_rate': 1.4791886517382413e-05, 'epoch': 4.64}\n", - "{'loss': 0.6909, 'grad_norm': 4.80437707901001, 'learning_rate': 1.4424968120263504e-05, 'epoch': 4.66}\n", - "{'loss': 0.6827, 'grad_norm': 5.20365047454834, 'learning_rate': 1.4061889248709343e-05, 'epoch': 4.68}\n", - "{'loss': 0.6361, 'grad_norm': 6.29000997543335, 'learning_rate': 1.370268908892825e-05, 'epoch': 4.69}\n", - "{'loss': 0.6747, 'grad_norm': 4.9368438720703125, 'learning_rate': 1.3347406408508695e-05, 'epoch': 4.71}\n", - "{'loss': 0.6435, 'grad_norm': 5.528055667877197, 'learning_rate': 1.2996079552235263e-05, 'epoch': 4.73}\n", - "{'loss': 0.6501, 'grad_norm': 4.367548942565918, 'learning_rate': 1.264874643795021e-05, 'epoch': 4.75}\n", - "{'loss': 0.6376, 'grad_norm': 4.568158149719238, 'learning_rate': 1.230544455246101e-05, 'epoch': 4.77}\n", - "{'loss': 0.7034, 'grad_norm': 5.3214287757873535, 'learning_rate': 1.1966210947494583e-05, 'epoch': 4.78}\n", - "{'loss': 0.7303, 'grad_norm': 3.9356067180633545, 'learning_rate': 1.1631082235698316e-05, 'epoch': 4.8}\n", - "{'loss': 0.6436, 'grad_norm': 5.198613166809082, 'learning_rate': 1.130009458668863e-05, 'epoch': 4.82}\n", - "{'loss': 0.595, 'grad_norm': 4.129484176635742, 'learning_rate': 1.097328372314721e-05, 'epoch': 4.84}\n", - "{'loss': 0.6302, 'grad_norm': 8.324830055236816, 'learning_rate': 1.0650684916965559e-05, 'epoch': 4.85}\n", - "{'loss': 0.7296, 'grad_norm': 5.789163589477539, 'learning_rate': 1.0332332985438248e-05, 'epoch': 4.87}\n", - "{'loss': 0.7077, 'grad_norm': 5.632966995239258, 'learning_rate': 1.0018262287505086e-05, 'epoch': 4.89}\n", - "{'loss': 0.7339, 'grad_norm': 4.699968338012695, 'learning_rate': 9.708506720042932e-06, 'epoch': 4.91}\n", - "{'loss': 0.6334, 'grad_norm': 3.969327926635742, 'learning_rate': 9.403099714207175e-06, 'epoch': 4.93}\n", - "{'loss': 0.7298, 'grad_norm': 4.980201244354248, 'learning_rate': 9.102074231823727e-06, 'epoch': 4.94}\n", - "{'loss': 0.7236, 'grad_norm': 6.4100565910339355, 'learning_rate': 8.805462761831418e-06, 'epoch': 4.96}\n", - "{'loss': 0.7751, 'grad_norm': 5.446720600128174, 'learning_rate': 8.513297316775625e-06, 'epoch': 4.98}\n", - "{'loss': 0.7407, 'grad_norm': 4.180345058441162, 'learning_rate': 8.225609429353187e-06, 'epoch': 5.0}\n", - " 83%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–Š | 2800/3360 [1:21:52<16:57, 1.82s/it][INFO|trainer.py:3788] 2024-07-04 13:48:40,919 >> \n", - "***** Running Evaluation *****\n", - "[INFO|trainer.py:3790] 2024-07-04 13:48:40,919 >> Num examples = 46\n", - "[INFO|trainer.py:3793] 2024-07-04 13:48:40,919 >> Batch size = 1\n", - "\n", - " 0%| | 0/46 [00:00> Saving model checkpoint to saves/qwen2-1.5b/lora/sft/checkpoint-2800\n", - "[INFO|configuration_utils.py:733] 2024-07-04 13:48:44,254 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/config.json\n", - "[INFO|configuration_utils.py:800] 2024-07-04 13:48:44,254 >> Model config Qwen2Config {\n", - " \"architectures\": [\n", - " \"Qwen2ForCausalLM\"\n", - " ],\n", - " \"attention_dropout\": 0.0,\n", - " \"bos_token_id\": 151643,\n", - " \"eos_token_id\": 151645,\n", - " \"hidden_act\": \"silu\",\n", - " \"hidden_size\": 1536,\n", - " \"initializer_range\": 0.02,\n", - " \"intermediate_size\": 8960,\n", - " \"max_position_embeddings\": 32768,\n", - " \"max_window_layers\": 28,\n", - " \"model_type\": \"qwen2\",\n", - " \"num_attention_heads\": 12,\n", - " \"num_hidden_layers\": 28,\n", - " \"num_key_value_heads\": 2,\n", - " \"rms_norm_eps\": 1e-06,\n", - " \"rope_theta\": 1000000.0,\n", - " \"sliding_window\": 32768,\n", - " \"tie_word_embeddings\": true,\n", - " \"torch_dtype\": \"bfloat16\",\n", - " \"transformers_version\": \"4.42.3\",\n", - " \"use_cache\": true,\n", - " \"use_sliding_window\": false,\n", - " \"vocab_size\": 151936\n", - "}\n", - "\n", - "[INFO|tokenization_utils_base.py:2574] 2024-07-04 13:48:44,307 >> tokenizer config file saved in saves/qwen2-1.5b/lora/sft/checkpoint-2800/tokenizer_config.json\n", - "[INFO|tokenization_utils_base.py:2583] 2024-07-04 13:48:44,307 >> Special tokens file saved in saves/qwen2-1.5b/lora/sft/checkpoint-2800/special_tokens_map.json\n", - "{'loss': 0.5816, 'grad_norm': 3.6714768409729004, 'learning_rate': 7.942430149009161e-06, 'epoch': 5.02}\n", - "{'loss': 0.516, 'grad_norm': 3.9655864238739014, 'learning_rate': 7.663790038585793e-06, 'epoch': 5.03}\n", - "{'loss': 0.5876, 'grad_norm': 4.2771453857421875, 'learning_rate': 7.389719171023857e-06, 'epoch': 5.05}\n", - "{'loss': 0.5746, 'grad_norm': 5.545507431030273, 'learning_rate': 7.1202471261170245e-06, 'epoch': 5.07}\n", - "{'loss': 0.5789, 'grad_norm': 4.685436248779297, 'learning_rate': 6.855402987319348e-06, 'epoch': 5.09}\n", - "{'loss': 0.6558, 'grad_norm': 5.384147644042969, 'learning_rate': 6.595215338606397e-06, 'epoch': 5.1}\n", - "{'loss': 0.4955, 'grad_norm': 5.300227642059326, 'learning_rate': 6.339712261390213e-06, 'epoch': 5.12}\n", - "{'loss': 0.6284, 'grad_norm': 5.341045379638672, 'learning_rate': 6.088921331488568e-06, 'epoch': 5.14}\n", - "{'loss': 0.5285, 'grad_norm': 4.509070873260498, 'learning_rate': 5.8428696161488215e-06, 'epoch': 5.16}\n", - "{'loss': 0.5073, 'grad_norm': 4.6753339767456055, 'learning_rate': 5.601583671126531e-06, 'epoch': 5.18}\n", - "{'loss': 0.5849, 'grad_norm': 3.836711883544922, 'learning_rate': 5.365089537819434e-06, 'epoch': 5.19}\n", - "{'loss': 0.5714, 'grad_norm': 4.124776840209961, 'learning_rate': 5.133412740456806e-06, 'epoch': 5.21}\n", - "{'loss': 0.5811, 'grad_norm': 4.734057903289795, 'learning_rate': 4.906578283344759e-06, 'epoch': 5.23}\n", - "{'loss': 0.6351, 'grad_norm': 5.501781463623047, 'learning_rate': 4.684610648167503e-06, 'epoch': 5.25}\n", - "{'loss': 0.5622, 'grad_norm': 4.912986755371094, 'learning_rate': 4.467533791345191e-06, 'epoch': 5.27}\n", - "{'loss': 0.5305, 'grad_norm': 6.3503899574279785, 'learning_rate': 4.255371141448272e-06, 'epoch': 5.28}\n", - "{'loss': 0.5406, 'grad_norm': 4.923576354980469, 'learning_rate': 4.048145596668967e-06, 'epoch': 5.3}\n", - "{'loss': 0.5534, 'grad_norm': 4.20800256729126, 'learning_rate': 3.84587952234991e-06, 'epoch': 5.32}\n", - "{'loss': 0.4831, 'grad_norm': 4.633558750152588, 'learning_rate': 3.6485947485702832e-06, 'epoch': 5.34}\n", - "{'loss': 0.5587, 'grad_norm': 5.120583534240723, 'learning_rate': 3.4563125677897932e-06, 'epoch': 5.35}\n", - "{'loss': 0.5696, 'grad_norm': 5.966647148132324, 'learning_rate': 3.269053732550581e-06, 'epoch': 5.37}\n", - "{'loss': 0.5767, 'grad_norm': 5.047117233276367, 'learning_rate': 3.086838453237506e-06, 'epoch': 5.39}\n", - "{'loss': 0.4262, 'grad_norm': 4.478403091430664, 'learning_rate': 2.9096863958968268e-06, 'epoch': 5.41}\n", - "{'loss': 0.4798, 'grad_norm': 4.455025672912598, 'learning_rate': 2.737616680113758e-06, 'epoch': 5.43}\n", - "{'loss': 0.4574, 'grad_norm': 3.7917206287384033, 'learning_rate': 2.570647876948895e-06, 'epoch': 5.44}\n", - "{'loss': 0.5635, 'grad_norm': 7.098059177398682, 'learning_rate': 2.408798006933882e-06, 'epoch': 5.46}\n", - "{'loss': 0.7231, 'grad_norm': 4.642895698547363, 'learning_rate': 2.252084538126542e-06, 'epoch': 5.48}\n", - "{'loss': 0.5122, 'grad_norm': 5.233055591583252, 'learning_rate': 2.100524384225555e-06, 'epoch': 5.5}\n", - "{'loss': 0.524, 'grad_norm': 4.6845173835754395, 'learning_rate': 1.9541339027450256e-06, 'epoch': 5.52}\n", - "{'loss': 0.5816, 'grad_norm': 5.447011470794678, 'learning_rate': 1.8129288932490274e-06, 'epoch': 5.53}\n", - "{'loss': 0.5329, 'grad_norm': 3.755023717880249, 'learning_rate': 1.6769245956464396e-06, 'epoch': 5.55}\n", - "{'loss': 0.6767, 'grad_norm': 5.255481719970703, 'learning_rate': 1.5461356885461075e-06, 'epoch': 5.57}\n", - "{'loss': 0.5529, 'grad_norm': 4.8336567878723145, 'learning_rate': 1.4205762876726092e-06, 'epoch': 5.59}\n", - "{'loss': 0.6372, 'grad_norm': 5.332770824432373, 'learning_rate': 1.3002599443428243e-06, 'epoch': 5.6}\n", - "{'loss': 0.634, 'grad_norm': 5.157808780670166, 'learning_rate': 1.1851996440033319e-06, 'epoch': 5.62}\n", - "{'loss': 0.5033, 'grad_norm': 4.826900005340576, 'learning_rate': 1.0754078048289374e-06, 'epoch': 5.64}\n", - "{'loss': 0.5681, 'grad_norm': 3.9047048091888428, 'learning_rate': 9.708962763824048e-07, 'epoch': 5.66}\n", - "{'loss': 0.5432, 'grad_norm': 6.038053512573242, 'learning_rate': 8.716763383355864e-07, 'epoch': 5.68}\n", - "{'loss': 0.6018, 'grad_norm': 5.233924388885498, 'learning_rate': 7.777586992519959e-07, 'epoch': 5.69}\n", - "{'loss': 0.5367, 'grad_norm': 6.929383277893066, 'learning_rate': 6.891534954310885e-07, 'epoch': 5.71}\n", - "{'loss': 0.6039, 'grad_norm': 4.509579181671143, 'learning_rate': 6.058702898142643e-07, 'epoch': 5.73}\n", - "{'loss': 0.5292, 'grad_norm': 4.131773948669434, 'learning_rate': 5.279180709527765e-07, 'epoch': 5.75}\n", - "{'loss': 0.6327, 'grad_norm': 4.368628025054932, 'learning_rate': 4.553052520375911e-07, 'epoch': 5.77}\n", - "{'loss': 0.4859, 'grad_norm': 4.966446399688721, 'learning_rate': 3.8803966999139684e-07, 'epoch': 5.78}\n", - "{'loss': 0.5397, 'grad_norm': 5.083605766296387, 'learning_rate': 3.261285846227868e-07, 'epoch': 5.8}\n", - "{'loss': 0.4758, 'grad_norm': 4.257706165313721, 'learning_rate': 2.6957867784270787e-07, 'epoch': 5.82}\n", - "{'loss': 0.492, 'grad_norm': 5.183888912200928, 'learning_rate': 2.1839605294330933e-07, 'epoch': 5.84}\n", - "{'loss': 0.6466, 'grad_norm': 7.4429707527160645, 'learning_rate': 1.725862339392259e-07, 'epoch': 5.85}\n", - "{'loss': 0.4461, 'grad_norm': 6.51588249206543, 'learning_rate': 1.3215416497138754e-07, 'epoch': 5.87}\n", - "{'loss': 0.6614, 'grad_norm': 4.2303786277771, 'learning_rate': 9.710420977340762e-08, 'epoch': 5.89}\n", - "{'loss': 0.4817, 'grad_norm': 6.3713908195495605, 'learning_rate': 6.744015120061509e-08, 'epoch': 5.91}\n", - "{'loss': 0.6231, 'grad_norm': 10.188394546508789, 'learning_rate': 4.316519082179227e-08, 'epoch': 5.93}\n", - "{'loss': 0.5204, 'grad_norm': 4.387541770935059, 'learning_rate': 2.4281948573617874e-08, 'epoch': 5.94}\n", - "{'loss': 0.5938, 'grad_norm': 5.084804534912109, 'learning_rate': 1.0792462477909882e-08, 'epoch': 5.96}\n", - "{'loss': 0.576, 'grad_norm': 4.955512523651123, 'learning_rate': 2.6981884216847884e-09, 'epoch': 5.98}\n", - "{'loss': 0.5752, 'grad_norm': 4.1065449714660645, 'learning_rate': 0.0, 'epoch': 6.0}\n", - "100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 3360/3360 [1:38:23<00:00, 1.77s/it][INFO|trainer.py:3788] 2024-07-04 14:05:12,056 >> \n", - "***** Running Evaluation *****\n", - "[INFO|trainer.py:3790] 2024-07-04 14:05:12,056 >> Num examples = 46\n", - "[INFO|trainer.py:3793] 2024-07-04 14:05:12,056 >> Batch size = 1\n", - "\n", - " 0%| | 0/46 [00:00> Saving model checkpoint to saves/qwen2-1.5b/lora/sft/checkpoint-3360\n", - "[INFO|configuration_utils.py:733] 2024-07-04 14:05:15,110 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/config.json\n", - "[INFO|configuration_utils.py:800] 2024-07-04 14:05:15,111 >> Model config Qwen2Config {\n", - " \"architectures\": [\n", - " \"Qwen2ForCausalLM\"\n", - " ],\n", - " \"attention_dropout\": 0.0,\n", - " \"bos_token_id\": 151643,\n", - " \"eos_token_id\": 151645,\n", - " \"hidden_act\": \"silu\",\n", - " \"hidden_size\": 1536,\n", - " \"initializer_range\": 0.02,\n", - " \"intermediate_size\": 8960,\n", - " \"max_position_embeddings\": 32768,\n", - " \"max_window_layers\": 28,\n", - " \"model_type\": \"qwen2\",\n", - " \"num_attention_heads\": 12,\n", - " \"num_hidden_layers\": 28,\n", - " \"num_key_value_heads\": 2,\n", - " \"rms_norm_eps\": 1e-06,\n", - " \"rope_theta\": 1000000.0,\n", - " \"sliding_window\": 32768,\n", - " \"tie_word_embeddings\": true,\n", - " \"torch_dtype\": \"bfloat16\",\n", - " \"transformers_version\": \"4.42.3\",\n", - " \"use_cache\": true,\n", - " \"use_sliding_window\": false,\n", - " \"vocab_size\": 151936\n", - "}\n", - "\n", - "[INFO|tokenization_utils_base.py:2574] 2024-07-04 14:05:15,155 >> tokenizer config file saved in saves/qwen2-1.5b/lora/sft/checkpoint-3360/tokenizer_config.json\n", - "[INFO|tokenization_utils_base.py:2583] 2024-07-04 14:05:15,155 >> Special tokens file saved in saves/qwen2-1.5b/lora/sft/checkpoint-3360/special_tokens_map.json\n", - "[INFO|trainer.py:2383] 2024-07-04 14:05:15,382 >> \n", - "\n", - "Training completed. Do not forget to share your model on huggingface.co/models =)\n", - "\n", - "\n", - "{'train_runtime': 5911.7152, 'train_samples_per_second': 4.549, 'train_steps_per_second': 0.568, 'train_loss': 1.1251599807114827, 'epoch': 6.0}\n", - "100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 3360/3360 [1:38:26<00:00, 1.76s/it]\n", - "[INFO|trainer.py:3478] 2024-07-04 14:05:15,386 >> Saving model checkpoint to saves/qwen2-1.5b/lora/sft\n", - "[INFO|configuration_utils.py:733] 2024-07-04 14:05:16,251 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/config.json\n", - "[INFO|configuration_utils.py:800] 2024-07-04 14:05:16,251 >> Model config Qwen2Config {\n", - " \"architectures\": [\n", - " \"Qwen2ForCausalLM\"\n", - " ],\n", - " \"attention_dropout\": 0.0,\n", - " \"bos_token_id\": 151643,\n", - " \"eos_token_id\": 151645,\n", - " \"hidden_act\": \"silu\",\n", - " \"hidden_size\": 1536,\n", - " \"initializer_range\": 0.02,\n", - " \"intermediate_size\": 8960,\n", - " \"max_position_embeddings\": 32768,\n", - " \"max_window_layers\": 28,\n", - " \"model_type\": \"qwen2\",\n", - " \"num_attention_heads\": 12,\n", - " \"num_hidden_layers\": 28,\n", - " \"num_key_value_heads\": 2,\n", - " \"rms_norm_eps\": 1e-06,\n", - " \"rope_theta\": 1000000.0,\n", - " \"sliding_window\": 32768,\n", - " \"tie_word_embeddings\": true,\n", - " \"torch_dtype\": \"bfloat16\",\n", - " \"transformers_version\": \"4.42.3\",\n", - " \"use_cache\": true,\n", - " \"use_sliding_window\": false,\n", - " \"vocab_size\": 151936\n", - "}\n", - "\n", - "[INFO|tokenization_utils_base.py:2574] 2024-07-04 14:05:16,306 >> tokenizer config file saved in saves/qwen2-1.5b/lora/sft/tokenizer_config.json\n", - "[INFO|tokenization_utils_base.py:2583] 2024-07-04 14:05:16,306 >> Special tokens file saved in saves/qwen2-1.5b/lora/sft/special_tokens_map.json\n", - "***** train metrics *****\n", - " epoch = 5.9973\n", - " total_flos = 16732846GF\n", - " train_loss = 1.1252\n", - " train_runtime = 1:38:31.71\n", - " train_samples_per_second = 4.549\n", - " train_steps_per_second = 0.568\n", - "Figure saved at: saves/qwen2-1.5b/lora/sft/training_loss.png\n", - "Figure saved at: saves/qwen2-1.5b/lora/sft/training_eval_loss.png\n", - "[INFO|trainer.py:3788] 2024-07-04 14:05:16,625 >> \n", - "***** Running Evaluation *****\n", - "[INFO|trainer.py:3790] 2024-07-04 14:05:16,625 >> Num examples = 46\n", - "[INFO|trainer.py:3793] 2024-07-04 14:05:16,625 >> Batch size = 1\n", - "100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 46/46 [00:02<00:00, 18.96it/s]\n", - "***** eval metrics *****\n", - " epoch = 5.9973\n", - " eval_loss = 2.4373\n", - " eval_runtime = 0:00:02.50\n", - " eval_samples_per_second = 18.363\n", - " eval_steps_per_second = 18.363\n", - "[INFO|modelcard.py:449] 2024-07-04 14:05:19,133 >> Dropping the following result as it does not have all the necessary fields:\n", - "{'task': {'name': 'Causal Language Modeling', 'type': 'text-generation'}}\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: \\ 0.086 MB of 0.086 MB uploaded\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: Run history:\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: eval/loss โ–โ–โ–‚โ–„โ–†โ–ˆโ–ˆ\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: eval/runtime โ–ƒโ–„โ–โ–…โ–ˆโ–โ–…\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: eval/samples_per_second โ–†โ–…โ–ˆโ–„โ–โ–ˆโ–„\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: eval/steps_per_second โ–†โ–…โ–ˆโ–„โ–โ–ˆโ–„\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: train/epoch โ–โ–โ–โ–‚โ–‚โ–‚โ–‚โ–‚โ–‚โ–ƒโ–ƒโ–ƒโ–ƒโ–ƒโ–ƒโ–„โ–„โ–„โ–„โ–„โ–…โ–…โ–…โ–…โ–…โ–†โ–†โ–†โ–†โ–†โ–†โ–‡โ–‡โ–‡โ–‡โ–‡โ–‡โ–ˆโ–ˆโ–ˆ\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: train/global_step โ–โ–โ–โ–‚โ–‚โ–‚โ–‚โ–‚โ–‚โ–ƒโ–ƒโ–ƒโ–ƒโ–ƒโ–ƒโ–„โ–„โ–„โ–„โ–„โ–…โ–…โ–…โ–…โ–…โ–†โ–†โ–†โ–†โ–†โ–†โ–‡โ–‡โ–‡โ–‡โ–‡โ–‡โ–ˆโ–ˆโ–ˆ\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: train/grad_norm โ–ƒโ–โ–‚โ–‚โ–โ–ƒโ–‚โ–ƒโ–„โ–„โ–ƒโ–…โ–ƒโ–ƒโ–…โ–…โ–…โ–ƒโ–„โ–„โ–„โ–†โ–…โ–‡โ–…โ–†โ–„โ–…โ–ˆโ–…โ–‡โ–†โ–…โ–‡โ–…โ–†โ–†โ–†โ–†โ–…\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: train/learning_rate โ–‚โ–„โ–…โ–‡โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–‡โ–‡โ–‡โ–‡โ–‡โ–†โ–†โ–†โ–†โ–…โ–…โ–…โ–„โ–„โ–„โ–ƒโ–ƒโ–ƒโ–ƒโ–‚โ–‚โ–‚โ–‚โ–โ–โ–โ–โ–โ–โ–\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: train/loss โ–ˆโ–‡โ–‡โ–‡โ–†โ–†โ–†โ–†โ–†โ–†โ–…โ–…โ–†โ–…โ–„โ–„โ–„โ–„โ–„โ–„โ–ƒโ–ƒโ–ƒโ–ƒโ–ƒโ–‚โ–‚โ–‚โ–‚โ–‚โ–‚โ–‚โ–‚โ–โ–โ–โ–โ–โ–โ–\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: \n", - "\u001b[34m\u001b[1mwandb\u001b[0m: Run summary:\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: eval/loss 2.43734\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: eval/runtime 2.5051\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: eval/samples_per_second 18.363\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: eval/steps_per_second 18.363\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: total_flos 1.7966756916707328e+16\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: train/epoch 5.99732\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: train/global_step 3360\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: train/grad_norm 4.10654\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: train/learning_rate 0.0\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: train/loss 0.5752\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: train_loss 1.12516\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: train_runtime 5911.7152\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: train_samples_per_second 4.549\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: train_steps_per_second 0.568\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: \n", - "\u001b[34m\u001b[1mwandb\u001b[0m: ๐Ÿš€ View run \u001b[33mqwen2_1.5b_lora_sft\u001b[0m at: \u001b[34m\u001b[4mhttps://wandb.ai/inflaton-ai/huggingface/runs/mpc5sxtf\u001b[0m\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: โญ๏ธ View project at: \u001b[34m\u001b[4mhttps://wandb.ai/inflaton-ai/huggingface\u001b[0m\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: Synced 6 W&B file(s), 0 media file(s), 1 artifact file(s) and 0 other file(s)\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: Find logs at: \u001b[35m\u001b[1m./wandb/run-20240704_122645-mpc5sxtf/logs\u001b[0m\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m The new W&B backend becomes opt-out in version 0.18.0; try it out with `wandb.require(\"core\")`! See https://wandb.me/wandb-core for more information.\n", - "CPU times: user 1min 28s, sys: 26.5 s, total: 1min 54s\n", - "Wall time: 1h 42min 32s\n" - ] - } - ], - "source": [ - "%%time\n", - "\n", - "!./scripts/tune-lf.sh config/qwen2_1.5b_lora_sft.yaml" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Current Directory:\n", - "/home/inflaton/code/projects/courses/llm-finetuning/llama-factory\n", - "07/04/2024 14:50:13 - WARNING - llamafactory.hparams.parser - We recommend enable `upcast_layernorm` in quantized training.\n", - "07/04/2024 14:50:13 - INFO - llamafactory.hparams.parser - Process rank: 0, device: cuda:0, n_gpu: 1, distributed training: False, compute dtype: torch.bfloat16\n", - "[INFO|tokenization_utils_base.py:2161] 2024-07-04 14:50:14,466 >> loading file vocab.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-7B-Instruct/snapshots/41c66b0be1c3081f13defc6bdf946c2ef240d6a6/vocab.json\n", - "[INFO|tokenization_utils_base.py:2161] 2024-07-04 14:50:14,466 >> loading file merges.txt from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-7B-Instruct/snapshots/41c66b0be1c3081f13defc6bdf946c2ef240d6a6/merges.txt\n", - "[INFO|tokenization_utils_base.py:2161] 2024-07-04 14:50:14,466 >> loading file tokenizer.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-7B-Instruct/snapshots/41c66b0be1c3081f13defc6bdf946c2ef240d6a6/tokenizer.json\n", - "[INFO|tokenization_utils_base.py:2161] 2024-07-04 14:50:14,466 >> loading file added_tokens.json from cache at None\n", - "[INFO|tokenization_utils_base.py:2161] 2024-07-04 14:50:14,466 >> loading file special_tokens_map.json from cache at None\n", - "[INFO|tokenization_utils_base.py:2161] 2024-07-04 14:50:14,467 >> loading file tokenizer_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-7B-Instruct/snapshots/41c66b0be1c3081f13defc6bdf946c2ef240d6a6/tokenizer_config.json\n", - "[WARNING|logging.py:313] 2024-07-04 14:50:14,635 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n", - "07/04/2024 14:50:14 - INFO - llamafactory.data.template - Replace eos token: <|im_end|>\n", - "07/04/2024 14:50:14 - INFO - llamafactory.data.template - Add <|im_start|> to stop words.\n", - "07/04/2024 14:50:14 - INFO - llamafactory.data.loader - Loading dataset alpaca_mac.json...\n", - "Converting format of dataset (num_proc=16): 100%|โ–ˆ| 4528/4528 [00:00<00:00, 1650\n", - "Running tokenizer on dataset (num_proc=16): 100%|โ–ˆ| 4528/4528 [00:01<00:00, 3163\n", - "input_ids:\n", - "[151644, 872, 198, 5501, 14683, 279, 2701, 8453, 1467, 1119, 6364, 323, 3410, 1172, 279, 24531, 2213, 11, 4302, 770, 624, 35987, 102895, 99164, 100324, 100717, 100095, 99509, 1773, 151645, 198, 151644, 77091, 198, 17949, 358, 572, 2617, 553, 264, 38835, 44486, 13, 151645]\n", - "inputs:\n", - "<|im_start|>user\n", - "Please translate the following Chinese text into English and provide only the translated content, nothing else.\n", - "ๅ…จไป—็€็‹ไป™ๆญๆ•‘ใ€‚<|im_end|>\n", - "<|im_start|>assistant\n", - "Because I was protected by a fox fairy.<|im_end|>\n", - "label_ids:\n", - "[-100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 17949, 358, 572, 2617, 553, 264, 38835, 44486, 13, 151645]\n", - "labels:\n", - "Because I was protected by a fox fairy.<|im_end|>\n", - "[INFO|configuration_utils.py:733] 2024-07-04 14:50:17,794 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-7B-Instruct/snapshots/41c66b0be1c3081f13defc6bdf946c2ef240d6a6/config.json\n", - "[INFO|configuration_utils.py:800] 2024-07-04 14:50:17,795 >> Model config Qwen2Config {\n", - " \"_name_or_path\": \"Qwen/Qwen2-7B-Instruct\",\n", - " \"architectures\": [\n", - " \"Qwen2ForCausalLM\"\n", - " ],\n", - " \"attention_dropout\": 0.0,\n", - " \"bos_token_id\": 151643,\n", - " \"eos_token_id\": 151645,\n", - " \"hidden_act\": \"silu\",\n", - " \"hidden_size\": 3584,\n", - " \"initializer_range\": 0.02,\n", - " \"intermediate_size\": 18944,\n", - " \"max_position_embeddings\": 32768,\n", - " \"max_window_layers\": 28,\n", - " \"model_type\": \"qwen2\",\n", - " \"num_attention_heads\": 28,\n", - " \"num_hidden_layers\": 28,\n", - " \"num_key_value_heads\": 4,\n", - " \"rms_norm_eps\": 1e-06,\n", - " \"rope_theta\": 1000000.0,\n", - " \"sliding_window\": 131072,\n", - " \"tie_word_embeddings\": false,\n", - " \"torch_dtype\": \"bfloat16\",\n", - " \"transformers_version\": \"4.42.3\",\n", - " \"use_cache\": true,\n", - " \"use_sliding_window\": false,\n", - " \"vocab_size\": 152064\n", - "}\n", - "\n", - "07/04/2024 14:50:17 - INFO - llamafactory.model.model_utils.quantization - Quantizing model to 4 bit with bitsandbytes.\n", - "๐Ÿฆฅ Unsloth: Will patch your computer to enable 2x faster free finetuning.\n", - "config.json: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 1.19k/1.19k [00:00<00:00, 12.3MB/s]\n", - "[INFO|configuration_utils.py:733] 2024-07-04 14:50:19,202 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-7b-instruct-bnb-4bit/snapshots/8d8ce83e5c9fc23482eeae78027d1fc87bc2edad/config.json\n", - "[INFO|configuration_utils.py:800] 2024-07-04 14:50:19,203 >> Model config Qwen2Config {\n", - " \"_name_or_path\": \"unsloth/qwen2-7b-instruct-bnb-4bit\",\n", - " \"architectures\": [\n", - " \"Qwen2ForCausalLM\"\n", - " ],\n", - " \"attention_dropout\": 0.0,\n", - " \"bos_token_id\": 151643,\n", - " \"eos_token_id\": 151645,\n", - " \"hidden_act\": \"silu\",\n", - " \"hidden_size\": 3584,\n", - " \"initializer_range\": 0.02,\n", - " \"intermediate_size\": 18944,\n", - " \"max_position_embeddings\": 32768,\n", - " \"max_window_layers\": 28,\n", - " \"model_type\": \"qwen2\",\n", - " \"num_attention_heads\": 28,\n", - " \"num_hidden_layers\": 28,\n", - " \"num_key_value_heads\": 4,\n", - " \"quantization_config\": {\n", - " \"_load_in_4bit\": true,\n", - " \"_load_in_8bit\": false,\n", - " \"bnb_4bit_compute_dtype\": \"bfloat16\",\n", - " \"bnb_4bit_quant_storage\": \"uint8\",\n", - " \"bnb_4bit_quant_type\": \"nf4\",\n", - " \"bnb_4bit_use_double_quant\": true,\n", - " \"llm_int8_enable_fp32_cpu_offload\": false,\n", - " \"llm_int8_has_fp16_weight\": false,\n", - " \"llm_int8_skip_modules\": null,\n", - " \"llm_int8_threshold\": 6.0,\n", - " \"load_in_4bit\": true,\n", - " \"load_in_8bit\": false,\n", - " \"quant_method\": \"bitsandbytes\"\n", - " },\n", - " \"rms_norm_eps\": 1e-06,\n", - " \"rope_theta\": 1000000.0,\n", - " \"sliding_window\": 131072,\n", - " \"tie_word_embeddings\": false,\n", - " \"torch_dtype\": \"bfloat16\",\n", - " \"transformers_version\": \"4.42.3\",\n", - " \"use_cache\": true,\n", - " \"use_sliding_window\": false,\n", - " \"vocab_size\": 152064\n", - "}\n", - "\n", - "==((====))== Unsloth: Fast Qwen2 patching release 2024.6\n", - " \\\\ /| GPU: NVIDIA GeForce RTX 4080 Laptop GPU. Max memory: 11.994 GB. Platform = Linux.\n", - "O^O/ \\_/ \\ Pytorch: 2.3.0+cu121. CUDA = 8.9. CUDA Toolkit = 12.1.\n", - "\\ / Bfloat16 = TRUE. Xformers = 0.0.26.post1. FA = False.\n", - " \"-____-\" Free Apache license: http://github.com/unslothai/unsloth\n", - "[INFO|configuration_utils.py:733] 2024-07-04 14:50:20,339 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-7b-instruct-bnb-4bit/snapshots/8d8ce83e5c9fc23482eeae78027d1fc87bc2edad/config.json\n", - "[INFO|configuration_utils.py:800] 2024-07-04 14:50:20,340 >> Model config Qwen2Config {\n", - " \"_name_or_path\": \"unsloth/qwen2-7b-instruct-bnb-4bit\",\n", - " \"architectures\": [\n", - " \"Qwen2ForCausalLM\"\n", - " ],\n", - " \"attention_dropout\": 0.0,\n", - " \"bos_token_id\": 151643,\n", - " \"eos_token_id\": 151645,\n", - " \"hidden_act\": \"silu\",\n", - " \"hidden_size\": 3584,\n", - " \"initializer_range\": 0.02,\n", - " \"intermediate_size\": 18944,\n", - " \"max_position_embeddings\": 32768,\n", - " \"max_window_layers\": 28,\n", - " \"model_type\": \"qwen2\",\n", - " \"num_attention_heads\": 28,\n", - " \"num_hidden_layers\": 28,\n", - " \"num_key_value_heads\": 4,\n", - " \"quantization_config\": {\n", - " \"_load_in_4bit\": true,\n", - " \"_load_in_8bit\": false,\n", - " \"bnb_4bit_compute_dtype\": \"bfloat16\",\n", - " \"bnb_4bit_quant_storage\": \"uint8\",\n", - " \"bnb_4bit_quant_type\": \"nf4\",\n", - " \"bnb_4bit_use_double_quant\": true,\n", - " \"llm_int8_enable_fp32_cpu_offload\": false,\n", - " \"llm_int8_has_fp16_weight\": false,\n", - " \"llm_int8_skip_modules\": null,\n", - " \"llm_int8_threshold\": 6.0,\n", - " \"load_in_4bit\": true,\n", - " \"load_in_8bit\": false,\n", - " \"quant_method\": \"bitsandbytes\"\n", - " },\n", - " \"rms_norm_eps\": 1e-06,\n", - " \"rope_theta\": 1000000.0,\n", - " \"sliding_window\": 131072,\n", - " \"tie_word_embeddings\": false,\n", - " \"torch_dtype\": \"bfloat16\",\n", - " \"transformers_version\": \"4.42.3\",\n", - " \"use_cache\": true,\n", - " \"use_sliding_window\": false,\n", - " \"vocab_size\": 152064\n", - "}\n", - "\n", - "[INFO|configuration_utils.py:733] 2024-07-04 14:50:20,992 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-7b-instruct-bnb-4bit/snapshots/8d8ce83e5c9fc23482eeae78027d1fc87bc2edad/config.json\n", - "[INFO|configuration_utils.py:800] 2024-07-04 14:50:20,993 >> Model config Qwen2Config {\n", - " \"_name_or_path\": \"unsloth/qwen2-7b-instruct-bnb-4bit\",\n", - " \"architectures\": [\n", - " \"Qwen2ForCausalLM\"\n", - " ],\n", - " \"attention_dropout\": 0.0,\n", - " \"bos_token_id\": 151643,\n", - " \"eos_token_id\": 151645,\n", - " \"hidden_act\": \"silu\",\n", - " \"hidden_size\": 3584,\n", - " \"initializer_range\": 0.02,\n", - " \"intermediate_size\": 18944,\n", - " \"max_position_embeddings\": 32768,\n", - " \"max_window_layers\": 28,\n", - " \"model_type\": \"qwen2\",\n", - " \"num_attention_heads\": 28,\n", - " \"num_hidden_layers\": 28,\n", - " \"num_key_value_heads\": 4,\n", - " \"quantization_config\": {\n", - " \"_load_in_4bit\": true,\n", - " \"_load_in_8bit\": false,\n", - " \"bnb_4bit_compute_dtype\": \"bfloat16\",\n", - " \"bnb_4bit_quant_storage\": \"uint8\",\n", - " \"bnb_4bit_quant_type\": \"nf4\",\n", - " \"bnb_4bit_use_double_quant\": true,\n", - " \"llm_int8_enable_fp32_cpu_offload\": false,\n", - " \"llm_int8_has_fp16_weight\": false,\n", - " \"llm_int8_skip_modules\": null,\n", - " \"llm_int8_threshold\": 6.0,\n", - " \"load_in_4bit\": true,\n", - " \"load_in_8bit\": false,\n", - " \"quant_method\": \"bitsandbytes\"\n", - " },\n", - " \"rms_norm_eps\": 1e-06,\n", - " \"rope_theta\": 1000000.0,\n", - " \"sliding_window\": 131072,\n", - " \"tie_word_embeddings\": false,\n", - " \"torch_dtype\": \"bfloat16\",\n", - " \"transformers_version\": \"4.42.3\",\n", - " \"use_cache\": true,\n", - " \"use_sliding_window\": false,\n", - " \"vocab_size\": 152064\n", - "}\n", - "\n", - "model.safetensors: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 5.55G/5.55G [31:00<00:00, 2.98MB/s]\n", - "[INFO|modeling_utils.py:3556] 2024-07-04 15:21:22,487 >> loading weights file model.safetensors from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-7b-instruct-bnb-4bit/snapshots/8d8ce83e5c9fc23482eeae78027d1fc87bc2edad/model.safetensors\n", - "[INFO|modeling_utils.py:1531] 2024-07-04 15:21:26,212 >> Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16.\n", - "[INFO|configuration_utils.py:1000] 2024-07-04 15:21:26,219 >> Generate config GenerationConfig {\n", - " \"bos_token_id\": 151643,\n", - " \"eos_token_id\": 151645\n", - "}\n", - "\n", - "[INFO|modeling_utils.py:4364] 2024-07-04 15:26:00,017 >> All model checkpoint weights were used when initializing Qwen2ForCausalLM.\n", - "\n", - "[INFO|modeling_utils.py:4372] 2024-07-04 15:26:00,018 >> All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at unsloth/qwen2-7b-instruct-bnb-4bit.\n", - "If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training.\n", - "generation_config.json: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 243/243 [00:00<00:00, 3.75MB/s]\n", - "[INFO|configuration_utils.py:955] 2024-07-04 15:26:01,541 >> loading configuration file generation_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-7b-instruct-bnb-4bit/snapshots/8d8ce83e5c9fc23482eeae78027d1fc87bc2edad/generation_config.json\n", - "[INFO|configuration_utils.py:1000] 2024-07-04 15:26:01,542 >> Generate config GenerationConfig {\n", - " \"bos_token_id\": 151643,\n", - " \"do_sample\": true,\n", - " \"eos_token_id\": [\n", - " 151645,\n", - " 151643\n", - " ],\n", - " \"pad_token_id\": 151643,\n", - " \"repetition_penalty\": 1.05,\n", - " \"temperature\": 0.7,\n", - " \"top_k\": 20,\n", - " \"top_p\": 0.8\n", - "}\n", - "\n", - "tokenizer_config.json: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 1.33k/1.33k [00:00<00:00, 19.0MB/s]\n", - "vocab.json: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 2.78M/2.78M [00:01<00:00, 1.75MB/s]\n", - "merges.txt: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 1.67M/1.67M [00:00<00:00, 1.89MB/s]\n", - "added_tokens.json: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 80.0/80.0 [00:00<00:00, 1.29MB/s]\n", - "special_tokens_map.json: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 367/367 [00:00<00:00, 6.11MB/s]\n", - "tokenizer.json: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 7.03M/7.03M [00:02<00:00, 3.09MB/s]\n", - "[INFO|tokenization_utils_base.py:2161] 2024-07-04 15:26:12,737 >> loading file vocab.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-7b-instruct-bnb-4bit/snapshots/8d8ce83e5c9fc23482eeae78027d1fc87bc2edad/vocab.json\n", - "[INFO|tokenization_utils_base.py:2161] 2024-07-04 15:26:12,737 >> loading file merges.txt from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-7b-instruct-bnb-4bit/snapshots/8d8ce83e5c9fc23482eeae78027d1fc87bc2edad/merges.txt\n", - "[INFO|tokenization_utils_base.py:2161] 2024-07-04 15:26:12,737 >> loading file added_tokens.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-7b-instruct-bnb-4bit/snapshots/8d8ce83e5c9fc23482eeae78027d1fc87bc2edad/added_tokens.json\n", - "[INFO|tokenization_utils_base.py:2161] 2024-07-04 15:26:12,737 >> loading file special_tokens_map.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-7b-instruct-bnb-4bit/snapshots/8d8ce83e5c9fc23482eeae78027d1fc87bc2edad/special_tokens_map.json\n", - "[INFO|tokenization_utils_base.py:2161] 2024-07-04 15:26:12,737 >> loading file tokenizer_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-7b-instruct-bnb-4bit/snapshots/8d8ce83e5c9fc23482eeae78027d1fc87bc2edad/tokenizer_config.json\n", - "[INFO|tokenization_utils_base.py:2161] 2024-07-04 15:26:12,737 >> loading file tokenizer.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-7b-instruct-bnb-4bit/snapshots/8d8ce83e5c9fc23482eeae78027d1fc87bc2edad/tokenizer.json\n", - "[WARNING|logging.py:313] 2024-07-04 15:26:12,946 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n", - "[INFO|tokenization_utils_base.py:2161] 2024-07-04 15:26:13,696 >> loading file vocab.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-7b-instruct-bnb-4bit/snapshots/8d8ce83e5c9fc23482eeae78027d1fc87bc2edad/vocab.json\n", - "[INFO|tokenization_utils_base.py:2161] 2024-07-04 15:26:13,696 >> loading file merges.txt from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-7b-instruct-bnb-4bit/snapshots/8d8ce83e5c9fc23482eeae78027d1fc87bc2edad/merges.txt\n", - "[INFO|tokenization_utils_base.py:2161] 2024-07-04 15:26:13,696 >> loading file tokenizer.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-7b-instruct-bnb-4bit/snapshots/8d8ce83e5c9fc23482eeae78027d1fc87bc2edad/tokenizer.json\n", - "[INFO|tokenization_utils_base.py:2161] 2024-07-04 15:26:13,696 >> loading file added_tokens.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-7b-instruct-bnb-4bit/snapshots/8d8ce83e5c9fc23482eeae78027d1fc87bc2edad/added_tokens.json\n", - "[INFO|tokenization_utils_base.py:2161] 2024-07-04 15:26:13,696 >> loading file special_tokens_map.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-7b-instruct-bnb-4bit/snapshots/8d8ce83e5c9fc23482eeae78027d1fc87bc2edad/special_tokens_map.json\n", - "[INFO|tokenization_utils_base.py:2161] 2024-07-04 15:26:13,696 >> loading file tokenizer_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-7b-instruct-bnb-4bit/snapshots/8d8ce83e5c9fc23482eeae78027d1fc87bc2edad/tokenizer_config.json\n", - "[WARNING|logging.py:313] 2024-07-04 15:26:13,877 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n", - "07/04/2024 15:26:14 - INFO - llamafactory.model.model_utils.checkpointing - Gradient checkpointing enabled.\n", - "07/04/2024 15:26:14 - INFO - llamafactory.model.adapter - Upcasting trainable params to float32.\n", - "07/04/2024 15:26:14 - INFO - llamafactory.model.adapter - Fine-tuning method: LoRA\n", - "07/04/2024 15:26:14 - INFO - llamafactory.model.model_utils.misc - Found linear modules: gate_proj,o_proj,v_proj,k_proj,up_proj,q_proj,down_proj\n", - "[WARNING|logging.py:328] 2024-07-04 15:26:15,372 >> Unsloth 2024.6 patched 28 layers with 0 QKV layers, 28 O layers and 28 MLP layers.\n", - "07/04/2024 15:26:16 - INFO - llamafactory.model.loader - trainable params: 20,185,088 || all params: 7,635,801,600 || trainable%: 0.2643\n", - "[INFO|trainer.py:642] 2024-07-04 15:26:16,270 >> Using auto half precision backend\n", - "07/04/2024 15:26:16 - INFO - llamafactory.train.trainer_utils - Using LoRA+ optimizer with loraplus lr ratio 16.00.\n", - "[WARNING|:223] 2024-07-04 15:26:16,423 >> ==((====))== Unsloth - 2x faster free finetuning | Num GPUs = 1\n", - " \\\\ /| Num examples = 4,482 | Num Epochs = 6\n", - "O^O/ \\_/ \\ Batch size per device = 1 | Gradient Accumulation steps = 8\n", - "\\ / Total batch size = 8 | Total steps = 3,360\n", - " \"-____-\" Number of trainable parameters = 20,185,088\n", - "[INFO|integration_utils.py:750] 2024-07-04 15:26:16,929 >> Automatic Weights & Biases logging enabled, to disable set os.environ[\"WANDB_DISABLED\"] = \"true\"\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33minflaton-sg\u001b[0m (\u001b[33minflaton-ai\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: Tracking run with wandb version 0.17.4\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: Run data is saved locally in \u001b[35m\u001b[1m/home/inflaton/code/projects/courses/llm-finetuning/llama-factory/wandb/run-20240704_152618-o710838e\u001b[0m\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: Run \u001b[1m`wandb offline`\u001b[0m to turn off syncing.\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: Syncing run \u001b[33mqwen2_7b_lora_sft\u001b[0m\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: โญ๏ธ View project at \u001b[34m\u001b[4mhttps://wandb.ai/inflaton-ai/huggingface\u001b[0m\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: ๐Ÿš€ View run at \u001b[34m\u001b[4mhttps://wandb.ai/inflaton-ai/huggingface/runs/o710838e\u001b[0m\n", - "{'loss': 2.1957, 'grad_norm': 2.977725028991699, 'learning_rate': 2.9761904761904763e-06, 'epoch': 0.02}\n", - "{'loss': 1.9984, 'grad_norm': 1.17664635181427, 'learning_rate': 5.9523809523809525e-06, 'epoch': 0.04}\n", - "{'loss': 1.7375, 'grad_norm': 0.7683635354042053, 'learning_rate': 8.92857142857143e-06, 'epoch': 0.05}\n", - "{'loss': 1.7268, 'grad_norm': 1.5277972221374512, 'learning_rate': 1.1904761904761905e-05, 'epoch': 0.07}\n", - "{'loss': 1.7873, 'grad_norm': 0.7151318788528442, 'learning_rate': 1.4880952380952381e-05, 'epoch': 0.09}\n", - "{'loss': 1.6224, 'grad_norm': 0.7458081841468811, 'learning_rate': 1.785714285714286e-05, 'epoch': 0.11}\n", - "{'loss': 1.7345, 'grad_norm': 0.7242929339408875, 'learning_rate': 2.0833333333333336e-05, 'epoch': 0.12}\n", - "{'loss': 1.57, 'grad_norm': 0.8281179666519165, 'learning_rate': 2.380952380952381e-05, 'epoch': 0.14}\n", - "{'loss': 1.6718, 'grad_norm': 1.0110186338424683, 'learning_rate': 2.6785714285714288e-05, 'epoch': 0.16}\n", - "{'loss': 1.6219, 'grad_norm': 0.8258731961250305, 'learning_rate': 2.9761904761904762e-05, 'epoch': 0.18}\n", - "{'loss': 1.6115, 'grad_norm': 0.9346244931221008, 'learning_rate': 3.273809523809524e-05, 'epoch': 0.2}\n", - "{'loss': 1.6081, 'grad_norm': 1.0503712892532349, 'learning_rate': 3.571428571428572e-05, 'epoch': 0.21}\n", - "{'loss': 1.5874, 'grad_norm': 1.1157383918762207, 'learning_rate': 3.8690476190476195e-05, 'epoch': 0.23}\n", - "{'loss': 1.5825, 'grad_norm': 1.212875485420227, 'learning_rate': 4.166666666666667e-05, 'epoch': 0.25}\n", - "{'loss': 1.512, 'grad_norm': 1.073009967803955, 'learning_rate': 4.464285714285715e-05, 'epoch': 0.27}\n", - "{'loss': 1.5074, 'grad_norm': 0.8304378390312195, 'learning_rate': 4.761904761904762e-05, 'epoch': 0.29}\n", - "{'loss': 1.6019, 'grad_norm': 0.8581129908561707, 'learning_rate': 5.05952380952381e-05, 'epoch': 0.3}\n", - "{'loss': 1.4319, 'grad_norm': 1.027076244354248, 'learning_rate': 5.3571428571428575e-05, 'epoch': 0.32}\n", - "{'loss': 1.555, 'grad_norm': 1.3464545011520386, 'learning_rate': 5.6547619047619046e-05, 'epoch': 0.34}\n", - "{'loss': 1.6637, 'grad_norm': 1.2714892625808716, 'learning_rate': 5.9523809523809524e-05, 'epoch': 0.36}\n", - "{'loss': 1.6159, 'grad_norm': 1.0014649629592896, 'learning_rate': 6.25e-05, 'epoch': 0.37}\n", - "{'loss': 1.5019, 'grad_norm': 1.4355653524398804, 'learning_rate': 6.547619047619048e-05, 'epoch': 0.39}\n", - "{'loss': 1.5167, 'grad_norm': 1.2876572608947754, 'learning_rate': 6.845238095238096e-05, 'epoch': 0.41}\n", - "{'loss': 1.6807, 'grad_norm': 1.4459688663482666, 'learning_rate': 7.142857142857143e-05, 'epoch': 0.43}\n", - "{'loss': 1.6053, 'grad_norm': 1.7381216287612915, 'learning_rate': 7.440476190476191e-05, 'epoch': 0.45}\n", - "{'loss': 1.4993, 'grad_norm': 1.516874074935913, 'learning_rate': 7.738095238095239e-05, 'epoch': 0.46}\n", - "{'loss': 1.58, 'grad_norm': 1.7755393981933594, 'learning_rate': 8.035714285714287e-05, 'epoch': 0.48}\n", - "{'loss': 1.5699, 'grad_norm': 1.7302135229110718, 'learning_rate': 8.333333333333334e-05, 'epoch': 0.5}\n", - "{'loss': 1.419, 'grad_norm': 1.172330617904663, 'learning_rate': 8.630952380952382e-05, 'epoch': 0.52}\n", - "{'loss': 1.5505, 'grad_norm': 1.676744818687439, 'learning_rate': 8.92857142857143e-05, 'epoch': 0.54}\n", - "{'loss': 1.5749, 'grad_norm': 1.8019312620162964, 'learning_rate': 9.226190476190478e-05, 'epoch': 0.55}\n", - "{'loss': 1.4185, 'grad_norm': 2.2339751720428467, 'learning_rate': 9.523809523809524e-05, 'epoch': 0.57}\n", - "{'loss': 1.4871, 'grad_norm': 1.8845446109771729, 'learning_rate': 9.821428571428572e-05, 'epoch': 0.59}\n", - "{'loss': 1.4547, 'grad_norm': 1.5382771492004395, 'learning_rate': 9.999956828659095e-05, 'epoch': 0.61}\n", - "{'loss': 1.5409, 'grad_norm': 2.5924744606018066, 'learning_rate': 9.999471159635539e-05, 'epoch': 0.62}\n", - "{'loss': 1.4544, 'grad_norm': 1.6850535869598389, 'learning_rate': 9.998445910004082e-05, 'epoch': 0.64}\n", - "{'loss': 1.6716, 'grad_norm': 2.093435287475586, 'learning_rate': 9.996881190417393e-05, 'epoch': 0.66}\n", - "{'loss': 1.5389, 'grad_norm': 1.9192240238189697, 'learning_rate': 9.994777169751806e-05, 'epoch': 0.68}\n", - "{'loss': 1.3255, 'grad_norm': 1.820000410079956, 'learning_rate': 9.992134075089084e-05, 'epoch': 0.7}\n", - "{'loss': 1.4784, 'grad_norm': 1.8777908086776733, 'learning_rate': 9.988952191691925e-05, 'epoch': 0.71}\n", - "{'loss': 1.5354, 'grad_norm': 1.7081478834152222, 'learning_rate': 9.985231862973168e-05, 'epoch': 0.73}\n", - "{'loss': 1.5822, 'grad_norm': 1.6461598873138428, 'learning_rate': 9.980973490458728e-05, 'epoch': 0.75}\n", - "{'loss': 1.5233, 'grad_norm': 2.1327311992645264, 'learning_rate': 9.976177533744261e-05, 'epoch': 0.77}\n", - "{'loss': 1.4739, 'grad_norm': 2.4746365547180176, 'learning_rate': 9.97084451044556e-05, 'epoch': 0.79}\n", - "{'loss': 1.4276, 'grad_norm': 1.7821303606033325, 'learning_rate': 9.964974996142698e-05, 'epoch': 0.8}\n", - "{'loss': 1.4803, 'grad_norm': 1.551522970199585, 'learning_rate': 9.958569624317893e-05, 'epoch': 0.82}\n", - "{'loss': 1.5314, 'grad_norm': 2.6767489910125732, 'learning_rate': 9.951629086287151e-05, 'epoch': 0.84}\n", - "{'loss': 1.4844, 'grad_norm': 1.7266111373901367, 'learning_rate': 9.944154131125642e-05, 'epoch': 0.86}\n", - "{'loss': 1.5248, 'grad_norm': 1.7948070764541626, 'learning_rate': 9.936145565586871e-05, 'epoch': 0.87}\n", - "{'loss': 1.6563, 'grad_norm': 1.6244261264801025, 'learning_rate': 9.927604254015585e-05, 'epoch': 0.89}\n", - "{'loss': 1.5928, 'grad_norm': 1.7924832105636597, 'learning_rate': 9.918531118254507e-05, 'epoch': 0.91}\n", - "{'loss': 1.4955, 'grad_norm': 2.337216377258301, 'learning_rate': 9.90892713754483e-05, 'epoch': 0.93}\n", - "{'loss': 1.558, 'grad_norm': 2.165968179702759, 'learning_rate': 9.898793348420536e-05, 'epoch': 0.95}\n", - "{'loss': 1.5148, 'grad_norm': 1.7740817070007324, 'learning_rate': 9.888130844596524e-05, 'epoch': 0.96}\n", - "{'loss': 1.5339, 'grad_norm': 2.276500940322876, 'learning_rate': 9.876940776850569e-05, 'epoch': 0.98}\n", - "{'loss': 1.4748, 'grad_norm': 1.852982521057129, 'learning_rate': 9.865224352899119e-05, 'epoch': 1.0}\n", - " 17%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ | 560/3360 [1:04:27<5:48:51, 7.48s/it][INFO|trainer.py:3788] 2024-07-04 16:30:50,001 >> \n", - "***** Running Evaluation *****\n", - "[INFO|trainer.py:3790] 2024-07-04 16:30:50,003 >> Num examples = 46\n", - "[INFO|trainer.py:3793] 2024-07-04 16:30:50,003 >> Batch size = 1\n", - "\n", - " 0%| | 0/46 [00:00> Saving model checkpoint to saves/qwen2-7b/lora/sft/checkpoint-560\n", - "[INFO|configuration_utils.py:733] 2024-07-04 16:31:06,164 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-7b-instruct-bnb-4bit/snapshots/8d8ce83e5c9fc23482eeae78027d1fc87bc2edad/config.json\n", - "[INFO|configuration_utils.py:800] 2024-07-04 16:31:06,165 >> Model config Qwen2Config {\n", - " \"_name_or_path\": \"Qwen/Qwen2-7B-Instruct\",\n", - " \"architectures\": [\n", - " \"Qwen2ForCausalLM\"\n", - " ],\n", - " \"attention_dropout\": 0.0,\n", - " \"bos_token_id\": 151643,\n", - " \"eos_token_id\": 151645,\n", - " \"hidden_act\": \"silu\",\n", - " \"hidden_size\": 3584,\n", - " \"initializer_range\": 0.02,\n", - " \"intermediate_size\": 18944,\n", - " \"max_position_embeddings\": 32768,\n", - " \"max_window_layers\": 28,\n", - " \"model_type\": \"qwen2\",\n", - " \"num_attention_heads\": 28,\n", - " \"num_hidden_layers\": 28,\n", - " \"num_key_value_heads\": 4,\n", - " \"quantization_config\": {\n", - " \"_load_in_4bit\": true,\n", - " \"_load_in_8bit\": false,\n", - " \"bnb_4bit_compute_dtype\": \"bfloat16\",\n", - " \"bnb_4bit_quant_storage\": \"uint8\",\n", - " \"bnb_4bit_quant_type\": \"nf4\",\n", - " \"bnb_4bit_use_double_quant\": true,\n", - " \"llm_int8_enable_fp32_cpu_offload\": false,\n", - " \"llm_int8_has_fp16_weight\": false,\n", - " \"llm_int8_skip_modules\": null,\n", - " \"llm_int8_threshold\": 6.0,\n", - " \"load_in_4bit\": true,\n", - " \"load_in_8bit\": false,\n", - " \"quant_method\": \"bitsandbytes\"\n", - " },\n", - " \"rms_norm_eps\": 1e-06,\n", - " \"rope_theta\": 1000000.0,\n", - " \"sliding_window\": 131072,\n", - " \"tie_word_embeddings\": false,\n", - " \"torch_dtype\": \"bfloat16\",\n", - " \"transformers_version\": \"4.42.3\",\n", - " \"use_cache\": true,\n", - " \"use_sliding_window\": false,\n", - " \"vocab_size\": 152064\n", - "}\n", - "\n", - "[INFO|tokenization_utils_base.py:2574] 2024-07-04 16:31:06,584 >> tokenizer config file saved in saves/qwen2-7b/lora/sft/checkpoint-560/tokenizer_config.json\n", - "[INFO|tokenization_utils_base.py:2583] 2024-07-04 16:31:06,585 >> Special tokens file saved in saves/qwen2-7b/lora/sft/checkpoint-560/special_tokens_map.json\n", - "{'loss': 1.0693, 'grad_norm': 2.4884133338928223, 'learning_rate': 9.852982837266955e-05, 'epoch': 1.02}\n", - "{'loss': 0.9156, 'grad_norm': 2.0713613033294678, 'learning_rate': 9.840217551150706e-05, 'epoch': 1.04}\n", - "{'loss': 1.0533, 'grad_norm': 2.160870313644409, 'learning_rate': 9.826929872276255e-05, 'epoch': 1.05}\n", - "{'loss': 0.9734, 'grad_norm': 2.251491069793701, 'learning_rate': 9.81312123475006e-05, 'epoch': 1.07}\n", - "{'loss': 0.925, 'grad_norm': 2.160745859146118, 'learning_rate': 9.798793128904356e-05, 'epoch': 1.09}\n", - "{'loss': 1.0312, 'grad_norm': 2.9455161094665527, 'learning_rate': 9.78394710113631e-05, 'epoch': 1.11}\n", - "{'loss': 0.9867, 'grad_norm': 1.834627628326416, 'learning_rate': 9.768584753741134e-05, 'epoch': 1.12}\n", - "{'loss': 0.9962, 'grad_norm': 2.891728401184082, 'learning_rate': 9.752707744739145e-05, 'epoch': 1.14}\n", - "{'loss': 1.0046, 'grad_norm': 2.459664821624756, 'learning_rate': 9.736317787696816e-05, 'epoch': 1.16}\n", - "{'loss': 0.872, 'grad_norm': 2.503146171569824, 'learning_rate': 9.719416651541839e-05, 'epoch': 1.18}\n", - "{'loss': 0.9536, 'grad_norm': 1.9054204225540161, 'learning_rate': 9.702006160372209e-05, 'epoch': 1.2}\n", - "{'loss': 0.9768, 'grad_norm': 2.08803129196167, 'learning_rate': 9.684088193259355e-05, 'epoch': 1.21}\n", - "{'loss': 0.9448, 'grad_norm': 2.4227285385131836, 'learning_rate': 9.665664684045333e-05, 'epoch': 1.23}\n", - "{'loss': 1.0078, 'grad_norm': 2.396881103515625, 'learning_rate': 9.646737621134112e-05, 'epoch': 1.25}\n", - "{'loss': 0.9285, 'grad_norm': 4.0550384521484375, 'learning_rate': 9.627309047276974e-05, 'epoch': 1.27}\n", - "{'loss': 1.0518, 'grad_norm': 3.4381208419799805, 'learning_rate': 9.607381059352038e-05, 'epoch': 1.29}\n", - "{'loss': 1.0221, 'grad_norm': 2.341543674468994, 'learning_rate': 9.586955808137958e-05, 'epoch': 1.3}\n", - "{'loss': 1.0084, 'grad_norm': 2.660717725753784, 'learning_rate': 9.566035498081784e-05, 'epoch': 1.32}\n", - "{'loss': 1.0374, 'grad_norm': 2.4253923892974854, 'learning_rate': 9.544622387061055e-05, 'epoch': 1.34}\n", - "{'loss': 0.8872, 'grad_norm': 3.2932205200195312, 'learning_rate': 9.522718786140097e-05, 'epoch': 1.36}\n", - "{'loss': 1.0013, 'grad_norm': 3.3068909645080566, 'learning_rate': 9.500327059320606e-05, 'epoch': 1.37}\n", - "{'loss': 0.9135, 'grad_norm': 3.9048690795898438, 'learning_rate': 9.477449623286505e-05, 'epoch': 1.39}\n", - "{'loss': 0.8808, 'grad_norm': 2.9740893840789795, 'learning_rate': 9.454088947143116e-05, 'epoch': 1.41}\n", - "{'loss': 1.0511, 'grad_norm': 3.2612483501434326, 'learning_rate': 9.430247552150673e-05, 'epoch': 1.43}\n", - "{'loss': 0.9457, 'grad_norm': 2.8854198455810547, 'learning_rate': 9.405928011452211e-05, 'epoch': 1.45}\n", - "{'loss': 0.9401, 'grad_norm': 2.1029069423675537, 'learning_rate': 9.381132949795861e-05, 'epoch': 1.46}\n", - "{'loss': 1.0803, 'grad_norm': 3.1445486545562744, 'learning_rate': 9.35586504325155e-05, 'epoch': 1.48}\n", - "{'loss': 0.9944, 'grad_norm': 3.0867714881896973, 'learning_rate': 9.330127018922194e-05, 'epoch': 1.5}\n", - "{'loss': 0.8916, 'grad_norm': 2.589761257171631, 'learning_rate': 9.303921654649362e-05, 'epoch': 1.52}\n", - "{'loss': 0.988, 'grad_norm': 2.9633171558380127, 'learning_rate': 9.277251778713474e-05, 'epoch': 1.54}\n", - "{'loss': 0.9376, 'grad_norm': 3.082129716873169, 'learning_rate': 9.250120269528546e-05, 'epoch': 1.55}\n", - "{'loss': 0.9333, 'grad_norm': 2.1602373123168945, 'learning_rate': 9.22253005533154e-05, 'epoch': 1.57}\n", - "{'loss': 1.0027, 'grad_norm': 2.900174617767334, 'learning_rate': 9.194484113866313e-05, 'epoch': 1.59}\n", - "{'loss': 1.1305, 'grad_norm': 3.4030845165252686, 'learning_rate': 9.165985472062246e-05, 'epoch': 1.61}\n", - "{'loss': 0.8973, 'grad_norm': 2.5629944801330566, 'learning_rate': 9.137037205707552e-05, 'epoch': 1.62}\n", - "{'loss': 0.9483, 'grad_norm': 3.2390940189361572, 'learning_rate': 9.107642439117321e-05, 'epoch': 1.64}\n", - "{'loss': 0.9879, 'grad_norm': 2.5794193744659424, 'learning_rate': 9.077804344796302e-05, 'epoch': 1.66}\n", - "{'loss': 0.9668, 'grad_norm': 2.389864444732666, 'learning_rate': 9.04752614309652e-05, 'epoch': 1.68}\n", - "{'loss': 0.8852, 'grad_norm': 3.5650432109832764, 'learning_rate': 9.01681110186971e-05, 'epoch': 1.7}\n", - "{'loss': 0.9984, 'grad_norm': 3.166510581970215, 'learning_rate': 8.985662536114613e-05, 'epoch': 1.71}\n", - "{'loss': 0.9526, 'grad_norm': 2.176490306854248, 'learning_rate': 8.954083807619208e-05, 'epoch': 1.73}\n", - "{'loss': 1.0377, 'grad_norm': 3.0157470703125, 'learning_rate': 8.922078324597879e-05, 'epoch': 1.75}\n", - "{'loss': 1.1106, 'grad_norm': 2.781142234802246, 'learning_rate': 8.889649541323574e-05, 'epoch': 1.77}\n", - "{'loss': 1.0373, 'grad_norm': 3.456441879272461, 'learning_rate': 8.856800957755e-05, 'epoch': 1.78}\n", - "{'loss': 1.0307, 'grad_norm': 3.646578311920166, 'learning_rate': 8.823536119158864e-05, 'epoch': 1.8}\n", - "{'loss': 0.9769, 'grad_norm': 3.4664463996887207, 'learning_rate': 8.789858615727265e-05, 'epoch': 1.82}\n", - "{'loss': 0.9524, 'grad_norm': 2.52860951423645, 'learning_rate': 8.755772082190194e-05, 'epoch': 1.84}\n", - "{'loss': 1.0686, 'grad_norm': 3.0946435928344727, 'learning_rate': 8.721280197423258e-05, 'epoch': 1.86}\n", - "{'loss': 0.9359, 'grad_norm': 3.146989583969116, 'learning_rate': 8.68638668405062e-05, 'epoch': 1.87}\n", - "{'loss': 1.0035, 'grad_norm': 3.2309892177581787, 'learning_rate': 8.651095308043232e-05, 'epoch': 1.89}\n", - "{'loss': 1.0669, 'grad_norm': 3.8748905658721924, 'learning_rate': 8.61540987831238e-05, 'epoch': 1.91}\n", - "{'loss': 1.0676, 'grad_norm': 3.329939603805542, 'learning_rate': 8.579334246298593e-05, 'epoch': 1.93}\n", - "{'loss': 0.9976, 'grad_norm': 3.7491514682769775, 'learning_rate': 8.542872305555978e-05, 'epoch': 1.95}\n", - "{'loss': 0.9471, 'grad_norm': 3.245119571685791, 'learning_rate': 8.50602799133199e-05, 'epoch': 1.96}\n", - "{'loss': 0.9998, 'grad_norm': 2.7840590476989746, 'learning_rate': 8.468805280142709e-05, 'epoch': 1.98}\n", - "{'loss': 1.0361, 'grad_norm': 3.2855234146118164, 'learning_rate': 8.43120818934367e-05, 'epoch': 2.0}\n", - " 33%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–‹ | 1120/3360 [2:09:53<4:22:41, 7.04s/it][INFO|trainer.py:3788] 2024-07-04 17:36:15,576 >> \n", - "***** Running Evaluation *****\n", - "[INFO|trainer.py:3790] 2024-07-04 17:36:15,578 >> Num examples = 46\n", - "[INFO|trainer.py:3793] 2024-07-04 17:36:15,580 >> Batch size = 1\n", - "\n", - " 0%| | 0/46 [00:00> Saving model checkpoint to saves/qwen2-7b/lora/sft/checkpoint-1120\n", - "[INFO|configuration_utils.py:733] 2024-07-04 17:36:31,166 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-7b-instruct-bnb-4bit/snapshots/8d8ce83e5c9fc23482eeae78027d1fc87bc2edad/config.json\n", - "[INFO|configuration_utils.py:800] 2024-07-04 17:36:31,166 >> Model config Qwen2Config {\n", - " \"_name_or_path\": \"Qwen/Qwen2-7B-Instruct\",\n", - " \"architectures\": [\n", - " \"Qwen2ForCausalLM\"\n", - " ],\n", - " \"attention_dropout\": 0.0,\n", - " \"bos_token_id\": 151643,\n", - " \"eos_token_id\": 151645,\n", - " \"hidden_act\": \"silu\",\n", - " \"hidden_size\": 3584,\n", - " \"initializer_range\": 0.02,\n", - " \"intermediate_size\": 18944,\n", - " \"max_position_embeddings\": 32768,\n", - " \"max_window_layers\": 28,\n", - " \"model_type\": \"qwen2\",\n", - " \"num_attention_heads\": 28,\n", - " \"num_hidden_layers\": 28,\n", - " \"num_key_value_heads\": 4,\n", - " \"quantization_config\": {\n", - " \"_load_in_4bit\": true,\n", - " \"_load_in_8bit\": false,\n", - " \"bnb_4bit_compute_dtype\": \"bfloat16\",\n", - " \"bnb_4bit_quant_storage\": \"uint8\",\n", - " \"bnb_4bit_quant_type\": \"nf4\",\n", - " \"bnb_4bit_use_double_quant\": true,\n", - " \"llm_int8_enable_fp32_cpu_offload\": false,\n", - " \"llm_int8_has_fp16_weight\": false,\n", - " \"llm_int8_skip_modules\": null,\n", - " \"llm_int8_threshold\": 6.0,\n", - " \"load_in_4bit\": true,\n", - " \"load_in_8bit\": false,\n", - " \"quant_method\": \"bitsandbytes\"\n", - " },\n", - " \"rms_norm_eps\": 1e-06,\n", - " \"rope_theta\": 1000000.0,\n", - " \"sliding_window\": 131072,\n", - " \"tie_word_embeddings\": false,\n", - " \"torch_dtype\": \"bfloat16\",\n", - " \"transformers_version\": \"4.42.3\",\n", - " \"use_cache\": true,\n", - " \"use_sliding_window\": false,\n", - " \"vocab_size\": 152064\n", - "}\n", - "\n", - "[INFO|tokenization_utils_base.py:2574] 2024-07-04 17:36:31,345 >> tokenizer config file saved in saves/qwen2-7b/lora/sft/checkpoint-1120/tokenizer_config.json\n", - "[INFO|tokenization_utils_base.py:2583] 2024-07-04 17:36:31,345 >> Special tokens file saved in saves/qwen2-7b/lora/sft/checkpoint-1120/special_tokens_map.json\n", - "{'loss': 0.4204, 'grad_norm': 3.7729196548461914, 'learning_rate': 8.393240776696274e-05, 'epoch': 2.02}\n", - "{'loss': 0.3656, 'grad_norm': 2.241997718811035, 'learning_rate': 8.354907139929851e-05, 'epoch': 2.03}\n", - "{'loss': 0.3554, 'grad_norm': 3.0927772521972656, 'learning_rate': 8.316211416299397e-05, 'epoch': 2.05}\n", - "{'loss': 0.3576, 'grad_norm': 4.350724697113037, 'learning_rate': 8.27715778213905e-05, 'epoch': 2.07}\n", - "{'loss': 0.2409, 'grad_norm': 2.0694334506988525, 'learning_rate': 8.237750452411353e-05, 'epoch': 2.09}\n", - "{'loss': 0.3769, 'grad_norm': 2.3954668045043945, 'learning_rate': 8.197993680252334e-05, 'epoch': 2.11}\n", - "{'loss': 0.4252, 'grad_norm': 2.997573137283325, 'learning_rate': 8.157891756512488e-05, 'epoch': 2.12}\n", - "{'loss': 0.3827, 'grad_norm': 3.1807985305786133, 'learning_rate': 8.117449009293668e-05, 'epoch': 2.14}\n", - "{'loss': 0.3291, 'grad_norm': 4.341946125030518, 'learning_rate': 8.076669803481965e-05, 'epoch': 2.16}\n", - "{'loss': 0.3049, 'grad_norm': 4.446887493133545, 'learning_rate': 8.035558540276618e-05, 'epoch': 2.18}\n", - "{'loss': 0.3638, 'grad_norm': 2.7504091262817383, 'learning_rate': 7.994119656715002e-05, 'epoch': 2.2}\n", - "{'loss': 0.4044, 'grad_norm': 2.769212484359741, 'learning_rate': 7.952357625193749e-05, 'epoch': 2.21}\n", - "{'loss': 0.3703, 'grad_norm': 5.1489362716674805, 'learning_rate': 7.91027695298606e-05, 'epoch': 2.23}\n", - "{'loss': 0.3866, 'grad_norm': 4.870989799499512, 'learning_rate': 7.86788218175523e-05, 'epoch': 2.25}\n", - "{'loss': 0.383, 'grad_norm': 2.3800389766693115, 'learning_rate': 7.8251778870645e-05, 'epoch': 2.27}\n", - "{'loss': 0.3855, 'grad_norm': 3.800349473953247, 'learning_rate': 7.782168677883206e-05, 'epoch': 2.28}\n", - "{'loss': 0.4051, 'grad_norm': 2.723214864730835, 'learning_rate': 7.738859196089358e-05, 'epoch': 2.3}\n", - "{'loss': 0.4282, 'grad_norm': 3.5306265354156494, 'learning_rate': 7.695254115968648e-05, 'epoch': 2.32}\n", - "{'loss': 0.4128, 'grad_norm': 2.6264665126800537, 'learning_rate': 7.651358143709972e-05, 'epoch': 2.34}\n", - "{'loss': 0.4174, 'grad_norm': 3.427201747894287, 'learning_rate': 7.60717601689749e-05, 'epoch': 2.36}\n", - "{'loss': 0.3553, 'grad_norm': 3.8674330711364746, 'learning_rate': 7.562712503999327e-05, 'epoch': 2.37}\n", - "{'loss': 0.4509, 'grad_norm': 3.253030776977539, 'learning_rate': 7.517972403852905e-05, 'epoch': 2.39}\n", - "{'loss': 0.3599, 'grad_norm': 3.4824795722961426, 'learning_rate': 7.472960545147038e-05, 'epoch': 2.41}\n", - "{'loss': 0.3248, 'grad_norm': 4.311473369598389, 'learning_rate': 7.427681785900761e-05, 'epoch': 2.43}\n", - "{'loss': 0.3835, 'grad_norm': 3.2026665210723877, 'learning_rate': 7.382141012939034e-05, 'epoch': 2.45}\n", - "{'loss': 0.3631, 'grad_norm': 3.4886059761047363, 'learning_rate': 7.33634314136531e-05, 'epoch': 2.46}\n", - "{'loss': 0.4001, 'grad_norm': 2.1931118965148926, 'learning_rate': 7.290293114031061e-05, 'epoch': 2.48}\n", - "{'loss': 0.4094, 'grad_norm': 3.082930564880371, 'learning_rate': 7.243995901002312e-05, 'epoch': 2.5}\n", - "{'loss': 0.3916, 'grad_norm': 2.8144562244415283, 'learning_rate': 7.197456499023225e-05, 'epoch': 2.52}\n", - "{'loss': 0.4212, 'grad_norm': 3.546799898147583, 'learning_rate': 7.150679930976825e-05, 'epoch': 2.53}\n", - "{'loss': 0.3852, 'grad_norm': 3.623589038848877, 'learning_rate': 7.103671245342887e-05, 'epoch': 2.55}\n", - "{'loss': 0.3294, 'grad_norm': 3.896050214767456, 'learning_rate': 7.056435515653059e-05, 'epoch': 2.57}\n", - "{'loss': 0.4378, 'grad_norm': 2.8549437522888184, 'learning_rate': 7.008977839943299e-05, 'epoch': 2.59}\n", - "{'loss': 0.3744, 'grad_norm': 2.963679313659668, 'learning_rate': 6.961303340203653e-05, 'epoch': 2.61}\n", - "{'loss': 0.4083, 'grad_norm': 3.584379196166992, 'learning_rate': 6.91341716182545e-05, 'epoch': 2.62}\n", - "{'loss': 0.3875, 'grad_norm': 3.231067180633545, 'learning_rate': 6.86532447304597e-05, 'epoch': 2.64}\n", - "{'loss': 0.3555, 'grad_norm': 3.2355687618255615, 'learning_rate': 6.817030464390656e-05, 'epoch': 2.66}\n", - "{'loss': 0.3962, 'grad_norm': 4.36820125579834, 'learning_rate': 6.768540348112907e-05, 'epoch': 2.68}\n", - "{'loss': 0.3224, 'grad_norm': 2.6882545948028564, 'learning_rate': 6.719859357631535e-05, 'epoch': 2.7}\n", - "{'loss': 0.3478, 'grad_norm': 3.5584182739257812, 'learning_rate': 6.670992746965938e-05, 'epoch': 2.71}\n", - "{'loss': 0.4298, 'grad_norm': 4.19834041595459, 'learning_rate': 6.621945790169036e-05, 'epoch': 2.73}\n", - "{'loss': 0.4304, 'grad_norm': 4.770883083343506, 'learning_rate': 6.572723780758069e-05, 'epoch': 2.75}\n", - "{'loss': 0.3657, 'grad_norm': 4.010149955749512, 'learning_rate': 6.523332031143272e-05, 'epoch': 2.77}\n", - "{'loss': 0.3699, 'grad_norm': 3.2105469703674316, 'learning_rate': 6.473775872054521e-05, 'epoch': 2.78}\n", - "{'loss': 0.3342, 'grad_norm': 3.494490146636963, 'learning_rate': 6.424060651966007e-05, 'epoch': 2.8}\n", - "{'loss': 0.327, 'grad_norm': 3.291541814804077, 'learning_rate': 6.374191736518974e-05, 'epoch': 2.82}\n", - "{'loss': 0.3928, 'grad_norm': 3.125520706176758, 'learning_rate': 6.324174507942637e-05, 'epoch': 2.84}\n", - "{'loss': 0.3776, 'grad_norm': 4.660810470581055, 'learning_rate': 6.274014364473274e-05, 'epoch': 2.86}\n", - "{'loss': 0.4623, 'grad_norm': 2.8751118183135986, 'learning_rate': 6.22371671977162e-05, 'epoch': 2.87}\n", - "{'loss': 0.4122, 'grad_norm': 4.0637078285217285, 'learning_rate': 6.173287002338577e-05, 'epoch': 2.89}\n", - "{'loss': 0.4056, 'grad_norm': 3.7399301528930664, 'learning_rate': 6.122730654929334e-05, 'epoch': 2.91}\n", - "{'loss': 0.3351, 'grad_norm': 4.581759452819824, 'learning_rate': 6.072053133965938e-05, 'epoch': 2.93}\n", - "{'loss': 0.3849, 'grad_norm': 3.381431818008423, 'learning_rate': 6.021259908948402e-05, 'epoch': 2.95}\n", - "{'loss': 0.3947, 'grad_norm': 4.740965366363525, 'learning_rate': 5.970356461864391e-05, 'epoch': 2.96}\n", - "{'loss': 0.3945, 'grad_norm': 5.124401569366455, 'learning_rate': 5.919348286597569e-05, 'epoch': 2.98}\n", - "{'loss': 0.4098, 'grad_norm': 3.3869075775146484, 'learning_rate': 5.868240888334653e-05, 'epoch': 3.0}\n", - " 50%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–Œ | 1680/3360 [3:13:07<3:03:37, 6.56s/it][INFO|trainer.py:3788] 2024-07-04 18:39:30,098 >> \n", - "***** Running Evaluation *****\n", - "[INFO|trainer.py:3790] 2024-07-04 18:39:30,098 >> Num examples = 46\n", - "[INFO|trainer.py:3793] 2024-07-04 18:39:30,098 >> Batch size = 1\n", - "\n", - " 0%| | 0/46 [00:00> Saving model checkpoint to saves/qwen2-7b/lora/sft/checkpoint-1680\n", - "[INFO|configuration_utils.py:733] 2024-07-04 18:39:46,491 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-7b-instruct-bnb-4bit/snapshots/8d8ce83e5c9fc23482eeae78027d1fc87bc2edad/config.json\n", - "[INFO|configuration_utils.py:800] 2024-07-04 18:39:46,492 >> Model config Qwen2Config {\n", - " \"_name_or_path\": \"Qwen/Qwen2-7B-Instruct\",\n", - " \"architectures\": [\n", - " \"Qwen2ForCausalLM\"\n", - " ],\n", - " \"attention_dropout\": 0.0,\n", - " \"bos_token_id\": 151643,\n", - " \"eos_token_id\": 151645,\n", - " \"hidden_act\": \"silu\",\n", - " \"hidden_size\": 3584,\n", - " \"initializer_range\": 0.02,\n", - " \"intermediate_size\": 18944,\n", - " \"max_position_embeddings\": 32768,\n", - " \"max_window_layers\": 28,\n", - " \"model_type\": \"qwen2\",\n", - " \"num_attention_heads\": 28,\n", - " \"num_hidden_layers\": 28,\n", - " \"num_key_value_heads\": 4,\n", - " \"quantization_config\": {\n", - " \"_load_in_4bit\": true,\n", - " \"_load_in_8bit\": false,\n", - " \"bnb_4bit_compute_dtype\": \"bfloat16\",\n", - " \"bnb_4bit_quant_storage\": \"uint8\",\n", - " \"bnb_4bit_quant_type\": \"nf4\",\n", - " \"bnb_4bit_use_double_quant\": true,\n", - " \"llm_int8_enable_fp32_cpu_offload\": false,\n", - " \"llm_int8_has_fp16_weight\": false,\n", - " \"llm_int8_skip_modules\": null,\n", - " \"llm_int8_threshold\": 6.0,\n", - " \"load_in_4bit\": true,\n", - " \"load_in_8bit\": false,\n", - " \"quant_method\": \"bitsandbytes\"\n", - " },\n", - " \"rms_norm_eps\": 1e-06,\n", - " \"rope_theta\": 1000000.0,\n", - " \"sliding_window\": 131072,\n", - " \"tie_word_embeddings\": false,\n", - " \"torch_dtype\": \"bfloat16\",\n", - " \"transformers_version\": \"4.42.3\",\n", - " \"use_cache\": true,\n", - " \"use_sliding_window\": false,\n", - " \"vocab_size\": 152064\n", - "}\n", - "\n", - "[INFO|tokenization_utils_base.py:2574] 2024-07-04 18:39:46,762 >> tokenizer config file saved in saves/qwen2-7b/lora/sft/checkpoint-1680/tokenizer_config.json\n", - "[INFO|tokenization_utils_base.py:2583] 2024-07-04 18:39:46,762 >> Special tokens file saved in saves/qwen2-7b/lora/sft/checkpoint-1680/special_tokens_map.json\n", - "{'loss': 0.1954, 'grad_norm': 2.6332297325134277, 'learning_rate': 5.8170397829712485e-05, 'epoch': 3.02}\n", - "{'loss': 0.0883, 'grad_norm': 3.9817214012145996, 'learning_rate': 5.765750496516547e-05, 'epoch': 3.03}\n", - "{'loss': 0.1392, 'grad_norm': 1.9517065286636353, 'learning_rate': 5.714378564496901e-05, 'epoch': 3.05}\n", - "{'loss': 0.1231, 'grad_norm': 1.8976528644561768, 'learning_rate': 5.6629295313583974e-05, 'epoch': 3.07}\n", - "{'loss': 0.1299, 'grad_norm': 1.4403581619262695, 'learning_rate': 5.611408949868457e-05, 'epoch': 3.09}\n", - "{'loss': 0.1068, 'grad_norm': 3.757260322570801, 'learning_rate': 5.559822380516539e-05, 'epoch': 3.11}\n", - "{'loss': 0.0975, 'grad_norm': 1.9946837425231934, 'learning_rate': 5.5081753909140096e-05, 'epoch': 3.12}\n", - "{'loss': 0.1205, 'grad_norm': 3.075326442718506, 'learning_rate': 5.456473555193242e-05, 'epoch': 3.14}\n", - "{'loss': 0.1226, 'grad_norm': 2.4876396656036377, 'learning_rate': 5.404722453406017e-05, 'epoch': 3.16}\n", - "{'loss': 0.1144, 'grad_norm': 2.430744171142578, 'learning_rate': 5.3529276709212816e-05, 'epoch': 3.18}\n", - "{'loss': 0.1399, 'grad_norm': 3.6195318698883057, 'learning_rate': 5.30109479782233e-05, 'epoch': 3.2}\n", - "{'loss': 0.1156, 'grad_norm': 3.914135217666626, 'learning_rate': 5.249229428303486e-05, 'epoch': 3.21}\n", - "{'loss': 0.1372, 'grad_norm': 1.994607925415039, 'learning_rate': 5.197337160066331e-05, 'epoch': 3.23}\n", - "{'loss': 0.1138, 'grad_norm': 1.6210600137710571, 'learning_rate': 5.145423593715557e-05, 'epoch': 3.25}\n", - "{'loss': 0.14, 'grad_norm': 2.50508713722229, 'learning_rate': 5.0934943321545115e-05, 'epoch': 3.27}\n", - "{'loss': 0.1152, 'grad_norm': 4.362739562988281, 'learning_rate': 5.041554979980486e-05, 'epoch': 3.28}\n", - "{'loss': 0.1549, 'grad_norm': 3.601013422012329, 'learning_rate': 4.9896111428798254e-05, 'epoch': 3.3}\n", - "{'loss': 0.1429, 'grad_norm': 2.076098680496216, 'learning_rate': 4.9376684270229254e-05, 'epoch': 3.32}\n", - "{'loss': 0.1353, 'grad_norm': 1.633200764656067, 'learning_rate': 4.8857324384591653e-05, 'epoch': 3.34}\n", - "{'loss': 0.1284, 'grad_norm': 4.053235054016113, 'learning_rate': 4.8338087825118675e-05, 'epoch': 3.36}\n", - "{'loss': 0.1526, 'grad_norm': 2.4892356395721436, 'learning_rate': 4.781903063173321e-05, 'epoch': 3.37}\n", - "{'loss': 0.1042, 'grad_norm': 1.8938469886779785, 'learning_rate': 4.730020882499964e-05, 'epoch': 3.39}\n", - "{'loss': 0.1569, 'grad_norm': 1.758270502090454, 'learning_rate': 4.678167840007767e-05, 'epoch': 3.41}\n", - "{'loss': 0.117, 'grad_norm': 1.9446786642074585, 'learning_rate': 4.626349532067879e-05, 'epoch': 3.43}\n", - "{'loss': 0.1603, 'grad_norm': 2.5028741359710693, 'learning_rate': 4.574571551302647e-05, 'epoch': 3.44}\n", - "{'loss': 0.1528, 'grad_norm': 3.524077892303467, 'learning_rate': 4.522839485981994e-05, 'epoch': 3.46}\n", - "{'loss': 0.1366, 'grad_norm': 2.425860643386841, 'learning_rate': 4.471158919420312e-05, 'epoch': 3.48}\n", - "{'loss': 0.1231, 'grad_norm': 2.6059088706970215, 'learning_rate': 4.4195354293738484e-05, 'epoch': 3.5}\n", - "{'loss': 0.1479, 'grad_norm': 3.934004783630371, 'learning_rate': 4.367974587438733e-05, 'epoch': 3.52}\n", - "{'loss': 0.1466, 'grad_norm': 2.3225414752960205, 'learning_rate': 4.316481958449634e-05, 'epoch': 3.53}\n", - "{'loss': 0.1161, 'grad_norm': 3.3421878814697266, 'learning_rate': 4.2650630998791615e-05, 'epoch': 3.55}\n", - "{'loss': 0.1312, 'grad_norm': 2.411162853240967, 'learning_rate': 4.213723561238074e-05, 'epoch': 3.57}\n", - "{'loss': 0.1144, 'grad_norm': 2.74504017829895, 'learning_rate': 4.162468883476319e-05, 'epoch': 3.59}\n", - "{'loss': 0.1303, 'grad_norm': 3.3871073722839355, 'learning_rate': 4.111304598385018e-05, 'epoch': 3.61}\n", - "{'loss': 0.1272, 'grad_norm': 2.4120686054229736, 'learning_rate': 4.060236227999441e-05, 'epoch': 3.62}\n", - "{'loss': 0.1127, 'grad_norm': 2.2959489822387695, 'learning_rate': 4.0092692840030134e-05, 'epoch': 3.64}\n", - "{'loss': 0.131, 'grad_norm': 2.5716683864593506, 'learning_rate': 3.9584092671324606e-05, 'epoch': 3.66}\n", - "{'loss': 0.1512, 'grad_norm': 3.035562753677368, 'learning_rate': 3.907661666584131e-05, 'epoch': 3.68}\n", - "{'loss': 0.1253, 'grad_norm': 2.897613048553467, 'learning_rate': 3.857031959421553e-05, 'epoch': 3.69}\n", - "{'loss': 0.1084, 'grad_norm': 2.2627975940704346, 'learning_rate': 3.806525609984312e-05, 'epoch': 3.71}\n", - "{'loss': 0.105, 'grad_norm': 2.2742927074432373, 'learning_rate': 3.7561480692983006e-05, 'epoch': 3.73}\n", - "{'loss': 0.1489, 'grad_norm': 1.9651683568954468, 'learning_rate': 3.705904774487396e-05, 'epoch': 3.75}\n", - "{'loss': 0.1448, 'grad_norm': 4.107623100280762, 'learning_rate': 3.655801148186655e-05, 'epoch': 3.77}\n", - "{'loss': 0.0998, 'grad_norm': 2.270852565765381, 'learning_rate': 3.6058425979570485e-05, 'epoch': 3.78}\n", - "{'loss': 0.1176, 'grad_norm': 3.770810842514038, 'learning_rate': 3.556034515701852e-05, 'epoch': 3.8}\n", - "{'loss': 0.1175, 'grad_norm': 4.139482498168945, 'learning_rate': 3.506382277084696e-05, 'epoch': 3.82}\n", - "{'loss': 0.152, 'grad_norm': 2.7534141540527344, 'learning_rate': 3.4568912409493945e-05, 'epoch': 3.84}\n", - "{'loss': 0.0974, 'grad_norm': 2.224083423614502, 'learning_rate': 3.4075667487415785e-05, 'epoch': 3.86}\n", - "{'loss': 0.1133, 'grad_norm': 1.7634135484695435, 'learning_rate': 3.358414123932195e-05, 'epoch': 3.87}\n", - "{'loss': 0.1311, 'grad_norm': 2.7758963108062744, 'learning_rate': 3.3094386714429724e-05, 'epoch': 3.89}\n", - "{'loss': 0.1341, 'grad_norm': 2.842358350753784, 'learning_rate': 3.2606456770738636e-05, 'epoch': 3.91}\n", - "{'loss': 0.0884, 'grad_norm': 1.71796452999115, 'learning_rate': 3.212040406932569e-05, 'epoch': 3.93}\n", - "{'loss': 0.0956, 'grad_norm': 2.689420461654663, 'learning_rate': 3.163628106866172e-05, 'epoch': 3.94}\n", - "{'loss': 0.1731, 'grad_norm': 2.630415439605713, 'learning_rate': 3.115414001894974e-05, 'epoch': 3.96}\n", - "{'loss': 0.1458, 'grad_norm': 2.928737163543701, 'learning_rate': 3.067403295648566e-05, 'epoch': 3.98}\n", - "{'loss': 0.1278, 'grad_norm': 2.467090129852295, 'learning_rate': 3.019601169804216e-05, 'epoch': 4.0}\n", - " 67%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–Ž | 2240/3360 [4:14:45<2:03:53, 6.64s/it][INFO|trainer.py:3788] 2024-07-04 19:41:08,043 >> \n", - "***** Running Evaluation *****\n", - "[INFO|trainer.py:3790] 2024-07-04 19:41:08,044 >> Num examples = 46\n", - "[INFO|trainer.py:3793] 2024-07-04 19:41:08,044 >> Batch size = 1\n", - "\n", - " 0%| | 0/46 [00:00> Saving model checkpoint to saves/qwen2-7b/lora/sft/checkpoint-2240\n", - "[INFO|configuration_utils.py:733] 2024-07-04 19:41:22,728 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-7b-instruct-bnb-4bit/snapshots/8d8ce83e5c9fc23482eeae78027d1fc87bc2edad/config.json\n", - "[INFO|configuration_utils.py:800] 2024-07-04 19:41:22,729 >> Model config Qwen2Config {\n", - " \"_name_or_path\": \"Qwen/Qwen2-7B-Instruct\",\n", - " \"architectures\": [\n", - " \"Qwen2ForCausalLM\"\n", - " ],\n", - " \"attention_dropout\": 0.0,\n", - " \"bos_token_id\": 151643,\n", - " \"eos_token_id\": 151645,\n", - " \"hidden_act\": \"silu\",\n", - " \"hidden_size\": 3584,\n", - " \"initializer_range\": 0.02,\n", - " \"intermediate_size\": 18944,\n", - " \"max_position_embeddings\": 32768,\n", - " \"max_window_layers\": 28,\n", - " \"model_type\": \"qwen2\",\n", - " \"num_attention_heads\": 28,\n", - " \"num_hidden_layers\": 28,\n", - " \"num_key_value_heads\": 4,\n", - " \"quantization_config\": {\n", - " \"_load_in_4bit\": true,\n", - " \"_load_in_8bit\": false,\n", - " \"bnb_4bit_compute_dtype\": \"bfloat16\",\n", - " \"bnb_4bit_quant_storage\": \"uint8\",\n", - " \"bnb_4bit_quant_type\": \"nf4\",\n", - " \"bnb_4bit_use_double_quant\": true,\n", - " \"llm_int8_enable_fp32_cpu_offload\": false,\n", - " \"llm_int8_has_fp16_weight\": false,\n", - " \"llm_int8_skip_modules\": null,\n", - " \"llm_int8_threshold\": 6.0,\n", - " \"load_in_4bit\": true,\n", - " \"load_in_8bit\": false,\n", - " \"quant_method\": \"bitsandbytes\"\n", - " },\n", - " \"rms_norm_eps\": 1e-06,\n", - " \"rope_theta\": 1000000.0,\n", - " \"sliding_window\": 131072,\n", - " \"tie_word_embeddings\": false,\n", - " \"torch_dtype\": \"bfloat16\",\n", - " \"transformers_version\": \"4.42.3\",\n", - " \"use_cache\": true,\n", - " \"use_sliding_window\": false,\n", - " \"vocab_size\": 152064\n", - "}\n", - "\n", - "[INFO|tokenization_utils_base.py:2574] 2024-07-04 19:41:22,849 >> tokenizer config file saved in saves/qwen2-7b/lora/sft/checkpoint-2240/tokenizer_config.json\n", - "[INFO|tokenization_utils_base.py:2583] 2024-07-04 19:41:22,850 >> Special tokens file saved in saves/qwen2-7b/lora/sft/checkpoint-2240/special_tokens_map.json\n", - "{'loss': 0.0535, 'grad_norm': 0.6558727025985718, 'learning_rate': 2.9720127835276256e-05, 'epoch': 4.02}\n", - "{'loss': 0.0402, 'grad_norm': 2.1889960765838623, 'learning_rate': 2.9246432729161055e-05, 'epoch': 4.03}\n", - "{'loss': 0.0377, 'grad_norm': 0.8100994229316711, 'learning_rate': 2.8774977504442647e-05, 'epoch': 4.05}\n", - "{'loss': 0.0398, 'grad_norm': 2.8209896087646484, 'learning_rate': 2.8305813044122097e-05, 'epoch': 4.07}\n", - "{'loss': 0.0308, 'grad_norm': 1.5516138076782227, 'learning_rate': 2.7838989983964065e-05, 'epoch': 4.09}\n", - "{'loss': 0.0597, 'grad_norm': 4.609562873840332, 'learning_rate': 2.737455870703155e-05, 'epoch': 4.11}\n", - "{'loss': 0.0231, 'grad_norm': 2.7549400329589844, 'learning_rate': 2.6912569338248315e-05, 'epoch': 4.12}\n", - "{'loss': 0.0448, 'grad_norm': 5.040008068084717, 'learning_rate': 2.645307173898901e-05, 'epoch': 4.14}\n", - "{'loss': 0.0253, 'grad_norm': 1.6336179971694946, 'learning_rate': 2.5996115501697694e-05, 'epoch': 4.16}\n", - "{'loss': 0.0289, 'grad_norm': 0.8074469566345215, 'learning_rate': 2.5541749944535554e-05, 'epoch': 4.18}\n", - "{'loss': 0.0338, 'grad_norm': 1.710808277130127, 'learning_rate': 2.5090024106057962e-05, 'epoch': 4.19}\n", - "{'loss': 0.0379, 'grad_norm': 2.1768016815185547, 'learning_rate': 2.464098673992205e-05, 'epoch': 4.21}\n", - "{'loss': 0.0682, 'grad_norm': 3.6282131671905518, 'learning_rate': 2.4194686309624663e-05, 'epoch': 4.23}\n", - "{'loss': 0.0515, 'grad_norm': 1.100537896156311, 'learning_rate': 2.3751170983272e-05, 'epoch': 4.25}\n", - "{'loss': 0.0354, 'grad_norm': 0.6081830859184265, 'learning_rate': 2.3310488628380757e-05, 'epoch': 4.27}\n", - "{'loss': 0.0334, 'grad_norm': 1.5605361461639404, 'learning_rate': 2.2872686806712035e-05, 'epoch': 4.28}\n", - "{'loss': 0.0492, 'grad_norm': 2.5406620502471924, 'learning_rate': 2.243781276913811e-05, 'epoch': 4.3}\n", - "{'loss': 0.0279, 'grad_norm': 2.160897970199585, 'learning_rate': 2.200591345054267e-05, 'epoch': 4.32}\n", - "{'loss': 0.0342, 'grad_norm': 2.3391342163085938, 'learning_rate': 2.157703546475539e-05, 'epoch': 4.34}\n", - "{'loss': 0.0332, 'grad_norm': 1.3248311281204224, 'learning_rate': 2.115122509952085e-05, 'epoch': 4.36}\n", - "{'loss': 0.0334, 'grad_norm': 2.741152763366699, 'learning_rate': 2.0728528311502976e-05, 'epoch': 4.37}\n", - "{'loss': 0.0542, 'grad_norm': 2.237809419631958, 'learning_rate': 2.0308990721324927e-05, 'epoch': 4.39}\n", - "{'loss': 0.0344, 'grad_norm': 3.8997409343719482, 'learning_rate': 1.989265760864542e-05, 'epoch': 4.41}\n", - "{'loss': 0.0439, 'grad_norm': 0.6022194623947144, 'learning_rate': 1.947957390727185e-05, 'epoch': 4.43}\n", - "{'loss': 0.0346, 'grad_norm': 1.2296243906021118, 'learning_rate': 1.906978420031059e-05, 'epoch': 4.44}\n", - "{'loss': 0.0209, 'grad_norm': 0.28131213784217834, 'learning_rate': 1.8663332715355396e-05, 'epoch': 4.46}\n", - "{'loss': 0.0271, 'grad_norm': 2.75640606880188, 'learning_rate': 1.8260263319713844e-05, 'epoch': 4.48}\n", - "{'loss': 0.0408, 'grad_norm': 3.289303779602051, 'learning_rate': 1.7860619515673033e-05, 'epoch': 4.5}\n", - "{'loss': 0.0344, 'grad_norm': 1.2157098054885864, 'learning_rate': 1.746444443580433e-05, 'epoch': 4.52}\n", - "{'loss': 0.0272, 'grad_norm': 1.5058122873306274, 'learning_rate': 1.7071780838308288e-05, 'epoch': 4.53}\n", - "{'loss': 0.0283, 'grad_norm': 1.8522496223449707, 'learning_rate': 1.6682671102399805e-05, 'epoch': 4.55}\n", - "{'loss': 0.027, 'grad_norm': 2.126176595687866, 'learning_rate': 1.629715722373423e-05, 'epoch': 4.57}\n", - "{'loss': 0.0434, 'grad_norm': 2.065514326095581, 'learning_rate': 1.5915280809874932e-05, 'epoch': 4.59}\n", - "{'loss': 0.0427, 'grad_norm': 2.2047812938690186, 'learning_rate': 1.553708307580265e-05, 'epoch': 4.61}\n", - "{'loss': 0.0266, 'grad_norm': 2.1723501682281494, 'learning_rate': 1.5162604839467265e-05, 'epoch': 4.62}\n", - "{'loss': 0.0201, 'grad_norm': 1.7166253328323364, 'learning_rate': 1.4791886517382413e-05, 'epoch': 4.64}\n", - "{'loss': 0.0306, 'grad_norm': 0.5556966066360474, 'learning_rate': 1.4424968120263504e-05, 'epoch': 4.66}\n", - "{'loss': 0.0249, 'grad_norm': 1.101198434829712, 'learning_rate': 1.4061889248709343e-05, 'epoch': 4.68}\n", - "{'loss': 0.0324, 'grad_norm': 0.6396570801734924, 'learning_rate': 1.370268908892825e-05, 'epoch': 4.69}\n", - "{'loss': 0.0303, 'grad_norm': 2.5093636512756348, 'learning_rate': 1.3347406408508695e-05, 'epoch': 4.71}\n", - "{'loss': 0.0522, 'grad_norm': 1.5739742517471313, 'learning_rate': 1.2996079552235263e-05, 'epoch': 4.73}\n", - "{'loss': 0.0293, 'grad_norm': 0.9539183974266052, 'learning_rate': 1.264874643795021e-05, 'epoch': 4.75}\n", - "{'loss': 0.0289, 'grad_norm': 0.5063753724098206, 'learning_rate': 1.230544455246101e-05, 'epoch': 4.77}\n", - "{'loss': 0.0457, 'grad_norm': 1.6972631216049194, 'learning_rate': 1.1966210947494583e-05, 'epoch': 4.78}\n", - "{'loss': 0.0228, 'grad_norm': 0.8949175477027893, 'learning_rate': 1.1631082235698316e-05, 'epoch': 4.8}\n", - "{'loss': 0.0345, 'grad_norm': 1.8337916135787964, 'learning_rate': 1.130009458668863e-05, 'epoch': 4.82}\n", - "{'loss': 0.0221, 'grad_norm': 2.356985569000244, 'learning_rate': 1.097328372314721e-05, 'epoch': 4.84}\n", - "{'loss': 0.0328, 'grad_norm': 2.9775609970092773, 'learning_rate': 1.0650684916965559e-05, 'epoch': 4.85}\n", - "{'loss': 0.0298, 'grad_norm': 2.2749829292297363, 'learning_rate': 1.0332332985438248e-05, 'epoch': 4.87}\n", - "{'loss': 0.0411, 'grad_norm': 1.9781012535095215, 'learning_rate': 1.0018262287505086e-05, 'epoch': 4.89}\n", - "{'loss': 0.0461, 'grad_norm': 1.8106870651245117, 'learning_rate': 9.708506720042932e-06, 'epoch': 4.91}\n", - "{'loss': 0.0354, 'grad_norm': 1.3991378545761108, 'learning_rate': 9.403099714207175e-06, 'epoch': 4.93}\n", - "{'loss': 0.0269, 'grad_norm': 0.6455625891685486, 'learning_rate': 9.102074231823727e-06, 'epoch': 4.94}\n", - "{'loss': 0.0339, 'grad_norm': 1.2710880041122437, 'learning_rate': 8.805462761831418e-06, 'epoch': 4.96}\n", - "{'loss': 0.0334, 'grad_norm': 1.1816545724868774, 'learning_rate': 8.513297316775625e-06, 'epoch': 4.98}\n", - "{'loss': 0.0301, 'grad_norm': 1.668415904045105, 'learning_rate': 8.225609429353187e-06, 'epoch': 5.0}\n", - " 83%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ– | 2800/3360 [5:16:56<1:03:45, 6.83s/it][INFO|trainer.py:3788] 2024-07-04 20:43:18,672 >> \n", - "***** Running Evaluation *****\n", - "[INFO|trainer.py:3790] 2024-07-04 20:43:18,672 >> Num examples = 46\n", - "[INFO|trainer.py:3793] 2024-07-04 20:43:18,673 >> Batch size = 1\n", - "\n", - " 0%| | 0/46 [00:00> Saving model checkpoint to saves/qwen2-7b/lora/sft/checkpoint-2800\n", - "[INFO|configuration_utils.py:733] 2024-07-04 20:43:32,430 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-7b-instruct-bnb-4bit/snapshots/8d8ce83e5c9fc23482eeae78027d1fc87bc2edad/config.json\n", - "[INFO|configuration_utils.py:800] 2024-07-04 20:43:32,431 >> Model config Qwen2Config {\n", - " \"_name_or_path\": \"Qwen/Qwen2-7B-Instruct\",\n", - " \"architectures\": [\n", - " \"Qwen2ForCausalLM\"\n", - " ],\n", - " \"attention_dropout\": 0.0,\n", - " \"bos_token_id\": 151643,\n", - " \"eos_token_id\": 151645,\n", - " \"hidden_act\": \"silu\",\n", - " \"hidden_size\": 3584,\n", - " \"initializer_range\": 0.02,\n", - " \"intermediate_size\": 18944,\n", - " \"max_position_embeddings\": 32768,\n", - " \"max_window_layers\": 28,\n", - " \"model_type\": \"qwen2\",\n", - " \"num_attention_heads\": 28,\n", - " \"num_hidden_layers\": 28,\n", - " \"num_key_value_heads\": 4,\n", - " \"quantization_config\": {\n", - " \"_load_in_4bit\": true,\n", - " \"_load_in_8bit\": false,\n", - " \"bnb_4bit_compute_dtype\": \"bfloat16\",\n", - " \"bnb_4bit_quant_storage\": \"uint8\",\n", - " \"bnb_4bit_quant_type\": \"nf4\",\n", - " \"bnb_4bit_use_double_quant\": true,\n", - " \"llm_int8_enable_fp32_cpu_offload\": false,\n", - " \"llm_int8_has_fp16_weight\": false,\n", - " \"llm_int8_skip_modules\": null,\n", - " \"llm_int8_threshold\": 6.0,\n", - " \"load_in_4bit\": true,\n", - " \"load_in_8bit\": false,\n", - " \"quant_method\": \"bitsandbytes\"\n", - " },\n", - " \"rms_norm_eps\": 1e-06,\n", - " \"rope_theta\": 1000000.0,\n", - " \"sliding_window\": 131072,\n", - " \"tie_word_embeddings\": false,\n", - " \"torch_dtype\": \"bfloat16\",\n", - " \"transformers_version\": \"4.42.3\",\n", - " \"use_cache\": true,\n", - " \"use_sliding_window\": false,\n", - " \"vocab_size\": 152064\n", - "}\n", - "\n", - "[INFO|tokenization_utils_base.py:2574] 2024-07-04 20:43:32,551 >> tokenizer config file saved in saves/qwen2-7b/lora/sft/checkpoint-2800/tokenizer_config.json\n", - "[INFO|tokenization_utils_base.py:2583] 2024-07-04 20:43:32,551 >> Special tokens file saved in saves/qwen2-7b/lora/sft/checkpoint-2800/special_tokens_map.json\n", - "{'loss': 0.0078, 'grad_norm': 1.6351463794708252, 'learning_rate': 7.942430149009161e-06, 'epoch': 5.02}\n", - "{'loss': 0.0062, 'grad_norm': 0.11965573579072952, 'learning_rate': 7.663790038585793e-06, 'epoch': 5.03}\n", - "{'loss': 0.0105, 'grad_norm': 0.05803072825074196, 'learning_rate': 7.389719171023857e-06, 'epoch': 5.05}\n", - "{'loss': 0.0058, 'grad_norm': 0.143271803855896, 'learning_rate': 7.1202471261170245e-06, 'epoch': 5.07}\n", - "{'loss': 0.0035, 'grad_norm': 0.17391343414783478, 'learning_rate': 6.855402987319348e-06, 'epoch': 5.09}\n", - "{'loss': 0.0072, 'grad_norm': 0.19679808616638184, 'learning_rate': 6.595215338606397e-06, 'epoch': 5.1}\n", - "{'loss': 0.0123, 'grad_norm': 0.09687481820583344, 'learning_rate': 6.339712261390213e-06, 'epoch': 5.12}\n", - "{'loss': 0.0057, 'grad_norm': 0.069660983979702, 'learning_rate': 6.088921331488568e-06, 'epoch': 5.14}\n", - "{'loss': 0.0072, 'grad_norm': 1.3626017570495605, 'learning_rate': 5.8428696161488215e-06, 'epoch': 5.16}\n", - "{'loss': 0.0047, 'grad_norm': 2.0419363975524902, 'learning_rate': 5.601583671126531e-06, 'epoch': 5.18}\n", - "{'loss': 0.0097, 'grad_norm': 0.2337513566017151, 'learning_rate': 5.365089537819434e-06, 'epoch': 5.19}\n", - "{'loss': 0.0042, 'grad_norm': 0.05815720558166504, 'learning_rate': 5.133412740456806e-06, 'epoch': 5.21}\n", - "{'loss': 0.008, 'grad_norm': 1.3515617847442627, 'learning_rate': 4.906578283344759e-06, 'epoch': 5.23}\n", - "{'loss': 0.0087, 'grad_norm': 0.37659117579460144, 'learning_rate': 4.684610648167503e-06, 'epoch': 5.25}\n", - "{'loss': 0.0031, 'grad_norm': 0.33385252952575684, 'learning_rate': 4.467533791345191e-06, 'epoch': 5.27}\n", - "{'loss': 0.0067, 'grad_norm': 0.15747712552547455, 'learning_rate': 4.255371141448272e-06, 'epoch': 5.28}\n", - "{'loss': 0.007, 'grad_norm': 1.2530337572097778, 'learning_rate': 4.048145596668967e-06, 'epoch': 5.3}\n", - "{'loss': 0.0136, 'grad_norm': 2.182263135910034, 'learning_rate': 3.84587952234991e-06, 'epoch': 5.32}\n", - "{'loss': 0.0035, 'grad_norm': 1.1545133590698242, 'learning_rate': 3.6485947485702832e-06, 'epoch': 5.34}\n", - "{'loss': 0.0061, 'grad_norm': 0.33282843232154846, 'learning_rate': 3.4563125677897932e-06, 'epoch': 5.35}\n", - "{'loss': 0.004, 'grad_norm': 0.2662621736526489, 'learning_rate': 3.269053732550581e-06, 'epoch': 5.37}\n", - "{'loss': 0.0071, 'grad_norm': 1.1687767505645752, 'learning_rate': 3.086838453237506e-06, 'epoch': 5.39}\n", - "{'loss': 0.0082, 'grad_norm': 0.12040398269891739, 'learning_rate': 2.9096863958968268e-06, 'epoch': 5.41}\n", - "{'loss': 0.0042, 'grad_norm': 0.22544123232364655, 'learning_rate': 2.737616680113758e-06, 'epoch': 5.43}\n", - "{'loss': 0.0056, 'grad_norm': 0.3548804521560669, 'learning_rate': 2.570647876948895e-06, 'epoch': 5.44}\n", - "{'loss': 0.0133, 'grad_norm': 0.7295147180557251, 'learning_rate': 2.408798006933882e-06, 'epoch': 5.46}\n", - "{'loss': 0.0125, 'grad_norm': 0.05939454585313797, 'learning_rate': 2.252084538126542e-06, 'epoch': 5.48}\n", - "{'loss': 0.0064, 'grad_norm': 0.5182624459266663, 'learning_rate': 2.100524384225555e-06, 'epoch': 5.5}\n", - "{'loss': 0.0043, 'grad_norm': 0.13460208475589752, 'learning_rate': 1.9541339027450256e-06, 'epoch': 5.52}\n", - "{'loss': 0.0066, 'grad_norm': 0.8837604522705078, 'learning_rate': 1.8129288932490274e-06, 'epoch': 5.53}\n", - "{'loss': 0.0092, 'grad_norm': 0.332492858171463, 'learning_rate': 1.6769245956464396e-06, 'epoch': 5.55}\n", - "{'loss': 0.0048, 'grad_norm': 0.2933903634548187, 'learning_rate': 1.5461356885461075e-06, 'epoch': 5.57}\n", - "{'loss': 0.0054, 'grad_norm': 0.371267706155777, 'learning_rate': 1.4205762876726092e-06, 'epoch': 5.59}\n", - "{'loss': 0.0083, 'grad_norm': 0.14521144330501556, 'learning_rate': 1.3002599443428243e-06, 'epoch': 5.6}\n", - "{'loss': 0.0073, 'grad_norm': 1.345499038696289, 'learning_rate': 1.1851996440033319e-06, 'epoch': 5.62}\n", - "{'loss': 0.0064, 'grad_norm': 0.025303443893790245, 'learning_rate': 1.0754078048289374e-06, 'epoch': 5.64}\n", - "{'loss': 0.0049, 'grad_norm': 1.9373172521591187, 'learning_rate': 9.708962763824048e-07, 'epoch': 5.66}\n", - "{'loss': 0.0063, 'grad_norm': 0.6459546685218811, 'learning_rate': 8.716763383355864e-07, 'epoch': 5.68}\n", - "{'loss': 0.005, 'grad_norm': 1.4349000453948975, 'learning_rate': 7.777586992519959e-07, 'epoch': 5.69}\n", - "{'loss': 0.0103, 'grad_norm': 0.5553787350654602, 'learning_rate': 6.891534954310885e-07, 'epoch': 5.71}\n", - "{'loss': 0.0054, 'grad_norm': 0.19051159918308258, 'learning_rate': 6.058702898142643e-07, 'epoch': 5.73}\n", - "{'loss': 0.0059, 'grad_norm': 0.36273324489593506, 'learning_rate': 5.279180709527765e-07, 'epoch': 5.75}\n", - "{'loss': 0.0084, 'grad_norm': 0.4064849019050598, 'learning_rate': 4.553052520375911e-07, 'epoch': 5.77}\n", - "{'loss': 0.0033, 'grad_norm': 0.2132396250963211, 'learning_rate': 3.8803966999139684e-07, 'epoch': 5.78}\n", - "{'loss': 0.0176, 'grad_norm': 2.6782572269439697, 'learning_rate': 3.261285846227868e-07, 'epoch': 5.8}\n", - "{'loss': 0.0064, 'grad_norm': 0.27686187624931335, 'learning_rate': 2.6957867784270787e-07, 'epoch': 5.82}\n", - "{'loss': 0.0041, 'grad_norm': 0.86066734790802, 'learning_rate': 2.1839605294330933e-07, 'epoch': 5.84}\n", - "{'loss': 0.0082, 'grad_norm': 0.16934335231781006, 'learning_rate': 1.725862339392259e-07, 'epoch': 5.85}\n", - "{'loss': 0.0047, 'grad_norm': 0.6522320508956909, 'learning_rate': 1.3215416497138754e-07, 'epoch': 5.87}\n", - "{'loss': 0.0063, 'grad_norm': 0.5966488718986511, 'learning_rate': 9.710420977340762e-08, 'epoch': 5.89}\n", - "{'loss': 0.0038, 'grad_norm': 0.1901843547821045, 'learning_rate': 6.744015120061509e-08, 'epoch': 5.91}\n", - "{'loss': 0.0123, 'grad_norm': 2.4536399841308594, 'learning_rate': 4.316519082179227e-08, 'epoch': 5.93}\n", - "{'loss': 0.0048, 'grad_norm': 0.5865656733512878, 'learning_rate': 2.4281948573617874e-08, 'epoch': 5.94}\n", - "{'loss': 0.006, 'grad_norm': 0.9566450715065002, 'learning_rate': 1.0792462477909882e-08, 'epoch': 5.96}\n", - "{'loss': 0.0043, 'grad_norm': 1.3847167491912842, 'learning_rate': 2.6981884216847884e-09, 'epoch': 5.98}\n", - "{'loss': 0.0049, 'grad_norm': 1.5407752990722656, 'learning_rate': 0.0, 'epoch': 6.0}\n", - "100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 3360/3360 [6:19:44<00:00, 6.09s/it][INFO|trainer.py:3788] 2024-07-04 21:46:06,786 >> \n", - "***** Running Evaluation *****\n", - "[INFO|trainer.py:3790] 2024-07-04 21:46:06,786 >> Num examples = 46\n", - "[INFO|trainer.py:3793] 2024-07-04 21:46:06,786 >> Batch size = 1\n", - "\n", - " 0%| | 0/46 [00:00> Saving model checkpoint to saves/qwen2-7b/lora/sft/checkpoint-3360\n", - "[INFO|configuration_utils.py:733] 2024-07-04 21:46:23,425 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-7b-instruct-bnb-4bit/snapshots/8d8ce83e5c9fc23482eeae78027d1fc87bc2edad/config.json\n", - "[INFO|configuration_utils.py:800] 2024-07-04 21:46:23,426 >> Model config Qwen2Config {\n", - " \"_name_or_path\": \"Qwen/Qwen2-7B-Instruct\",\n", - " \"architectures\": [\n", - " \"Qwen2ForCausalLM\"\n", - " ],\n", - " \"attention_dropout\": 0.0,\n", - " \"bos_token_id\": 151643,\n", - " \"eos_token_id\": 151645,\n", - " \"hidden_act\": \"silu\",\n", - " \"hidden_size\": 3584,\n", - " \"initializer_range\": 0.02,\n", - " \"intermediate_size\": 18944,\n", - " \"max_position_embeddings\": 32768,\n", - " \"max_window_layers\": 28,\n", - " \"model_type\": \"qwen2\",\n", - " \"num_attention_heads\": 28,\n", - " \"num_hidden_layers\": 28,\n", - " \"num_key_value_heads\": 4,\n", - " \"quantization_config\": {\n", - " \"_load_in_4bit\": true,\n", - " \"_load_in_8bit\": false,\n", - " \"bnb_4bit_compute_dtype\": \"bfloat16\",\n", - " \"bnb_4bit_quant_storage\": \"uint8\",\n", - " \"bnb_4bit_quant_type\": \"nf4\",\n", - " \"bnb_4bit_use_double_quant\": true,\n", - " \"llm_int8_enable_fp32_cpu_offload\": false,\n", - " \"llm_int8_has_fp16_weight\": false,\n", - " \"llm_int8_skip_modules\": null,\n", - " \"llm_int8_threshold\": 6.0,\n", - " \"load_in_4bit\": true,\n", - " \"load_in_8bit\": false,\n", - " \"quant_method\": \"bitsandbytes\"\n", - " },\n", - " \"rms_norm_eps\": 1e-06,\n", - " \"rope_theta\": 1000000.0,\n", - " \"sliding_window\": 131072,\n", - " \"tie_word_embeddings\": false,\n", - " \"torch_dtype\": \"bfloat16\",\n", - " \"transformers_version\": \"4.42.3\",\n", - " \"use_cache\": true,\n", - " \"use_sliding_window\": false,\n", - " \"vocab_size\": 152064\n", - "}\n", - "\n", - "[INFO|tokenization_utils_base.py:2574] 2024-07-04 21:46:23,565 >> tokenizer config file saved in saves/qwen2-7b/lora/sft/checkpoint-3360/tokenizer_config.json\n", - "[INFO|tokenization_utils_base.py:2583] 2024-07-04 21:46:23,565 >> Special tokens file saved in saves/qwen2-7b/lora/sft/checkpoint-3360/special_tokens_map.json\n", - "[INFO|:482] 2024-07-04 21:46:23,978 >> \n", - "\n", - "Training completed. Do not forget to share your model on huggingface.co/models =)\n", - "\n", - "\n", - "{'train_runtime': 22807.0531, 'train_samples_per_second': 1.179, 'train_steps_per_second': 0.147, 'train_loss': 0.5189488330479002, 'epoch': 6.0}\n", - "100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 3360/3360 [6:20:01<00:00, 6.79s/it]\n", - "[INFO|trainer.py:3478] 2024-07-04 21:46:23,983 >> Saving model checkpoint to saves/qwen2-7b/lora/sft\n", - "[INFO|configuration_utils.py:733] 2024-07-04 21:46:25,525 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-7b-instruct-bnb-4bit/snapshots/8d8ce83e5c9fc23482eeae78027d1fc87bc2edad/config.json\n", - "[INFO|configuration_utils.py:800] 2024-07-04 21:46:25,525 >> Model config Qwen2Config {\n", - " \"_name_or_path\": \"Qwen/Qwen2-7B-Instruct\",\n", - " \"architectures\": [\n", - " \"Qwen2ForCausalLM\"\n", - " ],\n", - " \"attention_dropout\": 0.0,\n", - " \"bos_token_id\": 151643,\n", - " \"eos_token_id\": 151645,\n", - " \"hidden_act\": \"silu\",\n", - " \"hidden_size\": 3584,\n", - " \"initializer_range\": 0.02,\n", - " \"intermediate_size\": 18944,\n", - " \"max_position_embeddings\": 32768,\n", - " \"max_window_layers\": 28,\n", - " \"model_type\": \"qwen2\",\n", - " \"num_attention_heads\": 28,\n", - " \"num_hidden_layers\": 28,\n", - " \"num_key_value_heads\": 4,\n", - " \"quantization_config\": {\n", - " \"_load_in_4bit\": true,\n", - " \"_load_in_8bit\": false,\n", - " \"bnb_4bit_compute_dtype\": \"bfloat16\",\n", - " \"bnb_4bit_quant_storage\": \"uint8\",\n", - " \"bnb_4bit_quant_type\": \"nf4\",\n", - " \"bnb_4bit_use_double_quant\": true,\n", - " \"llm_int8_enable_fp32_cpu_offload\": false,\n", - " \"llm_int8_has_fp16_weight\": false,\n", - " \"llm_int8_skip_modules\": null,\n", - " \"llm_int8_threshold\": 6.0,\n", - " \"load_in_4bit\": true,\n", - " \"load_in_8bit\": false,\n", - " \"quant_method\": \"bitsandbytes\"\n", - " },\n", - " \"rms_norm_eps\": 1e-06,\n", - " \"rope_theta\": 1000000.0,\n", - " \"sliding_window\": 131072,\n", - " \"tie_word_embeddings\": false,\n", - " \"torch_dtype\": \"bfloat16\",\n", - " \"transformers_version\": \"4.42.3\",\n", - " \"use_cache\": true,\n", - " \"use_sliding_window\": false,\n", - " \"vocab_size\": 152064\n", - "}\n", - "\n", - "[INFO|tokenization_utils_base.py:2574] 2024-07-04 21:46:25,650 >> tokenizer config file saved in saves/qwen2-7b/lora/sft/tokenizer_config.json\n", - "[INFO|tokenization_utils_base.py:2583] 2024-07-04 21:46:25,650 >> Special tokens file saved in saves/qwen2-7b/lora/sft/special_tokens_map.json\n", - "***** train metrics *****\n", - " epoch = 5.9973\n", - " total_flos = 89914948GF\n", - " train_loss = 0.5189\n", - " train_runtime = 6:20:07.05\n", - " train_samples_per_second = 1.179\n", - " train_steps_per_second = 0.147\n", - "Figure saved at: saves/qwen2-7b/lora/sft/training_loss.png\n", - "Figure saved at: saves/qwen2-7b/lora/sft/training_eval_loss.png\n", - "[INFO|trainer.py:3788] 2024-07-04 21:46:26,044 >> \n", - "***** Running Evaluation *****\n", - "[INFO|trainer.py:3790] 2024-07-04 21:46:26,044 >> Num examples = 46\n", - "[INFO|trainer.py:3793] 2024-07-04 21:46:26,045 >> Batch size = 1\n", - "100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 46/46 [00:08<00:00, 5.41it/s]\n", - "***** eval metrics *****\n", - " epoch = 5.9973\n", - " eval_loss = 2.9878\n", - " eval_runtime = 0:00:08.78\n", - " eval_samples_per_second = 5.234\n", - " eval_steps_per_second = 5.234\n", - "[INFO|modelcard.py:449] 2024-07-04 21:46:34,837 >> Dropping the following result as it does not have all the necessary fields:\n", - "{'task': {'name': 'Causal Language Modeling', 'type': 'text-generation'}}\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: / 0.092 MB of 0.092 MB uploaded\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: Run history:\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: eval/loss โ–โ–‚โ–ƒโ–…โ–‡โ–ˆโ–ˆ\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: eval/runtime โ–ˆโ–‡โ–ˆโ–†โ–…โ–ˆโ–\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: eval/samples_per_second โ–โ–‚โ–โ–‚โ–ƒโ–โ–ˆ\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: eval/steps_per_second โ–โ–‚โ–โ–‚โ–ƒโ–โ–ˆ\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: train/epoch โ–โ–โ–โ–‚โ–‚โ–‚โ–‚โ–‚โ–‚โ–ƒโ–ƒโ–ƒโ–ƒโ–ƒโ–ƒโ–„โ–„โ–„โ–„โ–„โ–…โ–…โ–…โ–…โ–…โ–†โ–†โ–†โ–†โ–†โ–†โ–‡โ–‡โ–‡โ–‡โ–‡โ–‡โ–ˆโ–ˆโ–ˆ\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: train/global_step โ–โ–โ–โ–‚โ–‚โ–‚โ–‚โ–‚โ–‚โ–ƒโ–ƒโ–ƒโ–ƒโ–ƒโ–ƒโ–„โ–„โ–„โ–„โ–„โ–…โ–…โ–…โ–…โ–…โ–†โ–†โ–†โ–†โ–†โ–†โ–‡โ–‡โ–‡โ–‡โ–‡โ–‡โ–ˆโ–ˆโ–ˆ\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: train/grad_norm โ–ƒโ–‚โ–ƒโ–ƒโ–ƒโ–„โ–„โ–„โ–†โ–†โ–„โ–…โ–…โ–†โ–…โ–†โ–…โ–…โ–…โ–‡โ–„โ–†โ–‡โ–…โ–„โ–„โ–„โ–ˆโ–ƒโ–‚โ–„โ–„โ–ƒโ–โ–โ–โ–‚โ–โ–โ–ƒ\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: train/learning_rate โ–‚โ–„โ–…โ–‡โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–‡โ–‡โ–‡โ–‡โ–‡โ–†โ–†โ–†โ–†โ–…โ–…โ–…โ–„โ–„โ–„โ–ƒโ–ƒโ–ƒโ–ƒโ–‚โ–‚โ–‚โ–‚โ–โ–โ–โ–โ–โ–โ–\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: train/loss โ–ˆโ–ˆโ–ˆโ–‡โ–‡โ–‡โ–‡โ–…โ–…โ–…โ–…โ–…โ–…โ–…โ–ƒโ–ƒโ–ƒโ–ƒโ–‚โ–‚โ–โ–โ–‚โ–โ–โ–โ–‚โ–โ–โ–โ–โ–โ–โ–โ–โ–โ–โ–โ–โ–\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: \n", - "\u001b[34m\u001b[1mwandb\u001b[0m: Run summary:\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: eval/loss 2.9878\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: eval/runtime 8.7891\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: eval/samples_per_second 5.234\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: eval/steps_per_second 5.234\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: total_flos 9.654544053942682e+16\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: train/epoch 5.99732\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: train/global_step 3360\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: train/grad_norm 1.54078\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: train/learning_rate 0.0\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: train/loss 0.0049\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: train_loss 0.51895\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: train_runtime 22807.0531\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: train_samples_per_second 1.179\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: train_steps_per_second 0.147\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: \n", - "\u001b[34m\u001b[1mwandb\u001b[0m: ๐Ÿš€ View run \u001b[33mqwen2_7b_lora_sft\u001b[0m at: \u001b[34m\u001b[4mhttps://wandb.ai/inflaton-ai/huggingface/runs/o710838e\u001b[0m\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: โญ๏ธ View project at: \u001b[34m\u001b[4mhttps://wandb.ai/inflaton-ai/huggingface\u001b[0m\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: Synced 6 W&B file(s), 0 media file(s), 1 artifact file(s) and 0 other file(s)\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: Find logs at: \u001b[35m\u001b[1m./wandb/run-20240704_152618-o710838e/logs\u001b[0m\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m The new W&B backend becomes opt-out in version 0.18.0; try it out with `wandb.require(\"core\")`! See https://wandb.me/wandb-core for more information.\n", - "CPU times: user 23min 50s, sys: 8min 47s, total: 32min 37s\n", - "Wall time: 6h 56min 32s\n" - ] - } - ], - "source": [ - "%%time\n", - "\n", - "!./scripts/tune-lf.sh config/qwen2_7b_lora_sft_unsloth.yaml" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Current Directory:\n", - "/home/inflaton/code/projects/courses/llm-finetuning/llama-factory\n", - "07/04/2024 21:56:42 - WARNING - llamafactory.hparams.parser - We recommend enable `upcast_layernorm` in quantized training.\n", - "07/04/2024 21:56:42 - INFO - llamafactory.hparams.parser - Process rank: 0, device: cuda:0, n_gpu: 1, distributed training: False, compute dtype: torch.bfloat16\n", - "[INFO|tokenization_utils_base.py:2161] 2024-07-04 21:56:42,789 >> loading file vocab.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/vocab.json\n", - "[INFO|tokenization_utils_base.py:2161] 2024-07-04 21:56:42,789 >> loading file merges.txt from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/merges.txt\n", - "[INFO|tokenization_utils_base.py:2161] 2024-07-04 21:56:42,789 >> loading file tokenizer.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/tokenizer.json\n", - "[INFO|tokenization_utils_base.py:2161] 2024-07-04 21:56:42,789 >> loading file added_tokens.json from cache at None\n", - "[INFO|tokenization_utils_base.py:2161] 2024-07-04 21:56:42,789 >> loading file special_tokens_map.json from cache at None\n", - "[INFO|tokenization_utils_base.py:2161] 2024-07-04 21:56:42,789 >> loading file tokenizer_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/tokenizer_config.json\n", - "[WARNING|logging.py:313] 2024-07-04 21:56:42,918 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n", - "07/04/2024 21:56:42 - INFO - llamafactory.data.template - Replace eos token: <|im_end|>\n", - "07/04/2024 21:56:42 - INFO - llamafactory.data.template - Add <|im_start|> to stop words.\n", - "07/04/2024 21:56:42 - INFO - llamafactory.data.loader - Loading dataset alpaca_mac.json...\n", - "Converting format of dataset (num_proc=16): 100%|โ–ˆ| 4528/4528 [00:00<00:00, 1521\n", - "Running tokenizer on dataset (num_proc=16): 100%|โ–ˆ| 4528/4528 [00:01<00:00, 2757\n", - "input_ids:\n", - "[151644, 872, 198, 5501, 14683, 279, 2701, 8453, 1467, 1119, 6364, 323, 3410, 1172, 279, 24531, 2213, 11, 4302, 770, 624, 35987, 102895, 99164, 100324, 100717, 100095, 99509, 1773, 151645, 198, 151644, 77091, 198, 17949, 358, 572, 2617, 553, 264, 38835, 44486, 13, 151645]\n", - "inputs:\n", - "<|im_start|>user\n", - "Please translate the following Chinese text into English and provide only the translated content, nothing else.\n", - "ๅ…จไป—็€็‹ไป™ๆญๆ•‘ใ€‚<|im_end|>\n", - "<|im_start|>assistant\n", - "Because I was protected by a fox fairy.<|im_end|>\n", - "label_ids:\n", - "[-100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 17949, 358, 572, 2617, 553, 264, 38835, 44486, 13, 151645]\n", - "labels:\n", - "Because I was protected by a fox fairy.<|im_end|>\n", - "[INFO|configuration_utils.py:733] 2024-07-04 21:56:47,196 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/config.json\n", - "[INFO|configuration_utils.py:800] 2024-07-04 21:56:47,197 >> Model config Qwen2Config {\n", - " \"_name_or_path\": \"Qwen/Qwen2-1.5B-Instruct\",\n", - " \"architectures\": [\n", - " \"Qwen2ForCausalLM\"\n", - " ],\n", - " \"attention_dropout\": 0.0,\n", - " \"bos_token_id\": 151643,\n", - " \"eos_token_id\": 151645,\n", - " \"hidden_act\": \"silu\",\n", - " \"hidden_size\": 1536,\n", - " \"initializer_range\": 0.02,\n", - " \"intermediate_size\": 8960,\n", - " \"max_position_embeddings\": 32768,\n", - " \"max_window_layers\": 28,\n", - " \"model_type\": \"qwen2\",\n", - " \"num_attention_heads\": 12,\n", - " \"num_hidden_layers\": 28,\n", - " \"num_key_value_heads\": 2,\n", - " \"rms_norm_eps\": 1e-06,\n", - " \"rope_theta\": 1000000.0,\n", - " \"sliding_window\": 32768,\n", - " \"tie_word_embeddings\": true,\n", - " \"torch_dtype\": \"bfloat16\",\n", - " \"transformers_version\": \"4.42.3\",\n", - " \"use_cache\": true,\n", - " \"use_sliding_window\": false,\n", - " \"vocab_size\": 151936\n", - "}\n", - "\n", - "07/04/2024 21:56:47 - INFO - llamafactory.model.model_utils.quantization - Quantizing model to 4 bit with bitsandbytes.\n", - "๐Ÿฆฅ Unsloth: Will patch your computer to enable 2x faster free finetuning.\n", - "[INFO|configuration_utils.py:733] 2024-07-04 21:56:48,123 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-1.5b-instruct-bnb-4bit/snapshots/9f10684b3a26fbf25e50921655353e2e3e599d70/config.json\n", - "[INFO|configuration_utils.py:800] 2024-07-04 21:56:48,123 >> Model config Qwen2Config {\n", - " \"_name_or_path\": \"unsloth/qwen2-1.5b-instruct-bnb-4bit\",\n", - " \"architectures\": [\n", - " \"Qwen2ForCausalLM\"\n", - " ],\n", - " \"attention_dropout\": 0.0,\n", - " \"bos_token_id\": 151643,\n", - " \"eos_token_id\": 151645,\n", - " \"hidden_act\": \"silu\",\n", - " \"hidden_size\": 1536,\n", - " \"initializer_range\": 0.02,\n", - " \"intermediate_size\": 8960,\n", - " \"max_position_embeddings\": 32768,\n", - " \"max_window_layers\": 28,\n", - " \"model_type\": \"qwen2\",\n", - " \"num_attention_heads\": 12,\n", - " \"num_hidden_layers\": 28,\n", - " \"num_key_value_heads\": 2,\n", - " \"quantization_config\": {\n", - " \"_load_in_4bit\": true,\n", - " \"_load_in_8bit\": false,\n", - " \"bnb_4bit_compute_dtype\": \"bfloat16\",\n", - " \"bnb_4bit_quant_storage\": \"uint8\",\n", - " \"bnb_4bit_quant_type\": \"nf4\",\n", - " \"bnb_4bit_use_double_quant\": true,\n", - " \"llm_int8_enable_fp32_cpu_offload\": false,\n", - " \"llm_int8_has_fp16_weight\": false,\n", - " \"llm_int8_skip_modules\": null,\n", - " \"llm_int8_threshold\": 6.0,\n", - " \"load_in_4bit\": true,\n", - " \"load_in_8bit\": false,\n", - " \"quant_method\": \"bitsandbytes\"\n", - " },\n", - " \"rms_norm_eps\": 1e-06,\n", - " \"rope_theta\": 1000000.0,\n", - " \"sliding_window\": 32768,\n", - " \"tie_word_embeddings\": true,\n", - " \"torch_dtype\": \"bfloat16\",\n", - " \"transformers_version\": \"4.42.3\",\n", - " \"use_cache\": true,\n", - " \"use_sliding_window\": false,\n", - " \"vocab_size\": 151936\n", - "}\n", - "\n", - "==((====))== Unsloth: Fast Qwen2 patching release 2024.6\n", - " \\\\ /| GPU: NVIDIA GeForce RTX 4080 Laptop GPU. Max memory: 11.994 GB. Platform = Linux.\n", - "O^O/ \\_/ \\ Pytorch: 2.3.0+cu121. CUDA = 8.9. CUDA Toolkit = 12.1.\n", - "\\ / Bfloat16 = TRUE. Xformers = 0.0.26.post1. FA = False.\n", - " \"-____-\" Free Apache license: http://github.com/unslothai/unsloth\n", - "[INFO|configuration_utils.py:733] 2024-07-04 21:56:49,865 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-1.5b-instruct-bnb-4bit/snapshots/9f10684b3a26fbf25e50921655353e2e3e599d70/config.json\n", - "[INFO|configuration_utils.py:800] 2024-07-04 21:56:49,865 >> Model config Qwen2Config {\n", - " \"_name_or_path\": \"unsloth/qwen2-1.5b-instruct-bnb-4bit\",\n", - " \"architectures\": [\n", - " \"Qwen2ForCausalLM\"\n", - " ],\n", - " \"attention_dropout\": 0.0,\n", - " \"bos_token_id\": 151643,\n", - " \"eos_token_id\": 151645,\n", - " \"hidden_act\": \"silu\",\n", - " \"hidden_size\": 1536,\n", - " \"initializer_range\": 0.02,\n", - " \"intermediate_size\": 8960,\n", - " \"max_position_embeddings\": 32768,\n", - " \"max_window_layers\": 28,\n", - " \"model_type\": \"qwen2\",\n", - " \"num_attention_heads\": 12,\n", - " \"num_hidden_layers\": 28,\n", - " \"num_key_value_heads\": 2,\n", - " \"quantization_config\": {\n", - " \"_load_in_4bit\": true,\n", - " \"_load_in_8bit\": false,\n", - " \"bnb_4bit_compute_dtype\": \"bfloat16\",\n", - " \"bnb_4bit_quant_storage\": \"uint8\",\n", - " \"bnb_4bit_quant_type\": \"nf4\",\n", - " \"bnb_4bit_use_double_quant\": true,\n", - " \"llm_int8_enable_fp32_cpu_offload\": false,\n", - " \"llm_int8_has_fp16_weight\": false,\n", - " \"llm_int8_skip_modules\": null,\n", - " \"llm_int8_threshold\": 6.0,\n", - " \"load_in_4bit\": true,\n", - " \"load_in_8bit\": false,\n", - " \"quant_method\": \"bitsandbytes\"\n", - " },\n", - " \"rms_norm_eps\": 1e-06,\n", - " \"rope_theta\": 1000000.0,\n", - " \"sliding_window\": 32768,\n", - " \"tie_word_embeddings\": true,\n", - " \"torch_dtype\": \"bfloat16\",\n", - " \"transformers_version\": \"4.42.3\",\n", - " \"use_cache\": true,\n", - " \"use_sliding_window\": false,\n", - " \"vocab_size\": 151936\n", - "}\n", - "\n", - "[INFO|configuration_utils.py:733] 2024-07-04 21:56:50,495 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-1.5b-instruct-bnb-4bit/snapshots/9f10684b3a26fbf25e50921655353e2e3e599d70/config.json\n", - "[INFO|configuration_utils.py:800] 2024-07-04 21:56:50,496 >> Model config Qwen2Config {\n", - " \"_name_or_path\": \"unsloth/qwen2-1.5b-instruct-bnb-4bit\",\n", - " \"architectures\": [\n", - " \"Qwen2ForCausalLM\"\n", - " ],\n", - " \"attention_dropout\": 0.0,\n", - " \"bos_token_id\": 151643,\n", - " \"eos_token_id\": 151645,\n", - " \"hidden_act\": \"silu\",\n", - " \"hidden_size\": 1536,\n", - " \"initializer_range\": 0.02,\n", - " \"intermediate_size\": 8960,\n", - " \"max_position_embeddings\": 32768,\n", - " \"max_window_layers\": 28,\n", - " \"model_type\": \"qwen2\",\n", - " \"num_attention_heads\": 12,\n", - " \"num_hidden_layers\": 28,\n", - " \"num_key_value_heads\": 2,\n", - " \"quantization_config\": {\n", - " \"_load_in_4bit\": true,\n", - " \"_load_in_8bit\": false,\n", - " \"bnb_4bit_compute_dtype\": \"bfloat16\",\n", - " \"bnb_4bit_quant_storage\": \"uint8\",\n", - " \"bnb_4bit_quant_type\": \"nf4\",\n", - " \"bnb_4bit_use_double_quant\": true,\n", - " \"llm_int8_enable_fp32_cpu_offload\": false,\n", - " \"llm_int8_has_fp16_weight\": false,\n", - " \"llm_int8_skip_modules\": null,\n", - " \"llm_int8_threshold\": 6.0,\n", - " \"load_in_4bit\": true,\n", - " \"load_in_8bit\": false,\n", - " \"quant_method\": \"bitsandbytes\"\n", - " },\n", - " \"rms_norm_eps\": 1e-06,\n", - " \"rope_theta\": 1000000.0,\n", - " \"sliding_window\": 32768,\n", - " \"tie_word_embeddings\": true,\n", - " \"torch_dtype\": \"bfloat16\",\n", - " \"transformers_version\": \"4.42.3\",\n", - " \"use_cache\": true,\n", - " \"use_sliding_window\": false,\n", - " \"vocab_size\": 151936\n", - "}\n", - "\n", - "[INFO|modeling_utils.py:3556] 2024-07-04 21:56:50,707 >> loading weights file model.safetensors from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-1.5b-instruct-bnb-4bit/snapshots/9f10684b3a26fbf25e50921655353e2e3e599d70/model.safetensors\n", - "[INFO|modeling_utils.py:1531] 2024-07-04 21:56:56,626 >> Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16.\n", - "[INFO|configuration_utils.py:1000] 2024-07-04 21:56:56,631 >> Generate config GenerationConfig {\n", - " \"bos_token_id\": 151643,\n", - " \"eos_token_id\": 151645\n", - "}\n", - "\n", - "[INFO|modeling_utils.py:4364] 2024-07-04 21:58:31,535 >> All model checkpoint weights were used when initializing Qwen2ForCausalLM.\n", - "\n", - "[INFO|modeling_utils.py:4372] 2024-07-04 21:58:31,535 >> All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at unsloth/qwen2-1.5b-instruct-bnb-4bit.\n", - "If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training.\n", - "[INFO|configuration_utils.py:955] 2024-07-04 21:58:32,073 >> loading configuration file generation_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-1.5b-instruct-bnb-4bit/snapshots/9f10684b3a26fbf25e50921655353e2e3e599d70/generation_config.json\n", - "[INFO|configuration_utils.py:1000] 2024-07-04 21:58:32,073 >> Generate config GenerationConfig {\n", - " \"bos_token_id\": 151643,\n", - " \"do_sample\": true,\n", - " \"eos_token_id\": [\n", - " 151645,\n", - " 151643\n", - " ],\n", - " \"pad_token_id\": 151643,\n", - " \"repetition_penalty\": 1.1,\n", - " \"temperature\": 0.7,\n", - " \"top_k\": 20,\n", - " \"top_p\": 0.8\n", - "}\n", - "\n", - "[INFO|tokenization_utils_base.py:2161] 2024-07-04 21:58:33,489 >> loading file vocab.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-1.5b-instruct-bnb-4bit/snapshots/9f10684b3a26fbf25e50921655353e2e3e599d70/vocab.json\n", - "[INFO|tokenization_utils_base.py:2161] 2024-07-04 21:58:33,489 >> loading file merges.txt from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-1.5b-instruct-bnb-4bit/snapshots/9f10684b3a26fbf25e50921655353e2e3e599d70/merges.txt\n", - "[INFO|tokenization_utils_base.py:2161] 2024-07-04 21:58:33,489 >> loading file added_tokens.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-1.5b-instruct-bnb-4bit/snapshots/9f10684b3a26fbf25e50921655353e2e3e599d70/added_tokens.json\n", - "[INFO|tokenization_utils_base.py:2161] 2024-07-04 21:58:33,490 >> loading file special_tokens_map.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-1.5b-instruct-bnb-4bit/snapshots/9f10684b3a26fbf25e50921655353e2e3e599d70/special_tokens_map.json\n", - "[INFO|tokenization_utils_base.py:2161] 2024-07-04 21:58:33,490 >> loading file tokenizer_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-1.5b-instruct-bnb-4bit/snapshots/9f10684b3a26fbf25e50921655353e2e3e599d70/tokenizer_config.json\n", - "[INFO|tokenization_utils_base.py:2161] 2024-07-04 21:58:33,490 >> loading file tokenizer.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-1.5b-instruct-bnb-4bit/snapshots/9f10684b3a26fbf25e50921655353e2e3e599d70/tokenizer.json\n", - "[WARNING|logging.py:313] 2024-07-04 21:58:33,937 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n", - "[INFO|tokenization_utils_base.py:2161] 2024-07-04 21:58:34,912 >> loading file vocab.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-1.5b-instruct-bnb-4bit/snapshots/9f10684b3a26fbf25e50921655353e2e3e599d70/vocab.json\n", - "[INFO|tokenization_utils_base.py:2161] 2024-07-04 21:58:34,912 >> loading file merges.txt from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-1.5b-instruct-bnb-4bit/snapshots/9f10684b3a26fbf25e50921655353e2e3e599d70/merges.txt\n", - "[INFO|tokenization_utils_base.py:2161] 2024-07-04 21:58:34,912 >> loading file tokenizer.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-1.5b-instruct-bnb-4bit/snapshots/9f10684b3a26fbf25e50921655353e2e3e599d70/tokenizer.json\n", - "[INFO|tokenization_utils_base.py:2161] 2024-07-04 21:58:34,912 >> loading file added_tokens.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-1.5b-instruct-bnb-4bit/snapshots/9f10684b3a26fbf25e50921655353e2e3e599d70/added_tokens.json\n", - "[INFO|tokenization_utils_base.py:2161] 2024-07-04 21:58:34,912 >> loading file special_tokens_map.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-1.5b-instruct-bnb-4bit/snapshots/9f10684b3a26fbf25e50921655353e2e3e599d70/special_tokens_map.json\n", - "[INFO|tokenization_utils_base.py:2161] 2024-07-04 21:58:34,912 >> loading file tokenizer_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-1.5b-instruct-bnb-4bit/snapshots/9f10684b3a26fbf25e50921655353e2e3e599d70/tokenizer_config.json\n", - "[WARNING|logging.py:313] 2024-07-04 21:58:35,100 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n", - "07/04/2024 21:58:35 - INFO - llamafactory.model.model_utils.checkpointing - Gradient checkpointing enabled.\n", - "07/04/2024 21:58:35 - INFO - llamafactory.model.adapter - Upcasting trainable params to float32.\n", - "07/04/2024 21:58:35 - INFO - llamafactory.model.adapter - Fine-tuning method: LoRA\n", - "07/04/2024 21:58:35 - INFO - llamafactory.model.model_utils.misc - Found linear modules: v_proj,k_proj,down_proj,gate_proj,q_proj,o_proj,up_proj\n", - "[WARNING|logging.py:328] 2024-07-04 21:58:36,612 >> Unsloth 2024.6 patched 28 layers with 0 QKV layers, 28 O layers and 28 MLP layers.\n", - "07/04/2024 21:58:37 - INFO - llamafactory.model.loader - trainable params: 9,232,384 || all params: 1,786,320,384 || trainable%: 0.5168\n", - "[INFO|trainer.py:642] 2024-07-04 21:58:37,463 >> Using auto half precision backend\n", - "07/04/2024 21:58:37 - WARNING - llamafactory.train.callbacks - Previous trainer log in this folder will be deleted.\n", - "07/04/2024 21:58:37 - INFO - llamafactory.train.trainer_utils - Using LoRA+ optimizer with loraplus lr ratio 16.00.\n", - "[WARNING|:223] 2024-07-04 21:58:37,613 >> ==((====))== Unsloth - 2x faster free finetuning | Num GPUs = 1\n", - " \\\\ /| Num examples = 4,482 | Num Epochs = 6\n", - "O^O/ \\_/ \\ Batch size per device = 1 | Gradient Accumulation steps = 8\n", - "\\ / Total batch size = 8 | Total steps = 3,360\n", - " \"-____-\" Number of trainable parameters = 9,232,384\n", - "[INFO|integration_utils.py:750] 2024-07-04 21:58:38,026 >> Automatic Weights & Biases logging enabled, to disable set os.environ[\"WANDB_DISABLED\"] = \"true\"\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33minflaton-sg\u001b[0m (\u001b[33minflaton-ai\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: Tracking run with wandb version 0.17.4\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: Run data is saved locally in \u001b[35m\u001b[1m/home/inflaton/code/projects/courses/llm-finetuning/llama-factory/wandb/run-20240704_215839-4fbnqsea\u001b[0m\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: Run \u001b[1m`wandb offline`\u001b[0m to turn off syncing.\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: Syncing run \u001b[33mqwen2_1.5b_lora_sft\u001b[0m\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: โญ๏ธ View project at \u001b[34m\u001b[4mhttps://wandb.ai/inflaton-ai/huggingface\u001b[0m\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: ๐Ÿš€ View run at \u001b[34m\u001b[4mhttps://wandb.ai/inflaton-ai/huggingface/runs/4fbnqsea\u001b[0m\n", - "{'loss': 2.2167, 'grad_norm': 1.7105902433395386, 'learning_rate': 2.9761904761904763e-06, 'epoch': 0.02}\n", - "{'loss': 2.2613, 'grad_norm': 2.196908712387085, 'learning_rate': 5.9523809523809525e-06, 'epoch': 0.04}\n", - "{'loss': 2.0707, 'grad_norm': 0.9740070104598999, 'learning_rate': 8.92857142857143e-06, 'epoch': 0.05}\n", - "{'loss': 1.9514, 'grad_norm': 1.8389497995376587, 'learning_rate': 1.1904761904761905e-05, 'epoch': 0.07}\n", - "{'loss': 2.1174, 'grad_norm': 1.03471839427948, 'learning_rate': 1.4880952380952381e-05, 'epoch': 0.09}\n", - "{'loss': 1.8992, 'grad_norm': 1.198785662651062, 'learning_rate': 1.785714285714286e-05, 'epoch': 0.11}\n", - "{'loss': 2.0404, 'grad_norm': 1.114922285079956, 'learning_rate': 2.0833333333333336e-05, 'epoch': 0.12}\n", - "{'loss': 1.8447, 'grad_norm': 1.1239877939224243, 'learning_rate': 2.380952380952381e-05, 'epoch': 0.14}\n", - "{'loss': 1.9283, 'grad_norm': 1.5919139385223389, 'learning_rate': 2.6785714285714288e-05, 'epoch': 0.16}\n", - "{'loss': 1.9026, 'grad_norm': 0.998127818107605, 'learning_rate': 2.9761904761904762e-05, 'epoch': 0.18}\n", - "{'loss': 1.8932, 'grad_norm': 1.1430412530899048, 'learning_rate': 3.273809523809524e-05, 'epoch': 0.2}\n", - "{'loss': 1.8906, 'grad_norm': 1.0670546293258667, 'learning_rate': 3.571428571428572e-05, 'epoch': 0.21}\n", - "{'loss': 1.8343, 'grad_norm': 1.4356828927993774, 'learning_rate': 3.8690476190476195e-05, 'epoch': 0.23}\n", - "{'loss': 1.8725, 'grad_norm': 1.7043449878692627, 'learning_rate': 4.166666666666667e-05, 'epoch': 0.25}\n", - "{'loss': 1.7689, 'grad_norm': 1.09099543094635, 'learning_rate': 4.464285714285715e-05, 'epoch': 0.27}\n", - "{'loss': 1.7491, 'grad_norm': 0.9564052224159241, 'learning_rate': 4.761904761904762e-05, 'epoch': 0.29}\n", - "{'loss': 1.8979, 'grad_norm': 1.0268529653549194, 'learning_rate': 5.05952380952381e-05, 'epoch': 0.3}\n", - "{'loss': 1.695, 'grad_norm': 1.2585980892181396, 'learning_rate': 5.3571428571428575e-05, 'epoch': 0.32}\n", - "{'loss': 1.803, 'grad_norm': 2.146714448928833, 'learning_rate': 5.6547619047619046e-05, 'epoch': 0.34}\n", - "{'loss': 1.9439, 'grad_norm': 1.163086175918579, 'learning_rate': 5.9523809523809524e-05, 'epoch': 0.36}\n", - "{'loss': 1.8679, 'grad_norm': 1.2789676189422607, 'learning_rate': 6.25e-05, 'epoch': 0.37}\n", - "{'loss': 1.7942, 'grad_norm': 1.5350133180618286, 'learning_rate': 6.547619047619048e-05, 'epoch': 0.39}\n", - "{'loss': 1.7723, 'grad_norm': 1.333762526512146, 'learning_rate': 6.845238095238096e-05, 'epoch': 0.41}\n", - "{'loss': 1.9781, 'grad_norm': 1.342468500137329, 'learning_rate': 7.142857142857143e-05, 'epoch': 0.43}\n", - "{'loss': 1.8381, 'grad_norm': 1.785408616065979, 'learning_rate': 7.440476190476191e-05, 'epoch': 0.45}\n", - "{'loss': 1.77, 'grad_norm': 1.5936214923858643, 'learning_rate': 7.738095238095239e-05, 'epoch': 0.46}\n", - "{'loss': 1.8368, 'grad_norm': 1.7655868530273438, 'learning_rate': 8.035714285714287e-05, 'epoch': 0.48}\n", - "{'loss': 1.838, 'grad_norm': 1.5333795547485352, 'learning_rate': 8.333333333333334e-05, 'epoch': 0.5}\n", - "{'loss': 1.6551, 'grad_norm': 1.4578733444213867, 'learning_rate': 8.630952380952382e-05, 'epoch': 0.52}\n", - "{'loss': 1.8046, 'grad_norm': 1.649754524230957, 'learning_rate': 8.92857142857143e-05, 'epoch': 0.54}\n", - "{'loss': 1.8364, 'grad_norm': 1.618801236152649, 'learning_rate': 9.226190476190478e-05, 'epoch': 0.55}\n", - "{'loss': 1.6749, 'grad_norm': 2.321563243865967, 'learning_rate': 9.523809523809524e-05, 'epoch': 0.57}\n", - "{'loss': 1.7095, 'grad_norm': 1.7713825702667236, 'learning_rate': 9.821428571428572e-05, 'epoch': 0.59}\n", - "{'loss': 1.7458, 'grad_norm': 2.338412046432495, 'learning_rate': 9.999956828659095e-05, 'epoch': 0.61}\n", - "{'loss': 1.7693, 'grad_norm': 2.676462173461914, 'learning_rate': 9.999471159635539e-05, 'epoch': 0.62}\n", - "{'loss': 1.702, 'grad_norm': 1.777328610420227, 'learning_rate': 9.998445910004082e-05, 'epoch': 0.64}\n", - "{'loss': 1.8997, 'grad_norm': 2.657947063446045, 'learning_rate': 9.996881190417393e-05, 'epoch': 0.66}\n", - "{'loss': 1.8264, 'grad_norm': 1.7980377674102783, 'learning_rate': 9.994777169751806e-05, 'epoch': 0.68}\n", - "{'loss': 1.5464, 'grad_norm': 1.6675528287887573, 'learning_rate': 9.992134075089084e-05, 'epoch': 0.7}\n", - "{'loss': 1.7621, 'grad_norm': 2.088773727416992, 'learning_rate': 9.988952191691925e-05, 'epoch': 0.71}\n", - "{'loss': 1.7907, 'grad_norm': 1.8195936679840088, 'learning_rate': 9.985231862973168e-05, 'epoch': 0.73}\n", - "{'loss': 1.8215, 'grad_norm': 1.8611762523651123, 'learning_rate': 9.980973490458728e-05, 'epoch': 0.75}\n", - "{'loss': 1.7694, 'grad_norm': 2.018522024154663, 'learning_rate': 9.976177533744261e-05, 'epoch': 0.77}\n", - "{'loss': 1.741, 'grad_norm': 2.393932342529297, 'learning_rate': 9.97084451044556e-05, 'epoch': 0.79}\n", - "{'loss': 1.6568, 'grad_norm': 1.8010462522506714, 'learning_rate': 9.964974996142698e-05, 'epoch': 0.8}\n", - "{'loss': 1.7109, 'grad_norm': 1.6937175989151, 'learning_rate': 9.958569624317893e-05, 'epoch': 0.82}\n", - "{'loss': 1.7973, 'grad_norm': 2.7904672622680664, 'learning_rate': 9.951629086287151e-05, 'epoch': 0.84}\n", - "{'loss': 1.7033, 'grad_norm': 1.759727954864502, 'learning_rate': 9.944154131125642e-05, 'epoch': 0.86}\n", - "{'loss': 1.7797, 'grad_norm': 1.7603638172149658, 'learning_rate': 9.936145565586871e-05, 'epoch': 0.87}\n", - "{'loss': 1.9387, 'grad_norm': 1.8501616716384888, 'learning_rate': 9.927604254015585e-05, 'epoch': 0.89}\n", - "{'loss': 1.8734, 'grad_norm': 1.8340226411819458, 'learning_rate': 9.918531118254507e-05, 'epoch': 0.91}\n", - "{'loss': 1.7725, 'grad_norm': 2.32716703414917, 'learning_rate': 9.90892713754483e-05, 'epoch': 0.93}\n", - "{'loss': 1.7641, 'grad_norm': 2.673140048980713, 'learning_rate': 9.898793348420536e-05, 'epoch': 0.95}\n", - "{'loss': 1.7813, 'grad_norm': 1.9481444358825684, 'learning_rate': 9.888130844596524e-05, 'epoch': 0.96}\n", - "{'loss': 1.7363, 'grad_norm': 2.068895101547241, 'learning_rate': 9.876940776850569e-05, 'epoch': 0.98}\n", - "{'loss': 1.725, 'grad_norm': 1.8741337060928345, 'learning_rate': 9.865224352899119e-05, 'epoch': 1.0}\n", - " 17%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–Ž | 560/3360 [20:10<1:47:08, 2.30s/it][INFO|trainer.py:3788] 2024-07-04 22:18:54,222 >> \n", - "***** Running Evaluation *****\n", - "[INFO|trainer.py:3790] 2024-07-04 22:18:54,223 >> Num examples = 46\n", - "[INFO|trainer.py:3793] 2024-07-04 22:18:54,223 >> Batch size = 1\n", - "\n", - " 0%| | 0/46 [00:00> Saving model checkpoint to saves/qwen2-1.5b/lora/sft/checkpoint-560\n", - "[INFO|configuration_utils.py:733] 2024-07-04 22:18:59,836 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-1.5b-instruct-bnb-4bit/snapshots/9f10684b3a26fbf25e50921655353e2e3e599d70/config.json\n", - "[INFO|configuration_utils.py:800] 2024-07-04 22:18:59,838 >> Model config Qwen2Config {\n", - " \"_name_or_path\": \"Qwen/Qwen2-1.5B-Instruct\",\n", - " \"architectures\": [\n", - " \"Qwen2ForCausalLM\"\n", - " ],\n", - " \"attention_dropout\": 0.0,\n", - " \"bos_token_id\": 151643,\n", - " \"eos_token_id\": 151645,\n", - " \"hidden_act\": \"silu\",\n", - " \"hidden_size\": 1536,\n", - " \"initializer_range\": 0.02,\n", - " \"intermediate_size\": 8960,\n", - " \"max_position_embeddings\": 32768,\n", - " \"max_window_layers\": 28,\n", - " \"model_type\": \"qwen2\",\n", - " \"num_attention_heads\": 12,\n", - " \"num_hidden_layers\": 28,\n", - " \"num_key_value_heads\": 2,\n", - " \"quantization_config\": {\n", - " \"_load_in_4bit\": true,\n", - " \"_load_in_8bit\": false,\n", - " \"bnb_4bit_compute_dtype\": \"bfloat16\",\n", - " \"bnb_4bit_quant_storage\": \"uint8\",\n", - " \"bnb_4bit_quant_type\": \"nf4\",\n", - " \"bnb_4bit_use_double_quant\": true,\n", - " \"llm_int8_enable_fp32_cpu_offload\": false,\n", - " \"llm_int8_has_fp16_weight\": false,\n", - " \"llm_int8_skip_modules\": null,\n", - " \"llm_int8_threshold\": 6.0,\n", - " \"load_in_4bit\": true,\n", - " \"load_in_8bit\": false,\n", - " \"quant_method\": \"bitsandbytes\"\n", - " },\n", - " \"rms_norm_eps\": 1e-06,\n", - " \"rope_theta\": 1000000.0,\n", - " \"sliding_window\": 32768,\n", - " \"tie_word_embeddings\": true,\n", - " \"torch_dtype\": \"bfloat16\",\n", - " \"transformers_version\": \"4.42.3\",\n", - " \"use_cache\": true,\n", - " \"use_sliding_window\": false,\n", - " \"vocab_size\": 151936\n", - "}\n", - "\n", - "[INFO|tokenization_utils_base.py:2574] 2024-07-04 22:18:59,966 >> tokenizer config file saved in saves/qwen2-1.5b/lora/sft/checkpoint-560/tokenizer_config.json\n", - "[INFO|tokenization_utils_base.py:2583] 2024-07-04 22:18:59,967 >> Special tokens file saved in saves/qwen2-1.5b/lora/sft/checkpoint-560/special_tokens_map.json\n", - "{'loss': 1.3163, 'grad_norm': 2.1074018478393555, 'learning_rate': 9.852982837266955e-05, 'epoch': 1.02}\n", - "{'loss': 1.1057, 'grad_norm': 2.284273147583008, 'learning_rate': 9.840217551150706e-05, 'epoch': 1.04}\n", - "{'loss': 1.3071, 'grad_norm': 1.8365180492401123, 'learning_rate': 9.826929872276255e-05, 'epoch': 1.05}\n", - "{'loss': 1.2093, 'grad_norm': 2.285874843597412, 'learning_rate': 9.81312123475006e-05, 'epoch': 1.07}\n", - "{'loss': 1.1653, 'grad_norm': 2.244819402694702, 'learning_rate': 9.798793128904356e-05, 'epoch': 1.09}\n", - "{'loss': 1.276, 'grad_norm': 2.5032386779785156, 'learning_rate': 9.78394710113631e-05, 'epoch': 1.11}\n", - "{'loss': 1.2116, 'grad_norm': 2.2835264205932617, 'learning_rate': 9.768584753741134e-05, 'epoch': 1.12}\n", - "{'loss': 1.1953, 'grad_norm': 3.390573740005493, 'learning_rate': 9.752707744739145e-05, 'epoch': 1.14}\n", - "{'loss': 1.2537, 'grad_norm': 2.312870502471924, 'learning_rate': 9.736317787696816e-05, 'epoch': 1.16}\n", - "{'loss': 1.1042, 'grad_norm': 2.252488851547241, 'learning_rate': 9.719416651541839e-05, 'epoch': 1.18}\n", - "{'loss': 1.1397, 'grad_norm': 1.93602454662323, 'learning_rate': 9.702006160372209e-05, 'epoch': 1.2}\n", - "{'loss': 1.1822, 'grad_norm': 2.3258895874023438, 'learning_rate': 9.684088193259355e-05, 'epoch': 1.21}\n", - "{'loss': 1.1777, 'grad_norm': 2.362091064453125, 'learning_rate': 9.665664684045333e-05, 'epoch': 1.23}\n", - "{'loss': 1.2182, 'grad_norm': 2.36861515045166, 'learning_rate': 9.646737621134112e-05, 'epoch': 1.25}\n", - "{'loss': 1.181, 'grad_norm': 3.928402900695801, 'learning_rate': 9.627309047276974e-05, 'epoch': 1.27}\n", - "{'loss': 1.3375, 'grad_norm': 3.1305952072143555, 'learning_rate': 9.607381059352038e-05, 'epoch': 1.29}\n", - "{'loss': 1.2559, 'grad_norm': 2.16672682762146, 'learning_rate': 9.586955808137958e-05, 'epoch': 1.3}\n", - "{'loss': 1.26, 'grad_norm': 2.531378984451294, 'learning_rate': 9.566035498081784e-05, 'epoch': 1.32}\n", - "{'loss': 1.2656, 'grad_norm': 2.2649450302124023, 'learning_rate': 9.544622387061055e-05, 'epoch': 1.34}\n", - "{'loss': 1.0581, 'grad_norm': 2.7688372135162354, 'learning_rate': 9.522718786140097e-05, 'epoch': 1.36}\n", - "{'loss': 1.2188, 'grad_norm': 3.3669986724853516, 'learning_rate': 9.500327059320606e-05, 'epoch': 1.37}\n", - "{'loss': 1.1538, 'grad_norm': 3.8478970527648926, 'learning_rate': 9.477449623286505e-05, 'epoch': 1.39}\n", - "{'loss': 1.0648, 'grad_norm': 2.5197343826293945, 'learning_rate': 9.454088947143116e-05, 'epoch': 1.41}\n", - "{'loss': 1.2997, 'grad_norm': 3.149819850921631, 'learning_rate': 9.430247552150673e-05, 'epoch': 1.43}\n", - "{'loss': 1.1716, 'grad_norm': 2.626891851425171, 'learning_rate': 9.405928011452211e-05, 'epoch': 1.45}\n", - "{'loss': 1.2123, 'grad_norm': 2.029723644256592, 'learning_rate': 9.381132949795861e-05, 'epoch': 1.46}\n", - "{'loss': 1.3143, 'grad_norm': 2.6693994998931885, 'learning_rate': 9.35586504325155e-05, 'epoch': 1.48}\n", - "{'loss': 1.2098, 'grad_norm': 2.4133574962615967, 'learning_rate': 9.330127018922194e-05, 'epoch': 1.5}\n", - "{'loss': 1.1153, 'grad_norm': 2.2110259532928467, 'learning_rate': 9.303921654649362e-05, 'epoch': 1.52}\n", - "{'loss': 1.2865, 'grad_norm': 2.425077438354492, 'learning_rate': 9.277251778713474e-05, 'epoch': 1.54}\n", - "{'loss': 1.2322, 'grad_norm': 2.287026882171631, 'learning_rate': 9.250120269528546e-05, 'epoch': 1.55}\n", - "{'loss': 1.1479, 'grad_norm': 2.3768105506896973, 'learning_rate': 9.22253005533154e-05, 'epoch': 1.57}\n", - "{'loss': 1.2783, 'grad_norm': 3.2799324989318848, 'learning_rate': 9.194484113866313e-05, 'epoch': 1.59}\n", - "{'loss': 1.3401, 'grad_norm': 2.8332979679107666, 'learning_rate': 9.165985472062246e-05, 'epoch': 1.61}\n", - "{'loss': 1.0948, 'grad_norm': 2.450061321258545, 'learning_rate': 9.137037205707552e-05, 'epoch': 1.62}\n", - "{'loss': 1.1901, 'grad_norm': 2.617992401123047, 'learning_rate': 9.107642439117321e-05, 'epoch': 1.64}\n", - "{'loss': 1.2412, 'grad_norm': 2.531679391860962, 'learning_rate': 9.077804344796302e-05, 'epoch': 1.66}\n", - "{'loss': 1.1875, 'grad_norm': 2.6147513389587402, 'learning_rate': 9.04752614309652e-05, 'epoch': 1.68}\n", - "{'loss': 1.1308, 'grad_norm': 3.1184866428375244, 'learning_rate': 9.01681110186971e-05, 'epoch': 1.7}\n", - "{'loss': 1.2466, 'grad_norm': 2.7524633407592773, 'learning_rate': 8.985662536114613e-05, 'epoch': 1.71}\n", - "{'loss': 1.1582, 'grad_norm': 2.410403251647949, 'learning_rate': 8.954083807619208e-05, 'epoch': 1.73}\n", - "{'loss': 1.2996, 'grad_norm': 3.132530927658081, 'learning_rate': 8.922078324597879e-05, 'epoch': 1.75}\n", - "{'loss': 1.3292, 'grad_norm': 3.093569755554199, 'learning_rate': 8.889649541323574e-05, 'epoch': 1.77}\n", - "{'loss': 1.2493, 'grad_norm': 2.8685665130615234, 'learning_rate': 8.856800957755e-05, 'epoch': 1.78}\n", - "{'loss': 1.2413, 'grad_norm': 3.4880857467651367, 'learning_rate': 8.823536119158864e-05, 'epoch': 1.8}\n", - "{'loss': 1.2145, 'grad_norm': 3.321408271789551, 'learning_rate': 8.789858615727265e-05, 'epoch': 1.82}\n", - "{'loss': 1.1431, 'grad_norm': 2.608922243118286, 'learning_rate': 8.755772082190194e-05, 'epoch': 1.84}\n", - "{'loss': 1.2395, 'grad_norm': 2.6196181774139404, 'learning_rate': 8.721280197423258e-05, 'epoch': 1.86}\n", - "{'loss': 1.0924, 'grad_norm': 3.0364978313446045, 'learning_rate': 8.68638668405062e-05, 'epoch': 1.87}\n", - "{'loss': 1.2218, 'grad_norm': 3.5102291107177734, 'learning_rate': 8.651095308043232e-05, 'epoch': 1.89}\n", - "{'loss': 1.2639, 'grad_norm': 4.278683662414551, 'learning_rate': 8.61540987831238e-05, 'epoch': 1.91}\n", - "{'loss': 1.2978, 'grad_norm': 3.729332208633423, 'learning_rate': 8.579334246298593e-05, 'epoch': 1.93}\n", - "{'loss': 1.1956, 'grad_norm': 3.6756839752197266, 'learning_rate': 8.542872305555978e-05, 'epoch': 1.95}\n", - "{'loss': 1.1345, 'grad_norm': 2.913640022277832, 'learning_rate': 8.50602799133199e-05, 'epoch': 1.96}\n", - "{'loss': 1.217, 'grad_norm': 2.75384259223938, 'learning_rate': 8.468805280142709e-05, 'epoch': 1.98}\n", - "{'loss': 1.2316, 'grad_norm': 3.1801509857177734, 'learning_rate': 8.43120818934367e-05, 'epoch': 2.0}\n", - " 33%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–Ž | 1120/3360 [41:10<1:24:27, 2.26s/it][INFO|trainer.py:3788] 2024-07-04 22:39:54,830 >> \n", - "***** Running Evaluation *****\n", - "[INFO|trainer.py:3790] 2024-07-04 22:39:54,830 >> Num examples = 46\n", - "[INFO|trainer.py:3793] 2024-07-04 22:39:54,830 >> Batch size = 1\n", - "\n", - " 0%| | 0/46 [00:00> Saving model checkpoint to saves/qwen2-1.5b/lora/sft/checkpoint-1120\n", - "[INFO|configuration_utils.py:733] 2024-07-04 22:39:59,689 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-1.5b-instruct-bnb-4bit/snapshots/9f10684b3a26fbf25e50921655353e2e3e599d70/config.json\n", - "[INFO|configuration_utils.py:800] 2024-07-04 22:39:59,690 >> Model config Qwen2Config {\n", - " \"_name_or_path\": \"Qwen/Qwen2-1.5B-Instruct\",\n", - " \"architectures\": [\n", - " \"Qwen2ForCausalLM\"\n", - " ],\n", - " \"attention_dropout\": 0.0,\n", - " \"bos_token_id\": 151643,\n", - " \"eos_token_id\": 151645,\n", - " \"hidden_act\": \"silu\",\n", - " \"hidden_size\": 1536,\n", - " \"initializer_range\": 0.02,\n", - " \"intermediate_size\": 8960,\n", - " \"max_position_embeddings\": 32768,\n", - " \"max_window_layers\": 28,\n", - " \"model_type\": \"qwen2\",\n", - " \"num_attention_heads\": 12,\n", - " \"num_hidden_layers\": 28,\n", - " \"num_key_value_heads\": 2,\n", - " \"quantization_config\": {\n", - " \"_load_in_4bit\": true,\n", - " \"_load_in_8bit\": false,\n", - " \"bnb_4bit_compute_dtype\": \"bfloat16\",\n", - " \"bnb_4bit_quant_storage\": \"uint8\",\n", - " \"bnb_4bit_quant_type\": \"nf4\",\n", - " \"bnb_4bit_use_double_quant\": true,\n", - " \"llm_int8_enable_fp32_cpu_offload\": false,\n", - " \"llm_int8_has_fp16_weight\": false,\n", - " \"llm_int8_skip_modules\": null,\n", - " \"llm_int8_threshold\": 6.0,\n", - " \"load_in_4bit\": true,\n", - " \"load_in_8bit\": false,\n", - " \"quant_method\": \"bitsandbytes\"\n", - " },\n", - " \"rms_norm_eps\": 1e-06,\n", - " \"rope_theta\": 1000000.0,\n", - " \"sliding_window\": 32768,\n", - " \"tie_word_embeddings\": true,\n", - " \"torch_dtype\": \"bfloat16\",\n", - " \"transformers_version\": \"4.42.3\",\n", - " \"use_cache\": true,\n", - " \"use_sliding_window\": false,\n", - " \"vocab_size\": 151936\n", - "}\n", - "\n", - "[INFO|tokenization_utils_base.py:2574] 2024-07-04 22:39:59,739 >> tokenizer config file saved in saves/qwen2-1.5b/lora/sft/checkpoint-1120/tokenizer_config.json\n", - "[INFO|tokenization_utils_base.py:2583] 2024-07-04 22:39:59,740 >> Special tokens file saved in saves/qwen2-1.5b/lora/sft/checkpoint-1120/special_tokens_map.json\n", - "{'loss': 0.6282, 'grad_norm': 2.980609893798828, 'learning_rate': 8.393240776696274e-05, 'epoch': 2.02}\n", - "{'loss': 0.516, 'grad_norm': 2.145615577697754, 'learning_rate': 8.354907139929851e-05, 'epoch': 2.03}\n", - "{'loss': 0.5637, 'grad_norm': 2.9827773571014404, 'learning_rate': 8.316211416299397e-05, 'epoch': 2.05}\n", - "{'loss': 0.5459, 'grad_norm': 6.992089748382568, 'learning_rate': 8.27715778213905e-05, 'epoch': 2.07}\n", - "{'loss': 0.4226, 'grad_norm': 2.0110838413238525, 'learning_rate': 8.237750452411353e-05, 'epoch': 2.09}\n", - "{'loss': 0.5595, 'grad_norm': 1.9566326141357422, 'learning_rate': 8.197993680252334e-05, 'epoch': 2.11}\n", - "{'loss': 0.6434, 'grad_norm': 3.0069830417633057, 'learning_rate': 8.157891756512488e-05, 'epoch': 2.12}\n", - "{'loss': 0.5572, 'grad_norm': 2.4360501766204834, 'learning_rate': 8.117449009293668e-05, 'epoch': 2.14}\n", - "{'loss': 0.5111, 'grad_norm': 3.1125354766845703, 'learning_rate': 8.076669803481965e-05, 'epoch': 2.16}\n", - "{'loss': 0.5006, 'grad_norm': 3.5583136081695557, 'learning_rate': 8.035558540276618e-05, 'epoch': 2.18}\n", - "{'loss': 0.5521, 'grad_norm': 2.597862482070923, 'learning_rate': 7.994119656715002e-05, 'epoch': 2.2}\n", - "{'loss': 0.6284, 'grad_norm': 3.2273318767547607, 'learning_rate': 7.952357625193749e-05, 'epoch': 2.21}\n", - "{'loss': 0.6074, 'grad_norm': 3.255476474761963, 'learning_rate': 7.91027695298606e-05, 'epoch': 2.23}\n", - "{'loss': 0.5721, 'grad_norm': 2.2420713901519775, 'learning_rate': 7.86788218175523e-05, 'epoch': 2.25}\n", - "{'loss': 0.5287, 'grad_norm': 3.241563558578491, 'learning_rate': 7.8251778870645e-05, 'epoch': 2.27}\n", - "{'loss': 0.5853, 'grad_norm': 3.7581467628479004, 'learning_rate': 7.782168677883206e-05, 'epoch': 2.28}\n", - "{'loss': 0.6142, 'grad_norm': 4.938629627227783, 'learning_rate': 7.738859196089358e-05, 'epoch': 2.3}\n", - "{'loss': 0.6064, 'grad_norm': 3.4490935802459717, 'learning_rate': 7.695254115968648e-05, 'epoch': 2.32}\n", - "{'loss': 0.6328, 'grad_norm': 3.473822832107544, 'learning_rate': 7.651358143709972e-05, 'epoch': 2.34}\n", - "{'loss': 0.6386, 'grad_norm': 3.5730648040771484, 'learning_rate': 7.60717601689749e-05, 'epoch': 2.36}\n", - "{'loss': 0.5591, 'grad_norm': 3.024034023284912, 'learning_rate': 7.562712503999327e-05, 'epoch': 2.37}\n", - "{'loss': 0.7168, 'grad_norm': 3.799771547317505, 'learning_rate': 7.517972403852905e-05, 'epoch': 2.39}\n", - "{'loss': 0.5869, 'grad_norm': 3.3111960887908936, 'learning_rate': 7.472960545147038e-05, 'epoch': 2.41}\n", - "{'loss': 0.5025, 'grad_norm': 3.5263280868530273, 'learning_rate': 7.427681785900761e-05, 'epoch': 2.43}\n", - "{'loss': 0.5964, 'grad_norm': 3.572462797164917, 'learning_rate': 7.382141012939034e-05, 'epoch': 2.45}\n", - "{'loss': 0.5491, 'grad_norm': 3.038294792175293, 'learning_rate': 7.33634314136531e-05, 'epoch': 2.46}\n", - "{'loss': 0.6004, 'grad_norm': 7.641390800476074, 'learning_rate': 7.290293114031061e-05, 'epoch': 2.48}\n", - "{'loss': 0.6356, 'grad_norm': 3.8366777896881104, 'learning_rate': 7.243995901002312e-05, 'epoch': 2.5}\n", - "{'loss': 0.5982, 'grad_norm': 3.146303176879883, 'learning_rate': 7.197456499023225e-05, 'epoch': 2.52}\n", - "{'loss': 0.6127, 'grad_norm': 4.154056072235107, 'learning_rate': 7.150679930976825e-05, 'epoch': 2.53}\n", - "{'loss': 0.5952, 'grad_norm': 2.470127582550049, 'learning_rate': 7.103671245342887e-05, 'epoch': 2.55}\n", - "{'loss': 0.4994, 'grad_norm': 5.2111053466796875, 'learning_rate': 7.056435515653059e-05, 'epoch': 2.57}\n", - "{'loss': 0.6969, 'grad_norm': 3.3999710083007812, 'learning_rate': 7.008977839943299e-05, 'epoch': 2.59}\n", - "{'loss': 0.6066, 'grad_norm': 3.942821979522705, 'learning_rate': 6.961303340203653e-05, 'epoch': 2.61}\n", - "{'loss': 0.6744, 'grad_norm': 3.511596918106079, 'learning_rate': 6.91341716182545e-05, 'epoch': 2.62}\n", - "{'loss': 0.5972, 'grad_norm': 2.605888605117798, 'learning_rate': 6.86532447304597e-05, 'epoch': 2.64}\n", - "{'loss': 0.6058, 'grad_norm': 3.500854730606079, 'learning_rate': 6.817030464390656e-05, 'epoch': 2.66}\n", - "{'loss': 0.6422, 'grad_norm': 2.9531426429748535, 'learning_rate': 6.768540348112907e-05, 'epoch': 2.68}\n", - "{'loss': 0.5311, 'grad_norm': 4.346620559692383, 'learning_rate': 6.719859357631535e-05, 'epoch': 2.7}\n", - "{'loss': 0.4986, 'grad_norm': 4.6267900466918945, 'learning_rate': 6.670992746965938e-05, 'epoch': 2.71}\n", - "{'loss': 0.6576, 'grad_norm': 4.185196876525879, 'learning_rate': 6.621945790169036e-05, 'epoch': 2.73}\n", - "{'loss': 0.6437, 'grad_norm': 3.265991687774658, 'learning_rate': 6.572723780758069e-05, 'epoch': 2.75}\n", - "{'loss': 0.574, 'grad_norm': 4.036723613739014, 'learning_rate': 6.523332031143272e-05, 'epoch': 2.77}\n", - "{'loss': 0.5839, 'grad_norm': 3.2608094215393066, 'learning_rate': 6.473775872054521e-05, 'epoch': 2.78}\n", - "{'loss': 0.5044, 'grad_norm': 3.3588390350341797, 'learning_rate': 6.424060651966007e-05, 'epoch': 2.8}\n", - "{'loss': 0.5707, 'grad_norm': 3.363955020904541, 'learning_rate': 6.374191736518974e-05, 'epoch': 2.82}\n", - "{'loss': 0.5785, 'grad_norm': 3.4573071002960205, 'learning_rate': 6.324174507942637e-05, 'epoch': 2.84}\n", - "{'loss': 0.5755, 'grad_norm': 4.1820855140686035, 'learning_rate': 6.274014364473274e-05, 'epoch': 2.86}\n", - "{'loss': 0.7532, 'grad_norm': 2.9372756481170654, 'learning_rate': 6.22371671977162e-05, 'epoch': 2.87}\n", - "{'loss': 0.6447, 'grad_norm': 4.2755632400512695, 'learning_rate': 6.173287002338577e-05, 'epoch': 2.89}\n", - "{'loss': 0.6018, 'grad_norm': 4.274354934692383, 'learning_rate': 6.122730654929334e-05, 'epoch': 2.91}\n", - "{'loss': 0.5677, 'grad_norm': 4.0272393226623535, 'learning_rate': 6.072053133965938e-05, 'epoch': 2.93}\n", - "{'loss': 0.6344, 'grad_norm': 3.0991122722625732, 'learning_rate': 6.021259908948402e-05, 'epoch': 2.95}\n", - "{'loss': 0.6559, 'grad_norm': 3.816575527191162, 'learning_rate': 5.970356461864391e-05, 'epoch': 2.96}\n", - "{'loss': 0.5647, 'grad_norm': 3.187918186187744, 'learning_rate': 5.919348286597569e-05, 'epoch': 2.98}\n", - "{'loss': 0.6381, 'grad_norm': 3.6101670265197754, 'learning_rate': 5.868240888334653e-05, 'epoch': 3.0}\n", - " 50%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–Œ | 1680/3360 [1:12:00<2:09:10, 4.61s/it][INFO|trainer.py:3788] 2024-07-04 23:10:44,677 >> \n", - "***** Running Evaluation *****\n", - "[INFO|trainer.py:3790] 2024-07-04 23:10:44,677 >> Num examples = 46\n", - "[INFO|trainer.py:3793] 2024-07-04 23:10:44,677 >> Batch size = 1\n", - "\n", - " 0%| | 0/46 [00:00> Saving model checkpoint to saves/qwen2-1.5b/lora/sft/checkpoint-1680\n", - "[INFO|configuration_utils.py:733] 2024-07-04 23:10:52,385 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-1.5b-instruct-bnb-4bit/snapshots/9f10684b3a26fbf25e50921655353e2e3e599d70/config.json\n", - "[INFO|configuration_utils.py:800] 2024-07-04 23:10:52,387 >> Model config Qwen2Config {\n", - " \"_name_or_path\": \"Qwen/Qwen2-1.5B-Instruct\",\n", - " \"architectures\": [\n", - " \"Qwen2ForCausalLM\"\n", - " ],\n", - " \"attention_dropout\": 0.0,\n", - " \"bos_token_id\": 151643,\n", - " \"eos_token_id\": 151645,\n", - " \"hidden_act\": \"silu\",\n", - " \"hidden_size\": 1536,\n", - " \"initializer_range\": 0.02,\n", - " \"intermediate_size\": 8960,\n", - " \"max_position_embeddings\": 32768,\n", - " \"max_window_layers\": 28,\n", - " \"model_type\": \"qwen2\",\n", - " \"num_attention_heads\": 12,\n", - " \"num_hidden_layers\": 28,\n", - " \"num_key_value_heads\": 2,\n", - " \"quantization_config\": {\n", - " \"_load_in_4bit\": true,\n", - " \"_load_in_8bit\": false,\n", - " \"bnb_4bit_compute_dtype\": \"bfloat16\",\n", - " \"bnb_4bit_quant_storage\": \"uint8\",\n", - " \"bnb_4bit_quant_type\": \"nf4\",\n", - " \"bnb_4bit_use_double_quant\": true,\n", - " \"llm_int8_enable_fp32_cpu_offload\": false,\n", - " \"llm_int8_has_fp16_weight\": false,\n", - " \"llm_int8_skip_modules\": null,\n", - " \"llm_int8_threshold\": 6.0,\n", - " \"load_in_4bit\": true,\n", - " \"load_in_8bit\": false,\n", - " \"quant_method\": \"bitsandbytes\"\n", - " },\n", - " \"rms_norm_eps\": 1e-06,\n", - " \"rope_theta\": 1000000.0,\n", - " \"sliding_window\": 32768,\n", - " \"tie_word_embeddings\": true,\n", - " \"torch_dtype\": \"bfloat16\",\n", - " \"transformers_version\": \"4.42.3\",\n", - " \"use_cache\": true,\n", - " \"use_sliding_window\": false,\n", - " \"vocab_size\": 151936\n", - "}\n", - "\n", - "[INFO|tokenization_utils_base.py:2574] 2024-07-04 23:10:52,534 >> tokenizer config file saved in saves/qwen2-1.5b/lora/sft/checkpoint-1680/tokenizer_config.json\n", - "[INFO|tokenization_utils_base.py:2583] 2024-07-04 23:10:52,535 >> Special tokens file saved in saves/qwen2-1.5b/lora/sft/checkpoint-1680/special_tokens_map.json\n", - "{'loss': 0.3795, 'grad_norm': 3.6095104217529297, 'learning_rate': 5.8170397829712485e-05, 'epoch': 3.02}\n", - "{'loss': 0.1885, 'grad_norm': 2.648378610610962, 'learning_rate': 5.765750496516547e-05, 'epoch': 3.03}\n", - "{'loss': 0.2431, 'grad_norm': 3.102599859237671, 'learning_rate': 5.714378564496901e-05, 'epoch': 3.05}\n", - "{'loss': 0.2407, 'grad_norm': 1.7132669687271118, 'learning_rate': 5.6629295313583974e-05, 'epoch': 3.07}\n", - "{'loss': 0.1859, 'grad_norm': 2.363086462020874, 'learning_rate': 5.611408949868457e-05, 'epoch': 3.09}\n", - "{'loss': 0.2814, 'grad_norm': 2.7699074745178223, 'learning_rate': 5.559822380516539e-05, 'epoch': 3.11}\n", - "{'loss': 0.2066, 'grad_norm': 2.309485912322998, 'learning_rate': 5.5081753909140096e-05, 'epoch': 3.12}\n", - "{'loss': 0.2561, 'grad_norm': 3.8177757263183594, 'learning_rate': 5.456473555193242e-05, 'epoch': 3.14}\n", - "{'loss': 0.2839, 'grad_norm': 5.046483039855957, 'learning_rate': 5.404722453406017e-05, 'epoch': 3.16}\n", - "{'loss': 0.2309, 'grad_norm': 3.3046510219573975, 'learning_rate': 5.3529276709212816e-05, 'epoch': 3.18}\n", - "{'loss': 0.2678, 'grad_norm': 3.739877939224243, 'learning_rate': 5.30109479782233e-05, 'epoch': 3.2}\n", - "{'loss': 0.2305, 'grad_norm': 3.0891871452331543, 'learning_rate': 5.249229428303486e-05, 'epoch': 3.21}\n", - "{'loss': 0.3009, 'grad_norm': 2.0775339603424072, 'learning_rate': 5.197337160066331e-05, 'epoch': 3.23}\n", - "{'loss': 0.1974, 'grad_norm': 4.094172477722168, 'learning_rate': 5.145423593715557e-05, 'epoch': 3.25}\n", - "{'loss': 0.2613, 'grad_norm': 3.4857871532440186, 'learning_rate': 5.0934943321545115e-05, 'epoch': 3.27}\n", - "{'loss': 0.1759, 'grad_norm': 5.555017948150635, 'learning_rate': 5.041554979980486e-05, 'epoch': 3.28}\n", - "{'loss': 0.2755, 'grad_norm': 5.37070894241333, 'learning_rate': 4.9896111428798254e-05, 'epoch': 3.3}\n", - "{'loss': 0.3013, 'grad_norm': 3.0473411083221436, 'learning_rate': 4.9376684270229254e-05, 'epoch': 3.32}\n", - "{'loss': 0.2713, 'grad_norm': 2.421534299850464, 'learning_rate': 4.8857324384591653e-05, 'epoch': 3.34}\n", - "{'loss': 0.2342, 'grad_norm': 3.430769205093384, 'learning_rate': 4.8338087825118675e-05, 'epoch': 3.36}\n", - "{'loss': 0.2836, 'grad_norm': 3.117511510848999, 'learning_rate': 4.781903063173321e-05, 'epoch': 3.37}\n", - "{'loss': 0.2305, 'grad_norm': 2.2710249423980713, 'learning_rate': 4.730020882499964e-05, 'epoch': 3.39}\n", - "{'loss': 0.2707, 'grad_norm': 2.8062386512756348, 'learning_rate': 4.678167840007767e-05, 'epoch': 3.41}\n", - "{'loss': 0.2347, 'grad_norm': 3.199958324432373, 'learning_rate': 4.626349532067879e-05, 'epoch': 3.43}\n", - "{'loss': 0.2987, 'grad_norm': 2.9405529499053955, 'learning_rate': 4.574571551302647e-05, 'epoch': 3.44}\n", - "{'loss': 0.2748, 'grad_norm': 2.3248393535614014, 'learning_rate': 4.522839485981994e-05, 'epoch': 3.46}\n", - "{'loss': 0.2595, 'grad_norm': 2.7082927227020264, 'learning_rate': 4.471158919420312e-05, 'epoch': 3.48}\n", - "{'loss': 0.2452, 'grad_norm': 2.636992931365967, 'learning_rate': 4.4195354293738484e-05, 'epoch': 3.5}\n", - "{'loss': 0.2322, 'grad_norm': 2.870598554611206, 'learning_rate': 4.367974587438733e-05, 'epoch': 3.52}\n", - "{'loss': 0.2822, 'grad_norm': 2.3464884757995605, 'learning_rate': 4.316481958449634e-05, 'epoch': 3.53}\n", - "{'loss': 0.2228, 'grad_norm': 4.499746322631836, 'learning_rate': 4.2650630998791615e-05, 'epoch': 3.55}\n", - "{'loss': 0.2826, 'grad_norm': 3.5622456073760986, 'learning_rate': 4.213723561238074e-05, 'epoch': 3.57}\n", - "{'loss': 0.2505, 'grad_norm': 2.92927622795105, 'learning_rate': 4.162468883476319e-05, 'epoch': 3.59}\n", - "{'loss': 0.2715, 'grad_norm': 4.32992696762085, 'learning_rate': 4.111304598385018e-05, 'epoch': 3.61}\n", - "{'loss': 0.2382, 'grad_norm': 3.33722186088562, 'learning_rate': 4.060236227999441e-05, 'epoch': 3.62}\n", - "{'loss': 0.2219, 'grad_norm': 3.15584135055542, 'learning_rate': 4.0092692840030134e-05, 'epoch': 3.64}\n", - "{'loss': 0.2593, 'grad_norm': 2.6653778553009033, 'learning_rate': 3.9584092671324606e-05, 'epoch': 3.66}\n", - "{'loss': 0.2825, 'grad_norm': 2.261251449584961, 'learning_rate': 3.907661666584131e-05, 'epoch': 3.68}\n", - "{'loss': 0.2472, 'grad_norm': 2.40474796295166, 'learning_rate': 3.857031959421553e-05, 'epoch': 3.69}\n", - "{'loss': 0.2667, 'grad_norm': 3.5820109844207764, 'learning_rate': 3.806525609984312e-05, 'epoch': 3.71}\n", - "{'loss': 0.2426, 'grad_norm': 4.061399459838867, 'learning_rate': 3.7561480692983006e-05, 'epoch': 3.73}\n", - "{'loss': 0.3113, 'grad_norm': 3.3326733112335205, 'learning_rate': 3.705904774487396e-05, 'epoch': 3.75}\n", - "{'loss': 0.2658, 'grad_norm': 3.946682929992676, 'learning_rate': 3.655801148186655e-05, 'epoch': 3.77}\n", - "{'loss': 0.2051, 'grad_norm': 1.952369213104248, 'learning_rate': 3.6058425979570485e-05, 'epoch': 3.78}\n", - "{'loss': 0.249, 'grad_norm': 3.139533519744873, 'learning_rate': 3.556034515701852e-05, 'epoch': 3.8}\n", - "{'loss': 0.2398, 'grad_norm': 2.4751150608062744, 'learning_rate': 3.506382277084696e-05, 'epoch': 3.82}\n", - "{'loss': 0.2659, 'grad_norm': 1.9120585918426514, 'learning_rate': 3.4568912409493945e-05, 'epoch': 3.84}\n", - "{'loss': 0.1847, 'grad_norm': 2.8865163326263428, 'learning_rate': 3.4075667487415785e-05, 'epoch': 3.86}\n", - "{'loss': 0.2245, 'grad_norm': 3.2274515628814697, 'learning_rate': 3.358414123932195e-05, 'epoch': 3.87}\n", - "{'loss': 0.2643, 'grad_norm': 2.924294948577881, 'learning_rate': 3.3094386714429724e-05, 'epoch': 3.89}\n", - "{'loss': 0.252, 'grad_norm': 3.187256336212158, 'learning_rate': 3.2606456770738636e-05, 'epoch': 3.91}\n", - "{'loss': 0.1969, 'grad_norm': 2.353398084640503, 'learning_rate': 3.212040406932569e-05, 'epoch': 3.93}\n", - "{'loss': 0.2, 'grad_norm': 2.357897996902466, 'learning_rate': 3.163628106866172e-05, 'epoch': 3.94}\n", - "{'loss': 0.2773, 'grad_norm': 3.165809392929077, 'learning_rate': 3.115414001894974e-05, 'epoch': 3.96}\n", - "{'loss': 0.2495, 'grad_norm': 3.546583414077759, 'learning_rate': 3.067403295648566e-05, 'epoch': 3.98}\n", - "{'loss': 0.2513, 'grad_norm': 3.0604918003082275, 'learning_rate': 3.019601169804216e-05, 'epoch': 4.0}\n", - " 67%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–Ž | 2240/3360 [1:48:51<1:17:16, 4.14s/it][INFO|trainer.py:3788] 2024-07-04 23:47:35,277 >> \n", - "***** Running Evaluation *****\n", - "[INFO|trainer.py:3790] 2024-07-04 23:47:35,278 >> Num examples = 46\n", - "[INFO|trainer.py:3793] 2024-07-04 23:47:35,278 >> Batch size = 1\n", - "\n", - " 0%| | 0/46 [00:00> Saving model checkpoint to saves/qwen2-1.5b/lora/sft/checkpoint-2240\n", - "[INFO|configuration_utils.py:733] 2024-07-04 23:47:44,213 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-1.5b-instruct-bnb-4bit/snapshots/9f10684b3a26fbf25e50921655353e2e3e599d70/config.json\n", - "[INFO|configuration_utils.py:800] 2024-07-04 23:47:44,213 >> Model config Qwen2Config {\n", - " \"_name_or_path\": \"Qwen/Qwen2-1.5B-Instruct\",\n", - " \"architectures\": [\n", - " \"Qwen2ForCausalLM\"\n", - " ],\n", - " \"attention_dropout\": 0.0,\n", - " \"bos_token_id\": 151643,\n", - " \"eos_token_id\": 151645,\n", - " \"hidden_act\": \"silu\",\n", - " \"hidden_size\": 1536,\n", - " \"initializer_range\": 0.02,\n", - " \"intermediate_size\": 8960,\n", - " \"max_position_embeddings\": 32768,\n", - " \"max_window_layers\": 28,\n", - " \"model_type\": \"qwen2\",\n", - " \"num_attention_heads\": 12,\n", - " \"num_hidden_layers\": 28,\n", - " \"num_key_value_heads\": 2,\n", - " \"quantization_config\": {\n", - " \"_load_in_4bit\": true,\n", - " \"_load_in_8bit\": false,\n", - " \"bnb_4bit_compute_dtype\": \"bfloat16\",\n", - " \"bnb_4bit_quant_storage\": \"uint8\",\n", - " \"bnb_4bit_quant_type\": \"nf4\",\n", - " \"bnb_4bit_use_double_quant\": true,\n", - " \"llm_int8_enable_fp32_cpu_offload\": false,\n", - " \"llm_int8_has_fp16_weight\": false,\n", - " \"llm_int8_skip_modules\": null,\n", - " \"llm_int8_threshold\": 6.0,\n", - " \"load_in_4bit\": true,\n", - " \"load_in_8bit\": false,\n", - " \"quant_method\": \"bitsandbytes\"\n", - " },\n", - " \"rms_norm_eps\": 1e-06,\n", - " \"rope_theta\": 1000000.0,\n", - " \"sliding_window\": 32768,\n", - " \"tie_word_embeddings\": true,\n", - " \"torch_dtype\": \"bfloat16\",\n", - " \"transformers_version\": \"4.42.3\",\n", - " \"use_cache\": true,\n", - " \"use_sliding_window\": false,\n", - " \"vocab_size\": 151936\n", - "}\n", - "\n", - "[INFO|tokenization_utils_base.py:2574] 2024-07-04 23:47:44,277 >> tokenizer config file saved in saves/qwen2-1.5b/lora/sft/checkpoint-2240/tokenizer_config.json\n", - "[INFO|tokenization_utils_base.py:2583] 2024-07-04 23:47:44,277 >> Special tokens file saved in saves/qwen2-1.5b/lora/sft/checkpoint-2240/special_tokens_map.json\n", - "{'loss': 0.1188, 'grad_norm': 1.1784201860427856, 'learning_rate': 2.9720127835276256e-05, 'epoch': 4.02}\n", - "{'loss': 0.0602, 'grad_norm': 1.9491609334945679, 'learning_rate': 2.9246432729161055e-05, 'epoch': 4.03}\n", - "{'loss': 0.1191, 'grad_norm': 8.893132209777832, 'learning_rate': 2.8774977504442647e-05, 'epoch': 4.05}\n", - "{'loss': 0.0814, 'grad_norm': 2.4567410945892334, 'learning_rate': 2.8305813044122097e-05, 'epoch': 4.07}\n", - "{'loss': 0.0717, 'grad_norm': 1.0190716981887817, 'learning_rate': 2.7838989983964065e-05, 'epoch': 4.09}\n", - "{'loss': 0.1036, 'grad_norm': 2.9603097438812256, 'learning_rate': 2.737455870703155e-05, 'epoch': 4.11}\n", - "{'loss': 0.0639, 'grad_norm': 0.43291687965393066, 'learning_rate': 2.6912569338248315e-05, 'epoch': 4.12}\n", - "{'loss': 0.1147, 'grad_norm': 16.320343017578125, 'learning_rate': 2.645307173898901e-05, 'epoch': 4.14}\n", - "{'loss': 0.083, 'grad_norm': 2.4415814876556396, 'learning_rate': 2.5996115501697694e-05, 'epoch': 4.16}\n", - "{'loss': 0.0894, 'grad_norm': 2.5519323348999023, 'learning_rate': 2.5541749944535554e-05, 'epoch': 4.18}\n", - "{'loss': 0.0634, 'grad_norm': 1.0128456354141235, 'learning_rate': 2.5090024106057962e-05, 'epoch': 4.19}\n", - "{'loss': 0.0978, 'grad_norm': 0.7708680629730225, 'learning_rate': 2.464098673992205e-05, 'epoch': 4.21}\n", - "{'loss': 0.0895, 'grad_norm': 2.129037618637085, 'learning_rate': 2.4194686309624663e-05, 'epoch': 4.23}\n", - "{'loss': 0.0986, 'grad_norm': 2.0388691425323486, 'learning_rate': 2.3751170983272e-05, 'epoch': 4.25}\n", - "{'loss': 0.1058, 'grad_norm': 2.9288082122802734, 'learning_rate': 2.3310488628380757e-05, 'epoch': 4.27}\n", - "{'loss': 0.1175, 'grad_norm': 4.13016414642334, 'learning_rate': 2.2872686806712035e-05, 'epoch': 4.28}\n", - "{'loss': 0.1101, 'grad_norm': 2.0640783309936523, 'learning_rate': 2.243781276913811e-05, 'epoch': 4.3}\n", - "{'loss': 0.0602, 'grad_norm': 2.8615546226501465, 'learning_rate': 2.200591345054267e-05, 'epoch': 4.32}\n", - "{'loss': 0.1019, 'grad_norm': 3.2558248043060303, 'learning_rate': 2.157703546475539e-05, 'epoch': 4.34}\n", - "{'loss': 0.0819, 'grad_norm': 2.1427247524261475, 'learning_rate': 2.115122509952085e-05, 'epoch': 4.36}\n", - "{'loss': 0.0767, 'grad_norm': 7.249903202056885, 'learning_rate': 2.0728528311502976e-05, 'epoch': 4.37}\n", - "{'loss': 0.0823, 'grad_norm': 2.022773027420044, 'learning_rate': 2.0308990721324927e-05, 'epoch': 4.39}\n", - "{'loss': 0.0797, 'grad_norm': 3.4550766944885254, 'learning_rate': 1.989265760864542e-05, 'epoch': 4.41}\n", - "{'loss': 0.0927, 'grad_norm': 1.1615883111953735, 'learning_rate': 1.947957390727185e-05, 'epoch': 4.43}\n", - "{'loss': 0.0782, 'grad_norm': 3.103994607925415, 'learning_rate': 1.906978420031059e-05, 'epoch': 4.44}\n", - "{'loss': 0.0575, 'grad_norm': 1.6370556354522705, 'learning_rate': 1.8663332715355396e-05, 'epoch': 4.46}\n", - "{'loss': 0.1022, 'grad_norm': 1.106717824935913, 'learning_rate': 1.8260263319713844e-05, 'epoch': 4.48}\n", - "{'loss': 0.1071, 'grad_norm': 3.171022415161133, 'learning_rate': 1.7860619515673033e-05, 'epoch': 4.5}\n", - "{'loss': 0.1038, 'grad_norm': 1.9004364013671875, 'learning_rate': 1.746444443580433e-05, 'epoch': 4.52}\n", - "{'loss': 0.0836, 'grad_norm': 1.7966681718826294, 'learning_rate': 1.7071780838308288e-05, 'epoch': 4.53}\n", - "{'loss': 0.0773, 'grad_norm': 2.2593512535095215, 'learning_rate': 1.6682671102399805e-05, 'epoch': 4.55}\n", - "{'loss': 0.0671, 'grad_norm': 2.4209578037261963, 'learning_rate': 1.629715722373423e-05, 'epoch': 4.57}\n", - "{'loss': 0.0869, 'grad_norm': 3.6910362243652344, 'learning_rate': 1.5915280809874932e-05, 'epoch': 4.59}\n", - "{'loss': 0.0713, 'grad_norm': 2.8420000076293945, 'learning_rate': 1.553708307580265e-05, 'epoch': 4.61}\n", - "{'loss': 0.0886, 'grad_norm': 1.897133469581604, 'learning_rate': 1.5162604839467265e-05, 'epoch': 4.62}\n", - "{'loss': 0.0804, 'grad_norm': 2.0078957080841064, 'learning_rate': 1.4791886517382413e-05, 'epoch': 4.64}\n", - "{'loss': 0.0828, 'grad_norm': 2.6949617862701416, 'learning_rate': 1.4424968120263504e-05, 'epoch': 4.66}\n", - "{'loss': 0.0906, 'grad_norm': 2.1701433658599854, 'learning_rate': 1.4061889248709343e-05, 'epoch': 4.68}\n", - "{'loss': 0.0854, 'grad_norm': 2.741436004638672, 'learning_rate': 1.370268908892825e-05, 'epoch': 4.69}\n", - "{'loss': 0.0847, 'grad_norm': 1.9649664163589478, 'learning_rate': 1.3347406408508695e-05, 'epoch': 4.71}\n", - "{'loss': 0.1074, 'grad_norm': 2.995682716369629, 'learning_rate': 1.2996079552235263e-05, 'epoch': 4.73}\n", - "{'loss': 0.0675, 'grad_norm': 1.7899149656295776, 'learning_rate': 1.264874643795021e-05, 'epoch': 4.75}\n", - "{'loss': 0.0736, 'grad_norm': 3.165422201156616, 'learning_rate': 1.230544455246101e-05, 'epoch': 4.77}\n", - "{'loss': 0.0949, 'grad_norm': 3.376789093017578, 'learning_rate': 1.1966210947494583e-05, 'epoch': 4.78}\n", - "{'loss': 0.0774, 'grad_norm': 0.7393803000450134, 'learning_rate': 1.1631082235698316e-05, 'epoch': 4.8}\n", - "{'loss': 0.0685, 'grad_norm': 4.275796890258789, 'learning_rate': 1.130009458668863e-05, 'epoch': 4.82}\n", - "{'loss': 0.0642, 'grad_norm': 1.65122652053833, 'learning_rate': 1.097328372314721e-05, 'epoch': 4.84}\n", - "{'loss': 0.0855, 'grad_norm': 1.4425795078277588, 'learning_rate': 1.0650684916965559e-05, 'epoch': 4.85}\n", - "{'loss': 0.0883, 'grad_norm': 2.1447832584381104, 'learning_rate': 1.0332332985438248e-05, 'epoch': 4.87}\n", - "{'loss': 0.1137, 'grad_norm': 2.644052743911743, 'learning_rate': 1.0018262287505086e-05, 'epoch': 4.89}\n", - "{'loss': 0.1026, 'grad_norm': 0.3625916838645935, 'learning_rate': 9.708506720042932e-06, 'epoch': 4.91}\n", - "{'loss': 0.0708, 'grad_norm': 0.9670233130455017, 'learning_rate': 9.403099714207175e-06, 'epoch': 4.93}\n", - "{'loss': 0.0886, 'grad_norm': 1.222226619720459, 'learning_rate': 9.102074231823727e-06, 'epoch': 4.94}\n", - "{'loss': 0.0913, 'grad_norm': 1.5419262647628784, 'learning_rate': 8.805462761831418e-06, 'epoch': 4.96}\n", - "{'loss': 0.105, 'grad_norm': 1.7759844064712524, 'learning_rate': 8.513297316775625e-06, 'epoch': 4.98}\n", - "{'loss': 0.0818, 'grad_norm': 1.2991019487380981, 'learning_rate': 8.225609429353187e-06, 'epoch': 5.0}\n", - " 83%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–Š | 2800/3360 [2:24:53<36:03, 3.86s/it][INFO|trainer.py:3788] 2024-07-05 00:23:37,381 >> \n", - "***** Running Evaluation *****\n", - "[INFO|trainer.py:3790] 2024-07-05 00:23:37,382 >> Num examples = 46\n", - "[INFO|trainer.py:3793] 2024-07-05 00:23:37,382 >> Batch size = 1\n", - "\n", - " 0%| | 0/46 [00:00> Saving model checkpoint to saves/qwen2-1.5b/lora/sft/checkpoint-2800\n", - "[INFO|configuration_utils.py:733] 2024-07-05 00:23:45,000 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-1.5b-instruct-bnb-4bit/snapshots/9f10684b3a26fbf25e50921655353e2e3e599d70/config.json\n", - "[INFO|configuration_utils.py:800] 2024-07-05 00:23:45,001 >> Model config Qwen2Config {\n", - " \"_name_or_path\": \"Qwen/Qwen2-1.5B-Instruct\",\n", - " \"architectures\": [\n", - " \"Qwen2ForCausalLM\"\n", - " ],\n", - " \"attention_dropout\": 0.0,\n", - " \"bos_token_id\": 151643,\n", - " \"eos_token_id\": 151645,\n", - " \"hidden_act\": \"silu\",\n", - " \"hidden_size\": 1536,\n", - " \"initializer_range\": 0.02,\n", - " \"intermediate_size\": 8960,\n", - " \"max_position_embeddings\": 32768,\n", - " \"max_window_layers\": 28,\n", - " \"model_type\": \"qwen2\",\n", - " \"num_attention_heads\": 12,\n", - " \"num_hidden_layers\": 28,\n", - " \"num_key_value_heads\": 2,\n", - " \"quantization_config\": {\n", - " \"_load_in_4bit\": true,\n", - " \"_load_in_8bit\": false,\n", - " \"bnb_4bit_compute_dtype\": \"bfloat16\",\n", - " \"bnb_4bit_quant_storage\": \"uint8\",\n", - " \"bnb_4bit_quant_type\": \"nf4\",\n", - " \"bnb_4bit_use_double_quant\": true,\n", - " \"llm_int8_enable_fp32_cpu_offload\": false,\n", - " \"llm_int8_has_fp16_weight\": false,\n", - " \"llm_int8_skip_modules\": null,\n", - " \"llm_int8_threshold\": 6.0,\n", - " \"load_in_4bit\": true,\n", - " \"load_in_8bit\": false,\n", - " \"quant_method\": \"bitsandbytes\"\n", - " },\n", - " \"rms_norm_eps\": 1e-06,\n", - " \"rope_theta\": 1000000.0,\n", - " \"sliding_window\": 32768,\n", - " \"tie_word_embeddings\": true,\n", - " \"torch_dtype\": \"bfloat16\",\n", - " \"transformers_version\": \"4.42.3\",\n", - " \"use_cache\": true,\n", - " \"use_sliding_window\": false,\n", - " \"vocab_size\": 151936\n", - "}\n", - "\n", - "[INFO|tokenization_utils_base.py:2574] 2024-07-05 00:23:45,087 >> tokenizer config file saved in saves/qwen2-1.5b/lora/sft/checkpoint-2800/tokenizer_config.json\n", - "[INFO|tokenization_utils_base.py:2583] 2024-07-05 00:23:45,087 >> Special tokens file saved in saves/qwen2-1.5b/lora/sft/checkpoint-2800/special_tokens_map.json\n", - "{'loss': 0.0391, 'grad_norm': 1.8985695838928223, 'learning_rate': 7.942430149009161e-06, 'epoch': 5.02}\n", - "{'loss': 0.0262, 'grad_norm': 0.18104498088359833, 'learning_rate': 7.663790038585793e-06, 'epoch': 5.03}\n", - "{'loss': 0.0369, 'grad_norm': 0.4857228696346283, 'learning_rate': 7.389719171023857e-06, 'epoch': 5.05}\n", - "{'loss': 0.0285, 'grad_norm': 0.5048622488975525, 'learning_rate': 7.1202471261170245e-06, 'epoch': 5.07}\n", - "{'loss': 0.0239, 'grad_norm': 1.3091479539871216, 'learning_rate': 6.855402987319348e-06, 'epoch': 5.09}\n", - "{'loss': 0.0315, 'grad_norm': 0.7383649945259094, 'learning_rate': 6.595215338606397e-06, 'epoch': 5.1}\n", - "{'loss': 0.0227, 'grad_norm': 0.46847808361053467, 'learning_rate': 6.339712261390213e-06, 'epoch': 5.12}\n", - "{'loss': 0.0286, 'grad_norm': 2.871511936187744, 'learning_rate': 6.088921331488568e-06, 'epoch': 5.14}\n", - "{'loss': 0.0215, 'grad_norm': 0.5253076553344727, 'learning_rate': 5.8428696161488215e-06, 'epoch': 5.16}\n", - "{'loss': 0.0212, 'grad_norm': 0.7373698949813843, 'learning_rate': 5.601583671126531e-06, 'epoch': 5.18}\n", - "{'loss': 0.0468, 'grad_norm': 1.2003121376037598, 'learning_rate': 5.365089537819434e-06, 'epoch': 5.19}\n", - "{'loss': 0.0269, 'grad_norm': 0.1384514421224594, 'learning_rate': 5.133412740456806e-06, 'epoch': 5.21}\n", - "{'loss': 0.016, 'grad_norm': 0.6597172617912292, 'learning_rate': 4.906578283344759e-06, 'epoch': 5.23}\n", - "{'loss': 0.0273, 'grad_norm': 1.3373147249221802, 'learning_rate': 4.684610648167503e-06, 'epoch': 5.25}\n", - "{'loss': 0.022, 'grad_norm': 1.9218050241470337, 'learning_rate': 4.467533791345191e-06, 'epoch': 5.27}\n", - "{'loss': 0.0266, 'grad_norm': 0.33371880650520325, 'learning_rate': 4.255371141448272e-06, 'epoch': 5.28}\n", - "{'loss': 0.0246, 'grad_norm': 0.3639131486415863, 'learning_rate': 4.048145596668967e-06, 'epoch': 5.3}\n", - "{'loss': 0.04, 'grad_norm': 0.7324997186660767, 'learning_rate': 3.84587952234991e-06, 'epoch': 5.32}\n", - "{'loss': 0.02, 'grad_norm': 1.7712045907974243, 'learning_rate': 3.6485947485702832e-06, 'epoch': 5.34}\n", - "{'loss': 0.0304, 'grad_norm': 1.001847267150879, 'learning_rate': 3.4563125677897932e-06, 'epoch': 5.35}\n", - "{'loss': 0.0251, 'grad_norm': 1.4244178533554077, 'learning_rate': 3.269053732550581e-06, 'epoch': 5.37}\n", - "{'loss': 0.0201, 'grad_norm': 0.938901960849762, 'learning_rate': 3.086838453237506e-06, 'epoch': 5.39}\n", - "{'loss': 0.017, 'grad_norm': 0.722439706325531, 'learning_rate': 2.9096863958968268e-06, 'epoch': 5.41}\n", - "{'loss': 0.0278, 'grad_norm': 0.9856802225112915, 'learning_rate': 2.737616680113758e-06, 'epoch': 5.43}\n", - "{'loss': 0.0275, 'grad_norm': 1.7459590435028076, 'learning_rate': 2.570647876948895e-06, 'epoch': 5.44}\n", - "{'loss': 0.0419, 'grad_norm': 15.734712600708008, 'learning_rate': 2.408798006933882e-06, 'epoch': 5.46}\n", - "{'loss': 0.0498, 'grad_norm': 0.5652347207069397, 'learning_rate': 2.252084538126542e-06, 'epoch': 5.48}\n", - "{'loss': 0.0281, 'grad_norm': 0.6292805075645447, 'learning_rate': 2.100524384225555e-06, 'epoch': 5.5}\n", - "{'loss': 0.025, 'grad_norm': 1.3762198686599731, 'learning_rate': 1.9541339027450256e-06, 'epoch': 5.52}\n", - "{'loss': 0.0228, 'grad_norm': 0.6231855154037476, 'learning_rate': 1.8129288932490274e-06, 'epoch': 5.53}\n", - "{'loss': 0.021, 'grad_norm': 0.2345045506954193, 'learning_rate': 1.6769245956464396e-06, 'epoch': 5.55}\n", - "{'loss': 0.0314, 'grad_norm': 0.8907411694526672, 'learning_rate': 1.5461356885461075e-06, 'epoch': 5.57}\n", - "{'loss': 0.0324, 'grad_norm': 0.8636724948883057, 'learning_rate': 1.4205762876726092e-06, 'epoch': 5.59}\n", - "{'loss': 0.0306, 'grad_norm': 1.4055633544921875, 'learning_rate': 1.3002599443428243e-06, 'epoch': 5.6}\n", - "{'loss': 0.0276, 'grad_norm': 0.9670897722244263, 'learning_rate': 1.1851996440033319e-06, 'epoch': 5.62}\n", - "{'loss': 0.0328, 'grad_norm': 0.16922369599342346, 'learning_rate': 1.0754078048289374e-06, 'epoch': 5.64}\n", - "{'loss': 0.031, 'grad_norm': 1.8827847242355347, 'learning_rate': 9.708962763824048e-07, 'epoch': 5.66}\n", - "{'loss': 0.0214, 'grad_norm': 0.40066924691200256, 'learning_rate': 8.716763383355864e-07, 'epoch': 5.68}\n", - "{'loss': 0.0272, 'grad_norm': 0.28809547424316406, 'learning_rate': 7.777586992519959e-07, 'epoch': 5.69}\n", - "{'loss': 0.0253, 'grad_norm': 1.053158164024353, 'learning_rate': 6.891534954310885e-07, 'epoch': 5.71}\n", - "{'loss': 0.025, 'grad_norm': 0.2853540778160095, 'learning_rate': 6.058702898142643e-07, 'epoch': 5.73}\n", - "{'loss': 0.0354, 'grad_norm': 1.2035536766052246, 'learning_rate': 5.279180709527765e-07, 'epoch': 5.75}\n", - "{'loss': 0.0276, 'grad_norm': 0.9827560782432556, 'learning_rate': 4.553052520375911e-07, 'epoch': 5.77}\n", - "{'loss': 0.0209, 'grad_norm': 0.42196208238601685, 'learning_rate': 3.8803966999139684e-07, 'epoch': 5.78}\n", - "{'loss': 0.0265, 'grad_norm': 1.0920729637145996, 'learning_rate': 3.261285846227868e-07, 'epoch': 5.8}\n", - "{'loss': 0.0218, 'grad_norm': 0.4562773108482361, 'learning_rate': 2.6957867784270787e-07, 'epoch': 5.82}\n", - "{'loss': 0.0229, 'grad_norm': 1.235041618347168, 'learning_rate': 2.1839605294330933e-07, 'epoch': 5.84}\n", - "{'loss': 0.0371, 'grad_norm': 0.8272603154182434, 'learning_rate': 1.725862339392259e-07, 'epoch': 5.85}\n", - "{'loss': 0.0187, 'grad_norm': 0.5107071399688721, 'learning_rate': 1.3215416497138754e-07, 'epoch': 5.87}\n", - "{'loss': 0.0347, 'grad_norm': 1.0998457670211792, 'learning_rate': 9.710420977340762e-08, 'epoch': 5.89}\n", - "{'loss': 0.027, 'grad_norm': 1.8781795501708984, 'learning_rate': 6.744015120061509e-08, 'epoch': 5.91}\n", - "{'loss': 0.0351, 'grad_norm': 0.9750437140464783, 'learning_rate': 4.316519082179227e-08, 'epoch': 5.93}\n", - "{'loss': 0.0209, 'grad_norm': 1.2990669012069702, 'learning_rate': 2.4281948573617874e-08, 'epoch': 5.94}\n", - "{'loss': 0.0354, 'grad_norm': 1.9354966878890991, 'learning_rate': 1.0792462477909882e-08, 'epoch': 5.96}\n", - "{'loss': 0.0381, 'grad_norm': 1.044374704360962, 'learning_rate': 2.6981884216847884e-09, 'epoch': 5.98}\n", - "{'loss': 0.0228, 'grad_norm': 0.6751245856285095, 'learning_rate': 0.0, 'epoch': 6.0}\n", - "100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 3360/3360 [3:00:43<00:00, 3.75s/it][INFO|trainer.py:3788] 2024-07-05 00:59:27,574 >> \n", - "***** Running Evaluation *****\n", - "[INFO|trainer.py:3790] 2024-07-05 00:59:27,574 >> Num examples = 46\n", - "[INFO|trainer.py:3793] 2024-07-05 00:59:27,574 >> Batch size = 1\n", - "\n", - " 0%| | 0/46 [00:00> Saving model checkpoint to saves/qwen2-1.5b/lora/sft/checkpoint-3360\n", - "[INFO|configuration_utils.py:733] 2024-07-05 00:59:35,314 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-1.5b-instruct-bnb-4bit/snapshots/9f10684b3a26fbf25e50921655353e2e3e599d70/config.json\n", - "[INFO|configuration_utils.py:800] 2024-07-05 00:59:35,316 >> Model config Qwen2Config {\n", - " \"_name_or_path\": \"Qwen/Qwen2-1.5B-Instruct\",\n", - " \"architectures\": [\n", - " \"Qwen2ForCausalLM\"\n", - " ],\n", - " \"attention_dropout\": 0.0,\n", - " \"bos_token_id\": 151643,\n", - " \"eos_token_id\": 151645,\n", - " \"hidden_act\": \"silu\",\n", - " \"hidden_size\": 1536,\n", - " \"initializer_range\": 0.02,\n", - " \"intermediate_size\": 8960,\n", - " \"max_position_embeddings\": 32768,\n", - " \"max_window_layers\": 28,\n", - " \"model_type\": \"qwen2\",\n", - " \"num_attention_heads\": 12,\n", - " \"num_hidden_layers\": 28,\n", - " \"num_key_value_heads\": 2,\n", - " \"quantization_config\": {\n", - " \"_load_in_4bit\": true,\n", - " \"_load_in_8bit\": false,\n", - " \"bnb_4bit_compute_dtype\": \"bfloat16\",\n", - " \"bnb_4bit_quant_storage\": \"uint8\",\n", - " \"bnb_4bit_quant_type\": \"nf4\",\n", - " \"bnb_4bit_use_double_quant\": true,\n", - " \"llm_int8_enable_fp32_cpu_offload\": false,\n", - " \"llm_int8_has_fp16_weight\": false,\n", - " \"llm_int8_skip_modules\": null,\n", - " \"llm_int8_threshold\": 6.0,\n", - " \"load_in_4bit\": true,\n", - " \"load_in_8bit\": false,\n", - " \"quant_method\": \"bitsandbytes\"\n", - " },\n", - " \"rms_norm_eps\": 1e-06,\n", - " \"rope_theta\": 1000000.0,\n", - " \"sliding_window\": 32768,\n", - " \"tie_word_embeddings\": true,\n", - " \"torch_dtype\": \"bfloat16\",\n", - " \"transformers_version\": \"4.42.3\",\n", - " \"use_cache\": true,\n", - " \"use_sliding_window\": false,\n", - " \"vocab_size\": 151936\n", - "}\n", - "\n", - "[INFO|tokenization_utils_base.py:2574] 2024-07-05 00:59:35,381 >> tokenizer config file saved in saves/qwen2-1.5b/lora/sft/checkpoint-3360/tokenizer_config.json\n", - "[INFO|tokenization_utils_base.py:2583] 2024-07-05 00:59:35,382 >> Special tokens file saved in saves/qwen2-1.5b/lora/sft/checkpoint-3360/special_tokens_map.json\n", - "[INFO|:482] 2024-07-05 00:59:35,695 >> \n", - "\n", - "Training completed. Do not forget to share your model on huggingface.co/models =)\n", - "\n", - "\n", - "{'train_runtime': 10857.6726, 'train_samples_per_second': 2.477, 'train_steps_per_second': 0.309, 'train_loss': 0.6667878782021858, 'epoch': 6.0}\n", - "100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 3360/3360 [3:00:51<00:00, 3.23s/it]\n", - "[INFO|trainer.py:3478] 2024-07-05 00:59:35,700 >> Saving model checkpoint to saves/qwen2-1.5b/lora/sft\n", - "[INFO|configuration_utils.py:733] 2024-07-05 00:59:36,890 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-1.5b-instruct-bnb-4bit/snapshots/9f10684b3a26fbf25e50921655353e2e3e599d70/config.json\n", - "[INFO|configuration_utils.py:800] 2024-07-05 00:59:36,891 >> Model config Qwen2Config {\n", - " \"_name_or_path\": \"Qwen/Qwen2-1.5B-Instruct\",\n", - " \"architectures\": [\n", - " \"Qwen2ForCausalLM\"\n", - " ],\n", - " \"attention_dropout\": 0.0,\n", - " \"bos_token_id\": 151643,\n", - " \"eos_token_id\": 151645,\n", - " \"hidden_act\": \"silu\",\n", - " \"hidden_size\": 1536,\n", - " \"initializer_range\": 0.02,\n", - " \"intermediate_size\": 8960,\n", - " \"max_position_embeddings\": 32768,\n", - " \"max_window_layers\": 28,\n", - " \"model_type\": \"qwen2\",\n", - " \"num_attention_heads\": 12,\n", - " \"num_hidden_layers\": 28,\n", - " \"num_key_value_heads\": 2,\n", - " \"quantization_config\": {\n", - " \"_load_in_4bit\": true,\n", - " \"_load_in_8bit\": false,\n", - " \"bnb_4bit_compute_dtype\": \"bfloat16\",\n", - " \"bnb_4bit_quant_storage\": \"uint8\",\n", - " \"bnb_4bit_quant_type\": \"nf4\",\n", - " \"bnb_4bit_use_double_quant\": true,\n", - " \"llm_int8_enable_fp32_cpu_offload\": false,\n", - " \"llm_int8_has_fp16_weight\": false,\n", - " \"llm_int8_skip_modules\": null,\n", - " \"llm_int8_threshold\": 6.0,\n", - " \"load_in_4bit\": true,\n", - " \"load_in_8bit\": false,\n", - " \"quant_method\": \"bitsandbytes\"\n", - " },\n", - " \"rms_norm_eps\": 1e-06,\n", - " \"rope_theta\": 1000000.0,\n", - " \"sliding_window\": 32768,\n", - " \"tie_word_embeddings\": true,\n", - " \"torch_dtype\": \"bfloat16\",\n", - " \"transformers_version\": \"4.42.3\",\n", - " \"use_cache\": true,\n", - " \"use_sliding_window\": false,\n", - " \"vocab_size\": 151936\n", - "}\n", - "\n", - "[INFO|tokenization_utils_base.py:2574] 2024-07-05 00:59:36,947 >> tokenizer config file saved in saves/qwen2-1.5b/lora/sft/tokenizer_config.json\n", - "[INFO|tokenization_utils_base.py:2583] 2024-07-05 00:59:36,947 >> Special tokens file saved in saves/qwen2-1.5b/lora/sft/special_tokens_map.json\n", - "***** train metrics *****\n", - " epoch = 5.9973\n", - " total_flos = 19692141GF\n", - " train_loss = 0.6668\n", - " train_runtime = 3:00:57.67\n", - " train_samples_per_second = 2.477\n", - " train_steps_per_second = 0.309\n", - "Figure saved at: saves/qwen2-1.5b/lora/sft/training_loss.png\n", - "Figure saved at: saves/qwen2-1.5b/lora/sft/training_eval_loss.png\n", - "[INFO|trainer.py:3788] 2024-07-05 00:59:37,341 >> \n", - "***** Running Evaluation *****\n", - "[INFO|trainer.py:3790] 2024-07-05 00:59:37,341 >> Num examples = 46\n", - "[INFO|trainer.py:3793] 2024-07-05 00:59:37,341 >> Batch size = 1\n", - "100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 46/46 [00:05<00:00, 7.96it/s]\n", - "***** eval metrics *****\n", - " epoch = 5.9973\n", - " eval_loss = 3.4014\n", - " eval_runtime = 0:00:05.94\n", - " eval_samples_per_second = 7.742\n", - " eval_steps_per_second = 7.742\n", - "[INFO|modelcard.py:449] 2024-07-05 00:59:43,285 >> Dropping the following result as it does not have all the necessary fields:\n", - "{'task': {'name': 'Causal Language Modeling', 'type': 'text-generation'}}\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: | 0.091 MB of 0.091 MB uploaded\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: Run history:\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: eval/loss โ–โ–โ–ƒโ–„โ–†โ–ˆโ–ˆ\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: eval/runtime โ–‚โ–โ–†โ–ˆโ–†โ–†โ–…\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: eval/samples_per_second โ–†โ–ˆโ–‚โ–โ–‚โ–‚โ–ƒ\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: eval/steps_per_second โ–†โ–ˆโ–‚โ–โ–‚โ–‚โ–ƒ\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: train/epoch โ–โ–โ–โ–‚โ–‚โ–‚โ–‚โ–‚โ–‚โ–ƒโ–ƒโ–ƒโ–ƒโ–ƒโ–ƒโ–„โ–„โ–„โ–„โ–„โ–…โ–…โ–…โ–…โ–…โ–†โ–†โ–†โ–†โ–†โ–†โ–‡โ–‡โ–‡โ–‡โ–‡โ–‡โ–ˆโ–ˆโ–ˆ\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: train/global_step โ–โ–โ–โ–‚โ–‚โ–‚โ–‚โ–‚โ–‚โ–ƒโ–ƒโ–ƒโ–ƒโ–ƒโ–ƒโ–„โ–„โ–„โ–„โ–„โ–…โ–…โ–…โ–…โ–…โ–†โ–†โ–†โ–†โ–†โ–†โ–‡โ–‡โ–‡โ–‡โ–‡โ–‡โ–ˆโ–ˆโ–ˆ\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: train/grad_norm โ–‚โ–โ–โ–‚โ–‚โ–‚โ–‚โ–‚โ–‚โ–‚โ–‚โ–‚โ–‚โ–‚โ–‚โ–ƒโ–ƒโ–‚โ–‚โ–ƒโ–‚โ–‚โ–‚โ–‚โ–‚โ–‚โ–‚โ–ˆโ–ƒโ–โ–‚โ–‚โ–โ–โ–โ–โ–โ–โ–โ–\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: train/learning_rate โ–‚โ–„โ–…โ–‡โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–‡โ–‡โ–‡โ–‡โ–‡โ–†โ–†โ–†โ–†โ–…โ–…โ–…โ–„โ–„โ–„โ–ƒโ–ƒโ–ƒโ–ƒโ–‚โ–‚โ–‚โ–‚โ–โ–โ–โ–โ–โ–โ–\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: train/loss โ–ˆโ–ˆโ–ˆโ–ˆโ–‡โ–‡โ–‡โ–…โ–†โ–†โ–…โ–…โ–…โ–…โ–ƒโ–ƒโ–ƒโ–ƒโ–ƒโ–ƒโ–‚โ–‚โ–‚โ–‚โ–‚โ–‚โ–‚โ–โ–โ–โ–โ–โ–โ–โ–โ–โ–โ–โ–โ–\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: \n", - "\u001b[34m\u001b[1mwandb\u001b[0m: Run summary:\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: eval/loss 3.40137\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: eval/runtime 5.9413\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: eval/samples_per_second 7.742\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: eval/steps_per_second 7.742\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: total_flos 2.114427607798579e+16\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: train/epoch 5.99732\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: train/global_step 3360\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: train/grad_norm 0.67512\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: train/learning_rate 0.0\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: train/loss 0.0228\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: train_loss 0.66679\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: train_runtime 10857.6726\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: train_samples_per_second 2.477\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: train_steps_per_second 0.309\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: \n", - "\u001b[34m\u001b[1mwandb\u001b[0m: ๐Ÿš€ View run \u001b[33mqwen2_1.5b_lora_sft\u001b[0m at: \u001b[34m\u001b[4mhttps://wandb.ai/inflaton-ai/huggingface/runs/4fbnqsea\u001b[0m\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: โญ๏ธ View project at: \u001b[34m\u001b[4mhttps://wandb.ai/inflaton-ai/huggingface\u001b[0m\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: Synced 6 W&B file(s), 0 media file(s), 1 artifact file(s) and 0 other file(s)\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: Find logs at: \u001b[35m\u001b[1m./wandb/run-20240704_215839-4fbnqsea/logs\u001b[0m\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m The new W&B backend becomes opt-out in version 0.18.0; try it out with `wandb.require(\"core\")`! See https://wandb.me/wandb-core for more information.\n", - "CPU times: user 3min 32s, sys: 1min 10s, total: 4min 43s\n", - "Wall time: 3h 3min 14s\n" - ] - } - ], - "source": [ - "%%time\n", - "\n", - "!./scripts/tune-lf.sh config/qwen2_1.5b_lora_sft_unsloth.yaml" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Current Directory:\n", - "/home/inflaton/code/projects/courses/llm-finetuning/llama-factory\n", - "07/05/2024 06:15:40 - WARNING - llamafactory.hparams.parser - We recommend enable `upcast_layernorm` in quantized training.\n", - "07/05/2024 06:15:40 - INFO - llamafactory.hparams.parser - Process rank: 0, device: cuda:0, n_gpu: 1, distributed training: False, compute dtype: torch.bfloat16\n", - "[INFO|tokenization_utils_base.py:2161] 2024-07-05 06:15:40,695 >> loading file vocab.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/vocab.json\n", - "[INFO|tokenization_utils_base.py:2161] 2024-07-05 06:15:40,695 >> loading file merges.txt from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/merges.txt\n", - "[INFO|tokenization_utils_base.py:2161] 2024-07-05 06:15:40,695 >> loading file tokenizer.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/tokenizer.json\n", - "[INFO|tokenization_utils_base.py:2161] 2024-07-05 06:15:40,695 >> loading file added_tokens.json from cache at None\n", - "[INFO|tokenization_utils_base.py:2161] 2024-07-05 06:15:40,695 >> loading file special_tokens_map.json from cache at None\n", - "[INFO|tokenization_utils_base.py:2161] 2024-07-05 06:15:40,695 >> loading file tokenizer_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/tokenizer_config.json\n", - "[WARNING|logging.py:313] 2024-07-05 06:15:40,871 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n", - "07/05/2024 06:15:40 - INFO - llamafactory.data.template - Replace eos token: <|im_end|>\n", - "07/05/2024 06:15:40 - INFO - llamafactory.data.template - Add <|im_start|> to stop words.\n", - "07/05/2024 06:15:40 - INFO - llamafactory.data.loader - Loading dataset alpaca_mac.json...\n", - "Converting format of dataset (num_proc=16): 100%|โ–ˆ| 4528/4528 [00:00<00:00, 1717\n", - "Running tokenizer on dataset (num_proc=16): 100%|โ–ˆ| 4528/4528 [00:01<00:00, 2570\n", - "input_ids:\n", - "[151644, 872, 198, 5501, 14683, 279, 2701, 8453, 1467, 1119, 6364, 323, 3410, 1172, 279, 24531, 2213, 11, 4302, 770, 624, 35987, 102895, 99164, 100324, 100717, 100095, 99509, 1773, 151645, 198, 151644, 77091, 198, 17949, 358, 572, 2617, 553, 264, 38835, 44486, 13, 151645]\n", - "inputs:\n", - "<|im_start|>user\n", - "Please translate the following Chinese text into English and provide only the translated content, nothing else.\n", - "ๅ…จไป—็€็‹ไป™ๆญๆ•‘ใ€‚<|im_end|>\n", - "<|im_start|>assistant\n", - "Because I was protected by a fox fairy.<|im_end|>\n", - "label_ids:\n", - "[-100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 17949, 358, 572, 2617, 553, 264, 38835, 44486, 13, 151645]\n", - "labels:\n", - "Because I was protected by a fox fairy.<|im_end|>\n", - "[INFO|configuration_utils.py:733] 2024-07-05 06:15:44,437 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/config.json\n", - "[INFO|configuration_utils.py:800] 2024-07-05 06:15:44,438 >> Model config Qwen2Config {\n", - " \"_name_or_path\": \"Qwen/Qwen2-0.5B-Instruct\",\n", - " \"architectures\": [\n", - " \"Qwen2ForCausalLM\"\n", - " ],\n", - " \"attention_dropout\": 0.0,\n", - " \"bos_token_id\": 151643,\n", - " \"eos_token_id\": 151645,\n", - " \"hidden_act\": \"silu\",\n", - " \"hidden_size\": 896,\n", - " \"initializer_range\": 0.02,\n", - " \"intermediate_size\": 4864,\n", - " \"max_position_embeddings\": 32768,\n", - " \"max_window_layers\": 24,\n", - " \"model_type\": \"qwen2\",\n", - " \"num_attention_heads\": 14,\n", - " \"num_hidden_layers\": 24,\n", - " \"num_key_value_heads\": 2,\n", - " \"rms_norm_eps\": 1e-06,\n", - " \"rope_theta\": 1000000.0,\n", - " \"sliding_window\": 32768,\n", - " \"tie_word_embeddings\": true,\n", - " \"torch_dtype\": \"bfloat16\",\n", - " \"transformers_version\": \"4.42.3\",\n", - " \"use_cache\": true,\n", - " \"use_sliding_window\": false,\n", - " \"vocab_size\": 151936\n", - "}\n", - "\n", - "07/05/2024 06:15:44 - INFO - llamafactory.model.model_utils.quantization - Quantizing model to 4 bit with bitsandbytes.\n", - "๐Ÿฆฅ Unsloth: Will patch your computer to enable 2x faster free finetuning.\n", - "[INFO|configuration_utils.py:733] 2024-07-05 06:15:45,429 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-0.5b-instruct-bnb-4bit/snapshots/c3b24ce4827d69f5c3bde9aba00047774069ab72/config.json\n", - "[INFO|configuration_utils.py:800] 2024-07-05 06:15:45,430 >> Model config Qwen2Config {\n", - " \"_name_or_path\": \"unsloth/qwen2-0.5b-instruct-bnb-4bit\",\n", - " \"architectures\": [\n", - " \"Qwen2ForCausalLM\"\n", - " ],\n", - " \"attention_dropout\": 0.0,\n", - " \"bos_token_id\": 151643,\n", - " \"eos_token_id\": 151645,\n", - " \"hidden_act\": \"silu\",\n", - " \"hidden_size\": 896,\n", - " \"initializer_range\": 0.02,\n", - " \"intermediate_size\": 4864,\n", - " \"max_position_embeddings\": 32768,\n", - " \"max_window_layers\": 24,\n", - " \"model_type\": \"qwen2\",\n", - " \"num_attention_heads\": 14,\n", - " \"num_hidden_layers\": 24,\n", - " \"num_key_value_heads\": 2,\n", - " \"quantization_config\": {\n", - " \"_load_in_4bit\": true,\n", - " \"_load_in_8bit\": false,\n", - " \"bnb_4bit_compute_dtype\": \"bfloat16\",\n", - " \"bnb_4bit_quant_storage\": \"uint8\",\n", - " \"bnb_4bit_quant_type\": \"nf4\",\n", - " \"bnb_4bit_use_double_quant\": true,\n", - " \"llm_int8_enable_fp32_cpu_offload\": false,\n", - " \"llm_int8_has_fp16_weight\": false,\n", - " \"llm_int8_skip_modules\": null,\n", - " \"llm_int8_threshold\": 6.0,\n", - " \"load_in_4bit\": true,\n", - " \"load_in_8bit\": false,\n", - " \"quant_method\": \"bitsandbytes\"\n", - " },\n", - " \"rms_norm_eps\": 1e-06,\n", - " \"rope_theta\": 1000000.0,\n", - " \"sliding_window\": 32768,\n", - " \"tie_word_embeddings\": true,\n", - " \"torch_dtype\": \"bfloat16\",\n", - " \"transformers_version\": \"4.42.3\",\n", - " \"use_cache\": true,\n", - " \"use_sliding_window\": false,\n", - " \"vocab_size\": 151936\n", - "}\n", - "\n", - "==((====))== Unsloth: Fast Qwen2 patching release 2024.7\n", - " \\\\ /| GPU: NVIDIA GeForce RTX 4080 Laptop GPU. Max memory: 11.994 GB. Platform = Linux.\n", - "O^O/ \\_/ \\ Pytorch: 2.3.0+cu121. CUDA = 8.9. CUDA Toolkit = 12.1.\n", - "\\ / Bfloat16 = TRUE. FA [Xformers = 0.0.26.post1. FA2 = False]\n", - " \"-____-\" Free Apache license: http://github.com/unslothai/unsloth\n", - "[INFO|configuration_utils.py:733] 2024-07-05 06:15:46,517 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-0.5b-instruct-bnb-4bit/snapshots/c3b24ce4827d69f5c3bde9aba00047774069ab72/config.json\n", - "[INFO|configuration_utils.py:800] 2024-07-05 06:15:46,517 >> Model config Qwen2Config {\n", - " \"_name_or_path\": \"unsloth/qwen2-0.5b-instruct-bnb-4bit\",\n", - " \"architectures\": [\n", - " \"Qwen2ForCausalLM\"\n", - " ],\n", - " \"attention_dropout\": 0.0,\n", - " \"bos_token_id\": 151643,\n", - " \"eos_token_id\": 151645,\n", - " \"hidden_act\": \"silu\",\n", - " \"hidden_size\": 896,\n", - " \"initializer_range\": 0.02,\n", - " \"intermediate_size\": 4864,\n", - " \"max_position_embeddings\": 32768,\n", - " \"max_window_layers\": 24,\n", - " \"model_type\": \"qwen2\",\n", - " \"num_attention_heads\": 14,\n", - " \"num_hidden_layers\": 24,\n", - " \"num_key_value_heads\": 2,\n", - " \"quantization_config\": {\n", - " \"_load_in_4bit\": true,\n", - " \"_load_in_8bit\": false,\n", - " \"bnb_4bit_compute_dtype\": \"bfloat16\",\n", - " \"bnb_4bit_quant_storage\": \"uint8\",\n", - " \"bnb_4bit_quant_type\": \"nf4\",\n", - " \"bnb_4bit_use_double_quant\": true,\n", - " \"llm_int8_enable_fp32_cpu_offload\": false,\n", - " \"llm_int8_has_fp16_weight\": false,\n", - " \"llm_int8_skip_modules\": null,\n", - " \"llm_int8_threshold\": 6.0,\n", - " \"load_in_4bit\": true,\n", - " \"load_in_8bit\": false,\n", - " \"quant_method\": \"bitsandbytes\"\n", - " },\n", - " \"rms_norm_eps\": 1e-06,\n", - " \"rope_theta\": 1000000.0,\n", - " \"sliding_window\": 32768,\n", - " \"tie_word_embeddings\": true,\n", - " \"torch_dtype\": \"bfloat16\",\n", - " \"transformers_version\": \"4.42.3\",\n", - " \"use_cache\": true,\n", - " \"use_sliding_window\": false,\n", - " \"vocab_size\": 151936\n", - "}\n", - "\n", - "[INFO|configuration_utils.py:733] 2024-07-05 06:15:47,071 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-0.5b-instruct-bnb-4bit/snapshots/c3b24ce4827d69f5c3bde9aba00047774069ab72/config.json\n", - "[INFO|configuration_utils.py:800] 2024-07-05 06:15:47,071 >> Model config Qwen2Config {\n", - " \"_name_or_path\": \"unsloth/qwen2-0.5b-instruct-bnb-4bit\",\n", - " \"architectures\": [\n", - " \"Qwen2ForCausalLM\"\n", - " ],\n", - " \"attention_dropout\": 0.0,\n", - " \"bos_token_id\": 151643,\n", - " \"eos_token_id\": 151645,\n", - " \"hidden_act\": \"silu\",\n", - " \"hidden_size\": 896,\n", - " \"initializer_range\": 0.02,\n", - " \"intermediate_size\": 4864,\n", - " \"max_position_embeddings\": 32768,\n", - " \"max_window_layers\": 24,\n", - " \"model_type\": \"qwen2\",\n", - " \"num_attention_heads\": 14,\n", - " \"num_hidden_layers\": 24,\n", - " \"num_key_value_heads\": 2,\n", - " \"quantization_config\": {\n", - " \"_load_in_4bit\": true,\n", - " \"_load_in_8bit\": false,\n", - " \"bnb_4bit_compute_dtype\": \"bfloat16\",\n", - " \"bnb_4bit_quant_storage\": \"uint8\",\n", - " \"bnb_4bit_quant_type\": \"nf4\",\n", - " \"bnb_4bit_use_double_quant\": true,\n", - " \"llm_int8_enable_fp32_cpu_offload\": false,\n", - " \"llm_int8_has_fp16_weight\": false,\n", - " \"llm_int8_skip_modules\": null,\n", - " \"llm_int8_threshold\": 6.0,\n", - " \"load_in_4bit\": true,\n", - " \"load_in_8bit\": false,\n", - " \"quant_method\": \"bitsandbytes\"\n", - " },\n", - " \"rms_norm_eps\": 1e-06,\n", - " \"rope_theta\": 1000000.0,\n", - " \"sliding_window\": 32768,\n", - " \"tie_word_embeddings\": true,\n", - " \"torch_dtype\": \"bfloat16\",\n", - " \"transformers_version\": \"4.42.3\",\n", - " \"use_cache\": true,\n", - " \"use_sliding_window\": false,\n", - " \"vocab_size\": 151936\n", - "}\n", - "\n", - "[INFO|modeling_utils.py:3556] 2024-07-05 06:15:47,115 >> loading weights file model.safetensors from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-0.5b-instruct-bnb-4bit/snapshots/c3b24ce4827d69f5c3bde9aba00047774069ab72/model.safetensors\n", - "[INFO|modeling_utils.py:1531] 2024-07-05 06:15:48,951 >> Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16.\n", - "[INFO|configuration_utils.py:1000] 2024-07-05 06:15:48,969 >> Generate config GenerationConfig {\n", - " \"bos_token_id\": 151643,\n", - " \"eos_token_id\": 151645\n", - "}\n", - "\n", - "[INFO|modeling_utils.py:4364] 2024-07-05 06:16:14,443 >> All model checkpoint weights were used when initializing Qwen2ForCausalLM.\n", - "\n", - "[INFO|modeling_utils.py:4372] 2024-07-05 06:16:14,443 >> All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at unsloth/qwen2-0.5b-instruct-bnb-4bit.\n", - "If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training.\n", - "[INFO|configuration_utils.py:955] 2024-07-05 06:16:14,971 >> loading configuration file generation_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-0.5b-instruct-bnb-4bit/snapshots/c3b24ce4827d69f5c3bde9aba00047774069ab72/generation_config.json\n", - "[INFO|configuration_utils.py:1000] 2024-07-05 06:16:14,971 >> Generate config GenerationConfig {\n", - " \"bos_token_id\": 151643,\n", - " \"do_sample\": true,\n", - " \"eos_token_id\": [\n", - " 151645,\n", - " 151643\n", - " ],\n", - " \"pad_token_id\": 151643,\n", - " \"repetition_penalty\": 1.1,\n", - " \"temperature\": 0.7,\n", - " \"top_k\": 20,\n", - " \"top_p\": 0.8\n", - "}\n", - "\n", - "07/05/2024 06:16:18 - INFO - llamafactory.model.model_utils.checkpointing - Gradient checkpointing enabled.\n", - "07/05/2024 06:16:18 - INFO - llamafactory.model.adapter - Upcasting trainable params to float32.\n", - "07/05/2024 06:16:18 - INFO - llamafactory.model.adapter - Fine-tuning method: LoRA\n", - "07/05/2024 06:16:18 - INFO - llamafactory.model.model_utils.misc - Found linear modules: gate_proj,q_proj,k_proj,up_proj,down_proj,o_proj,v_proj\n", - "[WARNING|logging.py:328] 2024-07-05 06:16:19,091 >> Unsloth 2024.7 patched 24 layers with 0 QKV layers, 24 O layers and 24 MLP layers.\n", - "07/05/2024 06:16:19 - INFO - llamafactory.model.loader - trainable params: 4,399,104 || all params: 634,566,528 || trainable%: 0.6932\n", - "[INFO|trainer.py:642] 2024-07-05 06:16:19,940 >> Using auto half precision backend\n", - "07/05/2024 06:16:19 - WARNING - llamafactory.train.callbacks - Previous trainer log in this folder will be deleted.\n", - "07/05/2024 06:16:20 - INFO - llamafactory.train.trainer_utils - Using LoRA+ optimizer with loraplus lr ratio 16.00.\n", - "[WARNING|:223] 2024-07-05 06:16:20,129 >> ==((====))== Unsloth - 2x faster free finetuning | Num GPUs = 1\n", - " \\\\ /| Num examples = 4,482 | Num Epochs = 6\n", - "O^O/ \\_/ \\ Batch size per device = 1 | Gradient Accumulation steps = 8\n", - "\\ / Total batch size = 8 | Total steps = 3,360\n", - " \"-____-\" Number of trainable parameters = 4,399,104\n", - "[INFO|integration_utils.py:750] 2024-07-05 06:16:20,818 >> Automatic Weights & Biases logging enabled, to disable set os.environ[\"WANDB_DISABLED\"] = \"true\"\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33minflaton-sg\u001b[0m (\u001b[33minflaton-ai\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: Tracking run with wandb version 0.17.4\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: Run data is saved locally in \u001b[35m\u001b[1m/home/inflaton/code/projects/courses/llm-finetuning/llama-factory/wandb/run-20240705_061623-3amepb0m\u001b[0m\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: Run \u001b[1m`wandb offline`\u001b[0m to turn off syncing.\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: Syncing run \u001b[33mqwen2_0.5b_lora_sft\u001b[0m\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: โญ๏ธ View project at \u001b[34m\u001b[4mhttps://wandb.ai/inflaton-ai/huggingface\u001b[0m\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: ๐Ÿš€ View run at \u001b[34m\u001b[4mhttps://wandb.ai/inflaton-ai/huggingface/runs/3amepb0m\u001b[0m\n", - "{'loss': 2.6325, 'grad_norm': 2.6052567958831787, 'learning_rate': 2.9761904761904763e-06, 'epoch': 0.02}\n", - "{'loss': 2.6514, 'grad_norm': 2.433773994445801, 'learning_rate': 5.9523809523809525e-06, 'epoch': 0.04}\n", - "{'loss': 2.474, 'grad_norm': 2.1471617221832275, 'learning_rate': 8.92857142857143e-06, 'epoch': 0.05}\n", - "{'loss': 2.3031, 'grad_norm': 4.300695419311523, 'learning_rate': 1.1904761904761905e-05, 'epoch': 0.07}\n", - "{'loss': 2.4774, 'grad_norm': 1.8105831146240234, 'learning_rate': 1.4880952380952381e-05, 'epoch': 0.09}\n", - "{'loss': 2.2519, 'grad_norm': 2.077115297317505, 'learning_rate': 1.785714285714286e-05, 'epoch': 0.11}\n", - "{'loss': 2.4309, 'grad_norm': 1.9538270235061646, 'learning_rate': 2.0833333333333336e-05, 'epoch': 0.12}\n", - "{'loss': 2.22, 'grad_norm': 2.1473119258880615, 'learning_rate': 2.380952380952381e-05, 'epoch': 0.14}\n", - "{'loss': 2.3228, 'grad_norm': 2.819317579269409, 'learning_rate': 2.6785714285714288e-05, 'epoch': 0.16}\n", - "{'loss': 2.238, 'grad_norm': 1.9084508419036865, 'learning_rate': 2.9761904761904762e-05, 'epoch': 0.18}\n", - "{'loss': 2.2707, 'grad_norm': 2.1343274116516113, 'learning_rate': 3.273809523809524e-05, 'epoch': 0.2}\n", - "{'loss': 2.286, 'grad_norm': 2.273739814758301, 'learning_rate': 3.571428571428572e-05, 'epoch': 0.21}\n", - "{'loss': 2.1805, 'grad_norm': 2.505805253982544, 'learning_rate': 3.8690476190476195e-05, 'epoch': 0.23}\n", - "{'loss': 2.2527, 'grad_norm': 2.4992618560791016, 'learning_rate': 4.166666666666667e-05, 'epoch': 0.25}\n", - "{'loss': 2.1387, 'grad_norm': 1.9521129131317139, 'learning_rate': 4.464285714285715e-05, 'epoch': 0.27}\n", - "{'loss': 2.1733, 'grad_norm': 1.7223074436187744, 'learning_rate': 4.761904761904762e-05, 'epoch': 0.29}\n", - "{'loss': 2.2774, 'grad_norm': 1.8748223781585693, 'learning_rate': 5.05952380952381e-05, 'epoch': 0.3}\n", - "{'loss': 2.0726, 'grad_norm': 2.039461135864258, 'learning_rate': 5.3571428571428575e-05, 'epoch': 0.32}\n", - "{'loss': 2.1471, 'grad_norm': 2.512571096420288, 'learning_rate': 5.6547619047619046e-05, 'epoch': 0.34}\n", - "{'loss': 2.3088, 'grad_norm': 2.0730302333831787, 'learning_rate': 5.9523809523809524e-05, 'epoch': 0.36}\n", - "{'loss': 2.2315, 'grad_norm': 1.9101688861846924, 'learning_rate': 6.25e-05, 'epoch': 0.37}\n", - "{'loss': 2.1767, 'grad_norm': 2.6846179962158203, 'learning_rate': 6.547619047619048e-05, 'epoch': 0.39}\n", - "{'loss': 2.1396, 'grad_norm': 2.3576760292053223, 'learning_rate': 6.845238095238096e-05, 'epoch': 0.41}\n", - "{'loss': 2.3496, 'grad_norm': 2.5166685581207275, 'learning_rate': 7.142857142857143e-05, 'epoch': 0.43}\n", - "{'loss': 2.1899, 'grad_norm': 2.326274871826172, 'learning_rate': 7.440476190476191e-05, 'epoch': 0.45}\n", - "{'loss': 2.1658, 'grad_norm': 2.342203140258789, 'learning_rate': 7.738095238095239e-05, 'epoch': 0.46}\n", - "{'loss': 2.1561, 'grad_norm': 2.895669937133789, 'learning_rate': 8.035714285714287e-05, 'epoch': 0.48}\n", - "{'loss': 2.264, 'grad_norm': 3.2078170776367188, 'learning_rate': 8.333333333333334e-05, 'epoch': 0.5}\n", - "{'loss': 2.088, 'grad_norm': 2.282803773880005, 'learning_rate': 8.630952380952382e-05, 'epoch': 0.52}\n", - "{'loss': 2.1821, 'grad_norm': 2.5930910110473633, 'learning_rate': 8.92857142857143e-05, 'epoch': 0.54}\n", - "{'loss': 2.2382, 'grad_norm': 2.7073450088500977, 'learning_rate': 9.226190476190478e-05, 'epoch': 0.55}\n", - "{'loss': 2.0117, 'grad_norm': 3.457638740539551, 'learning_rate': 9.523809523809524e-05, 'epoch': 0.57}\n", - "{'loss': 2.0526, 'grad_norm': 3.453278064727783, 'learning_rate': 9.821428571428572e-05, 'epoch': 0.59}\n", - "{'loss': 2.1403, 'grad_norm': 2.7960667610168457, 'learning_rate': 9.999956828659095e-05, 'epoch': 0.61}\n", - "{'loss': 2.161, 'grad_norm': 3.307030439376831, 'learning_rate': 9.999471159635539e-05, 'epoch': 0.62}\n", - "{'loss': 2.0478, 'grad_norm': 2.788396120071411, 'learning_rate': 9.998445910004082e-05, 'epoch': 0.64}\n", - "{'loss': 2.3267, 'grad_norm': 4.489534378051758, 'learning_rate': 9.996881190417393e-05, 'epoch': 0.66}\n", - "{'loss': 2.2085, 'grad_norm': 2.93642520904541, 'learning_rate': 9.994777169751806e-05, 'epoch': 0.68}\n", - "{'loss': 1.8982, 'grad_norm': 2.470207929611206, 'learning_rate': 9.992134075089084e-05, 'epoch': 0.7}\n", - "{'loss': 2.1388, 'grad_norm': 2.992520809173584, 'learning_rate': 9.988952191691925e-05, 'epoch': 0.71}\n", - "{'loss': 2.1675, 'grad_norm': 2.986842155456543, 'learning_rate': 9.985231862973168e-05, 'epoch': 0.73}\n", - "{'loss': 2.1914, 'grad_norm': 2.8504011631011963, 'learning_rate': 9.980973490458728e-05, 'epoch': 0.75}\n", - "{'loss': 2.1588, 'grad_norm': 3.4979565143585205, 'learning_rate': 9.976177533744261e-05, 'epoch': 0.77}\n", - "{'loss': 2.0952, 'grad_norm': 3.6922664642333984, 'learning_rate': 9.97084451044556e-05, 'epoch': 0.79}\n", - "{'loss': 2.0288, 'grad_norm': 2.895118236541748, 'learning_rate': 9.964974996142698e-05, 'epoch': 0.8}\n", - "{'loss': 2.1275, 'grad_norm': 3.1226203441619873, 'learning_rate': 9.958569624317893e-05, 'epoch': 0.82}\n", - "{'loss': 2.1303, 'grad_norm': 4.210818767547607, 'learning_rate': 9.951629086287151e-05, 'epoch': 0.84}\n", - "{'loss': 2.1294, 'grad_norm': 2.9749433994293213, 'learning_rate': 9.944154131125642e-05, 'epoch': 0.86}\n", - "{'loss': 2.1612, 'grad_norm': 2.9232656955718994, 'learning_rate': 9.936145565586871e-05, 'epoch': 0.87}\n", - "{'loss': 2.3294, 'grad_norm': 2.8355772495269775, 'learning_rate': 9.927604254015585e-05, 'epoch': 0.89}\n", - "{'loss': 2.274, 'grad_norm': 3.1120338439941406, 'learning_rate': 9.918531118254507e-05, 'epoch': 0.91}\n", - "{'loss': 2.1442, 'grad_norm': 4.310208797454834, 'learning_rate': 9.90892713754483e-05, 'epoch': 0.93}\n", - "{'loss': 2.1595, 'grad_norm': 3.8621461391448975, 'learning_rate': 9.898793348420536e-05, 'epoch': 0.95}\n", - "{'loss': 2.1399, 'grad_norm': 2.8605706691741943, 'learning_rate': 9.888130844596524e-05, 'epoch': 0.96}\n", - "{'loss': 2.1673, 'grad_norm': 3.161895275115967, 'learning_rate': 9.876940776850569e-05, 'epoch': 0.98}\n", - "{'loss': 2.1621, 'grad_norm': 3.304511785507202, 'learning_rate': 9.865224352899119e-05, 'epoch': 1.0}\n", - " 17%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–Ž | 560/3360 [15:54<1:17:19, 1.66s/it][INFO|trainer.py:3788] 2024-07-05 06:32:21,677 >> \n", - "***** Running Evaluation *****\n", - "[INFO|trainer.py:3790] 2024-07-05 06:32:21,677 >> Num examples = 46\n", - "[INFO|trainer.py:3793] 2024-07-05 06:32:21,677 >> Batch size = 1\n", - "\n", - " 0%| | 0/46 [00:00> Saving model checkpoint to saves/qwen2-0.5b/lora/sft/checkpoint-560\n", - "[INFO|configuration_utils.py:733] 2024-07-05 06:32:25,696 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-0.5b-instruct-bnb-4bit/snapshots/c3b24ce4827d69f5c3bde9aba00047774069ab72/config.json\n", - "[INFO|configuration_utils.py:800] 2024-07-05 06:32:25,697 >> Model config Qwen2Config {\n", - " \"_name_or_path\": \"Qwen/Qwen2-0.5B-Instruct\",\n", - " \"architectures\": [\n", - " \"Qwen2ForCausalLM\"\n", - " ],\n", - " \"attention_dropout\": 0.0,\n", - " \"bos_token_id\": 151643,\n", - " \"eos_token_id\": 151645,\n", - " \"hidden_act\": \"silu\",\n", - " \"hidden_size\": 896,\n", - " \"initializer_range\": 0.02,\n", - " \"intermediate_size\": 4864,\n", - " \"max_position_embeddings\": 32768,\n", - " \"max_window_layers\": 24,\n", - " \"model_type\": \"qwen2\",\n", - " \"num_attention_heads\": 14,\n", - " \"num_hidden_layers\": 24,\n", - " \"num_key_value_heads\": 2,\n", - " \"quantization_config\": {\n", - " \"_load_in_4bit\": true,\n", - " \"_load_in_8bit\": false,\n", - " \"bnb_4bit_compute_dtype\": \"bfloat16\",\n", - " \"bnb_4bit_quant_storage\": \"uint8\",\n", - " \"bnb_4bit_quant_type\": \"nf4\",\n", - " \"bnb_4bit_use_double_quant\": true,\n", - " \"llm_int8_enable_fp32_cpu_offload\": false,\n", - " \"llm_int8_has_fp16_weight\": false,\n", - " \"llm_int8_skip_modules\": null,\n", - " \"llm_int8_threshold\": 6.0,\n", - " \"load_in_4bit\": true,\n", - " \"load_in_8bit\": false,\n", - " \"quant_method\": \"bitsandbytes\"\n", - " },\n", - " \"rms_norm_eps\": 1e-06,\n", - " \"rope_theta\": 1000000.0,\n", - " \"sliding_window\": 32768,\n", - " \"tie_word_embeddings\": true,\n", - " \"torch_dtype\": \"bfloat16\",\n", - " \"transformers_version\": \"4.42.3\",\n", - " \"use_cache\": true,\n", - " \"use_sliding_window\": false,\n", - " \"vocab_size\": 151936\n", - "}\n", - "\n", - "{'loss': 1.6151, 'grad_norm': 3.5378007888793945, 'learning_rate': 9.852982837266955e-05, 'epoch': 1.02}\n", - "{'loss': 1.3799, 'grad_norm': 3.1997132301330566, 'learning_rate': 9.840217551150706e-05, 'epoch': 1.04}\n", - "{'loss': 1.6132, 'grad_norm': 2.48860502243042, 'learning_rate': 9.826929872276255e-05, 'epoch': 1.05}\n", - "{'loss': 1.4984, 'grad_norm': 3.7188329696655273, 'learning_rate': 9.81312123475006e-05, 'epoch': 1.07}\n", - "{'loss': 1.4967, 'grad_norm': 3.4040935039520264, 'learning_rate': 9.798793128904356e-05, 'epoch': 1.09}\n", - "{'loss': 1.5688, 'grad_norm': 3.603771924972534, 'learning_rate': 9.78394710113631e-05, 'epoch': 1.11}\n", - "{'loss': 1.4902, 'grad_norm': 3.248730421066284, 'learning_rate': 9.768584753741134e-05, 'epoch': 1.12}\n", - "{'loss': 1.4788, 'grad_norm': 4.081541538238525, 'learning_rate': 9.752707744739145e-05, 'epoch': 1.14}\n", - "{'loss': 1.5933, 'grad_norm': 3.348815441131592, 'learning_rate': 9.736317787696816e-05, 'epoch': 1.16}\n", - "{'loss': 1.4597, 'grad_norm': 5.059058666229248, 'learning_rate': 9.719416651541839e-05, 'epoch': 1.18}\n", - "{'loss': 1.5088, 'grad_norm': 2.929900646209717, 'learning_rate': 9.702006160372209e-05, 'epoch': 1.2}\n", - "{'loss': 1.5122, 'grad_norm': 3.9229655265808105, 'learning_rate': 9.684088193259355e-05, 'epoch': 1.21}\n", - "{'loss': 1.4982, 'grad_norm': 4.456009864807129, 'learning_rate': 9.665664684045333e-05, 'epoch': 1.23}\n", - "{'loss': 1.5631, 'grad_norm': 6.255136966705322, 'learning_rate': 9.646737621134112e-05, 'epoch': 1.25}\n", - "{'loss': 1.5067, 'grad_norm': 4.147162914276123, 'learning_rate': 9.627309047276974e-05, 'epoch': 1.27}\n", - "{'loss': 1.6788, 'grad_norm': 4.083860874176025, 'learning_rate': 9.607381059352038e-05, 'epoch': 1.29}\n", - "{'loss': 1.6006, 'grad_norm': 3.7379791736602783, 'learning_rate': 9.586955808137958e-05, 'epoch': 1.3}\n", - "{'loss': 1.6328, 'grad_norm': 3.6500179767608643, 'learning_rate': 9.566035498081784e-05, 'epoch': 1.32}\n", - "{'loss': 1.6155, 'grad_norm': 3.455841302871704, 'learning_rate': 9.544622387061055e-05, 'epoch': 1.34}\n", - "{'loss': 1.3868, 'grad_norm': 3.636683702468872, 'learning_rate': 9.522718786140097e-05, 'epoch': 1.36}\n", - "{'loss': 1.5776, 'grad_norm': 4.494875431060791, 'learning_rate': 9.500327059320606e-05, 'epoch': 1.37}\n", - "{'loss': 1.4877, 'grad_norm': 4.710891246795654, 'learning_rate': 9.477449623286505e-05, 'epoch': 1.39}\n", - "{'loss': 1.401, 'grad_norm': 3.5016818046569824, 'learning_rate': 9.454088947143116e-05, 'epoch': 1.41}\n", - "{'loss': 1.628, 'grad_norm': 4.40405797958374, 'learning_rate': 9.430247552150673e-05, 'epoch': 1.43}\n", - "{'loss': 1.4999, 'grad_norm': 3.74572491645813, 'learning_rate': 9.405928011452211e-05, 'epoch': 1.45}\n", - "{'loss': 1.5602, 'grad_norm': 4.144255638122559, 'learning_rate': 9.381132949795861e-05, 'epoch': 1.46}\n", - "{'loss': 1.6872, 'grad_norm': 4.109062671661377, 'learning_rate': 9.35586504325155e-05, 'epoch': 1.48}\n", - "{'loss': 1.5494, 'grad_norm': 7.194815635681152, 'learning_rate': 9.330127018922194e-05, 'epoch': 1.5}\n", - "{'loss': 1.4354, 'grad_norm': 3.779526948928833, 'learning_rate': 9.303921654649362e-05, 'epoch': 1.52}\n", - "{'loss': 1.593, 'grad_norm': 3.863893508911133, 'learning_rate': 9.277251778713474e-05, 'epoch': 1.54}\n", - "{'loss': 1.5795, 'grad_norm': 3.684547185897827, 'learning_rate': 9.250120269528546e-05, 'epoch': 1.55}\n", - "{'loss': 1.5245, 'grad_norm': 3.9775428771972656, 'learning_rate': 9.22253005533154e-05, 'epoch': 1.57}\n", - "{'loss': 1.631, 'grad_norm': 4.817204475402832, 'learning_rate': 9.194484113866313e-05, 'epoch': 1.59}\n", - "{'loss': 1.658, 'grad_norm': 3.928107738494873, 'learning_rate': 9.165985472062246e-05, 'epoch': 1.61}\n", - "{'loss': 1.464, 'grad_norm': 4.099756240844727, 'learning_rate': 9.137037205707552e-05, 'epoch': 1.62}\n", - "{'loss': 1.5206, 'grad_norm': 3.9024410247802734, 'learning_rate': 9.107642439117321e-05, 'epoch': 1.64}\n", - "{'loss': 1.6011, 'grad_norm': 3.7552289962768555, 'learning_rate': 9.077804344796302e-05, 'epoch': 1.66}\n", - "{'loss': 1.4891, 'grad_norm': 3.713045835494995, 'learning_rate': 9.04752614309652e-05, 'epoch': 1.68}\n", - "{'loss': 1.5139, 'grad_norm': 3.589451313018799, 'learning_rate': 9.01681110186971e-05, 'epoch': 1.7}\n", - "{'loss': 1.5901, 'grad_norm': 3.9955010414123535, 'learning_rate': 8.985662536114613e-05, 'epoch': 1.71}\n", - "{'loss': 1.5646, 'grad_norm': 3.6160426139831543, 'learning_rate': 8.954083807619208e-05, 'epoch': 1.73}\n", - "{'loss': 1.6884, 'grad_norm': 4.0372796058654785, 'learning_rate': 8.922078324597879e-05, 'epoch': 1.75}\n", - "{'loss': 1.6813, 'grad_norm': 4.466279983520508, 'learning_rate': 8.889649541323574e-05, 'epoch': 1.77}\n", - "{'loss': 1.5947, 'grad_norm': 5.11010217666626, 'learning_rate': 8.856800957755e-05, 'epoch': 1.78}\n", - "{'loss': 1.6637, 'grad_norm': 5.363622188568115, 'learning_rate': 8.823536119158864e-05, 'epoch': 1.8}\n", - "{'loss': 1.5541, 'grad_norm': 4.0909223556518555, 'learning_rate': 8.789858615727265e-05, 'epoch': 1.82}\n", - "{'loss': 1.523, 'grad_norm': 3.796602249145508, 'learning_rate': 8.755772082190194e-05, 'epoch': 1.84}\n", - "{'loss': 1.6437, 'grad_norm': 4.511483669281006, 'learning_rate': 8.721280197423258e-05, 'epoch': 1.86}\n", - "{'loss': 1.4852, 'grad_norm': 4.5722246170043945, 'learning_rate': 8.68638668405062e-05, 'epoch': 1.87}\n", - "{'loss': 1.5986, 'grad_norm': 4.731987953186035, 'learning_rate': 8.651095308043232e-05, 'epoch': 1.89}\n", - "{'loss': 1.7502, 'grad_norm': 6.07273530960083, 'learning_rate': 8.61540987831238e-05, 'epoch': 1.91}\n", - "{'loss': 1.6979, 'grad_norm': 5.418001651763916, 'learning_rate': 8.579334246298593e-05, 'epoch': 1.93}\n", - "{'loss': 1.5625, 'grad_norm': 4.6554341316223145, 'learning_rate': 8.542872305555978e-05, 'epoch': 1.95}\n", - "{'loss': 1.4509, 'grad_norm': 3.8252899646759033, 'learning_rate': 8.50602799133199e-05, 'epoch': 1.96}\n", - "{'loss': 1.5915, 'grad_norm': 4.251583099365234, 'learning_rate': 8.468805280142709e-05, 'epoch': 1.98}\n", - "{'loss': 1.6074, 'grad_norm': 4.587167739868164, 'learning_rate': 8.43120818934367e-05, 'epoch': 2.0}\n", - " 33%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–Ž | 1120/3360 [31:19<1:00:24, 1.62s/it][INFO|trainer.py:3788] 2024-07-05 06:47:46,319 >> \n", - "***** Running Evaluation *****\n", - "[INFO|trainer.py:3790] 2024-07-05 06:47:46,319 >> Num examples = 46\n", - "[INFO|trainer.py:3793] 2024-07-05 06:47:46,319 >> Batch size = 1\n", - "\n", - " 0%| | 0/46 [00:00> Saving model checkpoint to saves/qwen2-0.5b/lora/sft/checkpoint-1120\n", - "[INFO|configuration_utils.py:733] 2024-07-05 06:47:50,281 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-0.5b-instruct-bnb-4bit/snapshots/c3b24ce4827d69f5c3bde9aba00047774069ab72/config.json\n", - "[INFO|configuration_utils.py:800] 2024-07-05 06:47:50,282 >> Model config Qwen2Config {\n", - " \"_name_or_path\": \"Qwen/Qwen2-0.5B-Instruct\",\n", - " \"architectures\": [\n", - " \"Qwen2ForCausalLM\"\n", - " ],\n", - " \"attention_dropout\": 0.0,\n", - " \"bos_token_id\": 151643,\n", - " \"eos_token_id\": 151645,\n", - " \"hidden_act\": \"silu\",\n", - " \"hidden_size\": 896,\n", - " \"initializer_range\": 0.02,\n", - " \"intermediate_size\": 4864,\n", - " \"max_position_embeddings\": 32768,\n", - " \"max_window_layers\": 24,\n", - " \"model_type\": \"qwen2\",\n", - " \"num_attention_heads\": 14,\n", - " \"num_hidden_layers\": 24,\n", - " \"num_key_value_heads\": 2,\n", - " \"quantization_config\": {\n", - " \"_load_in_4bit\": true,\n", - " \"_load_in_8bit\": false,\n", - " \"bnb_4bit_compute_dtype\": \"bfloat16\",\n", - " \"bnb_4bit_quant_storage\": \"uint8\",\n", - " \"bnb_4bit_quant_type\": \"nf4\",\n", - " \"bnb_4bit_use_double_quant\": true,\n", - " \"llm_int8_enable_fp32_cpu_offload\": false,\n", - " \"llm_int8_has_fp16_weight\": false,\n", - " \"llm_int8_skip_modules\": null,\n", - " \"llm_int8_threshold\": 6.0,\n", - " \"load_in_4bit\": true,\n", - " \"load_in_8bit\": false,\n", - " \"quant_method\": \"bitsandbytes\"\n", - " },\n", - " \"rms_norm_eps\": 1e-06,\n", - " \"rope_theta\": 1000000.0,\n", - " \"sliding_window\": 32768,\n", - " \"tie_word_embeddings\": true,\n", - " \"torch_dtype\": \"bfloat16\",\n", - " \"transformers_version\": \"4.42.3\",\n", - " \"use_cache\": true,\n", - " \"use_sliding_window\": false,\n", - " \"vocab_size\": 151936\n", - "}\n", - "\n", - "{'loss': 0.9111, 'grad_norm': 5.008914470672607, 'learning_rate': 8.393240776696274e-05, 'epoch': 2.02}\n", - "{'loss': 0.863, 'grad_norm': 6.299067974090576, 'learning_rate': 8.354907139929851e-05, 'epoch': 2.03}\n", - "{'loss': 0.8515, 'grad_norm': 4.728297233581543, 'learning_rate': 8.316211416299397e-05, 'epoch': 2.05}\n", - "{'loss': 0.8316, 'grad_norm': 4.379367351531982, 'learning_rate': 8.27715778213905e-05, 'epoch': 2.07}\n", - "{'loss': 0.6608, 'grad_norm': 4.311402320861816, 'learning_rate': 8.237750452411353e-05, 'epoch': 2.09}\n", - "{'loss': 0.8713, 'grad_norm': 3.783642530441284, 'learning_rate': 8.197993680252334e-05, 'epoch': 2.11}\n", - "{'loss': 1.0158, 'grad_norm': 4.141658782958984, 'learning_rate': 8.157891756512488e-05, 'epoch': 2.12}\n", - "{'loss': 0.9155, 'grad_norm': 4.355412483215332, 'learning_rate': 8.117449009293668e-05, 'epoch': 2.14}\n", - "{'loss': 0.8407, 'grad_norm': 5.703305721282959, 'learning_rate': 8.076669803481965e-05, 'epoch': 2.16}\n", - "{'loss': 0.8494, 'grad_norm': 5.374706745147705, 'learning_rate': 8.035558540276618e-05, 'epoch': 2.18}\n", - "{'loss': 0.8743, 'grad_norm': 4.037242889404297, 'learning_rate': 7.994119656715002e-05, 'epoch': 2.2}\n", - "{'loss': 0.9841, 'grad_norm': 4.615417957305908, 'learning_rate': 7.952357625193749e-05, 'epoch': 2.21}\n", - "{'loss': 0.9296, 'grad_norm': 4.376211643218994, 'learning_rate': 7.91027695298606e-05, 'epoch': 2.23}\n", - "{'loss': 0.9142, 'grad_norm': 4.084548473358154, 'learning_rate': 7.86788218175523e-05, 'epoch': 2.25}\n", - "{'loss': 0.8517, 'grad_norm': 4.527939796447754, 'learning_rate': 7.8251778870645e-05, 'epoch': 2.27}\n", - "{'loss': 0.9113, 'grad_norm': 5.170512676239014, 'learning_rate': 7.782168677883206e-05, 'epoch': 2.28}\n", - "{'loss': 0.9332, 'grad_norm': 4.342284202575684, 'learning_rate': 7.738859196089358e-05, 'epoch': 2.3}\n", - "{'loss': 0.9759, 'grad_norm': 4.931323051452637, 'learning_rate': 7.695254115968648e-05, 'epoch': 2.32}\n", - "{'loss': 1.0079, 'grad_norm': 3.684819459915161, 'learning_rate': 7.651358143709972e-05, 'epoch': 2.34}\n", - "{'loss': 0.9958, 'grad_norm': 5.162328720092773, 'learning_rate': 7.60717601689749e-05, 'epoch': 2.36}\n", - "{'loss': 0.9528, 'grad_norm': 4.386671543121338, 'learning_rate': 7.562712503999327e-05, 'epoch': 2.37}\n", - "{'loss': 1.1468, 'grad_norm': 5.785244464874268, 'learning_rate': 7.517972403852905e-05, 'epoch': 2.39}\n", - "{'loss': 0.9291, 'grad_norm': 4.308371543884277, 'learning_rate': 7.472960545147038e-05, 'epoch': 2.41}\n", - "{'loss': 0.8408, 'grad_norm': 5.942112922668457, 'learning_rate': 7.427681785900761e-05, 'epoch': 2.43}\n", - "{'loss': 0.9693, 'grad_norm': 4.682136535644531, 'learning_rate': 7.382141012939034e-05, 'epoch': 2.45}\n", - "{'loss': 0.8726, 'grad_norm': 4.883449077606201, 'learning_rate': 7.33634314136531e-05, 'epoch': 2.46}\n", - "{'loss': 0.9426, 'grad_norm': 4.833103656768799, 'learning_rate': 7.290293114031061e-05, 'epoch': 2.48}\n", - "{'loss': 1.0333, 'grad_norm': 4.8503289222717285, 'learning_rate': 7.243995901002312e-05, 'epoch': 2.5}\n", - "{'loss': 0.9984, 'grad_norm': 4.3091230392456055, 'learning_rate': 7.197456499023225e-05, 'epoch': 2.52}\n", - "{'loss': 1.0019, 'grad_norm': 4.726260662078857, 'learning_rate': 7.150679930976825e-05, 'epoch': 2.53}\n", - "{'loss': 0.9594, 'grad_norm': 3.850511312484741, 'learning_rate': 7.103671245342887e-05, 'epoch': 2.55}\n", - "{'loss': 0.8701, 'grad_norm': 5.5012030601501465, 'learning_rate': 7.056435515653059e-05, 'epoch': 2.57}\n", - "{'loss': 1.0956, 'grad_norm': 5.610720157623291, 'learning_rate': 7.008977839943299e-05, 'epoch': 2.59}\n", - "{'loss': 0.9175, 'grad_norm': 3.8002779483795166, 'learning_rate': 6.961303340203653e-05, 'epoch': 2.61}\n", - "{'loss': 1.0243, 'grad_norm': 5.210932731628418, 'learning_rate': 6.91341716182545e-05, 'epoch': 2.62}\n", - "{'loss': 0.9902, 'grad_norm': 3.9311327934265137, 'learning_rate': 6.86532447304597e-05, 'epoch': 2.64}\n", - "{'loss': 0.9589, 'grad_norm': 4.984393119812012, 'learning_rate': 6.817030464390656e-05, 'epoch': 2.66}\n", - "{'loss': 0.9985, 'grad_norm': 4.881758689880371, 'learning_rate': 6.768540348112907e-05, 'epoch': 2.68}\n", - "{'loss': 0.8961, 'grad_norm': 6.465915203094482, 'learning_rate': 6.719859357631535e-05, 'epoch': 2.7}\n", - "{'loss': 0.8434, 'grad_norm': 5.6094183921813965, 'learning_rate': 6.670992746965938e-05, 'epoch': 2.71}\n", - "{'loss': 1.0485, 'grad_norm': 5.219779968261719, 'learning_rate': 6.621945790169036e-05, 'epoch': 2.73}\n", - "{'loss': 1.0165, 'grad_norm': 5.263071060180664, 'learning_rate': 6.572723780758069e-05, 'epoch': 2.75}\n", - "{'loss': 0.9104, 'grad_norm': 4.919801235198975, 'learning_rate': 6.523332031143272e-05, 'epoch': 2.77}\n", - "{'loss': 0.9633, 'grad_norm': 4.69899320602417, 'learning_rate': 6.473775872054521e-05, 'epoch': 2.78}\n", - "{'loss': 0.8483, 'grad_norm': 4.0923285484313965, 'learning_rate': 6.424060651966007e-05, 'epoch': 2.8}\n", - "{'loss': 0.8888, 'grad_norm': 5.461803436279297, 'learning_rate': 6.374191736518974e-05, 'epoch': 2.82}\n", - "{'loss': 0.9598, 'grad_norm': 4.758564472198486, 'learning_rate': 6.324174507942637e-05, 'epoch': 2.84}\n", - "{'loss': 0.9436, 'grad_norm': 6.395792007446289, 'learning_rate': 6.274014364473274e-05, 'epoch': 2.86}\n", - "{'loss': 1.1634, 'grad_norm': 6.077510356903076, 'learning_rate': 6.22371671977162e-05, 'epoch': 2.87}\n", - "{'loss': 1.0049, 'grad_norm': 5.1858720779418945, 'learning_rate': 6.173287002338577e-05, 'epoch': 2.89}\n", - "{'loss': 0.9795, 'grad_norm': 6.103806972503662, 'learning_rate': 6.122730654929334e-05, 'epoch': 2.91}\n", - "{'loss': 0.9422, 'grad_norm': 5.469768524169922, 'learning_rate': 6.072053133965938e-05, 'epoch': 2.93}\n", - "{'loss': 1.0349, 'grad_norm': 4.436359405517578, 'learning_rate': 6.021259908948402e-05, 'epoch': 2.95}\n", - "{'loss': 1.1161, 'grad_norm': 5.872861862182617, 'learning_rate': 5.970356461864391e-05, 'epoch': 2.96}\n", - "{'loss': 0.9069, 'grad_norm': 5.360676288604736, 'learning_rate': 5.919348286597569e-05, 'epoch': 2.98}\n", - "{'loss': 1.0593, 'grad_norm': 4.815310001373291, 'learning_rate': 5.868240888334653e-05, 'epoch': 3.0}\n", - " 50%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–Œ | 1680/3360 [46:46<45:31, 1.63s/it][INFO|trainer.py:3788] 2024-07-05 07:03:13,485 >> \n", - "***** Running Evaluation *****\n", - "[INFO|trainer.py:3790] 2024-07-05 07:03:13,485 >> Num examples = 46\n", - "[INFO|trainer.py:3793] 2024-07-05 07:03:13,485 >> Batch size = 1\n", - "\n", - " 0%| | 0/46 [00:00> Saving model checkpoint to saves/qwen2-0.5b/lora/sft/checkpoint-1680\n", - "[INFO|configuration_utils.py:733] 2024-07-05 07:03:17,790 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-0.5b-instruct-bnb-4bit/snapshots/c3b24ce4827d69f5c3bde9aba00047774069ab72/config.json\n", - "[INFO|configuration_utils.py:800] 2024-07-05 07:03:17,790 >> Model config Qwen2Config {\n", - " \"_name_or_path\": \"Qwen/Qwen2-0.5B-Instruct\",\n", - " \"architectures\": [\n", - " \"Qwen2ForCausalLM\"\n", - " ],\n", - " \"attention_dropout\": 0.0,\n", - " \"bos_token_id\": 151643,\n", - " \"eos_token_id\": 151645,\n", - " \"hidden_act\": \"silu\",\n", - " \"hidden_size\": 896,\n", - " \"initializer_range\": 0.02,\n", - " \"intermediate_size\": 4864,\n", - " \"max_position_embeddings\": 32768,\n", - " \"max_window_layers\": 24,\n", - " \"model_type\": \"qwen2\",\n", - " \"num_attention_heads\": 14,\n", - " \"num_hidden_layers\": 24,\n", - " \"num_key_value_heads\": 2,\n", - " \"quantization_config\": {\n", - " \"_load_in_4bit\": true,\n", - " \"_load_in_8bit\": false,\n", - " \"bnb_4bit_compute_dtype\": \"bfloat16\",\n", - " \"bnb_4bit_quant_storage\": \"uint8\",\n", - " \"bnb_4bit_quant_type\": \"nf4\",\n", - " \"bnb_4bit_use_double_quant\": true,\n", - " \"llm_int8_enable_fp32_cpu_offload\": false,\n", - " \"llm_int8_has_fp16_weight\": false,\n", - " \"llm_int8_skip_modules\": null,\n", - " \"llm_int8_threshold\": 6.0,\n", - " \"load_in_4bit\": true,\n", - " \"load_in_8bit\": false,\n", - " \"quant_method\": \"bitsandbytes\"\n", - " },\n", - " \"rms_norm_eps\": 1e-06,\n", - " \"rope_theta\": 1000000.0,\n", - " \"sliding_window\": 32768,\n", - " \"tie_word_embeddings\": true,\n", - " \"torch_dtype\": \"bfloat16\",\n", - " \"transformers_version\": \"4.42.3\",\n", - " \"use_cache\": true,\n", - " \"use_sliding_window\": false,\n", - " \"vocab_size\": 151936\n", - "}\n", - "\n", - "{'loss': 0.6987, 'grad_norm': 4.28726863861084, 'learning_rate': 5.8170397829712485e-05, 'epoch': 3.02}\n", - "{'loss': 0.3462, 'grad_norm': 5.342904567718506, 'learning_rate': 5.765750496516547e-05, 'epoch': 3.03}\n", - "{'loss': 0.4899, 'grad_norm': 3.8532354831695557, 'learning_rate': 5.714378564496901e-05, 'epoch': 3.05}\n", - "{'loss': 0.4609, 'grad_norm': 4.3072590827941895, 'learning_rate': 5.6629295313583974e-05, 'epoch': 3.07}\n", - "{'loss': 0.4106, 'grad_norm': 4.2518463134765625, 'learning_rate': 5.611408949868457e-05, 'epoch': 3.09}\n", - "{'loss': 0.5169, 'grad_norm': 4.579401016235352, 'learning_rate': 5.559822380516539e-05, 'epoch': 3.11}\n", - "{'loss': 0.4794, 'grad_norm': 3.6858370304107666, 'learning_rate': 5.5081753909140096e-05, 'epoch': 3.12}\n", - "{'loss': 0.5473, 'grad_norm': 8.67149543762207, 'learning_rate': 5.456473555193242e-05, 'epoch': 3.14}\n", - "{'loss': 0.4638, 'grad_norm': 6.095928192138672, 'learning_rate': 5.404722453406017e-05, 'epoch': 3.16}\n", - "{'loss': 0.4697, 'grad_norm': 6.712044715881348, 'learning_rate': 5.3529276709212816e-05, 'epoch': 3.18}\n", - "{'loss': 0.4869, 'grad_norm': 4.1765336990356445, 'learning_rate': 5.30109479782233e-05, 'epoch': 3.2}\n", - "{'loss': 0.4821, 'grad_norm': 4.068556308746338, 'learning_rate': 5.249229428303486e-05, 'epoch': 3.21}\n", - "{'loss': 0.6011, 'grad_norm': 3.6553525924682617, 'learning_rate': 5.197337160066331e-05, 'epoch': 3.23}\n", - "{'loss': 0.4558, 'grad_norm': 4.888422012329102, 'learning_rate': 5.145423593715557e-05, 'epoch': 3.25}\n", - "{'loss': 0.5203, 'grad_norm': 4.138525009155273, 'learning_rate': 5.0934943321545115e-05, 'epoch': 3.27}\n", - "{'loss': 0.3826, 'grad_norm': 4.2213358879089355, 'learning_rate': 5.041554979980486e-05, 'epoch': 3.28}\n", - "{'loss': 0.5895, 'grad_norm': 4.9374260902404785, 'learning_rate': 4.9896111428798254e-05, 'epoch': 3.3}\n", - "{'loss': 0.5609, 'grad_norm': 4.482494831085205, 'learning_rate': 4.9376684270229254e-05, 'epoch': 3.32}\n", - "{'loss': 0.5478, 'grad_norm': 3.9575753211975098, 'learning_rate': 4.8857324384591653e-05, 'epoch': 3.34}\n", - "{'loss': 0.4865, 'grad_norm': 5.01925802230835, 'learning_rate': 4.8338087825118675e-05, 'epoch': 3.36}\n", - "{'loss': 0.5365, 'grad_norm': 4.109598636627197, 'learning_rate': 4.781903063173321e-05, 'epoch': 3.37}\n", - "{'loss': 0.4814, 'grad_norm': 3.7702512741088867, 'learning_rate': 4.730020882499964e-05, 'epoch': 3.39}\n", - "{'loss': 0.5355, 'grad_norm': 6.243114948272705, 'learning_rate': 4.678167840007767e-05, 'epoch': 3.41}\n", - "{'loss': 0.5361, 'grad_norm': 4.488025188446045, 'learning_rate': 4.626349532067879e-05, 'epoch': 3.43}\n", - "{'loss': 0.5952, 'grad_norm': 4.389721870422363, 'learning_rate': 4.574571551302647e-05, 'epoch': 3.44}\n", - "{'loss': 0.6049, 'grad_norm': 4.847557067871094, 'learning_rate': 4.522839485981994e-05, 'epoch': 3.46}\n", - "{'loss': 0.5697, 'grad_norm': 3.9925057888031006, 'learning_rate': 4.471158919420312e-05, 'epoch': 3.48}\n", - "{'loss': 0.5018, 'grad_norm': 5.327306747436523, 'learning_rate': 4.4195354293738484e-05, 'epoch': 3.5}\n", - "{'loss': 0.4745, 'grad_norm': 5.380455493927002, 'learning_rate': 4.367974587438733e-05, 'epoch': 3.52}\n", - "{'loss': 0.5421, 'grad_norm': 3.978426694869995, 'learning_rate': 4.316481958449634e-05, 'epoch': 3.53}\n", - "{'loss': 0.5091, 'grad_norm': 8.685088157653809, 'learning_rate': 4.2650630998791615e-05, 'epoch': 3.55}\n", - "{'loss': 0.6102, 'grad_norm': 4.471510887145996, 'learning_rate': 4.213723561238074e-05, 'epoch': 3.57}\n", - "{'loss': 0.4623, 'grad_norm': 4.236584663391113, 'learning_rate': 4.162468883476319e-05, 'epoch': 3.59}\n", - "{'loss': 0.5203, 'grad_norm': 5.698358535766602, 'learning_rate': 4.111304598385018e-05, 'epoch': 3.61}\n", - "{'loss': 0.5314, 'grad_norm': 5.975699424743652, 'learning_rate': 4.060236227999441e-05, 'epoch': 3.62}\n", - "{'loss': 0.484, 'grad_norm': 5.300996780395508, 'learning_rate': 4.0092692840030134e-05, 'epoch': 3.64}\n", - "{'loss': 0.4564, 'grad_norm': 7.857934474945068, 'learning_rate': 3.9584092671324606e-05, 'epoch': 3.66}\n", - "{'loss': 0.5715, 'grad_norm': 3.796581268310547, 'learning_rate': 3.907661666584131e-05, 'epoch': 3.68}\n", - "{'loss': 0.539, 'grad_norm': 4.170958995819092, 'learning_rate': 3.857031959421553e-05, 'epoch': 3.69}\n", - "{'loss': 0.5249, 'grad_norm': 6.283390045166016, 'learning_rate': 3.806525609984312e-05, 'epoch': 3.71}\n", - "{'loss': 0.4406, 'grad_norm': 6.235040664672852, 'learning_rate': 3.7561480692983006e-05, 'epoch': 3.73}\n", - "{'loss': 0.553, 'grad_norm': 3.715141534805298, 'learning_rate': 3.705904774487396e-05, 'epoch': 3.75}\n", - "{'loss': 0.5154, 'grad_norm': 6.352488040924072, 'learning_rate': 3.655801148186655e-05, 'epoch': 3.77}\n", - "{'loss': 0.4681, 'grad_norm': 4.480152130126953, 'learning_rate': 3.6058425979570485e-05, 'epoch': 3.78}\n", - "{'loss': 0.4915, 'grad_norm': 5.1917219161987305, 'learning_rate': 3.556034515701852e-05, 'epoch': 3.8}\n", - "{'loss': 0.5371, 'grad_norm': 4.501936912536621, 'learning_rate': 3.506382277084696e-05, 'epoch': 3.82}\n", - "{'loss': 0.5273, 'grad_norm': 3.53322434425354, 'learning_rate': 3.4568912409493945e-05, 'epoch': 3.84}\n", - "{'loss': 0.4405, 'grad_norm': 4.688470840454102, 'learning_rate': 3.4075667487415785e-05, 'epoch': 3.86}\n", - "{'loss': 0.5048, 'grad_norm': 6.739779949188232, 'learning_rate': 3.358414123932195e-05, 'epoch': 3.87}\n", - "{'loss': 0.5572, 'grad_norm': 4.120084762573242, 'learning_rate': 3.3094386714429724e-05, 'epoch': 3.89}\n", - "{'loss': 0.5498, 'grad_norm': 7.938605785369873, 'learning_rate': 3.2606456770738636e-05, 'epoch': 3.91}\n", - "{'loss': 0.3955, 'grad_norm': 4.132835865020752, 'learning_rate': 3.212040406932569e-05, 'epoch': 3.93}\n", - "{'loss': 0.4149, 'grad_norm': 3.777303457260132, 'learning_rate': 3.163628106866172e-05, 'epoch': 3.94}\n", - "{'loss': 0.6278, 'grad_norm': 5.201406955718994, 'learning_rate': 3.115414001894974e-05, 'epoch': 3.96}\n", - "{'loss': 0.5186, 'grad_norm': 7.258588790893555, 'learning_rate': 3.067403295648566e-05, 'epoch': 3.98}\n", - "{'loss': 0.4754, 'grad_norm': 3.8839337825775146, 'learning_rate': 3.019601169804216e-05, 'epoch': 4.0}\n", - " 67%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–‹ | 2240/3360 [1:01:49<29:29, 1.58s/it][INFO|trainer.py:3788] 2024-07-05 07:18:16,440 >> \n", - "***** Running Evaluation *****\n", - "[INFO|trainer.py:3790] 2024-07-05 07:18:16,441 >> Num examples = 46\n", - "[INFO|trainer.py:3793] 2024-07-05 07:18:16,441 >> Batch size = 1\n", - "\n", - " 0%| | 0/46 [00:00> Saving model checkpoint to saves/qwen2-0.5b/lora/sft/checkpoint-2240\n", - "[INFO|configuration_utils.py:733] 2024-07-05 07:18:20,481 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-0.5b-instruct-bnb-4bit/snapshots/c3b24ce4827d69f5c3bde9aba00047774069ab72/config.json\n", - "[INFO|configuration_utils.py:800] 2024-07-05 07:18:20,481 >> Model config Qwen2Config {\n", - " \"_name_or_path\": \"Qwen/Qwen2-0.5B-Instruct\",\n", - " \"architectures\": [\n", - " \"Qwen2ForCausalLM\"\n", - " ],\n", - " \"attention_dropout\": 0.0,\n", - " \"bos_token_id\": 151643,\n", - " \"eos_token_id\": 151645,\n", - " \"hidden_act\": \"silu\",\n", - " \"hidden_size\": 896,\n", - " \"initializer_range\": 0.02,\n", - " \"intermediate_size\": 4864,\n", - " \"max_position_embeddings\": 32768,\n", - " \"max_window_layers\": 24,\n", - " \"model_type\": \"qwen2\",\n", - " \"num_attention_heads\": 14,\n", - " \"num_hidden_layers\": 24,\n", - " \"num_key_value_heads\": 2,\n", - " \"quantization_config\": {\n", - " \"_load_in_4bit\": true,\n", - " \"_load_in_8bit\": false,\n", - " \"bnb_4bit_compute_dtype\": \"bfloat16\",\n", - " \"bnb_4bit_quant_storage\": \"uint8\",\n", - " \"bnb_4bit_quant_type\": \"nf4\",\n", - " \"bnb_4bit_use_double_quant\": true,\n", - " \"llm_int8_enable_fp32_cpu_offload\": false,\n", - " \"llm_int8_has_fp16_weight\": false,\n", - " \"llm_int8_skip_modules\": null,\n", - " \"llm_int8_threshold\": 6.0,\n", - " \"load_in_4bit\": true,\n", - " \"load_in_8bit\": false,\n", - " \"quant_method\": \"bitsandbytes\"\n", - " },\n", - " \"rms_norm_eps\": 1e-06,\n", - " \"rope_theta\": 1000000.0,\n", - " \"sliding_window\": 32768,\n", - " \"tie_word_embeddings\": true,\n", - " \"torch_dtype\": \"bfloat16\",\n", - " \"transformers_version\": \"4.42.3\",\n", - " \"use_cache\": true,\n", - " \"use_sliding_window\": false,\n", - " \"vocab_size\": 151936\n", - "}\n", - "\n", - "{'loss': 0.3004, 'grad_norm': 3.442147731781006, 'learning_rate': 2.9720127835276256e-05, 'epoch': 4.02}\n", - "{'loss': 0.2198, 'grad_norm': 3.406682252883911, 'learning_rate': 2.9246432729161055e-05, 'epoch': 4.03}\n", - "{'loss': 0.2312, 'grad_norm': 3.0993845462799072, 'learning_rate': 2.8774977504442647e-05, 'epoch': 4.05}\n", - "{'loss': 0.2038, 'grad_norm': 4.1203694343566895, 'learning_rate': 2.8305813044122097e-05, 'epoch': 4.07}\n", - "{'loss': 0.1819, 'grad_norm': 2.116147756576538, 'learning_rate': 2.7838989983964065e-05, 'epoch': 4.09}\n", - "{'loss': 0.2672, 'grad_norm': 3.2091379165649414, 'learning_rate': 2.737455870703155e-05, 'epoch': 4.11}\n", - "{'loss': 0.1779, 'grad_norm': 2.386085033416748, 'learning_rate': 2.6912569338248315e-05, 'epoch': 4.12}\n", - "{'loss': 0.2997, 'grad_norm': 28.640592575073242, 'learning_rate': 2.645307173898901e-05, 'epoch': 4.14}\n", - "{'loss': 0.227, 'grad_norm': 2.2596945762634277, 'learning_rate': 2.5996115501697694e-05, 'epoch': 4.16}\n", - "{'loss': 0.2184, 'grad_norm': 4.521151065826416, 'learning_rate': 2.5541749944535554e-05, 'epoch': 4.18}\n", - "{'loss': 0.2038, 'grad_norm': 5.861654281616211, 'learning_rate': 2.5090024106057962e-05, 'epoch': 4.19}\n", - "{'loss': 0.2423, 'grad_norm': 4.1528639793396, 'learning_rate': 2.464098673992205e-05, 'epoch': 4.21}\n", - "{'loss': 0.2367, 'grad_norm': 2.1180801391601562, 'learning_rate': 2.4194686309624663e-05, 'epoch': 4.23}\n", - "{'loss': 0.2627, 'grad_norm': 4.030113697052002, 'learning_rate': 2.3751170983272e-05, 'epoch': 4.25}\n", - "{'loss': 0.2329, 'grad_norm': 4.907358646392822, 'learning_rate': 2.3310488628380757e-05, 'epoch': 4.27}\n", - "{'loss': 0.2542, 'grad_norm': 4.652915000915527, 'learning_rate': 2.2872686806712035e-05, 'epoch': 4.28}\n", - "{'loss': 0.2495, 'grad_norm': 5.10890531539917, 'learning_rate': 2.243781276913811e-05, 'epoch': 4.3}\n", - "{'loss': 0.1662, 'grad_norm': 3.823878288269043, 'learning_rate': 2.200591345054267e-05, 'epoch': 4.32}\n", - "{'loss': 0.2812, 'grad_norm': 3.004128932952881, 'learning_rate': 2.157703546475539e-05, 'epoch': 4.34}\n", - "{'loss': 0.2202, 'grad_norm': 4.443856716156006, 'learning_rate': 2.115122509952085e-05, 'epoch': 4.36}\n", - "{'loss': 0.2244, 'grad_norm': 2.996962070465088, 'learning_rate': 2.0728528311502976e-05, 'epoch': 4.37}\n", - "{'loss': 0.2474, 'grad_norm': 4.116214752197266, 'learning_rate': 2.0308990721324927e-05, 'epoch': 4.39}\n", - "{'loss': 0.1881, 'grad_norm': 4.773007392883301, 'learning_rate': 1.989265760864542e-05, 'epoch': 4.41}\n", - "{'loss': 0.2721, 'grad_norm': 3.045060873031616, 'learning_rate': 1.947957390727185e-05, 'epoch': 4.43}\n", - "{'loss': 0.2474, 'grad_norm': 5.480595111846924, 'learning_rate': 1.906978420031059e-05, 'epoch': 4.44}\n", - "{'loss': 0.1786, 'grad_norm': 2.452791929244995, 'learning_rate': 1.8663332715355396e-05, 'epoch': 4.46}\n", - "{'loss': 0.2655, 'grad_norm': 1.6951186656951904, 'learning_rate': 1.8260263319713844e-05, 'epoch': 4.48}\n", - "{'loss': 0.2307, 'grad_norm': 4.780274868011475, 'learning_rate': 1.7860619515673033e-05, 'epoch': 4.5}\n", - "{'loss': 0.2661, 'grad_norm': 4.14153528213501, 'learning_rate': 1.746444443580433e-05, 'epoch': 4.52}\n", - "{'loss': 0.2482, 'grad_norm': 4.6406989097595215, 'learning_rate': 1.7071780838308288e-05, 'epoch': 4.53}\n", - "{'loss': 0.2268, 'grad_norm': 3.8813576698303223, 'learning_rate': 1.6682671102399805e-05, 'epoch': 4.55}\n", - "{'loss': 0.2058, 'grad_norm': 3.428504467010498, 'learning_rate': 1.629715722373423e-05, 'epoch': 4.57}\n", - "{'loss': 0.2407, 'grad_norm': 4.3143415451049805, 'learning_rate': 1.5915280809874932e-05, 'epoch': 4.59}\n", - "{'loss': 0.2153, 'grad_norm': 4.420351505279541, 'learning_rate': 1.553708307580265e-05, 'epoch': 4.61}\n", - "{'loss': 0.2423, 'grad_norm': 3.758807897567749, 'learning_rate': 1.5162604839467265e-05, 'epoch': 4.62}\n", - "{'loss': 0.2376, 'grad_norm': 3.164726734161377, 'learning_rate': 1.4791886517382413e-05, 'epoch': 4.64}\n", - "{'loss': 0.223, 'grad_norm': 2.6924712657928467, 'learning_rate': 1.4424968120263504e-05, 'epoch': 4.66}\n", - "{'loss': 0.224, 'grad_norm': 3.1326253414154053, 'learning_rate': 1.4061889248709343e-05, 'epoch': 4.68}\n", - "{'loss': 0.2097, 'grad_norm': 3.1166789531707764, 'learning_rate': 1.370268908892825e-05, 'epoch': 4.69}\n", - "{'loss': 0.2607, 'grad_norm': 4.3387651443481445, 'learning_rate': 1.3347406408508695e-05, 'epoch': 4.71}\n", - "{'loss': 0.2217, 'grad_norm': 2.9194934368133545, 'learning_rate': 1.2996079552235263e-05, 'epoch': 4.73}\n", - "{'loss': 0.175, 'grad_norm': 2.6297366619110107, 'learning_rate': 1.264874643795021e-05, 'epoch': 4.75}\n", - "{'loss': 0.2148, 'grad_norm': 3.174553632736206, 'learning_rate': 1.230544455246101e-05, 'epoch': 4.77}\n", - "{'loss': 0.246, 'grad_norm': 3.611652374267578, 'learning_rate': 1.1966210947494583e-05, 'epoch': 4.78}\n", - "{'loss': 0.2477, 'grad_norm': 3.13002610206604, 'learning_rate': 1.1631082235698316e-05, 'epoch': 4.8}\n", - "{'loss': 0.2034, 'grad_norm': 3.1411221027374268, 'learning_rate': 1.130009458668863e-05, 'epoch': 4.82}\n", - "{'loss': 0.1899, 'grad_norm': 3.8253543376922607, 'learning_rate': 1.097328372314721e-05, 'epoch': 4.84}\n", - "{'loss': 0.2432, 'grad_norm': 4.582285404205322, 'learning_rate': 1.0650684916965559e-05, 'epoch': 4.85}\n", - "{'loss': 0.2412, 'grad_norm': 3.9309003353118896, 'learning_rate': 1.0332332985438248e-05, 'epoch': 4.87}\n", - "{'loss': 0.2543, 'grad_norm': 4.181048393249512, 'learning_rate': 1.0018262287505086e-05, 'epoch': 4.89}\n", - "{'loss': 0.2759, 'grad_norm': 1.8343684673309326, 'learning_rate': 9.708506720042932e-06, 'epoch': 4.91}\n", - "{'loss': 0.189, 'grad_norm': 2.335709571838379, 'learning_rate': 9.403099714207175e-06, 'epoch': 4.93}\n", - "{'loss': 0.2495, 'grad_norm': 4.065298080444336, 'learning_rate': 9.102074231823727e-06, 'epoch': 4.94}\n", - "{'loss': 0.2557, 'grad_norm': 2.8378493785858154, 'learning_rate': 8.805462761831418e-06, 'epoch': 4.96}\n", - "{'loss': 0.2784, 'grad_norm': 3.395693063735962, 'learning_rate': 8.513297316775625e-06, 'epoch': 4.98}\n", - "{'loss': 0.2621, 'grad_norm': 4.082712173461914, 'learning_rate': 8.225609429353187e-06, 'epoch': 5.0}\n", - " 83%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–Š | 2800/3360 [1:16:55<15:13, 1.63s/it][INFO|trainer.py:3788] 2024-07-05 07:33:22,309 >> \n", - "***** Running Evaluation *****\n", - "[INFO|trainer.py:3790] 2024-07-05 07:33:22,309 >> Num examples = 46\n", - "[INFO|trainer.py:3793] 2024-07-05 07:33:22,310 >> Batch size = 1\n", - "\n", - " 0%| | 0/46 [00:00> Saving model checkpoint to saves/qwen2-0.5b/lora/sft/checkpoint-2800\n", - "[INFO|configuration_utils.py:733] 2024-07-05 07:33:26,470 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-0.5b-instruct-bnb-4bit/snapshots/c3b24ce4827d69f5c3bde9aba00047774069ab72/config.json\n", - "[INFO|configuration_utils.py:800] 2024-07-05 07:33:26,471 >> Model config Qwen2Config {\n", - " \"_name_or_path\": \"Qwen/Qwen2-0.5B-Instruct\",\n", - " \"architectures\": [\n", - " \"Qwen2ForCausalLM\"\n", - " ],\n", - " \"attention_dropout\": 0.0,\n", - " \"bos_token_id\": 151643,\n", - " \"eos_token_id\": 151645,\n", - " \"hidden_act\": \"silu\",\n", - " \"hidden_size\": 896,\n", - " \"initializer_range\": 0.02,\n", - " \"intermediate_size\": 4864,\n", - " \"max_position_embeddings\": 32768,\n", - " \"max_window_layers\": 24,\n", - " \"model_type\": \"qwen2\",\n", - " \"num_attention_heads\": 14,\n", - " \"num_hidden_layers\": 24,\n", - " \"num_key_value_heads\": 2,\n", - " \"quantization_config\": {\n", - " \"_load_in_4bit\": true,\n", - " \"_load_in_8bit\": false,\n", - " \"bnb_4bit_compute_dtype\": \"bfloat16\",\n", - " \"bnb_4bit_quant_storage\": \"uint8\",\n", - " \"bnb_4bit_quant_type\": \"nf4\",\n", - " \"bnb_4bit_use_double_quant\": true,\n", - " \"llm_int8_enable_fp32_cpu_offload\": false,\n", - " \"llm_int8_has_fp16_weight\": false,\n", - " \"llm_int8_skip_modules\": null,\n", - " \"llm_int8_threshold\": 6.0,\n", - " \"load_in_4bit\": true,\n", - " \"load_in_8bit\": false,\n", - " \"quant_method\": \"bitsandbytes\"\n", - " },\n", - " \"rms_norm_eps\": 1e-06,\n", - " \"rope_theta\": 1000000.0,\n", - " \"sliding_window\": 32768,\n", - " \"tie_word_embeddings\": true,\n", - " \"torch_dtype\": \"bfloat16\",\n", - " \"transformers_version\": \"4.42.3\",\n", - " \"use_cache\": true,\n", - " \"use_sliding_window\": false,\n", - " \"vocab_size\": 151936\n", - "}\n", - "\n", - "{'loss': 0.1274, 'grad_norm': 1.5695966482162476, 'learning_rate': 7.942430149009161e-06, 'epoch': 5.02}\n", - "{'loss': 0.1052, 'grad_norm': 0.5931769609451294, 'learning_rate': 7.663790038585793e-06, 'epoch': 5.03}\n", - "{'loss': 0.1188, 'grad_norm': 4.6362762451171875, 'learning_rate': 7.389719171023857e-06, 'epoch': 5.05}\n", - "{'loss': 0.1032, 'grad_norm': 2.54799485206604, 'learning_rate': 7.1202471261170245e-06, 'epoch': 5.07}\n", - "{'loss': 0.0781, 'grad_norm': 2.0741422176361084, 'learning_rate': 6.855402987319348e-06, 'epoch': 5.09}\n", - "{'loss': 0.1428, 'grad_norm': 2.47188401222229, 'learning_rate': 6.595215338606397e-06, 'epoch': 5.1}\n", - "{'loss': 0.0839, 'grad_norm': 1.993886947631836, 'learning_rate': 6.339712261390213e-06, 'epoch': 5.12}\n", - "{'loss': 0.1086, 'grad_norm': 1.442935824394226, 'learning_rate': 6.088921331488568e-06, 'epoch': 5.14}\n", - "{'loss': 0.0851, 'grad_norm': 2.1466658115386963, 'learning_rate': 5.8428696161488215e-06, 'epoch': 5.16}\n", - "{'loss': 0.0964, 'grad_norm': 1.960119366645813, 'learning_rate': 5.601583671126531e-06, 'epoch': 5.18}\n", - "{'loss': 0.1149, 'grad_norm': 1.3245364427566528, 'learning_rate': 5.365089537819434e-06, 'epoch': 5.19}\n", - "{'loss': 0.1074, 'grad_norm': 0.817304253578186, 'learning_rate': 5.133412740456806e-06, 'epoch': 5.21}\n", - "{'loss': 0.0966, 'grad_norm': 1.4587805271148682, 'learning_rate': 4.906578283344759e-06, 'epoch': 5.23}\n", - "{'loss': 0.1326, 'grad_norm': 5.115628719329834, 'learning_rate': 4.684610648167503e-06, 'epoch': 5.25}\n", - "{'loss': 0.1112, 'grad_norm': 2.1370065212249756, 'learning_rate': 4.467533791345191e-06, 'epoch': 5.27}\n", - "{'loss': 0.0918, 'grad_norm': 1.5177031755447388, 'learning_rate': 4.255371141448272e-06, 'epoch': 5.28}\n", - "{'loss': 0.0911, 'grad_norm': 3.415386199951172, 'learning_rate': 4.048145596668967e-06, 'epoch': 5.3}\n", - "{'loss': 0.1295, 'grad_norm': 9.106415748596191, 'learning_rate': 3.84587952234991e-06, 'epoch': 5.32}\n", - "{'loss': 0.0753, 'grad_norm': 1.1960046291351318, 'learning_rate': 3.6485947485702832e-06, 'epoch': 5.34}\n", - "{'loss': 0.1281, 'grad_norm': 3.4662070274353027, 'learning_rate': 3.4563125677897932e-06, 'epoch': 5.35}\n", - "{'loss': 0.1045, 'grad_norm': 1.4903005361557007, 'learning_rate': 3.269053732550581e-06, 'epoch': 5.37}\n", - "{'loss': 0.094, 'grad_norm': 2.3145623207092285, 'learning_rate': 3.086838453237506e-06, 'epoch': 5.39}\n", - "{'loss': 0.056, 'grad_norm': 1.6177632808685303, 'learning_rate': 2.9096863958968268e-06, 'epoch': 5.41}\n", - "{'loss': 0.093, 'grad_norm': 1.7712160348892212, 'learning_rate': 2.737616680113758e-06, 'epoch': 5.43}\n", - "{'loss': 0.0846, 'grad_norm': 2.1207849979400635, 'learning_rate': 2.570647876948895e-06, 'epoch': 5.44}\n", - "{'loss': 0.1257, 'grad_norm': 1.7891684770584106, 'learning_rate': 2.408798006933882e-06, 'epoch': 5.46}\n", - "{'loss': 0.1472, 'grad_norm': 1.305862545967102, 'learning_rate': 2.252084538126542e-06, 'epoch': 5.48}\n", - "{'loss': 0.0784, 'grad_norm': 2.511289596557617, 'learning_rate': 2.100524384225555e-06, 'epoch': 5.5}\n", - "{'loss': 0.1159, 'grad_norm': 2.205674886703491, 'learning_rate': 1.9541339027450256e-06, 'epoch': 5.52}\n", - "{'loss': 0.1057, 'grad_norm': 2.3121867179870605, 'learning_rate': 1.8129288932490274e-06, 'epoch': 5.53}\n", - "{'loss': 0.1044, 'grad_norm': 0.5653843283653259, 'learning_rate': 1.6769245956464396e-06, 'epoch': 5.55}\n", - "{'loss': 0.1248, 'grad_norm': 2.8058314323425293, 'learning_rate': 1.5461356885461075e-06, 'epoch': 5.57}\n", - "{'loss': 0.1108, 'grad_norm': 1.7656151056289673, 'learning_rate': 1.4205762876726092e-06, 'epoch': 5.59}\n", - "{'loss': 0.103, 'grad_norm': 1.4396343231201172, 'learning_rate': 1.3002599443428243e-06, 'epoch': 5.6}\n", - "{'loss': 0.1239, 'grad_norm': 2.5784292221069336, 'learning_rate': 1.1851996440033319e-06, 'epoch': 5.62}\n", - "{'loss': 0.0961, 'grad_norm': 0.813414990901947, 'learning_rate': 1.0754078048289374e-06, 'epoch': 5.64}\n", - "{'loss': 0.1176, 'grad_norm': 2.7768945693969727, 'learning_rate': 9.708962763824048e-07, 'epoch': 5.66}\n", - "{'loss': 0.0784, 'grad_norm': 1.4548313617706299, 'learning_rate': 8.716763383355864e-07, 'epoch': 5.68}\n", - "{'loss': 0.0995, 'grad_norm': 1.4250032901763916, 'learning_rate': 7.777586992519959e-07, 'epoch': 5.69}\n", - "{'loss': 0.1014, 'grad_norm': 3.0032870769500732, 'learning_rate': 6.891534954310885e-07, 'epoch': 5.71}\n", - "{'loss': 0.0993, 'grad_norm': 3.392124891281128, 'learning_rate': 6.058702898142643e-07, 'epoch': 5.73}\n", - "{'loss': 0.0962, 'grad_norm': 2.9156267642974854, 'learning_rate': 5.279180709527765e-07, 'epoch': 5.75}\n", - "{'loss': 0.1407, 'grad_norm': 2.50022292137146, 'learning_rate': 4.553052520375911e-07, 'epoch': 5.77}\n", - "{'loss': 0.0832, 'grad_norm': 1.8522708415985107, 'learning_rate': 3.8803966999139684e-07, 'epoch': 5.78}\n", - "{'loss': 0.1036, 'grad_norm': 2.559648275375366, 'learning_rate': 3.261285846227868e-07, 'epoch': 5.8}\n", - "{'loss': 0.0851, 'grad_norm': 6.124639987945557, 'learning_rate': 2.6957867784270787e-07, 'epoch': 5.82}\n", - "{'loss': 0.0861, 'grad_norm': 1.8628261089324951, 'learning_rate': 2.1839605294330933e-07, 'epoch': 5.84}\n", - "{'loss': 0.1153, 'grad_norm': 2.0182836055755615, 'learning_rate': 1.725862339392259e-07, 'epoch': 5.85}\n", - "{'loss': 0.0913, 'grad_norm': 2.184485912322998, 'learning_rate': 1.3215416497138754e-07, 'epoch': 5.87}\n", - "{'loss': 0.132, 'grad_norm': 2.652066707611084, 'learning_rate': 9.710420977340762e-08, 'epoch': 5.89}\n", - "{'loss': 0.0822, 'grad_norm': 2.054509401321411, 'learning_rate': 6.744015120061509e-08, 'epoch': 5.91}\n", - "{'loss': 0.1632, 'grad_norm': 2.1160929203033447, 'learning_rate': 4.316519082179227e-08, 'epoch': 5.93}\n", - "{'loss': 0.0715, 'grad_norm': 3.3849403858184814, 'learning_rate': 2.4281948573617874e-08, 'epoch': 5.94}\n", - "{'loss': 0.1134, 'grad_norm': 3.3306052684783936, 'learning_rate': 1.0792462477909882e-08, 'epoch': 5.96}\n", - "{'loss': 0.1273, 'grad_norm': 2.356410026550293, 'learning_rate': 2.6981884216847884e-09, 'epoch': 5.98}\n", - "{'loss': 0.1189, 'grad_norm': 2.4627721309661865, 'learning_rate': 0.0, 'epoch': 6.0}\n", - "100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 3360/3360 [1:31:57<00:00, 1.60s/it][INFO|trainer.py:3788] 2024-07-05 07:48:24,113 >> \n", - "***** Running Evaluation *****\n", - "[INFO|trainer.py:3790] 2024-07-05 07:48:24,113 >> Num examples = 46\n", - "[INFO|trainer.py:3793] 2024-07-05 07:48:24,113 >> Batch size = 1\n", - "\n", - " 0%| | 0/46 [00:00> Saving model checkpoint to saves/qwen2-0.5b/lora/sft/checkpoint-3360\n", - "[INFO|configuration_utils.py:733] 2024-07-05 07:48:28,128 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-0.5b-instruct-bnb-4bit/snapshots/c3b24ce4827d69f5c3bde9aba00047774069ab72/config.json\n", - "[INFO|configuration_utils.py:800] 2024-07-05 07:48:28,128 >> Model config Qwen2Config {\n", - " \"_name_or_path\": \"Qwen/Qwen2-0.5B-Instruct\",\n", - " \"architectures\": [\n", - " \"Qwen2ForCausalLM\"\n", - " ],\n", - " \"attention_dropout\": 0.0,\n", - " \"bos_token_id\": 151643,\n", - " \"eos_token_id\": 151645,\n", - " \"hidden_act\": \"silu\",\n", - " \"hidden_size\": 896,\n", - " \"initializer_range\": 0.02,\n", - " \"intermediate_size\": 4864,\n", - " \"max_position_embeddings\": 32768,\n", - " \"max_window_layers\": 24,\n", - " \"model_type\": \"qwen2\",\n", - " \"num_attention_heads\": 14,\n", - " \"num_hidden_layers\": 24,\n", - " \"num_key_value_heads\": 2,\n", - " \"quantization_config\": {\n", - " \"_load_in_4bit\": true,\n", - " \"_load_in_8bit\": false,\n", - " \"bnb_4bit_compute_dtype\": \"bfloat16\",\n", - " \"bnb_4bit_quant_storage\": \"uint8\",\n", - " \"bnb_4bit_quant_type\": \"nf4\",\n", - " \"bnb_4bit_use_double_quant\": true,\n", - " \"llm_int8_enable_fp32_cpu_offload\": false,\n", - " \"llm_int8_has_fp16_weight\": false,\n", - " \"llm_int8_skip_modules\": null,\n", - " \"llm_int8_threshold\": 6.0,\n", - " \"load_in_4bit\": true,\n", - " \"load_in_8bit\": false,\n", - " \"quant_method\": \"bitsandbytes\"\n", - " },\n", - " \"rms_norm_eps\": 1e-06,\n", - " \"rope_theta\": 1000000.0,\n", - " \"sliding_window\": 32768,\n", - " \"tie_word_embeddings\": true,\n", - " \"torch_dtype\": \"bfloat16\",\n", - " \"transformers_version\": \"4.42.3\",\n", - " \"use_cache\": true,\n", - " \"use_sliding_window\": false,\n", - " \"vocab_size\": 151936\n", - "}\n", - "\n", - "[INFO|:482] 2024-07-05 07:48:28,348 >> \n", - "\n", - "Training completed. Do not forget to share your model on huggingface.co/models =)\n", - "\n", - "\n", - "{'train_runtime': 5527.5332, 'train_samples_per_second': 4.865, 'train_steps_per_second': 0.608, 'train_loss': 0.927943646074051, 'epoch': 6.0}\n", - "100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 3360/3360 [1:32:01<00:00, 1.64s/it]\n", - "[INFO|trainer.py:3478] 2024-07-05 07:48:28,351 >> Saving model checkpoint to saves/qwen2-0.5b/lora/sft\n", - "[INFO|configuration_utils.py:733] 2024-07-05 07:48:29,375 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--unsloth--qwen2-0.5b-instruct-bnb-4bit/snapshots/c3b24ce4827d69f5c3bde9aba00047774069ab72/config.json\n", - "[INFO|configuration_utils.py:800] 2024-07-05 07:48:29,376 >> Model config Qwen2Config {\n", - " \"_name_or_path\": \"Qwen/Qwen2-0.5B-Instruct\",\n", - " \"architectures\": [\n", - " \"Qwen2ForCausalLM\"\n", - " ],\n", - " \"attention_dropout\": 0.0,\n", - " \"bos_token_id\": 151643,\n", - " \"eos_token_id\": 151645,\n", - " \"hidden_act\": \"silu\",\n", - " \"hidden_size\": 896,\n", - " \"initializer_range\": 0.02,\n", - " \"intermediate_size\": 4864,\n", - " \"max_position_embeddings\": 32768,\n", - " \"max_window_layers\": 24,\n", - " \"model_type\": \"qwen2\",\n", - " \"num_attention_heads\": 14,\n", - " \"num_hidden_layers\": 24,\n", - " \"num_key_value_heads\": 2,\n", - " \"quantization_config\": {\n", - " \"_load_in_4bit\": true,\n", - " \"_load_in_8bit\": false,\n", - " \"bnb_4bit_compute_dtype\": \"bfloat16\",\n", - " \"bnb_4bit_quant_storage\": \"uint8\",\n", - " \"bnb_4bit_quant_type\": \"nf4\",\n", - " \"bnb_4bit_use_double_quant\": true,\n", - " \"llm_int8_enable_fp32_cpu_offload\": false,\n", - " \"llm_int8_has_fp16_weight\": false,\n", - " \"llm_int8_skip_modules\": null,\n", - " \"llm_int8_threshold\": 6.0,\n", - " \"load_in_4bit\": true,\n", - " \"load_in_8bit\": false,\n", - " \"quant_method\": \"bitsandbytes\"\n", - " },\n", - " \"rms_norm_eps\": 1e-06,\n", - " \"rope_theta\": 1000000.0,\n", - " \"sliding_window\": 32768,\n", - " \"tie_word_embeddings\": true,\n", - " \"torch_dtype\": \"bfloat16\",\n", - " \"transformers_version\": \"4.42.3\",\n", - " \"use_cache\": true,\n", - " \"use_sliding_window\": false,\n", - " \"vocab_size\": 151936\n", - "}\n", - "\n", - "***** train metrics *****\n", - " epoch = 5.9973\n", - " total_flos = 6320365GF\n", - " train_loss = 0.9279\n", - " train_runtime = 1:32:07.53\n", - " train_samples_per_second = 4.865\n", - " train_steps_per_second = 0.608\n", - "Figure saved at: saves/qwen2-0.5b/lora/sft/training_loss.png\n", - "Figure saved at: saves/qwen2-0.5b/lora/sft/training_eval_loss.png\n", - "[INFO|trainer.py:3788] 2024-07-05 07:48:29,751 >> \n", - "***** Running Evaluation *****\n", - "[INFO|trainer.py:3790] 2024-07-05 07:48:29,752 >> Num examples = 46\n", - "[INFO|trainer.py:3793] 2024-07-05 07:48:29,752 >> Batch size = 1\n", - "100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 46/46 [00:03<00:00, 15.10it/s]\n", - "***** eval metrics *****\n", - " epoch = 5.9973\n", - " eval_loss = 3.5429\n", - " eval_runtime = 0:00:03.16\n", - " eval_samples_per_second = 14.532\n", - " eval_steps_per_second = 14.532\n", - "[INFO|modelcard.py:449] 2024-07-05 07:48:32,920 >> Dropping the following result as it does not have all the necessary fields:\n", - "{'task': {'name': 'Causal Language Modeling', 'type': 'text-generation'}}\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: / 0.561 MB of 0.561 MB uploaded\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: Run history:\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: eval/loss โ–โ–โ–ƒโ–„โ–†โ–ˆโ–ˆ\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: eval/runtime โ–โ–‚โ–ƒโ–‚โ–„โ–‚โ–ˆ\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: eval/samples_per_second โ–ˆโ–‡โ–†โ–‡โ–…โ–‡โ–\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: eval/steps_per_second โ–ˆโ–‡โ–†โ–‡โ–…โ–‡โ–\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: train/epoch โ–โ–โ–โ–‚โ–‚โ–‚โ–‚โ–‚โ–‚โ–ƒโ–ƒโ–ƒโ–ƒโ–ƒโ–ƒโ–„โ–„โ–„โ–„โ–„โ–…โ–…โ–…โ–…โ–…โ–†โ–†โ–†โ–†โ–†โ–†โ–‡โ–‡โ–‡โ–‡โ–‡โ–‡โ–ˆโ–ˆโ–ˆ\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: train/global_step โ–โ–โ–โ–‚โ–‚โ–‚โ–‚โ–‚โ–‚โ–ƒโ–ƒโ–ƒโ–ƒโ–ƒโ–ƒโ–„โ–„โ–„โ–„โ–„โ–…โ–…โ–…โ–…โ–…โ–†โ–†โ–†โ–†โ–†โ–†โ–‡โ–‡โ–‡โ–‡โ–‡โ–‡โ–ˆโ–ˆโ–ˆ\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: train/grad_norm โ–‚โ–โ–โ–‚โ–โ–‚โ–‚โ–‚โ–‚โ–‚โ–‚โ–‚โ–‚โ–‚โ–‚โ–‚โ–‚โ–‚โ–‚โ–‚โ–‚โ–‚โ–‚โ–‚โ–‚โ–‚โ–‚โ–ˆโ–‚โ–‚โ–‚โ–‚โ–โ–โ–โ–‚โ–โ–โ–โ–\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: train/learning_rate โ–‚โ–„โ–…โ–‡โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–‡โ–‡โ–‡โ–‡โ–‡โ–†โ–†โ–†โ–†โ–…โ–…โ–…โ–„โ–„โ–„โ–ƒโ–ƒโ–ƒโ–ƒโ–‚โ–‚โ–‚โ–‚โ–โ–โ–โ–โ–โ–โ–\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: train/loss โ–ˆโ–ˆโ–ˆโ–ˆโ–‡โ–‡โ–‡โ–…โ–†โ–†โ–†โ–†โ–†โ–†โ–„โ–„โ–„โ–„โ–„โ–„โ–‚โ–‚โ–‚โ–‚โ–‚โ–‚โ–‚โ–‚โ–‚โ–‚โ–โ–‚โ–โ–โ–โ–โ–โ–โ–โ–\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: \n", - "\u001b[34m\u001b[1mwandb\u001b[0m: Run summary:\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: eval/loss 3.54292\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: eval/runtime 3.1655\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: eval/samples_per_second 14.532\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: eval/steps_per_second 14.532\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: total_flos 6786441021493248.0\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: train/epoch 5.99732\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: train/global_step 3360\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: train/grad_norm 2.46277\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: train/learning_rate 0.0\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: train/loss 0.1189\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: train_loss 0.92794\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: train_runtime 5527.5332\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: train_samples_per_second 4.865\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: train_steps_per_second 0.608\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: \n", - "\u001b[34m\u001b[1mwandb\u001b[0m: ๐Ÿš€ View run \u001b[33mqwen2_0.5b_lora_sft\u001b[0m at: \u001b[34m\u001b[4mhttps://wandb.ai/inflaton-ai/huggingface/runs/3amepb0m\u001b[0m\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: โญ๏ธ View project at: \u001b[34m\u001b[4mhttps://wandb.ai/inflaton-ai/huggingface\u001b[0m\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: Synced 6 W&B file(s), 0 media file(s), 0 artifact file(s) and 0 other file(s)\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: Find logs at: \u001b[35m\u001b[1m./wandb/run-20240705_061623-3amepb0m/logs\u001b[0m\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m The new W&B backend becomes opt-out in version 0.18.0; try it out with `wandb.require(\"core\")`! See https://wandb.me/wandb-core for more information.\n", - "CPU times: user 1min 4s, sys: 21.9 s, total: 1min 26s\n", - "Wall time: 1h 33min 9s\n" - ] - } - ], - "source": [ - "%%time\n", - "\n", - "!./scripts/tune-lf.sh config/qwen2_0.5b_lora_sft_unsloth.yaml" - ] - } - ], - "metadata": { - "accelerator": "GPU", - "application/vnd.databricks.v1+notebook": { - "dashboards": [], - "environmentMetadata": null, - "language": "python", - "notebookMetadata": { - "pythonIndentUnit": 4 - }, - "notebookName": "07_MAC_+_Qwen2-7B-Instructi_Unsloth_train", - "widgets": {} - }, - "colab": { - "gpuType": "T4", - "provenance": [] - }, - "kernelspec": { - "display_name": "Python 3", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.9" - }, - "widgets": { - "application/vnd.jupyter.widget-state+json": { - "036fc5746f43416db18c19ad8fd36677": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "06e806c82c7b4cbea31c5358dd9c3434": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "087b76a8b7514269b1f0ab29b062e444": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_a069d2ab23824f29aa320ac256e2cfe9", - "placeholder": "โ€‹", - "style": "IPY_MODEL_06e806c82c7b4cbea31c5358dd9c3434", - "value": "Mapโ€‡(num_proc=2):โ€‡100%" - } - }, - "09b76013aa9e45efb6deb23a7a0d0925": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_dea41c5260884aa6879b5e1d1697b14f", - "placeholder": "โ€‹", - "style": "IPY_MODEL_89965917796a4f81b899fdc7685f33df", - "value": "config.json:โ€‡100%" - } - }, - "0a92c56bfa134ef583220d7ef0b13e17": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "0c34be936c8145d3ab41282f30a70713": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "0f8b6bfe16894500838793f2491d403f": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "177c78fce95d4b4ab33057c5a048d693": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "1f44c9ce1adf470cbb19784493ed209f": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_0c34be936c8145d3ab41282f30a70713", - "placeholder": "โ€‹", - "style": "IPY_MODEL_0a92c56bfa134ef583220d7ef0b13e17", - "value": "model.safetensors:โ€‡100%" - } - }, - "201b59ccd9f845e197029b57e424aefc": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "2157f01726d748f8a9ae4a00664430da": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "21db8a77b00d4a4e82fdfa608657531f": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "26e4202cca81496a90d15a0dd4ca9cf1": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_ba90fdb8822d47dab7ba203bee297f37", - "IPY_MODEL_61560ff6a36b44f4a9dfdae5c52791d4", - "IPY_MODEL_95fbe66647904c06a20f640630d6dc0e" - ], - "layout": "IPY_MODEL_57182a263d324a3dbf1471c74290a0d5" - } - }, - "27155728b6b84cb199c91c940095d0a8": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_6b91feeed5464877991ac2c207aebe7c", - "IPY_MODEL_cca8113c54c0495daedce1327bf9c68b", - "IPY_MODEL_2e63a29e2f7247bba5beede9a568c99f" - ], - "layout": "IPY_MODEL_5c9d781c28944f3eb86e2a6d44efdf18" - } - }, - "271ddaa553a042d09b6db7b450643d8f": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "2a58d04b428c46f4b3dbadd3bc6cd529": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "2d18ddf6482c4d97829ac0e5a7b9868f": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_9f679ad3ec7f4fe8ad0510ffb57bc2ab", - "IPY_MODEL_f2df530d22c74977b249dd9fb5f4829b", - "IPY_MODEL_89b2ef0dbfea47ab8e6f8d659e3351d1" - ], - "layout": "IPY_MODEL_3056b148aa9f4e6e8aa3b61d26886255" - } - }, - "2e5087c76f98437cb5dc729230358cba": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "2e63a29e2f7247bba5beede9a568c99f": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_b993eaec6b224440bf80c0958c6fb536", - "placeholder": "โ€‹", - "style": "IPY_MODEL_de868e26e7154f62aa86223a539ad421", - "value": "โ€‡464/464โ€‡[00:00<00:00,โ€‡27.1kB/s]" - } - }, - "2f6c70dd266c4816bfad3fd3d192929a": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "30307300bc4e4baf96560e30969a82b6": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_e36a3f9eff0e4cf68834d66b0213ae96", - "placeholder": "โ€‹", - "style": "IPY_MODEL_a0037bdccf254159becde630bee3d1db", - "value": "generation_config.json:โ€‡100%" - } - }, - "3056b148aa9f4e6e8aa3b61d26886255": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "30cdc32298134cb0be4d41615b9e5774": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "3572201bd4d74a58b7a665f9bdfdcdba": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "35b0e8c26d6640e9bd0ed7b242a423d8": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_2e5087c76f98437cb5dc729230358cba", - "max": 51760, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_036fc5746f43416db18c19ad8fd36677", - "value": 51760 - } - }, - "36166c7bcb854b34aca1f41a5d6ea50b": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "370692d819df41828b48c4ad446f977b": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "39b29a75374b45c0a22506010be2b84e": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_30cdc32298134cb0be4d41615b9e5774", - "max": 1179, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_47928317548c454bba6358ab132e8dee", - "value": 1179 - } - }, - "3cf2dd993b5e4d3daecf61e4bab5a404": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_087b76a8b7514269b1f0ab29b062e444", - "IPY_MODEL_35b0e8c26d6640e9bd0ed7b242a423d8", - "IPY_MODEL_54ad89e05fd74576b9b8b5b5a10eaf8d" - ], - "layout": "IPY_MODEL_a41dc44766444a998bec2d777f249d23" - } - }, - "43dec2ede91341f5af60eb522e18e984": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "4463edd481c1467f914c7dcd6c6e6ffc": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "47928317548c454bba6358ab132e8dee": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "49277aeeac16434a865a4d12308b1abc": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "4ae7e449e4ea4c729b5f34607c18ebae": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "4b2061b8a73c43ffb0c2f83daf0d0183": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "4c4c88d4c701450692fa0f6b0c5764b0": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "4c666f4ace3943f8b80ecd20e7503236": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "4ccedf0d93094e63b57a0f8a434fba06": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_4463edd481c1467f914c7dcd6c6e6ffc", - "max": 44307561, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_6d3b9a05db0b4dadb638c686faa0c40a", - "value": 44307561 - } - }, - "4dcf6ff672d24983a1877a8431709aa9": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_5807d5fb827d490fb3bc698f801ffff5", - "placeholder": "โ€‹", - "style": "IPY_MODEL_c4f2b06a82fd4987b8b659524a7b503b", - "value": "Generatingโ€‡trainโ€‡split:โ€‡100%" - } - }, - "4ea63adfce694725bdba878aef709dd3": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "5234566b1bfc4655b8d582ea5b46ed9f": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "54ad89e05fd74576b9b8b5b5a10eaf8d": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_fdb1941405ed4e4aa06019933892deb3", - "placeholder": "โ€‹", - "style": "IPY_MODEL_668d5377ca56426a99753867e6e24862", - "value": "โ€‡51760/51760โ€‡[01:02<00:00,โ€‡1131.51โ€‡examples/s]" - } - }, - "56aee4853b7740e6a977254f5d1fa66d": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "57182a263d324a3dbf1471c74290a0d5": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "5807d5fb827d490fb3bc698f801ffff5": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "5c9d781c28944f3eb86e2a6d44efdf18": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "5f40db8173dd4d76b6ef5ed6d9ec8b6e": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "61560ff6a36b44f4a9dfdae5c52791d4": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_db19fc8d37db4e45a5790a876836d8c4", - "max": 11610, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_36166c7bcb854b34aca1f41a5d6ea50b", - "value": 11610 - } - }, - "6578fd7acdb54c4c93528ea431fd0144": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_370692d819df41828b48c4ad446f977b", - "placeholder": "โ€‹", - "style": "IPY_MODEL_a0bf9160eb2647409b3200270914b90f", - "value": "โ€‡50.6k/50.6kโ€‡[00:00<00:00,โ€‡2.71MB/s]" - } - }, - "668d5377ca56426a99753867e6e24862": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "697f027529b54ee9956bae78a11e0611": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "69ac12aec0714318bf2c83d4f4e745f5": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "6b2012c3f88547af8884a9ea90e3164b": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_938f45f1b3e24118b815d96ae34ba86a", - "placeholder": "โ€‹", - "style": "IPY_MODEL_9367047a800747f79c6b225d92397846", - "value": "โ€‡44.3M/44.3Mโ€‡[00:01<00:00,โ€‡31.0MB/s]" - } - }, - "6b91feeed5464877991ac2c207aebe7c": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_4b2061b8a73c43ffb0c2f83daf0d0183", - "placeholder": "โ€‹", - "style": "IPY_MODEL_69ac12aec0714318bf2c83d4f4e745f5", - "value": "special_tokens_map.json:โ€‡100%" - } - }, - "6d3b9a05db0b4dadb638c686faa0c40a": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "6dbbedeca9314e66ae50e44ffa31a414": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "6e34619b45934040b6092e6fb01ea7fe": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "71ce208e20d6483abb9ed923510c86d7": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_d69dc491b3ab44d7852b21873ed7bb7f", - "placeholder": "โ€‹", - "style": "IPY_MODEL_f401d53bf28e44eb906bce6c05412662", - "value": "โ€‡51760/51760โ€‡[00:01<00:00,โ€‡45512.81โ€‡examples/s]" - } - }, - "7358cdad832342c983e31efb8754ab78": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "73e352a3404f4c7dad0737f57d29e92f": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_988a0e8c1f89446086858da0a891a79c", - "IPY_MODEL_4ccedf0d93094e63b57a0f8a434fba06", - "IPY_MODEL_6b2012c3f88547af8884a9ea90e3164b" - ], - "layout": "IPY_MODEL_7e29cb8dd4df4d5b94407cd8fd3f2011" - } - }, - "74501720ac7e4dbb911a4a99b3633bc6": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "78e5400bff924a92a4cc61c4ff18b182": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_b9b313fd861948f5aba25b24b1518d30", - "placeholder": "โ€‹", - "style": "IPY_MODEL_4c666f4ace3943f8b80ecd20e7503236", - "value": "โ€‡1.18k/1.18kโ€‡[00:00<00:00,โ€‡31.3kB/s]" - } - }, - "7975adbc2ec5489ea7fa0167e620d85c": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_6e34619b45934040b6092e6fb01ea7fe", - "max": 51760, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_271ddaa553a042d09b6db7b450643d8f", - "value": 51760 - } - }, - "7e29cb8dd4df4d5b94407cd8fd3f2011": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "810ff6c0e17d4fa09a30fef27eacff90": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "89965917796a4f81b899fdc7685f33df": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "89b2ef0dbfea47ab8e6f8d659e3351d1": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_b8908fa0df3743ecb9d12983a739104f", - "placeholder": "โ€‹", - "style": "IPY_MODEL_177c78fce95d4b4ab33057c5a048d693", - "value": "โ€‡9.09M/9.09Mโ€‡[00:00<00:00,โ€‡32.6MB/s]" - } - }, - "8b3505352a5a42bf910428c40ce40465": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_49277aeeac16434a865a4d12308b1abc", - "placeholder": "โ€‹", - "style": "IPY_MODEL_2157f01726d748f8a9ae4a00664430da", - "value": "โ€‡5.70G/5.70Gโ€‡[01:02<00:00,โ€‡30.1MB/s]" - } - }, - "8fc142b628fb40568730234de1cafde2": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_4ae7e449e4ea4c729b5f34607c18ebae", - "max": 172, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_3572201bd4d74a58b7a665f9bdfdcdba", - "value": 172 - } - }, - "9367047a800747f79c6b225d92397846": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "938f45f1b3e24118b815d96ae34ba86a": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "95fbe66647904c06a20f640630d6dc0e": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_b0a370dc20654b279b9680692e34418e", - "placeholder": "โ€‹", - "style": "IPY_MODEL_cfeb365ddf7548d58b2557f22737fcf5", - "value": "โ€‡11.6k/11.6kโ€‡[00:00<00:00,โ€‡716kB/s]" - } - }, - "988a0e8c1f89446086858da0a891a79c": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_ad2be500fc164c0f86f33e914ef8e6a0", - "placeholder": "โ€‹", - "style": "IPY_MODEL_5234566b1bfc4655b8d582ea5b46ed9f", - "value": "Downloadingโ€‡data:โ€‡100%" - } - }, - "98c58f23f4d549518832cb2d18f796e8": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_09b76013aa9e45efb6deb23a7a0d0925", - "IPY_MODEL_39b29a75374b45c0a22506010be2b84e", - "IPY_MODEL_78e5400bff924a92a4cc61c4ff18b182" - ], - "layout": "IPY_MODEL_2a58d04b428c46f4b3dbadd3bc6cd529" - } - }, - "99fdbb0300c14c139d1937c646f0cfe7": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_7358cdad832342c983e31efb8754ab78", - "placeholder": "โ€‹", - "style": "IPY_MODEL_e9adf418296e436fb48bb9f78885598b", - "value": "โ€‡51760/51760โ€‡[00:01<00:00,โ€‡38665.95โ€‡examples/s]" - } - }, - "9f679ad3ec7f4fe8ad0510ffb57bc2ab": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_4ea63adfce694725bdba878aef709dd3", - "placeholder": "โ€‹", - "style": "IPY_MODEL_74501720ac7e4dbb911a4a99b3633bc6", - "value": "tokenizer.json:โ€‡100%" - } - }, - "a0037bdccf254159becde630bee3d1db": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "a069d2ab23824f29aa320ac256e2cfe9": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "a0bf9160eb2647409b3200270914b90f": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "a41dc44766444a998bec2d777f249d23": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "a8464a4c711e4e00aafdfc919b60d07e": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_fb995c740590427b882572c81d4e848c", - "placeholder": "โ€‹", - "style": "IPY_MODEL_201b59ccd9f845e197029b57e424aefc", - "value": "โ€‡172/172โ€‡[00:00<00:00,โ€‡12.0kB/s]" - } - }, - "a9f0cc51fc3d4d7b874c32dcf1c5bdf2": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "ad2be500fc164c0f86f33e914ef8e6a0": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "b0240cd9a4554b29ae11f8051984a1c6": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_edaf890370314a218f138015faa0b05d", - "placeholder": "โ€‹", - "style": "IPY_MODEL_697f027529b54ee9956bae78a11e0611", - "value": "Map:โ€‡100%" - } - }, - "b0a370dc20654b279b9680692e34418e": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "b518dcee69074b87be73957cd810e7ed": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_d891f8d0b1fc462f8008d02bb2a15692", - "placeholder": "โ€‹", - "style": "IPY_MODEL_cced8fd7e998472794f3f3e3018956a5", - "value": "tokenizer_config.json:โ€‡100%" - } - }, - "b8908fa0df3743ecb9d12983a739104f": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "b993eaec6b224440bf80c0958c6fb536": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "b9b313fd861948f5aba25b24b1518d30": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "ba90fdb8822d47dab7ba203bee297f37": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_0f8b6bfe16894500838793f2491d403f", - "placeholder": "โ€‹", - "style": "IPY_MODEL_bb19f6c747754682a514373a3a0535ba", - "value": "Downloadingโ€‡readme:โ€‡100%" - } - }, - "bb19f6c747754682a514373a3a0535ba": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "bc883d4cf13e4f8b8a4fe5f410cb6efd": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_e9159e03e61f4f56978ece9c3bca49b2", - "max": 51760, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_810ff6c0e17d4fa09a30fef27eacff90", - "value": 51760 - } - }, - "c161d94df0f04feba9542237e0856c22": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "c22f71b1f85843209d7e5321506b9cb9": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_1f44c9ce1adf470cbb19784493ed209f", - "IPY_MODEL_f1addc4479d849879e743cf9089e6540", - "IPY_MODEL_8b3505352a5a42bf910428c40ce40465" - ], - "layout": "IPY_MODEL_4c4c88d4c701450692fa0f6b0c5764b0" - } - }, - "c4f2b06a82fd4987b8b659524a7b503b": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "cca8113c54c0495daedce1327bf9c68b": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_e02f9b7849c64531835eb77b860d1c93", - "max": 464, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_56aee4853b7740e6a977254f5d1fa66d", - "value": 464 - } - }, - "cced8fd7e998472794f3f3e3018956a5": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "cf245afeb1c04f29a24d291608c3d157": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_b518dcee69074b87be73957cd810e7ed", - "IPY_MODEL_e29104486d594b2992d7285e0ef77371", - "IPY_MODEL_6578fd7acdb54c4c93528ea431fd0144" - ], - "layout": "IPY_MODEL_d35db8148a354c56aaac56dbae22536f" - } - }, - "cfe8cae0e22b495bafa221a63d13b283": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "cfeb365ddf7548d58b2557f22737fcf5": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "d1b47d39450d4019ae85c9b2f943eeaf": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_4dcf6ff672d24983a1877a8431709aa9", - "IPY_MODEL_7975adbc2ec5489ea7fa0167e620d85c", - "IPY_MODEL_71ce208e20d6483abb9ed923510c86d7" - ], - "layout": "IPY_MODEL_cfe8cae0e22b495bafa221a63d13b283" - } - }, - "d35db8148a354c56aaac56dbae22536f": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "d69dc491b3ab44d7852b21873ed7bb7f": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "d891f8d0b1fc462f8008d02bb2a15692": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "d8e5318cead340c4adbeaccc05d39225": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "daf4cd890b35422683d22fd30bc71e83": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_b0240cd9a4554b29ae11f8051984a1c6", - "IPY_MODEL_bc883d4cf13e4f8b8a4fe5f410cb6efd", - "IPY_MODEL_99fdbb0300c14c139d1937c646f0cfe7" - ], - "layout": "IPY_MODEL_c161d94df0f04feba9542237e0856c22" - } - }, - "db19fc8d37db4e45a5790a876836d8c4": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "de868e26e7154f62aa86223a539ad421": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "dea41c5260884aa6879b5e1d1697b14f": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "e02f9b7849c64531835eb77b860d1c93": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "e29104486d594b2992d7285e0ef77371": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_a9f0cc51fc3d4d7b874c32dcf1c5bdf2", - "max": 50641, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_2f6c70dd266c4816bfad3fd3d192929a", - "value": 50641 - } - }, - "e36a3f9eff0e4cf68834d66b0213ae96": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "e9159e03e61f4f56978ece9c3bca49b2": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "e9adf418296e436fb48bb9f78885598b": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "edaf890370314a218f138015faa0b05d": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "f1addc4479d849879e743cf9089e6540": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_43dec2ede91341f5af60eb522e18e984", - "max": 5702746405, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_d8e5318cead340c4adbeaccc05d39225", - "value": 5702746405 - } - }, - "f2df530d22c74977b249dd9fb5f4829b": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_21db8a77b00d4a4e82fdfa608657531f", - "max": 9085698, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_6dbbedeca9314e66ae50e44ffa31a414", - "value": 9085698 - } - }, - "f401d53bf28e44eb906bce6c05412662": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "fb995c740590427b882572c81d4e848c": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "fce7a61c25ec4390af43d92b7c473a45": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_30307300bc4e4baf96560e30969a82b6", - "IPY_MODEL_8fc142b628fb40568730234de1cafde2", - "IPY_MODEL_a8464a4c711e4e00aafdfc919b60d07e" - ], - "layout": "IPY_MODEL_5f40db8173dd4d76b6ef5ed6d9ec8b6e" - } - }, - "fdb1941405ed4e4aa06019933892deb3": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - } - } - } - }, - "nbformat": 4, - "nbformat_minor": 0 -}