diff --git "a/notebooks/08_eval-lf-py3.11.ipynb" "b/notebooks/08_eval-lf-py3.11.ipynb" deleted file mode 100644--- "a/notebooks/08_eval-lf-py3.11.ipynb" +++ /dev/null @@ -1,6437 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "application/vnd.databricks.v1+cell": { - "cellMetadata": {}, - "inputWidgets": {}, - "nuid": "0ea8b46b-839b-445b-8043-ccdf4e920ace", - "showTitle": false, - "title": "" - } - }, - "outputs": [], - "source": [ - "%load_ext autoreload\n", - "%autoreload 2" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "application/vnd.databricks.v1+cell": { - "cellMetadata": {}, - "inputWidgets": {}, - "nuid": "6d394937-6c99-4a7c-9d32-7600a280032f", - "showTitle": false, - "title": "" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "workding dir: /home/inflaton/code/projects/courses/llm-finetuning\n" - ] - } - ], - "source": [ - "import os\n", - "import sys\n", - "from pathlib import Path\n", - "\n", - "workding_dir = str(Path.cwd().parent)\n", - "os.chdir(workding_dir)\n", - "sys.path.append(workding_dir)\n", - "print(\"workding dir:\", workding_dir)" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": { - "application/vnd.databricks.v1+cell": { - "cellMetadata": {}, - "inputWidgets": {}, - "nuid": "9f67ec60-2f24-411c-84eb-0dd664b44775", - "showTitle": false, - "title": "" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "loading env vars from: /home/inflaton/code/projects/courses/llm-finetuning/.env\n" - ] - }, - { - "data": { - "text/plain": [ - "True" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from dotenv import find_dotenv, load_dotenv\n", - "\n", - "found_dotenv = find_dotenv(\".env\")\n", - "\n", - "if len(found_dotenv) == 0:\n", - " found_dotenv = find_dotenv(\".env.example\")\n", - "print(f\"loading env vars from: {found_dotenv}\")\n", - "load_dotenv(found_dotenv, override=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "application/vnd.databricks.v1+cell": { - "cellMetadata": {}, - "inputWidgets": {}, - "nuid": "f1597656-8042-4878-9d3b-9ebfb8dd86dc", - "showTitle": false, - "title": "" - } - }, - "outputs": [ - { - "data": { - "text/plain": [ - "('unsloth/Qwen2-0.5B-Instruct',\n", - " True,\n", - " None,\n", - " None,\n", - " 2048,\n", - " 10,\n", - " None,\n", - " 'datasets/mac/mac.tsv',\n", - " 'results/mac-results_lf.csv')" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "import os\n", - "\n", - "model_name = os.getenv(\"MODEL_NAME\")\n", - "token = os.getenv(\"HF_TOKEN\") or None\n", - "load_in_4bit = os.getenv(\"LOAD_IN_4BIT\") == \"true\"\n", - "local_model = os.getenv(\"LOCAL_MODEL\")\n", - "hub_model = os.getenv(\"HUB_MODEL\")\n", - "num_train_epochs = int(os.getenv(\"NUM_TRAIN_EPOCHS\") or 0)\n", - "data_path = os.getenv(\"DATA_PATH\")\n", - "results_path = os.getenv(\"RESULTS_PATH\")\n", - "\n", - "max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!\n", - "dtype = (\n", - " None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+\n", - ")\n", - "\n", - "model_name, load_in_4bit, local_model, hub_model, max_seq_length, num_train_epochs, dtype, data_path, results_path" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Sun Jun 30 13:21:10 2024 \n", - "+---------------------------------------------------------------------------------------+\n", - "| NVIDIA-SMI 545.23.07 Driver Version: 546.12 CUDA Version: 12.3 |\n", - "|-----------------------------------------+----------------------+----------------------+\n", - "| GPU Name Persistence-M | Bus-Id Disp.A | Volatile Uncorr. ECC |\n", - "| Fan Temp Perf Pwr:Usage/Cap | Memory-Usage | GPU-Util Compute M. |\n", - "| | | MIG M. |\n", - "|=========================================+======================+======================|\n", - "| 0 NVIDIA GeForce RTX 4080 ... On | 00000000:01:00.0 Off | N/A |\n", - "| N/A 49C P8 3W / 150W | 194MiB / 12282MiB | 0% Default |\n", - "| | | N/A |\n", - "+-----------------------------------------+----------------------+----------------------+\n", - " \n", - "+---------------------------------------------------------------------------------------+\n", - "| Processes: |\n", - "| GPU GI CI PID Type Process name GPU Memory |\n", - "| ID ID Usage |\n", - "|=======================================================================================|\n", - "| No running processes found |\n", - "+---------------------------------------------------------------------------------------+\n" - ] - } - ], - "source": [ - "!nvidia-smi" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Python 3.11.9\n", - "\u001b[33mWARNING: Package(s) not found: flash-attn\u001b[0m\u001b[33m\n", - "\u001b[0mCPU times: user 32 ms, sys: 10.6 ms, total: 42.6 ms\n", - "Wall time: 1.23 s\n" - ] - } - ], - "source": [ - "%%time\n", - "!python --version\n", - "!pip show flash-attn" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch 1\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "loading env vars from: /home/inflaton/code/projects/courses/llm-finetuning/.env\n", - "Adding /home/inflaton/code/projects/courses/llm-finetuning to sys.path\n", - "πŸ¦₯ Unsloth: Will patch your computer to enable 2x faster free finetuning.\n", - "[nltk_data] Downloading package wordnet to /home/inflaton/nltk_data...\n", - "[nltk_data] Package wordnet is already up-to-date!\n", - "[nltk_data] Downloading package punkt to /home/inflaton/nltk_data...\n", - "[nltk_data] Package punkt is already up-to-date!\n", - "[nltk_data] Downloading package omw-1.4 to /home/inflaton/nltk_data...\n", - "[nltk_data] Package omw-1.4 is already up-to-date!\n", - "loading /home/inflaton/code/projects/courses/llm-finetuning/llm_toolkit/translation_engine.py\n", - "Qwen/Qwen2-0.5B-Instruct llama-factory/saves/qwen2-0.5b/lora/sft/checkpoint-560 True datasets/mac/mac.tsv results/mac-results_lf.csv\n", - "(1) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n", - "0.0 GB of memory reserved.\n", - "loading model: Qwen/Qwen2-0.5B-Instruct\n", - "[INFO|tokenization_utils_base.py:2161] 2024-06-30 13:21:34,519 >> loading file vocab.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/vocab.json\n", - "[INFO|tokenization_utils_base.py:2161] 2024-06-30 13:21:34,519 >> loading file merges.txt from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/merges.txt\n", - "[INFO|tokenization_utils_base.py:2161] 2024-06-30 13:21:34,519 >> loading file tokenizer.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/tokenizer.json\n", - "[INFO|tokenization_utils_base.py:2161] 2024-06-30 13:21:34,519 >> loading file added_tokens.json from cache at None\n", - "[INFO|tokenization_utils_base.py:2161] 2024-06-30 13:21:34,519 >> loading file special_tokens_map.json from cache at None\n", - "[INFO|tokenization_utils_base.py:2161] 2024-06-30 13:21:34,520 >> loading file tokenizer_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/tokenizer_config.json\n", - "[WARNING|logging.py:313] 2024-06-30 13:21:34,863 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n", - "06/30/2024 13:21:34 - INFO - llamafactory.data.template - Replace eos token: <|im_end|>\n", - "06/30/2024 13:21:34 - INFO - llamafactory.data.template - Add <|im_start|> to stop words.\n", - "[INFO|configuration_utils.py:733] 2024-06-30 13:21:35,179 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/config.json\n", - "[INFO|configuration_utils.py:800] 2024-06-30 13:21:35,181 >> Model config Qwen2Config {\n", - " \"_name_or_path\": \"Qwen/Qwen2-0.5B-Instruct\",\n", - " \"architectures\": [\n", - " \"Qwen2ForCausalLM\"\n", - " ],\n", - " \"attention_dropout\": 0.0,\n", - " \"bos_token_id\": 151643,\n", - " \"eos_token_id\": 151645,\n", - " \"hidden_act\": \"silu\",\n", - " \"hidden_size\": 896,\n", - " \"initializer_range\": 0.02,\n", - " \"intermediate_size\": 4864,\n", - " \"max_position_embeddings\": 32768,\n", - " \"max_window_layers\": 24,\n", - " \"model_type\": \"qwen2\",\n", - " \"num_attention_heads\": 14,\n", - " \"num_hidden_layers\": 24,\n", - " \"num_key_value_heads\": 2,\n", - " \"rms_norm_eps\": 1e-06,\n", - " \"rope_theta\": 1000000.0,\n", - " \"sliding_window\": 32768,\n", - " \"tie_word_embeddings\": true,\n", - " \"torch_dtype\": \"bfloat16\",\n", - " \"transformers_version\": \"4.42.3\",\n", - " \"use_cache\": true,\n", - " \"use_sliding_window\": false,\n", - " \"vocab_size\": 151936\n", - "}\n", - "\n", - "06/30/2024 13:21:35 - INFO - llamafactory.model.model_utils.quantization - Quantizing model to 4 bit with bitsandbytes.\n", - "06/30/2024 13:21:35 - INFO - llamafactory.model.patcher - Using KV cache for faster generation.\n", - "[INFO|modeling_utils.py:3556] 2024-06-30 13:21:35,287 >> loading weights file model.safetensors from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/model.safetensors\n", - "[INFO|modeling_utils.py:1531] 2024-06-30 13:21:37,852 >> Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16.\n", - "[INFO|configuration_utils.py:1000] 2024-06-30 13:21:37,860 >> Generate config GenerationConfig {\n", - " \"bos_token_id\": 151643,\n", - " \"eos_token_id\": 151645\n", - "}\n", - "\n", - "[INFO|modeling_utils.py:4364] 2024-06-30 13:22:34,747 >> All model checkpoint weights were used when initializing Qwen2ForCausalLM.\n", - "\n", - "[INFO|modeling_utils.py:4372] 2024-06-30 13:22:34,747 >> All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at Qwen/Qwen2-0.5B-Instruct.\n", - "If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training.\n", - "[INFO|configuration_utils.py:955] 2024-06-30 13:22:35,055 >> loading configuration file generation_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/generation_config.json\n", - "[INFO|configuration_utils.py:1000] 2024-06-30 13:22:35,055 >> Generate config GenerationConfig {\n", - " \"bos_token_id\": 151643,\n", - " \"do_sample\": true,\n", - " \"eos_token_id\": [\n", - " 151645,\n", - " 151643\n", - " ],\n", - " \"pad_token_id\": 151643,\n", - " \"repetition_penalty\": 1.1,\n", - " \"temperature\": 0.7,\n", - " \"top_k\": 20,\n", - " \"top_p\": 0.8\n", - "}\n", - "\n", - "06/30/2024 13:22:35 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.\n", - "06/30/2024 13:22:36 - INFO - llamafactory.model.adapter - Loaded adapter(s): llama-factory/saves/qwen2-0.5b/lora/sft/checkpoint-560\n", - "06/30/2024 13:22:36 - INFO - llamafactory.model.loader - all params: 498,431,872\n", - "(2) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n", - "0.666 GB of memory reserved.\n", - "loading train/test data files\n", - "DatasetDict({\n", - " train: Dataset({\n", - " features: ['chinese', 'english', 'text', 'prompt'],\n", - " num_rows: 4528\n", - " })\n", - " test: Dataset({\n", - " features: ['chinese', 'english', 'text', 'prompt'],\n", - " num_rows: 1133\n", - " })\n", - "})\n", - "Evaluating model: Qwen/Qwen2-0.5B-Instruct\n", - " 0%| | 0/1133 [00:00\n", - "--------\n", - "step 2: Old Trinket raised his gun and squinted his triangular eye. The trigger sounded as if a bullet had been shot; the bullets ricocheted off of the branches like hailstones. The sound was so loud that it could be heard from miles away.\n", - "--------\n", - "step 3: Old Trinket raised his gun and squinted his triangular eye. The trigger sounded as if a bullet had been shot; the bullets ricocheted off of the branches like hailstones. The sound was so loud that it could be heard from miles away.\n", - "100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 1133/1133 [1:43:14<00:00, 5.47s/it]\n", - "(3) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n", - "0.9 GB of memory reserved.\n", - " chinese ... Qwen/Qwen2-0.5B-Instruct_checkpoint-560\n", - "0 老耿端衷ζžͺοΌŒηœ―ηΌθ΅·δΈ€εͺδΈ‰θ§’ηœΌοΌŒδΈ€ζ‚ζ‰³ζœΊε“δΊ†ζžͺοΌŒε†°ι›Ήθˆ¬ηš„ι‡‘ιΊ»ι›€εŠˆε“©ε•ͺε•¦εΎ€δΈ‹θ½οΌŒι“η ‚ε­εœ¨ζŸ³ζžι—΄ι£ž... ... Old Trinket raised his gun and squinted his tr...\n", - "\n", - "[1 rows x 3 columns]\n", - "{'accuracy': 0.00088261253309797, 'correct_ids': [272], 'meteor': 0.28906766286950575, 'bleu_scores': {'bleu': 0.05350226890847294, 'precisions': [0.34546985517009093, 0.08439261827222748, 0.02716499544211486, 0.011066742726754135], 'brevity_penalty': 0.9833003245834433, 'length_ratio': 0.9834382245776747, 'translation_length': 29690, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.32218455635719456, 'rouge2': 0.09323903991316618, 'rougeL': 0.26091815189986767, 'rougeLsum': 0.2609816275457334}}\n", - "Epoch 2\n", - "loading env vars from: /home/inflaton/code/projects/courses/llm-finetuning/.env\n", - "Adding /home/inflaton/code/projects/courses/llm-finetuning to sys.path\n", - "πŸ¦₯ Unsloth: Will patch your computer to enable 2x faster free finetuning.\n", - "[nltk_data] Downloading package wordnet to /home/inflaton/nltk_data...\n", - "[nltk_data] Package wordnet is already up-to-date!\n", - "[nltk_data] Downloading package punkt to /home/inflaton/nltk_data...\n", - "[nltk_data] Package punkt is already up-to-date!\n", - "[nltk_data] Downloading package omw-1.4 to /home/inflaton/nltk_data...\n", - "[nltk_data] Package omw-1.4 is already up-to-date!\n", - "loading /home/inflaton/code/projects/courses/llm-finetuning/llm_toolkit/translation_engine.py\n", - "Qwen/Qwen2-0.5B-Instruct llama-factory/saves/qwen2-0.5b/lora/sft/checkpoint-1120 True datasets/mac/mac.tsv results/mac-results_lf.csv\n", - "(1) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n", - "0.0 GB of memory reserved.\n", - "loading model: Qwen/Qwen2-0.5B-Instruct\n", - "[INFO|tokenization_utils_base.py:2161] 2024-06-30 15:06:25,573 >> loading file vocab.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/vocab.json\n", - "[INFO|tokenization_utils_base.py:2161] 2024-06-30 15:06:25,574 >> loading file merges.txt from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/merges.txt\n", - "[INFO|tokenization_utils_base.py:2161] 2024-06-30 15:06:25,574 >> loading file tokenizer.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/tokenizer.json\n", - "[INFO|tokenization_utils_base.py:2161] 2024-06-30 15:06:25,574 >> loading file added_tokens.json from cache at None\n", - "[INFO|tokenization_utils_base.py:2161] 2024-06-30 15:06:25,574 >> loading file special_tokens_map.json from cache at None\n", - "[INFO|tokenization_utils_base.py:2161] 2024-06-30 15:06:25,574 >> loading file tokenizer_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/tokenizer_config.json\n", - "[WARNING|logging.py:313] 2024-06-30 15:06:25,971 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n", - "06/30/2024 15:06:25 - INFO - llamafactory.data.template - Replace eos token: <|im_end|>\n", - "06/30/2024 15:06:25 - INFO - llamafactory.data.template - Add <|im_start|> to stop words.\n", - "[INFO|configuration_utils.py:733] 2024-06-30 15:06:26,308 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/config.json\n", - "[INFO|configuration_utils.py:800] 2024-06-30 15:06:26,309 >> Model config Qwen2Config {\n", - " \"_name_or_path\": \"Qwen/Qwen2-0.5B-Instruct\",\n", - " \"architectures\": [\n", - " \"Qwen2ForCausalLM\"\n", - " ],\n", - " \"attention_dropout\": 0.0,\n", - " \"bos_token_id\": 151643,\n", - " \"eos_token_id\": 151645,\n", - " \"hidden_act\": \"silu\",\n", - " \"hidden_size\": 896,\n", - " \"initializer_range\": 0.02,\n", - " \"intermediate_size\": 4864,\n", - " \"max_position_embeddings\": 32768,\n", - " \"max_window_layers\": 24,\n", - " \"model_type\": \"qwen2\",\n", - " \"num_attention_heads\": 14,\n", - " \"num_hidden_layers\": 24,\n", - " \"num_key_value_heads\": 2,\n", - " \"rms_norm_eps\": 1e-06,\n", - " \"rope_theta\": 1000000.0,\n", - " \"sliding_window\": 32768,\n", - " \"tie_word_embeddings\": true,\n", - " \"torch_dtype\": \"bfloat16\",\n", - " \"transformers_version\": \"4.42.3\",\n", - " \"use_cache\": true,\n", - " \"use_sliding_window\": false,\n", - " \"vocab_size\": 151936\n", - "}\n", - "\n", - "06/30/2024 15:06:26 - INFO - llamafactory.model.model_utils.quantization - Quantizing model to 4 bit with bitsandbytes.\n", - "06/30/2024 15:06:26 - INFO - llamafactory.model.patcher - Using KV cache for faster generation.\n", - "[INFO|modeling_utils.py:3556] 2024-06-30 15:06:26,450 >> loading weights file model.safetensors from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/model.safetensors\n", - "[INFO|modeling_utils.py:1531] 2024-06-30 15:06:28,647 >> Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16.\n", - "[INFO|configuration_utils.py:1000] 2024-06-30 15:06:28,655 >> Generate config GenerationConfig {\n", - " \"bos_token_id\": 151643,\n", - " \"eos_token_id\": 151645\n", - "}\n", - "\n", - "[INFO|modeling_utils.py:4364] 2024-06-30 15:07:24,483 >> All model checkpoint weights were used when initializing Qwen2ForCausalLM.\n", - "\n", - "[INFO|modeling_utils.py:4372] 2024-06-30 15:07:24,484 >> All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at Qwen/Qwen2-0.5B-Instruct.\n", - "If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training.\n", - "[INFO|configuration_utils.py:955] 2024-06-30 15:07:24,816 >> loading configuration file generation_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/generation_config.json\n", - "[INFO|configuration_utils.py:1000] 2024-06-30 15:07:24,816 >> Generate config GenerationConfig {\n", - " \"bos_token_id\": 151643,\n", - " \"do_sample\": true,\n", - " \"eos_token_id\": [\n", - " 151645,\n", - " 151643\n", - " ],\n", - " \"pad_token_id\": 151643,\n", - " \"repetition_penalty\": 1.1,\n", - " \"temperature\": 0.7,\n", - " \"top_k\": 20,\n", - " \"top_p\": 0.8\n", - "}\n", - "\n", - "06/30/2024 15:07:25 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.\n", - "06/30/2024 15:07:25 - INFO - llamafactory.model.adapter - Loaded adapter(s): llama-factory/saves/qwen2-0.5b/lora/sft/checkpoint-1120\n", - "06/30/2024 15:07:25 - INFO - llamafactory.model.loader - all params: 498,431,872\n", - "(2) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n", - "0.666 GB of memory reserved.\n", - "loading train/test data files\n", - "DatasetDict({\n", - " train: Dataset({\n", - " features: ['chinese', 'english', 'text', 'prompt'],\n", - " num_rows: 4528\n", - " })\n", - " test: Dataset({\n", - " features: ['chinese', 'english', 'text', 'prompt'],\n", - " num_rows: 1133\n", - " })\n", - "})\n", - "Evaluating model: Qwen/Qwen2-0.5B-Instruct\n", - " 0%| | 0/1133 [00:00\n", - "--------\n", - "step 2: Old Geng raised his gun, his eyes narrowed. The shotgun fired; a deafening boom of gunfire followed, crickets chirping in the air, the sound like the cracking of ice chips on branches.\n", - "--------\n", - "step 3: Old Geng raised his gun, his eyes narrowed. The shotgun fired; a deafening boom of gunfire followed, crickets chirping in the air, the sound like the cracking of ice chips on branches.\n", - "100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 1133/1133 [46:43<00:00, 2.47s/it]\n", - "(3) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n", - "0.9 GB of memory reserved.\n", - " chinese ... Qwen/Qwen2-0.5B-Instruct_checkpoint-1120\n", - "0 老耿端衷ζžͺοΌŒηœ―ηΌθ΅·δΈ€εͺδΈ‰θ§’ηœΌοΌŒδΈ€ζ‚ζ‰³ζœΊε“δΊ†ζžͺοΌŒε†°ι›Ήθˆ¬ηš„ι‡‘ιΊ»ι›€εŠˆε“©ε•ͺε•¦εΎ€δΈ‹θ½οΌŒι“η ‚ε­εœ¨ζŸ³ζžι—΄ι£ž... ... Old Geng raised his gun, his eyes narrowed. Th...\n", - "\n", - "[1 rows x 4 columns]\n", - "{'accuracy': 0.00088261253309797, 'correct_ids': [659], 'meteor': 0.3075388134142166, 'bleu_scores': {'bleu': 0.06482340202869877, 'precisions': [0.36907098754416645, 0.10273004537677602, 0.038322655794991264, 0.01656785511248274], 'brevity_penalty': 0.9254426305194808, 'length_ratio': 0.9280887711162636, 'translation_length': 28019, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.34374303845779386, 'rouge2': 0.11899790599832506, 'rougeL': 0.2851818971023854, 'rougeLsum': 0.285674896233578}}\n", - "Epoch 3\n", - "loading env vars from: /home/inflaton/code/projects/courses/llm-finetuning/.env\n", - "Adding /home/inflaton/code/projects/courses/llm-finetuning to sys.path\n", - "πŸ¦₯ Unsloth: Will patch your computer to enable 2x faster free finetuning.\n", - "[nltk_data] Downloading package wordnet to /home/inflaton/nltk_data...\n", - "[nltk_data] Package wordnet is already up-to-date!\n", - "[nltk_data] Downloading package punkt to /home/inflaton/nltk_data...\n", - "[nltk_data] Package punkt is already up-to-date!\n", - "[nltk_data] Downloading package omw-1.4 to /home/inflaton/nltk_data...\n", - "[nltk_data] Package omw-1.4 is already up-to-date!\n", - "loading /home/inflaton/code/projects/courses/llm-finetuning/llm_toolkit/translation_engine.py\n", - "Qwen/Qwen2-0.5B-Instruct llama-factory/saves/qwen2-0.5b/lora/sft/checkpoint-1680 True datasets/mac/mac.tsv results/mac-results_lf.csv\n", - "(1) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n", - "0.0 GB of memory reserved.\n", - "loading model: Qwen/Qwen2-0.5B-Instruct\n", - "[INFO|tokenization_utils_base.py:2161] 2024-06-30 15:54:26,677 >> loading file vocab.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/vocab.json\n", - "[INFO|tokenization_utils_base.py:2161] 2024-06-30 15:54:26,678 >> loading file merges.txt from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/merges.txt\n", - "[INFO|tokenization_utils_base.py:2161] 2024-06-30 15:54:26,678 >> loading file tokenizer.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/tokenizer.json\n", - "[INFO|tokenization_utils_base.py:2161] 2024-06-30 15:54:26,678 >> loading file added_tokens.json from cache at None\n", - "[INFO|tokenization_utils_base.py:2161] 2024-06-30 15:54:26,678 >> loading file special_tokens_map.json from cache at None\n", - "[INFO|tokenization_utils_base.py:2161] 2024-06-30 15:54:26,678 >> loading file tokenizer_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/tokenizer_config.json\n", - "[WARNING|logging.py:313] 2024-06-30 15:54:26,803 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n", - "06/30/2024 15:54:26 - INFO - llamafactory.data.template - Replace eos token: <|im_end|>\n", - "06/30/2024 15:54:26 - INFO - llamafactory.data.template - Add <|im_start|> to stop words.\n", - "[INFO|configuration_utils.py:733] 2024-06-30 15:54:27,176 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/config.json\n", - "[INFO|configuration_utils.py:800] 2024-06-30 15:54:27,177 >> Model config Qwen2Config {\n", - " \"_name_or_path\": \"Qwen/Qwen2-0.5B-Instruct\",\n", - " \"architectures\": [\n", - " \"Qwen2ForCausalLM\"\n", - " ],\n", - " \"attention_dropout\": 0.0,\n", - " \"bos_token_id\": 151643,\n", - " \"eos_token_id\": 151645,\n", - " \"hidden_act\": \"silu\",\n", - " \"hidden_size\": 896,\n", - " \"initializer_range\": 0.02,\n", - " \"intermediate_size\": 4864,\n", - " \"max_position_embeddings\": 32768,\n", - " \"max_window_layers\": 24,\n", - " \"model_type\": \"qwen2\",\n", - " \"num_attention_heads\": 14,\n", - " \"num_hidden_layers\": 24,\n", - " \"num_key_value_heads\": 2,\n", - " \"rms_norm_eps\": 1e-06,\n", - " \"rope_theta\": 1000000.0,\n", - " \"sliding_window\": 32768,\n", - " \"tie_word_embeddings\": true,\n", - " \"torch_dtype\": \"bfloat16\",\n", - " \"transformers_version\": \"4.42.3\",\n", - " \"use_cache\": true,\n", - " \"use_sliding_window\": false,\n", - " \"vocab_size\": 151936\n", - "}\n", - "\n", - "06/30/2024 15:54:27 - INFO - llamafactory.model.model_utils.quantization - Quantizing model to 4 bit with bitsandbytes.\n", - "06/30/2024 15:54:27 - INFO - llamafactory.model.patcher - Using KV cache for faster generation.\n", - "[INFO|modeling_utils.py:3556] 2024-06-30 15:54:27,212 >> loading weights file model.safetensors from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/model.safetensors\n", - "[INFO|modeling_utils.py:1531] 2024-06-30 15:54:27,943 >> Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16.\n", - "[INFO|configuration_utils.py:1000] 2024-06-30 15:54:27,946 >> Generate config GenerationConfig {\n", - " \"bos_token_id\": 151643,\n", - " \"eos_token_id\": 151645\n", - "}\n", - "\n", - "[INFO|modeling_utils.py:4364] 2024-06-30 15:54:50,953 >> All model checkpoint weights were used when initializing Qwen2ForCausalLM.\n", - "\n", - "[INFO|modeling_utils.py:4372] 2024-06-30 15:54:50,954 >> All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at Qwen/Qwen2-0.5B-Instruct.\n", - "If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training.\n", - "[INFO|configuration_utils.py:955] 2024-06-30 15:54:51,228 >> loading configuration file generation_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/generation_config.json\n", - "[INFO|configuration_utils.py:1000] 2024-06-30 15:54:51,228 >> Generate config GenerationConfig {\n", - " \"bos_token_id\": 151643,\n", - " \"do_sample\": true,\n", - " \"eos_token_id\": [\n", - " 151645,\n", - " 151643\n", - " ],\n", - " \"pad_token_id\": 151643,\n", - " \"repetition_penalty\": 1.1,\n", - " \"temperature\": 0.7,\n", - " \"top_k\": 20,\n", - " \"top_p\": 0.8\n", - "}\n", - "\n", - "06/30/2024 15:54:51 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.\n", - "06/30/2024 15:54:51 - INFO - llamafactory.model.adapter - Loaded adapter(s): llama-factory/saves/qwen2-0.5b/lora/sft/checkpoint-1680\n", - "06/30/2024 15:54:51 - INFO - llamafactory.model.loader - all params: 498,431,872\n", - "(2) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n", - "0.666 GB of memory reserved.\n", - "loading train/test data files\n", - "DatasetDict({\n", - " train: Dataset({\n", - " features: ['chinese', 'english', 'text', 'prompt'],\n", - " num_rows: 4528\n", - " })\n", - " test: Dataset({\n", - " features: ['chinese', 'english', 'text', 'prompt'],\n", - " num_rows: 1133\n", - " })\n", - "})\n", - "Evaluating model: Qwen/Qwen2-0.5B-Instruct\n", - " 0%| | 0/1133 [00:00\n", - "--------\n", - "step 2: Old Geng held his gun up, half-closed, and cocked it as if he was aiming for a bullet. The barrel cracked, and bullets flew down like ice nuggets; the leaves on the riverbank splashed like cannonballs.\n", - "--------\n", - "step 3: Old Geng held his gun up, half-closed, and cocked it as if he was aiming for a bullet. The barrel cracked, and bullets flew down like ice nuggets; the leaves on the riverbank splashed like cannonballs.\n", - "100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 1133/1133 [44:44<00:00, 2.37s/it]\n", - "(3) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n", - "4.24 GB of memory reserved.\n", - " chinese ... Qwen/Qwen2-0.5B-Instruct_checkpoint-1680\n", - "0 老耿端衷ζžͺοΌŒηœ―ηΌθ΅·δΈ€εͺδΈ‰θ§’ηœΌοΌŒδΈ€ζ‚ζ‰³ζœΊε“δΊ†ζžͺοΌŒε†°ι›Ήθˆ¬ηš„ι‡‘ιΊ»ι›€εŠˆε“©ε•ͺε•¦εΎ€δΈ‹θ½οΌŒι“η ‚ε­εœ¨ζŸ³ζžι—΄ι£ž... ... Old Geng held his gun up, half-closed, and coc...\n", - "\n", - "[1 rows x 5 columns]\n", - "{'accuracy': 0.00353045013239188, 'correct_ids': [147, 194, 202, 364], 'meteor': 0.3232125016634757, 'bleu_scores': {'bleu': 0.06687635711488571, 'precisions': [0.33171058236475387, 0.0956102480068068, 0.03666427030913012, 0.017202185050724392], 'brevity_penalty': 1.0, 'length_ratio': 1.0886386220602848, 'translation_length': 32866, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.352664915385991, 'rouge2': 0.1232869942455126, 'rougeL': 0.2909052156293055, 'rougeLsum': 0.2907588163008441}}\n", - "Epoch 4\n", - "loading env vars from: /home/inflaton/code/projects/courses/llm-finetuning/.env\n", - "Adding /home/inflaton/code/projects/courses/llm-finetuning to sys.path\n", - "πŸ¦₯ Unsloth: Will patch your computer to enable 2x faster free finetuning.\n", - "[nltk_data] Downloading package wordnet to /home/inflaton/nltk_data...\n", - "[nltk_data] Package wordnet is already up-to-date!\n", - "[nltk_data] Downloading package punkt to /home/inflaton/nltk_data...\n", - "[nltk_data] Package punkt is already up-to-date!\n", - "[nltk_data] Downloading package omw-1.4 to /home/inflaton/nltk_data...\n", - "[nltk_data] Package omw-1.4 is already up-to-date!\n", - "loading /home/inflaton/code/projects/courses/llm-finetuning/llm_toolkit/translation_engine.py\n", - "Qwen/Qwen2-0.5B-Instruct llama-factory/saves/qwen2-0.5b/lora/sft/checkpoint-2240 True datasets/mac/mac.tsv results/mac-results_lf.csv\n", - "(1) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n", - "0.0 GB of memory reserved.\n", - "loading model: Qwen/Qwen2-0.5B-Instruct\n", - "[INFO|tokenization_utils_base.py:2161] 2024-06-30 16:39:54,252 >> loading file vocab.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/vocab.json\n", - "[INFO|tokenization_utils_base.py:2161] 2024-06-30 16:39:54,252 >> loading file merges.txt from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/merges.txt\n", - "[INFO|tokenization_utils_base.py:2161] 2024-06-30 16:39:54,252 >> loading file tokenizer.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/tokenizer.json\n", - "[INFO|tokenization_utils_base.py:2161] 2024-06-30 16:39:54,252 >> loading file added_tokens.json from cache at None\n", - "[INFO|tokenization_utils_base.py:2161] 2024-06-30 16:39:54,252 >> loading file special_tokens_map.json from cache at None\n", - "[INFO|tokenization_utils_base.py:2161] 2024-06-30 16:39:54,252 >> loading file tokenizer_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/tokenizer_config.json\n", - "[WARNING|logging.py:313] 2024-06-30 16:39:54,394 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n", - "06/30/2024 16:39:54 - INFO - llamafactory.data.template - Replace eos token: <|im_end|>\n", - "06/30/2024 16:39:54 - INFO - llamafactory.data.template - Add <|im_start|> to stop words.\n", - "[INFO|configuration_utils.py:733] 2024-06-30 16:39:54,662 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/config.json\n", - "[INFO|configuration_utils.py:800] 2024-06-30 16:39:54,663 >> Model config Qwen2Config {\n", - " \"_name_or_path\": \"Qwen/Qwen2-0.5B-Instruct\",\n", - " \"architectures\": [\n", - " \"Qwen2ForCausalLM\"\n", - " ],\n", - " \"attention_dropout\": 0.0,\n", - " \"bos_token_id\": 151643,\n", - " \"eos_token_id\": 151645,\n", - " \"hidden_act\": \"silu\",\n", - " \"hidden_size\": 896,\n", - " \"initializer_range\": 0.02,\n", - " \"intermediate_size\": 4864,\n", - " \"max_position_embeddings\": 32768,\n", - " \"max_window_layers\": 24,\n", - " \"model_type\": \"qwen2\",\n", - " \"num_attention_heads\": 14,\n", - " \"num_hidden_layers\": 24,\n", - " \"num_key_value_heads\": 2,\n", - " \"rms_norm_eps\": 1e-06,\n", - " \"rope_theta\": 1000000.0,\n", - " \"sliding_window\": 32768,\n", - " \"tie_word_embeddings\": true,\n", - " \"torch_dtype\": \"bfloat16\",\n", - " \"transformers_version\": \"4.42.3\",\n", - " \"use_cache\": true,\n", - " \"use_sliding_window\": false,\n", - " \"vocab_size\": 151936\n", - "}\n", - "\n", - "06/30/2024 16:39:54 - INFO - llamafactory.model.model_utils.quantization - Quantizing model to 4 bit with bitsandbytes.\n", - "06/30/2024 16:39:54 - INFO - llamafactory.model.patcher - Using KV cache for faster generation.\n", - "[INFO|modeling_utils.py:3556] 2024-06-30 16:39:54,705 >> loading weights file model.safetensors from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/model.safetensors\n", - "[INFO|modeling_utils.py:1531] 2024-06-30 16:39:55,523 >> Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16.\n", - "[INFO|configuration_utils.py:1000] 2024-06-30 16:39:55,526 >> Generate config GenerationConfig {\n", - " \"bos_token_id\": 151643,\n", - " \"eos_token_id\": 151645\n", - "}\n", - "\n", - "[INFO|modeling_utils.py:4364] 2024-06-30 16:40:17,339 >> All model checkpoint weights were used when initializing Qwen2ForCausalLM.\n", - "\n", - "[INFO|modeling_utils.py:4372] 2024-06-30 16:40:17,339 >> All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at Qwen/Qwen2-0.5B-Instruct.\n", - "If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training.\n", - "[INFO|configuration_utils.py:955] 2024-06-30 16:40:17,617 >> loading configuration file generation_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/generation_config.json\n", - "[INFO|configuration_utils.py:1000] 2024-06-30 16:40:17,617 >> Generate config GenerationConfig {\n", - " \"bos_token_id\": 151643,\n", - " \"do_sample\": true,\n", - " \"eos_token_id\": [\n", - " 151645,\n", - " 151643\n", - " ],\n", - " \"pad_token_id\": 151643,\n", - " \"repetition_penalty\": 1.1,\n", - " \"temperature\": 0.7,\n", - " \"top_k\": 20,\n", - " \"top_p\": 0.8\n", - "}\n", - "\n", - "06/30/2024 16:40:17 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.\n", - "06/30/2024 16:40:18 - INFO - llamafactory.model.adapter - Loaded adapter(s): llama-factory/saves/qwen2-0.5b/lora/sft/checkpoint-2240\n", - "06/30/2024 16:40:18 - INFO - llamafactory.model.loader - all params: 498,431,872\n", - "(2) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n", - "0.666 GB of memory reserved.\n", - "loading train/test data files\n", - "DatasetDict({\n", - " train: Dataset({\n", - " features: ['chinese', 'english', 'text', 'prompt'],\n", - " num_rows: 4528\n", - " })\n", - " test: Dataset({\n", - " features: ['chinese', 'english', 'text', 'prompt'],\n", - " num_rows: 1133\n", - " })\n", - "})\n", - "Evaluating model: Qwen/Qwen2-0.5B-Instruct\n", - " 0%| | 0/1133 [00:00\n", - "--------\n", - "step 2: Old Geng raised his gun, his triangular eye half-lit. The trigger sounded as he fired, bullets raining down like a cold stinker from skyward. The metal chattering sounded as if it had broken glass in the branches of the willows.\n", - "--------\n", - "step 3: Old Geng raised his gun, his triangular eye half-lit. The trigger sounded as he fired, bullets raining down like a cold stinker from skyward. The metal chattering sounded as if it had broken glass in the branches of the willows.\n", - "100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 1133/1133 [57:01<00:00, 3.02s/it]\n", - "(3) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n", - "4.221 GB of memory reserved.\n", - " chinese ... Qwen/Qwen2-0.5B-Instruct_checkpoint-2240\n", - "0 老耿端衷ζžͺοΌŒηœ―ηΌθ΅·δΈ€εͺδΈ‰θ§’ηœΌοΌŒδΈ€ζ‚ζ‰³ζœΊε“δΊ†ζžͺοΌŒε†°ι›Ήθˆ¬ηš„ι‡‘ιΊ»ι›€εŠˆε“©ε•ͺε•¦εΎ€δΈ‹θ½οΌŒι“η ‚ε­εœ¨ζŸ³ζžι—΄ι£ž... ... Old Geng raised his gun, his triangular eye ha...\n", - "\n", - "[1 rows x 6 columns]\n", - "{'accuracy': 0.00441306266548985, 'correct_ids': [147, 202, 364, 533, 850], 'meteor': 0.3141676906431015, 'bleu_scores': {'bleu': 0.05981782718505817, 'precisions': [0.2922991381706978, 0.08376151792634268, 0.033080163769061886, 0.01580821413223648], 'brevity_penalty': 1.0, 'length_ratio': 1.1914541238820802, 'translation_length': 35970, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.3493638878638674, 'rouge2': 0.1255400870123861, 'rougeL': 0.2910327113370838, 'rougeLsum': 0.2905461546619883}}\n", - "Epoch 5\n", - "loading env vars from: /home/inflaton/code/projects/courses/llm-finetuning/.env\n", - "Adding /home/inflaton/code/projects/courses/llm-finetuning to sys.path\n", - "πŸ¦₯ Unsloth: Will patch your computer to enable 2x faster free finetuning.\n", - "[nltk_data] Downloading package wordnet to /home/inflaton/nltk_data...\n", - "[nltk_data] Package wordnet is already up-to-date!\n", - "[nltk_data] Downloading package punkt to /home/inflaton/nltk_data...\n", - "[nltk_data] Package punkt is already up-to-date!\n", - "[nltk_data] Downloading package omw-1.4 to /home/inflaton/nltk_data...\n", - "[nltk_data] Package omw-1.4 is already up-to-date!\n", - "loading /home/inflaton/code/projects/courses/llm-finetuning/llm_toolkit/translation_engine.py\n", - "Qwen/Qwen2-0.5B-Instruct llama-factory/saves/qwen2-0.5b/lora/sft/checkpoint-2800 True datasets/mac/mac.tsv results/mac-results_lf.csv\n", - "(1) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n", - "0.0 GB of memory reserved.\n", - "loading model: Qwen/Qwen2-0.5B-Instruct\n", - "[INFO|tokenization_utils_base.py:2161] 2024-06-30 17:37:38,874 >> loading file vocab.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/vocab.json\n", - "[INFO|tokenization_utils_base.py:2161] 2024-06-30 17:37:38,874 >> loading file merges.txt from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/merges.txt\n", - "[INFO|tokenization_utils_base.py:2161] 2024-06-30 17:37:38,875 >> loading file tokenizer.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/tokenizer.json\n", - "[INFO|tokenization_utils_base.py:2161] 2024-06-30 17:37:38,875 >> loading file added_tokens.json from cache at None\n", - "[INFO|tokenization_utils_base.py:2161] 2024-06-30 17:37:38,875 >> loading file special_tokens_map.json from cache at None\n", - "[INFO|tokenization_utils_base.py:2161] 2024-06-30 17:37:38,875 >> loading file tokenizer_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/tokenizer_config.json\n", - "[WARNING|logging.py:313] 2024-06-30 17:37:39,004 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n", - "06/30/2024 17:37:39 - INFO - llamafactory.data.template - Replace eos token: <|im_end|>\n", - "06/30/2024 17:37:39 - INFO - llamafactory.data.template - Add <|im_start|> to stop words.\n", - "[INFO|configuration_utils.py:733] 2024-06-30 17:37:39,272 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/config.json\n", - "[INFO|configuration_utils.py:800] 2024-06-30 17:37:39,272 >> Model config Qwen2Config {\n", - " \"_name_or_path\": \"Qwen/Qwen2-0.5B-Instruct\",\n", - " \"architectures\": [\n", - " \"Qwen2ForCausalLM\"\n", - " ],\n", - " \"attention_dropout\": 0.0,\n", - " \"bos_token_id\": 151643,\n", - " \"eos_token_id\": 151645,\n", - " \"hidden_act\": \"silu\",\n", - " \"hidden_size\": 896,\n", - " \"initializer_range\": 0.02,\n", - " \"intermediate_size\": 4864,\n", - " \"max_position_embeddings\": 32768,\n", - " \"max_window_layers\": 24,\n", - " \"model_type\": \"qwen2\",\n", - " \"num_attention_heads\": 14,\n", - " \"num_hidden_layers\": 24,\n", - " \"num_key_value_heads\": 2,\n", - " \"rms_norm_eps\": 1e-06,\n", - " \"rope_theta\": 1000000.0,\n", - " \"sliding_window\": 32768,\n", - " \"tie_word_embeddings\": true,\n", - " \"torch_dtype\": \"bfloat16\",\n", - " \"transformers_version\": \"4.42.3\",\n", - " \"use_cache\": true,\n", - " \"use_sliding_window\": false,\n", - " \"vocab_size\": 151936\n", - "}\n", - "\n", - "06/30/2024 17:37:39 - INFO - llamafactory.model.model_utils.quantization - Quantizing model to 4 bit with bitsandbytes.\n", - "06/30/2024 17:37:39 - INFO - llamafactory.model.patcher - Using KV cache for faster generation.\n", - "[INFO|modeling_utils.py:3556] 2024-06-30 17:37:39,347 >> loading weights file model.safetensors from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/model.safetensors\n", - "[INFO|modeling_utils.py:1531] 2024-06-30 17:37:41,000 >> Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16.\n", - "[INFO|configuration_utils.py:1000] 2024-06-30 17:37:41,003 >> Generate config GenerationConfig {\n", - " \"bos_token_id\": 151643,\n", - " \"eos_token_id\": 151645\n", - "}\n", - "\n", - "[INFO|modeling_utils.py:4364] 2024-06-30 17:38:03,532 >> All model checkpoint weights were used when initializing Qwen2ForCausalLM.\n", - "\n", - "[INFO|modeling_utils.py:4372] 2024-06-30 17:38:03,532 >> All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at Qwen/Qwen2-0.5B-Instruct.\n", - "If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training.\n", - "[INFO|configuration_utils.py:955] 2024-06-30 17:38:03,825 >> loading configuration file generation_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/generation_config.json\n", - "[INFO|configuration_utils.py:1000] 2024-06-30 17:38:03,825 >> Generate config GenerationConfig {\n", - " \"bos_token_id\": 151643,\n", - " \"do_sample\": true,\n", - " \"eos_token_id\": [\n", - " 151645,\n", - " 151643\n", - " ],\n", - " \"pad_token_id\": 151643,\n", - " \"repetition_penalty\": 1.1,\n", - " \"temperature\": 0.7,\n", - " \"top_k\": 20,\n", - " \"top_p\": 0.8\n", - "}\n", - "\n", - "06/30/2024 17:38:04 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.\n", - "06/30/2024 17:38:04 - INFO - llamafactory.model.adapter - Loaded adapter(s): llama-factory/saves/qwen2-0.5b/lora/sft/checkpoint-2800\n", - "06/30/2024 17:38:04 - INFO - llamafactory.model.loader - all params: 498,431,872\n", - "(2) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n", - "0.666 GB of memory reserved.\n", - "loading train/test data files\n", - "DatasetDict({\n", - " train: Dataset({\n", - " features: ['chinese', 'english', 'text', 'prompt'],\n", - " num_rows: 4528\n", - " })\n", - " test: Dataset({\n", - " features: ['chinese', 'english', 'text', 'prompt'],\n", - " num_rows: 1133\n", - " })\n", - "})\n", - "Evaluating model: Qwen/Qwen2-0.5B-Instruct\n", - " 0%| | 0/1133 [00:00\n", - "--------\n", - "step 2: Old Geng took out his pistol, squinted over a triangular brow, then fired. A hail of bullets fell like ice-crystals from the sky: crisscrossing branches crackled with sounds like a bucketful of cold metal sparrows flying through the air.\n", - "--------\n", - "step 3: Old Geng took out his pistol, squinted over a triangular brow, then fired. A hail of bullets fell like ice-crystals from the sky: crisscrossing branches crackled with sounds like a bucketful of cold metal sparrows flying through the air.\n", - "100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 1133/1133 [44:54<00:00, 2.38s/it]\n", - "(3) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n", - "4.201 GB of memory reserved.\n", - " chinese ... Qwen/Qwen2-0.5B-Instruct_checkpoint-2800\n", - "0 老耿端衷ζžͺοΌŒηœ―ηΌθ΅·δΈ€εͺδΈ‰θ§’ηœΌοΌŒδΈ€ζ‚ζ‰³ζœΊε“δΊ†ζžͺοΌŒε†°ι›Ήθˆ¬ηš„ι‡‘ιΊ»ι›€εŠˆε“©ε•ͺε•¦εΎ€δΈ‹θ½οΌŒι“η ‚ε­εœ¨ζŸ³ζžι—΄ι£ž... ... Old Geng took out his pistol, squinted over a ...\n", - "\n", - "[1 rows x 7 columns]\n", - "{'accuracy': 0.00264783759929391, 'correct_ids': [147, 194, 364], 'meteor': 0.31468732087511564, 'bleu_scores': {'bleu': 0.06531154622295796, 'precisions': [0.31492039110270875, 0.09110635696821516, 0.03624564735675847, 0.017496635262449527], 'brevity_penalty': 1.0, 'length_ratio': 1.121331566743955, 'translation_length': 33853, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.3477119790584883, 'rouge2': 0.12383470549112005, 'rougeL': 0.28723768855041154, 'rougeLsum': 0.287515203604385}}\n", - "Epoch 6\n", - "loading env vars from: /home/inflaton/code/projects/courses/llm-finetuning/.env\n", - "Adding /home/inflaton/code/projects/courses/llm-finetuning to sys.path\n", - "πŸ¦₯ Unsloth: Will patch your computer to enable 2x faster free finetuning.\n", - "[nltk_data] Downloading package wordnet to /home/inflaton/nltk_data...\n", - "[nltk_data] Package wordnet is already up-to-date!\n", - "[nltk_data] Downloading package punkt to /home/inflaton/nltk_data...\n", - "[nltk_data] Package punkt is already up-to-date!\n", - "[nltk_data] Downloading package omw-1.4 to /home/inflaton/nltk_data...\n", - "[nltk_data] Package omw-1.4 is already up-to-date!\n", - "loading /home/inflaton/code/projects/courses/llm-finetuning/llm_toolkit/translation_engine.py\n", - "Qwen/Qwen2-0.5B-Instruct llama-factory/saves/qwen2-0.5b/lora/sft/checkpoint-3360 True datasets/mac/mac.tsv results/mac-results_lf.csv\n", - "(1) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n", - "0.0 GB of memory reserved.\n", - "loading model: Qwen/Qwen2-0.5B-Instruct\n", - "[INFO|tokenization_utils_base.py:2161] 2024-06-30 18:23:20,733 >> loading file vocab.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/vocab.json\n", - "[INFO|tokenization_utils_base.py:2161] 2024-06-30 18:23:20,733 >> loading file merges.txt from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/merges.txt\n", - "[INFO|tokenization_utils_base.py:2161] 2024-06-30 18:23:20,733 >> loading file tokenizer.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/tokenizer.json\n", - "[INFO|tokenization_utils_base.py:2161] 2024-06-30 18:23:20,733 >> loading file added_tokens.json from cache at None\n", - "[INFO|tokenization_utils_base.py:2161] 2024-06-30 18:23:20,733 >> loading file special_tokens_map.json from cache at None\n", - "[INFO|tokenization_utils_base.py:2161] 2024-06-30 18:23:20,733 >> loading file tokenizer_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/tokenizer_config.json\n", - "[WARNING|logging.py:313] 2024-06-30 18:23:20,880 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n", - "06/30/2024 18:23:20 - INFO - llamafactory.data.template - Replace eos token: <|im_end|>\n", - "06/30/2024 18:23:20 - INFO - llamafactory.data.template - Add <|im_start|> to stop words.\n", - "[INFO|configuration_utils.py:733] 2024-06-30 18:23:21,195 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/config.json\n", - "[INFO|configuration_utils.py:800] 2024-06-30 18:23:21,195 >> Model config Qwen2Config {\n", - " \"_name_or_path\": \"Qwen/Qwen2-0.5B-Instruct\",\n", - " \"architectures\": [\n", - " \"Qwen2ForCausalLM\"\n", - " ],\n", - " \"attention_dropout\": 0.0,\n", - " \"bos_token_id\": 151643,\n", - " \"eos_token_id\": 151645,\n", - " \"hidden_act\": \"silu\",\n", - " \"hidden_size\": 896,\n", - " \"initializer_range\": 0.02,\n", - " \"intermediate_size\": 4864,\n", - " \"max_position_embeddings\": 32768,\n", - " \"max_window_layers\": 24,\n", - " \"model_type\": \"qwen2\",\n", - " \"num_attention_heads\": 14,\n", - " \"num_hidden_layers\": 24,\n", - " \"num_key_value_heads\": 2,\n", - " \"rms_norm_eps\": 1e-06,\n", - " \"rope_theta\": 1000000.0,\n", - " \"sliding_window\": 32768,\n", - " \"tie_word_embeddings\": true,\n", - " \"torch_dtype\": \"bfloat16\",\n", - " \"transformers_version\": \"4.42.3\",\n", - " \"use_cache\": true,\n", - " \"use_sliding_window\": false,\n", - " \"vocab_size\": 151936\n", - "}\n", - "\n", - "06/30/2024 18:23:21 - INFO - llamafactory.model.model_utils.quantization - Quantizing model to 4 bit with bitsandbytes.\n", - "06/30/2024 18:23:21 - INFO - llamafactory.model.patcher - Using KV cache for faster generation.\n", - "[INFO|modeling_utils.py:3556] 2024-06-30 18:23:21,271 >> loading weights file model.safetensors from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/model.safetensors\n", - "[INFO|modeling_utils.py:1531] 2024-06-30 18:23:23,604 >> Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16.\n", - "[INFO|configuration_utils.py:1000] 2024-06-30 18:23:23,608 >> Generate config GenerationConfig {\n", - " \"bos_token_id\": 151643,\n", - " \"eos_token_id\": 151645\n", - "}\n", - "\n", - "[INFO|modeling_utils.py:4364] 2024-06-30 18:23:50,830 >> All model checkpoint weights were used when initializing Qwen2ForCausalLM.\n", - "\n", - "[INFO|modeling_utils.py:4372] 2024-06-30 18:23:50,830 >> All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at Qwen/Qwen2-0.5B-Instruct.\n", - "If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training.\n", - "[INFO|configuration_utils.py:955] 2024-06-30 18:23:51,197 >> loading configuration file generation_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/generation_config.json\n", - "[INFO|configuration_utils.py:1000] 2024-06-30 18:23:51,197 >> Generate config GenerationConfig {\n", - " \"bos_token_id\": 151643,\n", - " \"do_sample\": true,\n", - " \"eos_token_id\": [\n", - " 151645,\n", - " 151643\n", - " ],\n", - " \"pad_token_id\": 151643,\n", - " \"repetition_penalty\": 1.1,\n", - " \"temperature\": 0.7,\n", - " \"top_k\": 20,\n", - " \"top_p\": 0.8\n", - "}\n", - "\n", - "06/30/2024 18:23:51 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.\n", - "06/30/2024 18:23:51 - INFO - llamafactory.model.adapter - Loaded adapter(s): llama-factory/saves/qwen2-0.5b/lora/sft/checkpoint-3360\n", - "06/30/2024 18:23:51 - INFO - llamafactory.model.loader - all params: 498,431,872\n", - "(2) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n", - "0.666 GB of memory reserved.\n", - "loading train/test data files\n", - "DatasetDict({\n", - " train: Dataset({\n", - " features: ['chinese', 'english', 'text', 'prompt'],\n", - " num_rows: 4528\n", - " })\n", - " test: Dataset({\n", - " features: ['chinese', 'english', 'text', 'prompt'],\n", - " num_rows: 1133\n", - " })\n", - "})\n", - "Evaluating model: Qwen/Qwen2-0.5B-Instruct\n", - " 0%| | 0/1133 [00:00\n", - "--------\n", - "step 2: Old Geng held his rifle up and cocked it over his right eye. Then the shotgun fired, loud as a bolt of golden sparrows that fell like ice-cold quakes down on the field. The chattering bits of iron were heard splashing across the field, cracking and crunching.\n", - "--------\n", - "step 3: Old Geng held his rifle up and cocked it over his right eye. Then the shotgun fired, loud as a bolt of golden sparrows that fell like ice-cold quakes down on the field. The chattering bits of iron were heard splashing across the field, cracking and crunching.\n", - "100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 1133/1133 [45:34<00:00, 2.41s/it]\n", - "(3) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n", - "4.26 GB of memory reserved.\n", - " chinese ... Qwen/Qwen2-0.5B-Instruct_checkpoint-3360\n", - "0 老耿端衷ζžͺοΌŒηœ―ηΌθ΅·δΈ€εͺδΈ‰θ§’ηœΌοΌŒδΈ€ζ‚ζ‰³ζœΊε“δΊ†ζžͺοΌŒε†°ι›Ήθˆ¬ηš„ι‡‘ιΊ»ι›€εŠˆε“©ε•ͺε•¦εΎ€δΈ‹θ½οΌŒι“η ‚ε­εœ¨ζŸ³ζžι—΄ι£ž... ... Old Geng held his rifle up and cocked it over ...\n", - "\n", - "[1 rows x 8 columns]\n", - "{'accuracy': 0.00176522506619594, 'correct_ids': [272, 364], 'meteor': 0.3060953047058868, 'bleu_scores': {'bleu': 0.06197290227987762, 'precisions': [0.30625790139064474, 0.08672151109263164, 0.03420510771689357, 0.01623686723973257], 'brevity_penalty': 1.0, 'length_ratio': 1.1528320635972176, 'translation_length': 34804, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.33981449502350625, 'rouge2': 0.11735200363049994, 'rougeL': 0.2798705836787463, 'rougeLsum': 0.27962230715315634}}\n", - "Epoch 7\n", - "loading env vars from: /home/inflaton/code/projects/courses/llm-finetuning/.env\n", - "Adding /home/inflaton/code/projects/courses/llm-finetuning to sys.path\n", - "πŸ¦₯ Unsloth: Will patch your computer to enable 2x faster free finetuning.\n", - "[nltk_data] Downloading package wordnet to /home/inflaton/nltk_data...\n", - "[nltk_data] Package wordnet is already up-to-date!\n", - "[nltk_data] Downloading package punkt to /home/inflaton/nltk_data...\n", - "[nltk_data] Package punkt is already up-to-date!\n", - "[nltk_data] Downloading package omw-1.4 to /home/inflaton/nltk_data...\n", - "[nltk_data] Package omw-1.4 is already up-to-date!\n", - "loading /home/inflaton/code/projects/courses/llm-finetuning/llm_toolkit/translation_engine.py\n", - "Qwen/Qwen2-0.5B-Instruct llama-factory/saves/qwen2-0.5b/lora/sft/checkpoint-3920 True datasets/mac/mac.tsv results/mac-results_lf.csv\n", - "(1) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n", - "0.0 GB of memory reserved.\n", - "loading model: Qwen/Qwen2-0.5B-Instruct\n", - "[INFO|tokenization_utils_base.py:2161] 2024-06-30 19:09:46,741 >> loading file vocab.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/vocab.json\n", - "[INFO|tokenization_utils_base.py:2161] 2024-06-30 19:09:46,741 >> loading file merges.txt from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/merges.txt\n", - "[INFO|tokenization_utils_base.py:2161] 2024-06-30 19:09:46,741 >> loading file tokenizer.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/tokenizer.json\n", - "[INFO|tokenization_utils_base.py:2161] 2024-06-30 19:09:46,741 >> loading file added_tokens.json from cache at None\n", - "[INFO|tokenization_utils_base.py:2161] 2024-06-30 19:09:46,741 >> loading file special_tokens_map.json from cache at None\n", - "[INFO|tokenization_utils_base.py:2161] 2024-06-30 19:09:46,741 >> loading file tokenizer_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/tokenizer_config.json\n", - "[WARNING|logging.py:313] 2024-06-30 19:09:46,876 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n", - "06/30/2024 19:09:46 - INFO - llamafactory.data.template - Replace eos token: <|im_end|>\n", - "06/30/2024 19:09:46 - INFO - llamafactory.data.template - Add <|im_start|> to stop words.\n", - "[INFO|configuration_utils.py:733] 2024-06-30 19:09:47,204 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/config.json\n", - "[INFO|configuration_utils.py:800] 2024-06-30 19:09:47,204 >> Model config Qwen2Config {\n", - " \"_name_or_path\": \"Qwen/Qwen2-0.5B-Instruct\",\n", - " \"architectures\": [\n", - " \"Qwen2ForCausalLM\"\n", - " ],\n", - " \"attention_dropout\": 0.0,\n", - " \"bos_token_id\": 151643,\n", - " \"eos_token_id\": 151645,\n", - " \"hidden_act\": \"silu\",\n", - " \"hidden_size\": 896,\n", - " \"initializer_range\": 0.02,\n", - " \"intermediate_size\": 4864,\n", - " \"max_position_embeddings\": 32768,\n", - " \"max_window_layers\": 24,\n", - " \"model_type\": \"qwen2\",\n", - " \"num_attention_heads\": 14,\n", - " \"num_hidden_layers\": 24,\n", - " \"num_key_value_heads\": 2,\n", - " \"rms_norm_eps\": 1e-06,\n", - " \"rope_theta\": 1000000.0,\n", - " \"sliding_window\": 32768,\n", - " \"tie_word_embeddings\": true,\n", - " \"torch_dtype\": \"bfloat16\",\n", - " \"transformers_version\": \"4.42.3\",\n", - " \"use_cache\": true,\n", - " \"use_sliding_window\": false,\n", - " \"vocab_size\": 151936\n", - "}\n", - "\n", - "06/30/2024 19:09:47 - INFO - llamafactory.model.model_utils.quantization - Quantizing model to 4 bit with bitsandbytes.\n", - "06/30/2024 19:09:47 - INFO - llamafactory.model.patcher - Using KV cache for faster generation.\n", - "[INFO|modeling_utils.py:3556] 2024-06-30 19:09:47,246 >> loading weights file model.safetensors from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/model.safetensors\n", - "[INFO|modeling_utils.py:1531] 2024-06-30 19:09:48,444 >> Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16.\n", - "[INFO|configuration_utils.py:1000] 2024-06-30 19:09:48,446 >> Generate config GenerationConfig {\n", - " \"bos_token_id\": 151643,\n", - " \"eos_token_id\": 151645\n", - "}\n", - "\n", - "[INFO|modeling_utils.py:4364] 2024-06-30 19:10:17,136 >> All model checkpoint weights were used when initializing Qwen2ForCausalLM.\n", - "\n", - "[INFO|modeling_utils.py:4372] 2024-06-30 19:10:17,136 >> All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at Qwen/Qwen2-0.5B-Instruct.\n", - "If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training.\n", - "[INFO|configuration_utils.py:955] 2024-06-30 19:10:17,747 >> loading configuration file generation_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/generation_config.json\n", - "[INFO|configuration_utils.py:1000] 2024-06-30 19:10:17,747 >> Generate config GenerationConfig {\n", - " \"bos_token_id\": 151643,\n", - " \"do_sample\": true,\n", - " \"eos_token_id\": [\n", - " 151645,\n", - " 151643\n", - " ],\n", - " \"pad_token_id\": 151643,\n", - " \"repetition_penalty\": 1.1,\n", - " \"temperature\": 0.7,\n", - " \"top_k\": 20,\n", - " \"top_p\": 0.8\n", - "}\n", - "\n", - "06/30/2024 19:10:18 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.\n", - "06/30/2024 19:10:18 - INFO - llamafactory.model.adapter - Loaded adapter(s): llama-factory/saves/qwen2-0.5b/lora/sft/checkpoint-3920\n", - "06/30/2024 19:10:18 - INFO - llamafactory.model.loader - all params: 498,431,872\n", - "(2) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n", - "0.666 GB of memory reserved.\n", - "loading train/test data files\n", - "DatasetDict({\n", - " train: Dataset({\n", - " features: ['chinese', 'english', 'text', 'prompt'],\n", - " num_rows: 4528\n", - " })\n", - " test: Dataset({\n", - " features: ['chinese', 'english', 'text', 'prompt'],\n", - " num_rows: 1133\n", - " })\n", - "})\n", - "Evaluating model: Qwen/Qwen2-0.5B-Instruct\n", - " 0%| | 0/1133 [00:00\n", - "--------\n", - "step 2: Old Geng held his gun to his chest, eyes on a triangle shape, but the trigger sounded when he cocked it and fired: shot after shot of golden sparrows thundered down the slope, and shattering stones clattered as they fell through the air.\n", - "--------\n", - "step 3: Old Geng held his gun to his chest, eyes on a triangle shape, but the trigger sounded when he cocked it and fired: shot after shot of golden sparrows thundered down the slope, and shattering stones clattered as they fell through the air.\n", - "100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 1133/1133 [33:14<00:00, 1.76s/it]\n", - "(3) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n", - "1.057 GB of memory reserved.\n", - " chinese ... Qwen/Qwen2-0.5B-Instruct_checkpoint-3920\n", - "0 老耿端衷ζžͺοΌŒηœ―ηΌθ΅·δΈ€εͺδΈ‰θ§’ηœΌοΌŒδΈ€ζ‚ζ‰³ζœΊε“δΊ†ζžͺοΌŒε†°ι›Ήθˆ¬ηš„ι‡‘ιΊ»ι›€εŠˆε“©ε•ͺε•¦εΎ€δΈ‹θ½οΌŒι“η ‚ε­εœ¨ζŸ³ζžι—΄ι£ž... ... Old Geng held his gun to his chest, eyes on a ...\n", - "\n", - "[1 rows x 9 columns]\n", - "{'accuracy': 0.00088261253309797, 'correct_ids': [364], 'meteor': 0.29569751947150547, 'bleu_scores': {'bleu': 0.06290335358107121, 'precisions': [0.33640226628895187, 0.09157729444388761, 0.033815921952574386, 0.015028901734104046], 'brevity_penalty': 1.0, 'length_ratio': 1.0289499834382245, 'translation_length': 31064, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.328871406524778, 'rouge2': 0.10887785000250436, 'rougeL': 0.2694111761024649, 'rougeLsum': 0.2691332869747859}}\n", - "Epoch 8\n", - "loading env vars from: /home/inflaton/code/projects/courses/llm-finetuning/.env\n", - "Adding /home/inflaton/code/projects/courses/llm-finetuning to sys.path\n", - "πŸ¦₯ Unsloth: Will patch your computer to enable 2x faster free finetuning.\n", - "[nltk_data] Downloading package wordnet to /home/inflaton/nltk_data...\n", - "[nltk_data] Package wordnet is already up-to-date!\n", - "[nltk_data] Downloading package punkt to /home/inflaton/nltk_data...\n", - "[nltk_data] Package punkt is already up-to-date!\n", - "[nltk_data] Downloading package omw-1.4 to /home/inflaton/nltk_data...\n", - "[nltk_data] Package omw-1.4 is already up-to-date!\n", - "loading /home/inflaton/code/projects/courses/llm-finetuning/llm_toolkit/translation_engine.py\n", - "Qwen/Qwen2-0.5B-Instruct llama-factory/saves/qwen2-0.5b/lora/sft/checkpoint-4480 True datasets/mac/mac.tsv results/mac-results_lf.csv\n", - "(1) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n", - "0.0 GB of memory reserved.\n", - "loading model: Qwen/Qwen2-0.5B-Instruct\n", - "[INFO|tokenization_utils_base.py:2161] 2024-06-30 19:43:50,488 >> loading file vocab.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/vocab.json\n", - "[INFO|tokenization_utils_base.py:2161] 2024-06-30 19:43:50,488 >> loading file merges.txt from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/merges.txt\n", - "[INFO|tokenization_utils_base.py:2161] 2024-06-30 19:43:50,488 >> loading file tokenizer.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/tokenizer.json\n", - "[INFO|tokenization_utils_base.py:2161] 2024-06-30 19:43:50,488 >> loading file added_tokens.json from cache at None\n", - "[INFO|tokenization_utils_base.py:2161] 2024-06-30 19:43:50,488 >> loading file special_tokens_map.json from cache at None\n", - "[INFO|tokenization_utils_base.py:2161] 2024-06-30 19:43:50,488 >> loading file tokenizer_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/tokenizer_config.json\n", - "[WARNING|logging.py:313] 2024-06-30 19:43:50,640 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n", - "06/30/2024 19:43:50 - INFO - llamafactory.data.template - Replace eos token: <|im_end|>\n", - "06/30/2024 19:43:50 - INFO - llamafactory.data.template - Add <|im_start|> to stop words.\n", - "[INFO|configuration_utils.py:733] 2024-06-30 19:43:50,918 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/config.json\n", - "[INFO|configuration_utils.py:800] 2024-06-30 19:43:50,918 >> Model config Qwen2Config {\n", - " \"_name_or_path\": \"Qwen/Qwen2-0.5B-Instruct\",\n", - " \"architectures\": [\n", - " \"Qwen2ForCausalLM\"\n", - " ],\n", - " \"attention_dropout\": 0.0,\n", - " \"bos_token_id\": 151643,\n", - " \"eos_token_id\": 151645,\n", - " \"hidden_act\": \"silu\",\n", - " \"hidden_size\": 896,\n", - " \"initializer_range\": 0.02,\n", - " \"intermediate_size\": 4864,\n", - " \"max_position_embeddings\": 32768,\n", - " \"max_window_layers\": 24,\n", - " \"model_type\": \"qwen2\",\n", - " \"num_attention_heads\": 14,\n", - " \"num_hidden_layers\": 24,\n", - " \"num_key_value_heads\": 2,\n", - " \"rms_norm_eps\": 1e-06,\n", - " \"rope_theta\": 1000000.0,\n", - " \"sliding_window\": 32768,\n", - " \"tie_word_embeddings\": true,\n", - " \"torch_dtype\": \"bfloat16\",\n", - " \"transformers_version\": \"4.42.3\",\n", - " \"use_cache\": true,\n", - " \"use_sliding_window\": false,\n", - " \"vocab_size\": 151936\n", - "}\n", - "\n", - "06/30/2024 19:43:50 - INFO - llamafactory.model.model_utils.quantization - Quantizing model to 4 bit with bitsandbytes.\n", - "06/30/2024 19:43:50 - INFO - llamafactory.model.patcher - Using KV cache for faster generation.\n", - "[INFO|modeling_utils.py:3556] 2024-06-30 19:43:50,973 >> loading weights file model.safetensors from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/model.safetensors\n", - "[INFO|modeling_utils.py:1531] 2024-06-30 19:43:51,791 >> Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16.\n", - "[INFO|configuration_utils.py:1000] 2024-06-30 19:43:51,794 >> Generate config GenerationConfig {\n", - " \"bos_token_id\": 151643,\n", - " \"eos_token_id\": 151645\n", - "}\n", - "\n", - "[INFO|modeling_utils.py:4364] 2024-06-30 19:44:16,853 >> All model checkpoint weights were used when initializing Qwen2ForCausalLM.\n", - "\n", - "[INFO|modeling_utils.py:4372] 2024-06-30 19:44:16,853 >> All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at Qwen/Qwen2-0.5B-Instruct.\n", - "If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training.\n", - "[INFO|configuration_utils.py:955] 2024-06-30 19:44:17,214 >> loading configuration file generation_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/generation_config.json\n", - "[INFO|configuration_utils.py:1000] 2024-06-30 19:44:17,214 >> Generate config GenerationConfig {\n", - " \"bos_token_id\": 151643,\n", - " \"do_sample\": true,\n", - " \"eos_token_id\": [\n", - " 151645,\n", - " 151643\n", - " ],\n", - " \"pad_token_id\": 151643,\n", - " \"repetition_penalty\": 1.1,\n", - " \"temperature\": 0.7,\n", - " \"top_k\": 20,\n", - " \"top_p\": 0.8\n", - "}\n", - "\n", - "06/30/2024 19:44:17 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.\n", - "06/30/2024 19:44:17 - INFO - llamafactory.model.adapter - Loaded adapter(s): llama-factory/saves/qwen2-0.5b/lora/sft/checkpoint-4480\n", - "06/30/2024 19:44:17 - INFO - llamafactory.model.loader - all params: 498,431,872\n", - "(2) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n", - "0.666 GB of memory reserved.\n", - "loading train/test data files\n", - "DatasetDict({\n", - " train: Dataset({\n", - " features: ['chinese', 'english', 'text', 'prompt'],\n", - " num_rows: 4528\n", - " })\n", - " test: Dataset({\n", - " features: ['chinese', 'english', 'text', 'prompt'],\n", - " num_rows: 1133\n", - " })\n", - "})\n", - "Evaluating model: Qwen/Qwen2-0.5B-Instruct\n", - " 0%| | 0/1133 [00:00\n", - "--------\n", - "step 2: Old Geng took up his gun and raised a triangular brow – the cocking of the revolver started as soon as he lifted it. Bang! Bang! Bang! – hail was raining down from the heavens in a torrent of gold sparrows that sounded like hail as they whizzed down the path, rustling leaves as they passed by.\n", - "--------\n", - "step 3: Old Geng took up his gun and raised a triangular brow – the cocking of the revolver started as soon as he lifted it. Bang! Bang! Bang! – hail was raining down from the heavens in a torrent of gold sparrows that sounded like hail as they whizzed down the path, rustling leaves as they passed by.\n", - "100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 1133/1133 [34:03<00:00, 1.80s/it]\n", - "(3) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n", - "0.9 GB of memory reserved.\n", - " chinese ... Qwen/Qwen2-0.5B-Instruct_checkpoint-4480\n", - "0 老耿端衷ζžͺοΌŒηœ―ηΌθ΅·δΈ€εͺδΈ‰θ§’ηœΌοΌŒδΈ€ζ‚ζ‰³ζœΊε“δΊ†ζžͺοΌŒε†°ι›Ήθˆ¬ηš„ι‡‘ιΊ»ι›€εŠˆε“©ε•ͺε•¦εΎ€δΈ‹θ½οΏ½οΏ½ι“η ‚ε­εœ¨ζŸ³ζžι—΄ι£ž... ... Old Geng took up his gun and raised a triangul...\n", - "\n", - "[1 rows x 10 columns]\n", - "{'accuracy': 0.0, 'correct_ids': [], 'meteor': 0.29297589531864165, 'bleu_scores': {'bleu': 0.066715036654756, 'precisions': [0.33156006043817676, 0.0917577933735923, 0.03666926492018843, 0.017757733774927772], 'brevity_penalty': 1.0, 'length_ratio': 1.0522689632328586, 'translation_length': 31768, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.3235260151085271, 'rouge2': 0.10613228641914846, 'rougeL': 0.2654728857129883, 'rougeLsum': 0.26595119389766264}}\n", - "Epoch 9\n", - "loading env vars from: /home/inflaton/code/projects/courses/llm-finetuning/.env\n", - "Adding /home/inflaton/code/projects/courses/llm-finetuning to sys.path\n", - "πŸ¦₯ Unsloth: Will patch your computer to enable 2x faster free finetuning.\n", - "[nltk_data] Downloading package wordnet to /home/inflaton/nltk_data...\n", - "[nltk_data] Package wordnet is already up-to-date!\n", - "[nltk_data] Downloading package punkt to /home/inflaton/nltk_data...\n", - "[nltk_data] Package punkt is already up-to-date!\n", - "[nltk_data] Downloading package omw-1.4 to /home/inflaton/nltk_data...\n", - "[nltk_data] Package omw-1.4 is already up-to-date!\n", - "loading /home/inflaton/code/projects/courses/llm-finetuning/llm_toolkit/translation_engine.py\n", - "Qwen/Qwen2-0.5B-Instruct llama-factory/saves/qwen2-0.5b/lora/sft/checkpoint-5040 True datasets/mac/mac.tsv results/mac-results_lf.csv\n", - "(1) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n", - "0.0 GB of memory reserved.\n", - "loading model: Qwen/Qwen2-0.5B-Instruct\n", - "[INFO|tokenization_utils_base.py:2161] 2024-06-30 20:18:42,543 >> loading file vocab.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/vocab.json\n", - "[INFO|tokenization_utils_base.py:2161] 2024-06-30 20:18:42,544 >> loading file merges.txt from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/merges.txt\n", - "[INFO|tokenization_utils_base.py:2161] 2024-06-30 20:18:42,544 >> loading file tokenizer.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/tokenizer.json\n", - "[INFO|tokenization_utils_base.py:2161] 2024-06-30 20:18:42,544 >> loading file added_tokens.json from cache at None\n", - "[INFO|tokenization_utils_base.py:2161] 2024-06-30 20:18:42,544 >> loading file special_tokens_map.json from cache at None\n", - "[INFO|tokenization_utils_base.py:2161] 2024-06-30 20:18:42,544 >> loading file tokenizer_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/tokenizer_config.json\n", - "[WARNING|logging.py:313] 2024-06-30 20:18:42,670 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n", - "06/30/2024 20:18:42 - INFO - llamafactory.data.template - Replace eos token: <|im_end|>\n", - "06/30/2024 20:18:42 - INFO - llamafactory.data.template - Add <|im_start|> to stop words.\n", - "[INFO|configuration_utils.py:733] 2024-06-30 20:18:42,995 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/config.json\n", - "[INFO|configuration_utils.py:800] 2024-06-30 20:18:42,995 >> Model config Qwen2Config {\n", - " \"_name_or_path\": \"Qwen/Qwen2-0.5B-Instruct\",\n", - " \"architectures\": [\n", - " \"Qwen2ForCausalLM\"\n", - " ],\n", - " \"attention_dropout\": 0.0,\n", - " \"bos_token_id\": 151643,\n", - " \"eos_token_id\": 151645,\n", - " \"hidden_act\": \"silu\",\n", - " \"hidden_size\": 896,\n", - " \"initializer_range\": 0.02,\n", - " \"intermediate_size\": 4864,\n", - " \"max_position_embeddings\": 32768,\n", - " \"max_window_layers\": 24,\n", - " \"model_type\": \"qwen2\",\n", - " \"num_attention_heads\": 14,\n", - " \"num_hidden_layers\": 24,\n", - " \"num_key_value_heads\": 2,\n", - " \"rms_norm_eps\": 1e-06,\n", - " \"rope_theta\": 1000000.0,\n", - " \"sliding_window\": 32768,\n", - " \"tie_word_embeddings\": true,\n", - " \"torch_dtype\": \"bfloat16\",\n", - " \"transformers_version\": \"4.42.3\",\n", - " \"use_cache\": true,\n", - " \"use_sliding_window\": false,\n", - " \"vocab_size\": 151936\n", - "}\n", - "\n", - "06/30/2024 20:18:42 - INFO - llamafactory.model.model_utils.quantization - Quantizing model to 4 bit with bitsandbytes.\n", - "06/30/2024 20:18:42 - INFO - llamafactory.model.patcher - Using KV cache for faster generation.\n", - "[INFO|modeling_utils.py:3556] 2024-06-30 20:18:43,139 >> loading weights file model.safetensors from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/model.safetensors\n", - "[INFO|modeling_utils.py:1531] 2024-06-30 20:18:44,397 >> Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16.\n", - "[INFO|configuration_utils.py:1000] 2024-06-30 20:18:44,400 >> Generate config GenerationConfig {\n", - " \"bos_token_id\": 151643,\n", - " \"eos_token_id\": 151645\n", - "}\n", - "\n", - "[INFO|modeling_utils.py:4364] 2024-06-30 20:19:10,704 >> All model checkpoint weights were used when initializing Qwen2ForCausalLM.\n", - "\n", - "[INFO|modeling_utils.py:4372] 2024-06-30 20:19:10,704 >> All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at Qwen/Qwen2-0.5B-Instruct.\n", - "If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training.\n", - "[INFO|configuration_utils.py:955] 2024-06-30 20:19:11,065 >> loading configuration file generation_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/generation_config.json\n", - "[INFO|configuration_utils.py:1000] 2024-06-30 20:19:11,065 >> Generate config GenerationConfig {\n", - " \"bos_token_id\": 151643,\n", - " \"do_sample\": true,\n", - " \"eos_token_id\": [\n", - " 151645,\n", - " 151643\n", - " ],\n", - " \"pad_token_id\": 151643,\n", - " \"repetition_penalty\": 1.1,\n", - " \"temperature\": 0.7,\n", - " \"top_k\": 20,\n", - " \"top_p\": 0.8\n", - "}\n", - "\n", - "06/30/2024 20:19:11 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.\n", - "06/30/2024 20:19:11 - INFO - llamafactory.model.adapter - Loaded adapter(s): llama-factory/saves/qwen2-0.5b/lora/sft/checkpoint-5040\n", - "06/30/2024 20:19:11 - INFO - llamafactory.model.loader - all params: 498,431,872\n", - "(2) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n", - "0.666 GB of memory reserved.\n", - "loading train/test data files\n", - "DatasetDict({\n", - " train: Dataset({\n", - " features: ['chinese', 'english', 'text', 'prompt'],\n", - " num_rows: 4528\n", - " })\n", - " test: Dataset({\n", - " features: ['chinese', 'english', 'text', 'prompt'],\n", - " num_rows: 1133\n", - " })\n", - "})\n", - "Evaluating model: Qwen/Qwen2-0.5B-Instruct\n", - " 0%| | 0/1133 [00:00\n", - "--------\n", - "step 2: Old Geng took out his pistol, but it was too late. Shot after shot of shotgun went off as he held the trigger, a bang-bang-gong-tingling fall from the din of bullets falling from the air.\n", - "--------\n", - "step 3: Old Geng took out his pistol, but it was too late. Shot after shot of shotgun went off as he held the trigger, a bang-bang-gong-tingling fall from the din of bullets falling from the air.\n", - "100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 1133/1133 [47:05<00:00, 2.49s/it]\n", - "(3) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n", - "4.221 GB of memory reserved.\n", - " chinese ... Qwen/Qwen2-0.5B-Instruct_checkpoint-5040\n", - "0 老耿端衷ζžͺοΌŒηœ―ηΌθ΅·δΈ€εͺδΈ‰θ§’ηœΌοΌŒδΈ€ζ‚ζ‰³ζœΊε“δΊ†ζžͺοΌŒε†°ι›Ήθˆ¬ηš„ι‡‘ιΊ»ι›€εŠˆε“©ε•ͺε•¦εΎ€δΈ‹θ½οΌŒι“η ‚ε­εœ¨ζŸ³ζžι—΄ι£ž... ... Old Geng took out his pistol, but it was too l...\n", - "\n", - "[1 rows x 11 columns]\n", - "{'accuracy': 0.0, 'correct_ids': [], 'meteor': 0.2833319356953958, 'bleu_scores': {'bleu': 0.05430760022077538, 'precisions': [0.28200039135660976, 0.0749133949191686, 0.029243256147051803, 0.01408015809300315], 'brevity_penalty': 1.0, 'length_ratio': 1.184928784365684, 'translation_length': 35773, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.3123182638202295, 'rouge2': 0.1006380742528073, 'rougeL': 0.25624416362806557, 'rougeLsum': 0.25609208337653155}}\n", - "Epoch 10\n", - "loading env vars from: /home/inflaton/code/projects/courses/llm-finetuning/.env\n", - "Adding /home/inflaton/code/projects/courses/llm-finetuning to sys.path\n", - "πŸ¦₯ Unsloth: Will patch your computer to enable 2x faster free finetuning.\n", - "[nltk_data] Downloading package wordnet to /home/inflaton/nltk_data...\n", - "[nltk_data] Package wordnet is already up-to-date!\n", - "[nltk_data] Downloading package punkt to /home/inflaton/nltk_data...\n", - "[nltk_data] Package punkt is already up-to-date!\n", - "[nltk_data] Downloading package omw-1.4 to /home/inflaton/nltk_data...\n", - "[nltk_data] Package omw-1.4 is already up-to-date!\n", - "loading /home/inflaton/code/projects/courses/llm-finetuning/llm_toolkit/translation_engine.py\n", - "Qwen/Qwen2-0.5B-Instruct llama-factory/saves/qwen2-0.5b/lora/sft/checkpoint-5600 True datasets/mac/mac.tsv results/mac-results_lf.csv\n", - "(1) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n", - "0.0 GB of memory reserved.\n", - "loading model: Qwen/Qwen2-0.5B-Instruct\n", - "[INFO|tokenization_utils_base.py:2161] 2024-06-30 21:06:41,264 >> loading file vocab.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/vocab.json\n", - "[INFO|tokenization_utils_base.py:2161] 2024-06-30 21:06:41,264 >> loading file merges.txt from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/merges.txt\n", - "[INFO|tokenization_utils_base.py:2161] 2024-06-30 21:06:41,264 >> loading file tokenizer.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/tokenizer.json\n", - "[INFO|tokenization_utils_base.py:2161] 2024-06-30 21:06:41,264 >> loading file added_tokens.json from cache at None\n", - "[INFO|tokenization_utils_base.py:2161] 2024-06-30 21:06:41,264 >> loading file special_tokens_map.json from cache at None\n", - "[INFO|tokenization_utils_base.py:2161] 2024-06-30 21:06:41,264 >> loading file tokenizer_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/tokenizer_config.json\n", - "[WARNING|logging.py:313] 2024-06-30 21:06:41,413 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n", - "06/30/2024 21:06:41 - INFO - llamafactory.data.template - Replace eos token: <|im_end|>\n", - "06/30/2024 21:06:41 - INFO - llamafactory.data.template - Add <|im_start|> to stop words.\n", - "[INFO|configuration_utils.py:733] 2024-06-30 21:06:41,679 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/config.json\n", - "[INFO|configuration_utils.py:800] 2024-06-30 21:06:41,680 >> Model config Qwen2Config {\n", - " \"_name_or_path\": \"Qwen/Qwen2-0.5B-Instruct\",\n", - " \"architectures\": [\n", - " \"Qwen2ForCausalLM\"\n", - " ],\n", - " \"attention_dropout\": 0.0,\n", - " \"bos_token_id\": 151643,\n", - " \"eos_token_id\": 151645,\n", - " \"hidden_act\": \"silu\",\n", - " \"hidden_size\": 896,\n", - " \"initializer_range\": 0.02,\n", - " \"intermediate_size\": 4864,\n", - " \"max_position_embeddings\": 32768,\n", - " \"max_window_layers\": 24,\n", - " \"model_type\": \"qwen2\",\n", - " \"num_attention_heads\": 14,\n", - " \"num_hidden_layers\": 24,\n", - " \"num_key_value_heads\": 2,\n", - " \"rms_norm_eps\": 1e-06,\n", - " \"rope_theta\": 1000000.0,\n", - " \"sliding_window\": 32768,\n", - " \"tie_word_embeddings\": true,\n", - " \"torch_dtype\": \"bfloat16\",\n", - " \"transformers_version\": \"4.42.3\",\n", - " \"use_cache\": true,\n", - " \"use_sliding_window\": false,\n", - " \"vocab_size\": 151936\n", - "}\n", - "\n", - "06/30/2024 21:06:41 - INFO - llamafactory.model.model_utils.quantization - Quantizing model to 4 bit with bitsandbytes.\n", - "06/30/2024 21:06:41 - INFO - llamafactory.model.patcher - Using KV cache for faster generation.\n", - "[INFO|modeling_utils.py:3556] 2024-06-30 21:06:41,746 >> loading weights file model.safetensors from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/model.safetensors\n", - "[INFO|modeling_utils.py:1531] 2024-06-30 21:06:42,649 >> Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16.\n", - "[INFO|configuration_utils.py:1000] 2024-06-30 21:06:42,653 >> Generate config GenerationConfig {\n", - " \"bos_token_id\": 151643,\n", - " \"eos_token_id\": 151645\n", - "}\n", - "\n", - "[INFO|modeling_utils.py:4364] 2024-06-30 21:07:13,550 >> All model checkpoint weights were used when initializing Qwen2ForCausalLM.\n", - "\n", - "[INFO|modeling_utils.py:4372] 2024-06-30 21:07:13,550 >> All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at Qwen/Qwen2-0.5B-Instruct.\n", - "If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training.\n", - "[INFO|configuration_utils.py:955] 2024-06-30 21:07:13,853 >> loading configuration file generation_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c291d6fce4804a1d39305f388dd32897d1f7acc4/generation_config.json\n", - "[INFO|configuration_utils.py:1000] 2024-06-30 21:07:13,853 >> Generate config GenerationConfig {\n", - " \"bos_token_id\": 151643,\n", - " \"do_sample\": true,\n", - " \"eos_token_id\": [\n", - " 151645,\n", - " 151643\n", - " ],\n", - " \"pad_token_id\": 151643,\n", - " \"repetition_penalty\": 1.1,\n", - " \"temperature\": 0.7,\n", - " \"top_k\": 20,\n", - " \"top_p\": 0.8\n", - "}\n", - "\n", - "06/30/2024 21:07:14 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.\n", - "06/30/2024 21:07:14 - INFO - llamafactory.model.adapter - Loaded adapter(s): llama-factory/saves/qwen2-0.5b/lora/sft/checkpoint-5600\n", - "06/30/2024 21:07:14 - INFO - llamafactory.model.loader - all params: 498,431,872\n", - "(2) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n", - "0.666 GB of memory reserved.\n", - "loading train/test data files\n", - "DatasetDict({\n", - " train: Dataset({\n", - " features: ['chinese', 'english', 'text', 'prompt'],\n", - " num_rows: 4528\n", - " })\n", - " test: Dataset({\n", - " features: ['chinese', 'english', 'text', 'prompt'],\n", - " num_rows: 1133\n", - " })\n", - "})\n", - "Evaluating model: Qwen/Qwen2-0.5B-Instruct\n", - " 0%| | 0/1133 [00:00\n", - "--------\n", - "step 2: Old Geng took up his gun and fired – hammering rain! Yellow streaks flew as he fired a goose-pat of gold – and then there was the sound of gravel between his bullets – crunchy.\n", - "--------\n", - "step 3: Old Geng took up his gun and fired – hammering rain! Yellow streaks flew as he fired a goose-pat of gold – and then there was the sound of gravel between his bullets – crunchy.\n", - "100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 1133/1133 [48:43<00:00, 2.58s/it]\n", - "(3) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n", - "4.221 GB of memory reserved.\n", - " chinese ... Qwen/Qwen2-0.5B-Instruct_checkpoint-5600\n", - "0 老耿端衷ζžͺοΌŒηœ―ηΌθ΅·δΈ€εͺδΈ‰θ§’ηœΌοΌŒδΈ€ζ‚ζ‰³ζœΊε“δΊ†ζžͺοΌŒε†°ι›Ήθˆ¬ηš„ι‡‘ιΊ»ι›€εŠˆε“©ε•ͺε•¦εΎ€δΈ‹θ½οΌŒι“η ‚ε­εœ¨ζŸ³ζžι—΄ι£ž... ... Old Geng took up his gun and fired – hammering...\n", - "\n", - "[1 rows x 12 columns]\n", - "{'accuracy': 0.0, 'correct_ids': [], 'meteor': 0.28432663251720675, 'bleu_scores': {'bleu': 0.052792420940353475, 'precisions': [0.29167024596970476, 0.07445989937851435, 0.0279223562549752, 0.012809131261889664], 'brevity_penalty': 1.0, 'length_ratio': 1.156773766147731, 'translation_length': 34923, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.31243618674019946, 'rouge2': 0.09792736995151512, 'rougeL': 0.25604383226456534, 'rougeLsum': 0.2555907570933199}}\n", - "CPU times: user 12min 29s, sys: 4min 19s, total: 16min 48s\n", - "Wall time: 8h 34min 52s\n" - ] - } - ], - "source": [ - "%%time\n", - "\n", - "os.environ[\"MODEL_NAME\"] = \"Qwen/Qwen2-0.5B-Instruct\" \n", - "for i in range(1, num_train_epochs + 1):\n", - " print(f\"Epoch {i}\")\n", - " adapter_path = f\"llama-factory/saves/qwen2-0.5b/lora/sft/checkpoint-{560 * i}\"\n", - " os.environ[\"ADAPTER_NAME_OR_PATH\"] = adapter_path\n", - " !python llm_toolkit/eval.py " - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch 1\n", - "loading env vars from: /home/inflaton/code/projects/courses/llm-finetuning/.env\n", - "Adding /home/inflaton/code/projects/courses/llm-finetuning to sys.path\n", - "πŸ¦₯ Unsloth: Will patch your computer to enable 2x faster free finetuning.\n", - "[nltk_data] Downloading package wordnet to /home/inflaton/nltk_data...\n", - "[nltk_data] Package wordnet is already up-to-date!\n", - "[nltk_data] Downloading package punkt to /home/inflaton/nltk_data...\n", - "[nltk_data] Package punkt is already up-to-date!\n", - "[nltk_data] Downloading package omw-1.4 to /home/inflaton/nltk_data...\n", - "[nltk_data] Package omw-1.4 is already up-to-date!\n", - "loading /home/inflaton/code/projects/courses/llm-finetuning/llm_toolkit/translation_engine.py\n", - "Qwen/Qwen2-1.5B-Instruct llama-factory/saves/qwen2-1.5b/lora/sft/checkpoint-560 True datasets/mac/mac.tsv results/mac-results_lf.csv\n", - "(1) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n", - "0.0 GB of memory reserved.\n", - "loading model: Qwen/Qwen2-1.5B-Instruct\n", - "[INFO|tokenization_utils_base.py:2161] 2024-06-30 21:56:19,887 >> loading file vocab.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/vocab.json\n", - "[INFO|tokenization_utils_base.py:2161] 2024-06-30 21:56:19,887 >> loading file merges.txt from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/merges.txt\n", - "[INFO|tokenization_utils_base.py:2161] 2024-06-30 21:56:19,887 >> loading file tokenizer.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/tokenizer.json\n", - "[INFO|tokenization_utils_base.py:2161] 2024-06-30 21:56:19,887 >> loading file added_tokens.json from cache at None\n", - "[INFO|tokenization_utils_base.py:2161] 2024-06-30 21:56:19,887 >> loading file special_tokens_map.json from cache at None\n", - "[INFO|tokenization_utils_base.py:2161] 2024-06-30 21:56:19,887 >> loading file tokenizer_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/tokenizer_config.json\n", - "[WARNING|logging.py:313] 2024-06-30 21:56:20,070 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n", - "06/30/2024 21:56:20 - INFO - llamafactory.data.template - Replace eos token: <|im_end|>\n", - "06/30/2024 21:56:20 - INFO - llamafactory.data.template - Add <|im_start|> to stop words.\n", - "[INFO|configuration_utils.py:733] 2024-06-30 21:56:20,393 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/config.json\n", - "[INFO|configuration_utils.py:800] 2024-06-30 21:56:20,393 >> Model config Qwen2Config {\n", - " \"_name_or_path\": \"Qwen/Qwen2-1.5B-Instruct\",\n", - " \"architectures\": [\n", - " \"Qwen2ForCausalLM\"\n", - " ],\n", - " \"attention_dropout\": 0.0,\n", - " \"bos_token_id\": 151643,\n", - " \"eos_token_id\": 151645,\n", - " \"hidden_act\": \"silu\",\n", - " \"hidden_size\": 1536,\n", - " \"initializer_range\": 0.02,\n", - " \"intermediate_size\": 8960,\n", - " \"max_position_embeddings\": 32768,\n", - " \"max_window_layers\": 28,\n", - " \"model_type\": \"qwen2\",\n", - " \"num_attention_heads\": 12,\n", - " \"num_hidden_layers\": 28,\n", - " \"num_key_value_heads\": 2,\n", - " \"rms_norm_eps\": 1e-06,\n", - " \"rope_theta\": 1000000.0,\n", - " \"sliding_window\": 32768,\n", - " \"tie_word_embeddings\": true,\n", - " \"torch_dtype\": \"bfloat16\",\n", - " \"transformers_version\": \"4.42.3\",\n", - " \"use_cache\": true,\n", - " \"use_sliding_window\": false,\n", - " \"vocab_size\": 151936\n", - "}\n", - "\n", - "06/30/2024 21:56:20 - INFO - llamafactory.model.model_utils.quantization - Quantizing model to 4 bit with bitsandbytes.\n", - "06/30/2024 21:56:20 - INFO - llamafactory.model.patcher - Using KV cache for faster generation.\n", - "[INFO|modeling_utils.py:3556] 2024-06-30 21:56:20,457 >> loading weights file model.safetensors from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/model.safetensors\n", - "[INFO|modeling_utils.py:1531] 2024-06-30 21:56:22,769 >> Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16.\n", - "[INFO|configuration_utils.py:1000] 2024-06-30 21:56:22,772 >> Generate config GenerationConfig {\n", - " \"bos_token_id\": 151643,\n", - " \"eos_token_id\": 151645\n", - "}\n", - "\n", - "[INFO|modeling_utils.py:4364] 2024-06-30 21:58:45,740 >> All model checkpoint weights were used when initializing Qwen2ForCausalLM.\n", - "\n", - "[INFO|modeling_utils.py:4372] 2024-06-30 21:58:45,740 >> All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at Qwen/Qwen2-1.5B-Instruct.\n", - "If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training.\n", - "[INFO|configuration_utils.py:955] 2024-06-30 21:58:46,024 >> loading configuration file generation_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/generation_config.json\n", - "[INFO|configuration_utils.py:1000] 2024-06-30 21:58:46,024 >> Generate config GenerationConfig {\n", - " \"bos_token_id\": 151643,\n", - " \"do_sample\": true,\n", - " \"eos_token_id\": [\n", - " 151645,\n", - " 151643\n", - " ],\n", - " \"pad_token_id\": 151643,\n", - " \"repetition_penalty\": 1.1,\n", - " \"temperature\": 0.7,\n", - " \"top_k\": 20,\n", - " \"top_p\": 0.8\n", - "}\n", - "\n", - "06/30/2024 21:58:46 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.\n", - "06/30/2024 21:58:46 - INFO - llamafactory.model.adapter - Loaded adapter(s): llama-factory/saves/qwen2-1.5b/lora/sft/checkpoint-560\n", - "06/30/2024 21:58:46 - INFO - llamafactory.model.loader - all params: 1,552,946,688\n", - "(2) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n", - "1.604 GB of memory reserved.\n", - "loading train/test data files\n", - "Map: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 4528/4528 [00:00<00:00, 35828.73 examples/s]\n", - "Map: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 1133/1133 [00:00<00:00, 12322.75 examples/s]\n", - "DatasetDict({\n", - " train: Dataset({\n", - " features: ['chinese', 'english', 'text', 'prompt'],\n", - " num_rows: 4528\n", - " })\n", - " test: Dataset({\n", - " features: ['chinese', 'english', 'text', 'prompt'],\n", - " num_rows: 1133\n", - " })\n", - "})\n", - "Evaluating model: Qwen/Qwen2-1.5B-Instruct\n", - " 0%| | 0/1133 [00:00\n", - "--------\n", - "step 2: Grannie Geng held up his gun with one eye, narrowed it, raised the barrel of the rifle, fired a hail of bullets at the target.\n", - "--------\n", - "step 3: Grannie Geng held up his gun with one eye, narrowed it, raised the barrel of the rifle, fired a hail of bullets at the target.\n", - "100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 1133/1133 [44:42<00:00, 2.37s/it]\n", - "(3) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n", - "1.857 GB of memory reserved.\n", - " chinese ... Qwen/Qwen2-1.5B-Instruct_checkpoint-560\n", - "0 老耿端衷ζžͺοΌŒηœ―ηΌθ΅·δΈ€εͺδΈ‰θ§’ηœΌοΌŒδΈ€ζ‚ζ‰³ζœΊε“δΊ†ζžͺοΌŒε†°ι›Ήθˆ¬ηš„ι‡‘ιΊ»ι›€εŠˆε“©ε•ͺε•¦εΎ€δΈ‹θ½οΌŒι“η ‚ε­εœ¨ζŸ³ζžι—΄ι£ž... ... Grannie Geng held up his gun with one eye, nar...\n", - "\n", - "[1 rows x 13 columns]\n", - "{'accuracy': 0.00264783759929391, 'correct_ids': [240, 738, 1026], 'meteor': 0.3555548051770412, 'bleu_scores': {'bleu': 0.08837370077365968, 'precisions': [0.4154119950169069, 0.13452266152362585, 0.055553404823661494, 0.026475589021131892], 'brevity_penalty': 0.9281439603442432, 'length_ratio': 0.9306061609804571, 'translation_length': 28095, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.39474926445540526, 'rouge2': 0.14909336721544575, 'rougeL': 0.3340601663307491, 'rougeLsum': 0.33415584663948783}}\n", - "Epoch 2\n", - "loading env vars from: /home/inflaton/code/projects/courses/llm-finetuning/.env\n", - "Adding /home/inflaton/code/projects/courses/llm-finetuning to sys.path\n", - "πŸ¦₯ Unsloth: Will patch your computer to enable 2x faster free finetuning.\n", - "[nltk_data] Downloading package wordnet to /home/inflaton/nltk_data...\n", - "[nltk_data] Package wordnet is already up-to-date!\n", - "[nltk_data] Downloading package punkt to /home/inflaton/nltk_data...\n", - "[nltk_data] Package punkt is already up-to-date!\n", - "[nltk_data] Downloading package omw-1.4 to /home/inflaton/nltk_data...\n", - "[nltk_data] Package omw-1.4 is already up-to-date!\n", - "loading /home/inflaton/code/projects/courses/llm-finetuning/llm_toolkit/translation_engine.py\n", - "Qwen/Qwen2-1.5B-Instruct llama-factory/saves/qwen2-1.5b/lora/sft/checkpoint-1120 True datasets/mac/mac.tsv results/mac-results_lf.csv\n", - "(1) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n", - "0.0 GB of memory reserved.\n", - "loading model: Qwen/Qwen2-1.5B-Instruct\n", - "[INFO|tokenization_utils_base.py:2161] 2024-06-30 22:43:48,381 >> loading file vocab.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/vocab.json\n", - "[INFO|tokenization_utils_base.py:2161] 2024-06-30 22:43:48,381 >> loading file merges.txt from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/merges.txt\n", - "[INFO|tokenization_utils_base.py:2161] 2024-06-30 22:43:48,381 >> loading file tokenizer.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/tokenizer.json\n", - "[INFO|tokenization_utils_base.py:2161] 2024-06-30 22:43:48,381 >> loading file added_tokens.json from cache at None\n", - "[INFO|tokenization_utils_base.py:2161] 2024-06-30 22:43:48,381 >> loading file special_tokens_map.json from cache at None\n", - "[INFO|tokenization_utils_base.py:2161] 2024-06-30 22:43:48,381 >> loading file tokenizer_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/tokenizer_config.json\n", - "[WARNING|logging.py:313] 2024-06-30 22:43:48,549 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n", - "06/30/2024 22:43:48 - INFO - llamafactory.data.template - Replace eos token: <|im_end|>\n", - "06/30/2024 22:43:48 - INFO - llamafactory.data.template - Add <|im_start|> to stop words.\n", - "[INFO|configuration_utils.py:733] 2024-06-30 22:43:48,826 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/config.json\n", - "[INFO|configuration_utils.py:800] 2024-06-30 22:43:48,826 >> Model config Qwen2Config {\n", - " \"_name_or_path\": \"Qwen/Qwen2-1.5B-Instruct\",\n", - " \"architectures\": [\n", - " \"Qwen2ForCausalLM\"\n", - " ],\n", - " \"attention_dropout\": 0.0,\n", - " \"bos_token_id\": 151643,\n", - " \"eos_token_id\": 151645,\n", - " \"hidden_act\": \"silu\",\n", - " \"hidden_size\": 1536,\n", - " \"initializer_range\": 0.02,\n", - " \"intermediate_size\": 8960,\n", - " \"max_position_embeddings\": 32768,\n", - " \"max_window_layers\": 28,\n", - " \"model_type\": \"qwen2\",\n", - " \"num_attention_heads\": 12,\n", - " \"num_hidden_layers\": 28,\n", - " \"num_key_value_heads\": 2,\n", - " \"rms_norm_eps\": 1e-06,\n", - " \"rope_theta\": 1000000.0,\n", - " \"sliding_window\": 32768,\n", - " \"tie_word_embeddings\": true,\n", - " \"torch_dtype\": \"bfloat16\",\n", - " \"transformers_version\": \"4.42.3\",\n", - " \"use_cache\": true,\n", - " \"use_sliding_window\": false,\n", - " \"vocab_size\": 151936\n", - "}\n", - "\n", - "06/30/2024 22:43:48 - INFO - llamafactory.model.model_utils.quantization - Quantizing model to 4 bit with bitsandbytes.\n", - "06/30/2024 22:43:48 - INFO - llamafactory.model.patcher - Using KV cache for faster generation.\n", - "[INFO|modeling_utils.py:3556] 2024-06-30 22:43:48,853 >> loading weights file model.safetensors from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/model.safetensors\n", - "[INFO|modeling_utils.py:1531] 2024-06-30 22:43:49,950 >> Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16.\n", - "[INFO|configuration_utils.py:1000] 2024-06-30 22:43:49,954 >> Generate config GenerationConfig {\n", - " \"bos_token_id\": 151643,\n", - " \"eos_token_id\": 151645\n", - "}\n", - "\n", - "[INFO|modeling_utils.py:4364] 2024-06-30 22:46:48,562 >> All model checkpoint weights were used when initializing Qwen2ForCausalLM.\n", - "\n", - "[INFO|modeling_utils.py:4372] 2024-06-30 22:46:48,562 >> All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at Qwen/Qwen2-1.5B-Instruct.\n", - "If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training.\n", - "[INFO|configuration_utils.py:955] 2024-06-30 22:46:48,846 >> loading configuration file generation_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/generation_config.json\n", - "[INFO|configuration_utils.py:1000] 2024-06-30 22:46:48,846 >> Generate config GenerationConfig {\n", - " \"bos_token_id\": 151643,\n", - " \"do_sample\": true,\n", - " \"eos_token_id\": [\n", - " 151645,\n", - " 151643\n", - " ],\n", - " \"pad_token_id\": 151643,\n", - " \"repetition_penalty\": 1.1,\n", - " \"temperature\": 0.7,\n", - " \"top_k\": 20,\n", - " \"top_p\": 0.8\n", - "}\n", - "\n", - "06/30/2024 22:46:51 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.\n", - "06/30/2024 22:46:54 - INFO - llamafactory.model.adapter - Loaded adapter(s): llama-factory/saves/qwen2-1.5b/lora/sft/checkpoint-1120\n", - "06/30/2024 22:46:55 - INFO - llamafactory.model.loader - all params: 1,552,946,688\n", - "(2) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n", - "1.604 GB of memory reserved.\n", - "loading train/test data files\n", - "DatasetDict({\n", - " train: Dataset({\n", - " features: ['chinese', 'english', 'text', 'prompt'],\n", - " num_rows: 4528\n", - " })\n", - " test: Dataset({\n", - " features: ['chinese', 'english', 'text', 'prompt'],\n", - " num_rows: 1133\n", - " })\n", - "})\n", - "Evaluating model: Qwen/Qwen2-1.5B-Instruct\n", - " 0%| | 0/1133 [00:00\n", - "--------\n", - "step 2: Old Geng raised his rifle and squinted at it through a slit in his eye. He squeezed the trigger and gold sparrows began to fall like rain. Iron sand scattered among the willow branches crackled.\n", - "--------\n", - "step 3: Old Geng raised his rifle and squinted at it through a slit in his eye. He squeezed the trigger and gold sparrows began to fall like rain. Iron sand scattered among the willow branches crackled.\n", - "100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 1133/1133 [54:52<00:00, 2.91s/it]\n", - "(3) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n", - "1.818 GB of memory reserved.\n", - " chinese ... Qwen/Qwen2-1.5B-Instruct_checkpoint-1120\n", - "0 老耿端衷ζžͺοΌŒηœ―ηΌθ΅·δΈ€εͺδΈ‰θ§’ηœΌοΌŒδΈ€ζ‚ζ‰³ζœΊε“δΊ†ζžͺοΌŒε†°ι›Ήθˆ¬ηš„ι‡‘ιΊ»ι›€εŠˆε“©ε•ͺε•¦εΎ€δΈ‹θ½οΌŒι“η ‚ε­εœ¨ζŸ³ζžι—΄ι£ž... ... Old Geng raised his rifle and squinted at it t...\n", - "\n", - "[1 rows x 14 columns]\n", - "{'accuracy': 0.00353045013239188, 'correct_ids': [77, 272, 381, 659], 'meteor': 0.364551066769633, 'bleu_scores': {'bleu': 0.09512979475404361, 'precisions': [0.41979252665206934, 0.1427074758661977, 0.06224115026959444, 0.03069440470838272], 'brevity_penalty': 0.9197334814475309, 'length_ratio': 0.9227890029811195, 'translation_length': 27859, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.40366781962223464, 'rouge2': 0.1631594243449107, 'rougeL': 0.34288741533227174, 'rougeLsum': 0.34268506193513737}}\n", - "Epoch 3\n", - "loading env vars from: /home/inflaton/code/projects/courses/llm-finetuning/.env\n", - "Adding /home/inflaton/code/projects/courses/llm-finetuning to sys.path\n", - "πŸ¦₯ Unsloth: Will patch your computer to enable 2x faster free finetuning.\n", - "[nltk_data] Downloading package wordnet to /home/inflaton/nltk_data...\n", - "[nltk_data] Package wordnet is already up-to-date!\n", - "[nltk_data] Downloading package punkt to /home/inflaton/nltk_data...\n", - "[nltk_data] Package punkt is already up-to-date!\n", - "[nltk_data] Downloading package omw-1.4 to /home/inflaton/nltk_data...\n", - "[nltk_data] Package omw-1.4 is already up-to-date!\n", - "loading /home/inflaton/code/projects/courses/llm-finetuning/llm_toolkit/translation_engine.py\n", - "Qwen/Qwen2-1.5B-Instruct llama-factory/saves/qwen2-1.5b/lora/sft/checkpoint-1680 True datasets/mac/mac.tsv results/mac-results_lf.csv\n", - "(1) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n", - "0.0 GB of memory reserved.\n", - "loading model: Qwen/Qwen2-1.5B-Instruct\n", - "[INFO|tokenization_utils_base.py:2161] 2024-06-30 23:42:11,002 >> loading file vocab.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/vocab.json\n", - "[INFO|tokenization_utils_base.py:2161] 2024-06-30 23:42:11,002 >> loading file merges.txt from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/merges.txt\n", - "[INFO|tokenization_utils_base.py:2161] 2024-06-30 23:42:11,002 >> loading file tokenizer.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/tokenizer.json\n", - "[INFO|tokenization_utils_base.py:2161] 2024-06-30 23:42:11,002 >> loading file added_tokens.json from cache at None\n", - "[INFO|tokenization_utils_base.py:2161] 2024-06-30 23:42:11,002 >> loading file special_tokens_map.json from cache at None\n", - "[INFO|tokenization_utils_base.py:2161] 2024-06-30 23:42:11,002 >> loading file tokenizer_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/tokenizer_config.json\n", - "[WARNING|logging.py:313] 2024-06-30 23:42:11,240 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n", - "06/30/2024 23:42:11 - INFO - llamafactory.data.template - Replace eos token: <|im_end|>\n", - "06/30/2024 23:42:11 - INFO - llamafactory.data.template - Add <|im_start|> to stop words.\n", - "[INFO|configuration_utils.py:733] 2024-06-30 23:42:11,554 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/config.json\n", - "[INFO|configuration_utils.py:800] 2024-06-30 23:42:11,554 >> Model config Qwen2Config {\n", - " \"_name_or_path\": \"Qwen/Qwen2-1.5B-Instruct\",\n", - " \"architectures\": [\n", - " \"Qwen2ForCausalLM\"\n", - " ],\n", - " \"attention_dropout\": 0.0,\n", - " \"bos_token_id\": 151643,\n", - " \"eos_token_id\": 151645,\n", - " \"hidden_act\": \"silu\",\n", - " \"hidden_size\": 1536,\n", - " \"initializer_range\": 0.02,\n", - " \"intermediate_size\": 8960,\n", - " \"max_position_embeddings\": 32768,\n", - " \"max_window_layers\": 28,\n", - " \"model_type\": \"qwen2\",\n", - " \"num_attention_heads\": 12,\n", - " \"num_hidden_layers\": 28,\n", - " \"num_key_value_heads\": 2,\n", - " \"rms_norm_eps\": 1e-06,\n", - " \"rope_theta\": 1000000.0,\n", - " \"sliding_window\": 32768,\n", - " \"tie_word_embeddings\": true,\n", - " \"torch_dtype\": \"bfloat16\",\n", - " \"transformers_version\": \"4.42.3\",\n", - " \"use_cache\": true,\n", - " \"use_sliding_window\": false,\n", - " \"vocab_size\": 151936\n", - "}\n", - "\n", - "06/30/2024 23:42:11 - INFO - llamafactory.model.model_utils.quantization - Quantizing model to 4 bit with bitsandbytes.\n", - "06/30/2024 23:42:11 - INFO - llamafactory.model.patcher - Using KV cache for faster generation.\n", - "[INFO|modeling_utils.py:3556] 2024-06-30 23:42:11,668 >> loading weights file model.safetensors from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/model.safetensors\n", - "[INFO|modeling_utils.py:1531] 2024-06-30 23:42:13,979 >> Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16.\n", - "[INFO|configuration_utils.py:1000] 2024-06-30 23:42:13,983 >> Generate config GenerationConfig {\n", - " \"bos_token_id\": 151643,\n", - " \"eos_token_id\": 151645\n", - "}\n", - "\n", - "[INFO|modeling_utils.py:4364] 2024-06-30 23:43:46,052 >> All model checkpoint weights were used when initializing Qwen2ForCausalLM.\n", - "\n", - "[INFO|modeling_utils.py:4372] 2024-06-30 23:43:46,052 >> All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at Qwen/Qwen2-1.5B-Instruct.\n", - "If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training.\n", - "[INFO|configuration_utils.py:955] 2024-06-30 23:43:47,155 >> loading configuration file generation_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/generation_config.json\n", - "[INFO|configuration_utils.py:1000] 2024-06-30 23:43:47,155 >> Generate config GenerationConfig {\n", - " \"bos_token_id\": 151643,\n", - " \"do_sample\": true,\n", - " \"eos_token_id\": [\n", - " 151645,\n", - " 151643\n", - " ],\n", - " \"pad_token_id\": 151643,\n", - " \"repetition_penalty\": 1.1,\n", - " \"temperature\": 0.7,\n", - " \"top_k\": 20,\n", - " \"top_p\": 0.8\n", - "}\n", - "\n", - "06/30/2024 23:43:47 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.\n", - "06/30/2024 23:43:48 - INFO - llamafactory.model.adapter - Loaded adapter(s): llama-factory/saves/qwen2-1.5b/lora/sft/checkpoint-1680\n", - "06/30/2024 23:43:48 - INFO - llamafactory.model.loader - all params: 1,552,946,688\n", - "(2) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n", - "1.604 GB of memory reserved.\n", - "loading train/test data files\n", - "DatasetDict({\n", - " train: Dataset({\n", - " features: ['chinese', 'english', 'text', 'prompt'],\n", - " num_rows: 4528\n", - " })\n", - " test: Dataset({\n", - " features: ['chinese', 'english', 'text', 'prompt'],\n", - " num_rows: 1133\n", - " })\n", - "})\n", - "Evaluating model: Qwen/Qwen2-1.5B-Instruct\n", - " 0%| | 0/1133 [00:00\n", - "--------\n", - "step 2: Old Geng took his gun off the table and raised it to his eye. He squeezed the trigger and a hail of bullets fell from the sky, golden sparrows falling like rain as shrapnel flew through the air among the willows.\n", - "--------\n", - "step 3: Old Geng took his gun off the table and raised it to his eye. He squeezed the trigger and a hail of bullets fell from the sky, golden sparrows falling like rain as shrapnel flew through the air among the willows.\n", - "100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 1133/1133 [42:11<00:00, 2.23s/it]\n", - "(3) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n", - "1.838 GB of memory reserved.\n", - " chinese ... Qwen/Qwen2-1.5B-Instruct_checkpoint-1680\n", - "0 老耿端衷ζžͺοΌŒηœ―ηΌθ΅·δΈ€εͺδΈ‰θ§’ηœΌοΌŒδΈ€ζ‚ζ‰³ζœΊε“δΊ†ζžͺοΌŒε†°ι›Ήθˆ¬ηš„ι‡‘ιΊ»ι›€εŠˆε“©ε•ͺε•¦εΎ€δΈ‹θ½οΌŒι“η ‚ε­εœ¨ζŸ³ζžι—΄ι£ž... ... Old Geng took his gun off the table and raised...\n", - "\n", - "[1 rows x 15 columns]\n", - "{'accuracy': 0.00529567519858782, 'correct_ids': [77, 147, 199, 452, 738, 918], 'meteor': 0.3723931629938662, 'bleu_scores': {'bleu': 0.1007710645770402, 'precisions': [0.4158811367698076, 0.14392059553349876, 0.0641747868453106, 0.03384639860000795], 'brevity_penalty': 0.9437209131631352, 'length_ratio': 0.9452467704537927, 'translation_length': 28537, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.40370214820886885, 'rouge2': 0.1641473385689542, 'rougeL': 0.3423335232392143, 'rougeLsum': 0.3424044524649077}}\n", - "Epoch 4\n", - "loading env vars from: /home/inflaton/code/projects/courses/llm-finetuning/.env\n", - "Adding /home/inflaton/code/projects/courses/llm-finetuning to sys.path\n", - "πŸ¦₯ Unsloth: Will patch your computer to enable 2x faster free finetuning.\n", - "[nltk_data] Downloading package wordnet to /home/inflaton/nltk_data...\n", - "[nltk_data] Package wordnet is already up-to-date!\n", - "[nltk_data] Downloading package punkt to /home/inflaton/nltk_data...\n", - "[nltk_data] Package punkt is already up-to-date!\n", - "[nltk_data] Downloading package omw-1.4 to /home/inflaton/nltk_data...\n", - "[nltk_data] Package omw-1.4 is already up-to-date!\n", - "loading /home/inflaton/code/projects/courses/llm-finetuning/llm_toolkit/translation_engine.py\n", - "Qwen/Qwen2-1.5B-Instruct llama-factory/saves/qwen2-1.5b/lora/sft/checkpoint-2240 True datasets/mac/mac.tsv results/mac-results_lf.csv\n", - "(1) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n", - "0.0 GB of memory reserved.\n", - "loading model: Qwen/Qwen2-1.5B-Instruct\n", - "[INFO|tokenization_utils_base.py:2161] 2024-07-01 00:26:19,392 >> loading file vocab.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/vocab.json\n", - "[INFO|tokenization_utils_base.py:2161] 2024-07-01 00:26:19,392 >> loading file merges.txt from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/merges.txt\n", - "[INFO|tokenization_utils_base.py:2161] 2024-07-01 00:26:19,392 >> loading file tokenizer.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/tokenizer.json\n", - "[INFO|tokenization_utils_base.py:2161] 2024-07-01 00:26:19,392 >> loading file added_tokens.json from cache at None\n", - "[INFO|tokenization_utils_base.py:2161] 2024-07-01 00:26:19,392 >> loading file special_tokens_map.json from cache at None\n", - "[INFO|tokenization_utils_base.py:2161] 2024-07-01 00:26:19,392 >> loading file tokenizer_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/tokenizer_config.json\n", - "[WARNING|logging.py:313] 2024-07-01 00:26:19,534 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n", - "07/01/2024 00:26:19 - INFO - llamafactory.data.template - Replace eos token: <|im_end|>\n", - "07/01/2024 00:26:19 - INFO - llamafactory.data.template - Add <|im_start|> to stop words.\n", - "[INFO|configuration_utils.py:733] 2024-07-01 00:26:19,883 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/config.json\n", - "[INFO|configuration_utils.py:800] 2024-07-01 00:26:19,883 >> Model config Qwen2Config {\n", - " \"_name_or_path\": \"Qwen/Qwen2-1.5B-Instruct\",\n", - " \"architectures\": [\n", - " \"Qwen2ForCausalLM\"\n", - " ],\n", - " \"attention_dropout\": 0.0,\n", - " \"bos_token_id\": 151643,\n", - " \"eos_token_id\": 151645,\n", - " \"hidden_act\": \"silu\",\n", - " \"hidden_size\": 1536,\n", - " \"initializer_range\": 0.02,\n", - " \"intermediate_size\": 8960,\n", - " \"max_position_embeddings\": 32768,\n", - " \"max_window_layers\": 28,\n", - " \"model_type\": \"qwen2\",\n", - " \"num_attention_heads\": 12,\n", - " \"num_hidden_layers\": 28,\n", - " \"num_key_value_heads\": 2,\n", - " \"rms_norm_eps\": 1e-06,\n", - " \"rope_theta\": 1000000.0,\n", - " \"sliding_window\": 32768,\n", - " \"tie_word_embeddings\": true,\n", - " \"torch_dtype\": \"bfloat16\",\n", - " \"transformers_version\": \"4.42.3\",\n", - " \"use_cache\": true,\n", - " \"use_sliding_window\": false,\n", - " \"vocab_size\": 151936\n", - "}\n", - "\n", - "07/01/2024 00:26:19 - INFO - llamafactory.model.model_utils.quantization - Quantizing model to 4 bit with bitsandbytes.\n", - "07/01/2024 00:26:19 - INFO - llamafactory.model.patcher - Using KV cache for faster generation.\n", - "[INFO|modeling_utils.py:3556] 2024-07-01 00:26:19,958 >> loading weights file model.safetensors from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/model.safetensors\n", - "[INFO|modeling_utils.py:1531] 2024-07-01 00:26:21,213 >> Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16.\n", - "[INFO|configuration_utils.py:1000] 2024-07-01 00:26:21,216 >> Generate config GenerationConfig {\n", - " \"bos_token_id\": 151643,\n", - " \"eos_token_id\": 151645\n", - "}\n", - "\n", - "[INFO|modeling_utils.py:4364] 2024-07-01 00:27:43,020 >> All model checkpoint weights were used when initializing Qwen2ForCausalLM.\n", - "\n", - "[INFO|modeling_utils.py:4372] 2024-07-01 00:27:43,020 >> All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at Qwen/Qwen2-1.5B-Instruct.\n", - "If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training.\n", - "[INFO|configuration_utils.py:955] 2024-07-01 00:27:43,422 >> loading configuration file generation_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/generation_config.json\n", - "[INFO|configuration_utils.py:1000] 2024-07-01 00:27:43,422 >> Generate config GenerationConfig {\n", - " \"bos_token_id\": 151643,\n", - " \"do_sample\": true,\n", - " \"eos_token_id\": [\n", - " 151645,\n", - " 151643\n", - " ],\n", - " \"pad_token_id\": 151643,\n", - " \"repetition_penalty\": 1.1,\n", - " \"temperature\": 0.7,\n", - " \"top_k\": 20,\n", - " \"top_p\": 0.8\n", - "}\n", - "\n", - "07/01/2024 00:27:43 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.\n", - "07/01/2024 00:27:44 - INFO - llamafactory.model.adapter - Loaded adapter(s): llama-factory/saves/qwen2-1.5b/lora/sft/checkpoint-2240\n", - "07/01/2024 00:27:44 - INFO - llamafactory.model.loader - all params: 1,552,946,688\n", - "(2) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n", - "1.604 GB of memory reserved.\n", - "loading train/test data files\n", - "DatasetDict({\n", - " train: Dataset({\n", - " features: ['chinese', 'english', 'text', 'prompt'],\n", - " num_rows: 4528\n", - " })\n", - " test: Dataset({\n", - " features: ['chinese', 'english', 'text', 'prompt'],\n", - " num_rows: 1133\n", - " })\n", - "})\n", - "Evaluating model: Qwen/Qwen2-1.5B-Instruct\n", - " 0%| | 0/1133 [00:00\n", - "--------\n", - "step 2: Old Geng raised his rifle and squeezed the trigger. The sound of gunfire joined the chattering rain as hundreds of sparrows fell from the sky, the pellets flying through the air between the willow twigs.\n", - "--------\n", - "step 3: Old Geng raised his rifle and squeezed the trigger. The sound of gunfire joined the chattering rain as hundreds of sparrows fell from the sky, the pellets flying through the air between the willow twigs.\n", - "100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 1133/1133 [40:27<00:00, 2.14s/it]\n", - "(3) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n", - "1.838 GB of memory reserved.\n", - " chinese ... Qwen/Qwen2-1.5B-Instruct_checkpoint-2240\n", - "0 老耿端衷ζžͺοΌŒηœ―ηΌθ΅·δΈ€εͺδΈ‰θ§’ηœΌοΌŒδΈ€ζ‚ζ‰³ζœΊε“δΊ†ζžͺοΌŒε†°ι›Ήθˆ¬ηš„ι‡‘ιΊ»ι›€εŠˆε“©ε•ͺε•¦εΎ€δΈ‹θ½οΌŒι“η ‚ε­εœ¨ζŸ³ζžι—΄ι£ž... ... Old Geng raised his rifle and squeezed the tri...\n", - "\n", - "[1 rows x 16 columns]\n", - "{'accuracy': 0.00264783759929391, 'correct_ids': [147, 199, 738], 'meteor': 0.35847259317675817, 'bleu_scores': {'bleu': 0.09681182585608442, 'precisions': [0.4169993042077123, 0.14579353556964927, 0.06572957431515054, 0.03353403579193845], 'brevity_penalty': 0.8998048931972519, 'length_ratio': 0.9045048029148725, 'translation_length': 27307, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.3940152835057211, 'rouge2': 0.16326412776493693, 'rougeL': 0.33702749255447373, 'rougeLsum': 0.3369782380738291}}\n", - "Epoch 5\n", - "loading env vars from: /home/inflaton/code/projects/courses/llm-finetuning/.env\n", - "Adding /home/inflaton/code/projects/courses/llm-finetuning to sys.path\n", - "πŸ¦₯ Unsloth: Will patch your computer to enable 2x faster free finetuning.\n", - "[nltk_data] Downloading package wordnet to /home/inflaton/nltk_data...\n", - "[nltk_data] Package wordnet is already up-to-date!\n", - "[nltk_data] Downloading package punkt to /home/inflaton/nltk_data...\n", - "[nltk_data] Package punkt is already up-to-date!\n", - "[nltk_data] Downloading package omw-1.4 to /home/inflaton/nltk_data...\n", - "[nltk_data] Package omw-1.4 is already up-to-date!\n", - "loading /home/inflaton/code/projects/courses/llm-finetuning/llm_toolkit/translation_engine.py\n", - "Qwen/Qwen2-1.5B-Instruct llama-factory/saves/qwen2-1.5b/lora/sft/checkpoint-2800 True datasets/mac/mac.tsv results/mac-results_lf.csv\n", - "(1) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n", - "0.0 GB of memory reserved.\n", - "loading model: Qwen/Qwen2-1.5B-Instruct\n", - "[INFO|tokenization_utils_base.py:2161] 2024-07-01 01:08:35,227 >> loading file vocab.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/vocab.json\n", - "[INFO|tokenization_utils_base.py:2161] 2024-07-01 01:08:35,227 >> loading file merges.txt from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/merges.txt\n", - "[INFO|tokenization_utils_base.py:2161] 2024-07-01 01:08:35,227 >> loading file tokenizer.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/tokenizer.json\n", - "[INFO|tokenization_utils_base.py:2161] 2024-07-01 01:08:35,227 >> loading file added_tokens.json from cache at None\n", - "[INFO|tokenization_utils_base.py:2161] 2024-07-01 01:08:35,227 >> loading file special_tokens_map.json from cache at None\n", - "[INFO|tokenization_utils_base.py:2161] 2024-07-01 01:08:35,227 >> loading file tokenizer_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/tokenizer_config.json\n", - "[WARNING|logging.py:313] 2024-07-01 01:08:35,401 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n", - "07/01/2024 01:08:35 - INFO - llamafactory.data.template - Replace eos token: <|im_end|>\n", - "07/01/2024 01:08:35 - INFO - llamafactory.data.template - Add <|im_start|> to stop words.\n", - "[INFO|configuration_utils.py:733] 2024-07-01 01:08:35,697 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/config.json\n", - "[INFO|configuration_utils.py:800] 2024-07-01 01:08:35,697 >> Model config Qwen2Config {\n", - " \"_name_or_path\": \"Qwen/Qwen2-1.5B-Instruct\",\n", - " \"architectures\": [\n", - " \"Qwen2ForCausalLM\"\n", - " ],\n", - " \"attention_dropout\": 0.0,\n", - " \"bos_token_id\": 151643,\n", - " \"eos_token_id\": 151645,\n", - " \"hidden_act\": \"silu\",\n", - " \"hidden_size\": 1536,\n", - " \"initializer_range\": 0.02,\n", - " \"intermediate_size\": 8960,\n", - " \"max_position_embeddings\": 32768,\n", - " \"max_window_layers\": 28,\n", - " \"model_type\": \"qwen2\",\n", - " \"num_attention_heads\": 12,\n", - " \"num_hidden_layers\": 28,\n", - " \"num_key_value_heads\": 2,\n", - " \"rms_norm_eps\": 1e-06,\n", - " \"rope_theta\": 1000000.0,\n", - " \"sliding_window\": 32768,\n", - " \"tie_word_embeddings\": true,\n", - " \"torch_dtype\": \"bfloat16\",\n", - " \"transformers_version\": \"4.42.3\",\n", - " \"use_cache\": true,\n", - " \"use_sliding_window\": false,\n", - " \"vocab_size\": 151936\n", - "}\n", - "\n", - "07/01/2024 01:08:35 - INFO - llamafactory.model.model_utils.quantization - Quantizing model to 4 bit with bitsandbytes.\n", - "07/01/2024 01:08:35 - INFO - llamafactory.model.patcher - Using KV cache for faster generation.\n", - "[INFO|modeling_utils.py:3556] 2024-07-01 01:08:35,772 >> loading weights file model.safetensors from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/model.safetensors\n", - "[INFO|modeling_utils.py:1531] 2024-07-01 01:08:37,565 >> Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16.\n", - "[INFO|configuration_utils.py:1000] 2024-07-01 01:08:37,570 >> Generate config GenerationConfig {\n", - " \"bos_token_id\": 151643,\n", - " \"eos_token_id\": 151645\n", - "}\n", - "\n", - "[INFO|modeling_utils.py:4364] 2024-07-01 01:10:00,800 >> All model checkpoint weights were used when initializing Qwen2ForCausalLM.\n", - "\n", - "[INFO|modeling_utils.py:4372] 2024-07-01 01:10:00,800 >> All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at Qwen/Qwen2-1.5B-Instruct.\n", - "If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training.\n", - "[INFO|configuration_utils.py:955] 2024-07-01 01:10:01,095 >> loading configuration file generation_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/generation_config.json\n", - "[INFO|configuration_utils.py:1000] 2024-07-01 01:10:01,096 >> Generate config GenerationConfig {\n", - " \"bos_token_id\": 151643,\n", - " \"do_sample\": true,\n", - " \"eos_token_id\": [\n", - " 151645,\n", - " 151643\n", - " ],\n", - " \"pad_token_id\": 151643,\n", - " \"repetition_penalty\": 1.1,\n", - " \"temperature\": 0.7,\n", - " \"top_k\": 20,\n", - " \"top_p\": 0.8\n", - "}\n", - "\n", - "07/01/2024 01:10:01 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.\n", - "07/01/2024 01:10:02 - INFO - llamafactory.model.adapter - Loaded adapter(s): llama-factory/saves/qwen2-1.5b/lora/sft/checkpoint-2800\n", - "07/01/2024 01:10:02 - INFO - llamafactory.model.loader - all params: 1,552,946,688\n", - "(2) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n", - "1.604 GB of memory reserved.\n", - "loading train/test data files\n", - "DatasetDict({\n", - " train: Dataset({\n", - " features: ['chinese', 'english', 'text', 'prompt'],\n", - " num_rows: 4528\n", - " })\n", - " test: Dataset({\n", - " features: ['chinese', 'english', 'text', 'prompt'],\n", - " num_rows: 1133\n", - " })\n", - "})\n", - "Evaluating model: Qwen/Qwen2-1.5B-Instruct\n", - " 0%| | 0/1133 [00:00\n", - "--------\n", - "step 2: Old Geng took aim and squeezed the trigger; dozens of gold-winged sparrows fell in a drenching rain, iron-shrapnel crackled among the willows, and a chorus of tiny explosions sounded over their heads.\n", - "--------\n", - "step 3: Old Geng took aim and squeezed the trigger; dozens of gold-winged sparrows fell in a drenching rain, iron-shrapnel crackled among the willows, and a chorus of tiny explosions sounded over their heads.\n", - "100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 1133/1133 [1:17:25<00:00, 4.10s/it]\n", - "(3) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n", - "5.197 GB of memory reserved.\n", - " chinese ... Qwen/Qwen2-1.5B-Instruct_checkpoint-2800\n", - "0 老耿端衷ζžͺοΌŒηœ―ηΌθ΅·δΈ€εͺδΈ‰θ§’ηœΌοΌŒδΈ€ζ‚ζ‰³ζœΊε“δΊ†ζžͺοΌŒε†°ι›Ήθˆ¬ηš„ι‡‘ιΊ»ι›€εŠˆε“©ε•ͺε•¦εΎ€δΈ‹θ½οΌŒι“η ‚ε­εœ¨ζŸ³ζžι—΄ι£ž... ... Old Geng took aim and squeezed the trigger; do...\n", - "\n", - "[1 rows x 17 columns]\n", - "{'accuracy': 0.00176522506619594, 'correct_ids': [147, 199], 'meteor': 0.35988930837184085, 'bleu_scores': {'bleu': 0.09029975816152737, 'precisions': [0.36273504273504276, 0.12144836028606404, 0.05442995653627549, 0.02772855206921714], 'brevity_penalty': 1.0, 'length_ratio': 1.0657502484266312, 'translation_length': 32175, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.3917385628494343, 'rouge2': 0.158275578220186, 'rougeL': 0.33145202576141436, 'rougeLsum': 0.331550843392171}}\n", - "Epoch 6\n", - "loading env vars from: /home/inflaton/code/projects/courses/llm-finetuning/.env\n", - "Adding /home/inflaton/code/projects/courses/llm-finetuning to sys.path\n", - "πŸ¦₯ Unsloth: Will patch your computer to enable 2x faster free finetuning.\n", - "[nltk_data] Downloading package wordnet to /home/inflaton/nltk_data...\n", - "[nltk_data] Package wordnet is already up-to-date!\n", - "[nltk_data] Downloading package punkt to /home/inflaton/nltk_data...\n", - "[nltk_data] Package punkt is already up-to-date!\n", - "[nltk_data] Downloading package omw-1.4 to /home/inflaton/nltk_data...\n", - "[nltk_data] Package omw-1.4 is already up-to-date!\n", - "loading /home/inflaton/code/projects/courses/llm-finetuning/llm_toolkit/translation_engine.py\n", - "Qwen/Qwen2-1.5B-Instruct llama-factory/saves/qwen2-1.5b/lora/sft/checkpoint-3360 True datasets/mac/mac.tsv results/mac-results_lf.csv\n", - "(1) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n", - "0.0 GB of memory reserved.\n", - "loading model: Qwen/Qwen2-1.5B-Instruct\n", - "[INFO|tokenization_utils_base.py:2161] 2024-07-01 02:27:49,309 >> loading file vocab.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/vocab.json\n", - "[INFO|tokenization_utils_base.py:2161] 2024-07-01 02:27:49,309 >> loading file merges.txt from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/merges.txt\n", - "[INFO|tokenization_utils_base.py:2161] 2024-07-01 02:27:49,309 >> loading file tokenizer.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/tokenizer.json\n", - "[INFO|tokenization_utils_base.py:2161] 2024-07-01 02:27:49,309 >> loading file added_tokens.json from cache at None\n", - "[INFO|tokenization_utils_base.py:2161] 2024-07-01 02:27:49,309 >> loading file special_tokens_map.json from cache at None\n", - "[INFO|tokenization_utils_base.py:2161] 2024-07-01 02:27:49,309 >> loading file tokenizer_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/tokenizer_config.json\n", - "[WARNING|logging.py:313] 2024-07-01 02:27:49,467 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n", - "07/01/2024 02:27:49 - INFO - llamafactory.data.template - Replace eos token: <|im_end|>\n", - "07/01/2024 02:27:49 - INFO - llamafactory.data.template - Add <|im_start|> to stop words.\n", - "[INFO|configuration_utils.py:733] 2024-07-01 02:27:49,780 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/config.json\n", - "[INFO|configuration_utils.py:800] 2024-07-01 02:27:49,781 >> Model config Qwen2Config {\n", - " \"_name_or_path\": \"Qwen/Qwen2-1.5B-Instruct\",\n", - " \"architectures\": [\n", - " \"Qwen2ForCausalLM\"\n", - " ],\n", - " \"attention_dropout\": 0.0,\n", - " \"bos_token_id\": 151643,\n", - " \"eos_token_id\": 151645,\n", - " \"hidden_act\": \"silu\",\n", - " \"hidden_size\": 1536,\n", - " \"initializer_range\": 0.02,\n", - " \"intermediate_size\": 8960,\n", - " \"max_position_embeddings\": 32768,\n", - " \"max_window_layers\": 28,\n", - " \"model_type\": \"qwen2\",\n", - " \"num_attention_heads\": 12,\n", - " \"num_hidden_layers\": 28,\n", - " \"num_key_value_heads\": 2,\n", - " \"rms_norm_eps\": 1e-06,\n", - " \"rope_theta\": 1000000.0,\n", - " \"sliding_window\": 32768,\n", - " \"tie_word_embeddings\": true,\n", - " \"torch_dtype\": \"bfloat16\",\n", - " \"transformers_version\": \"4.42.3\",\n", - " \"use_cache\": true,\n", - " \"use_sliding_window\": false,\n", - " \"vocab_size\": 151936\n", - "}\n", - "\n", - "07/01/2024 02:27:49 - INFO - llamafactory.model.model_utils.quantization - Quantizing model to 4 bit with bitsandbytes.\n", - "07/01/2024 02:27:49 - INFO - llamafactory.model.patcher - Using KV cache for faster generation.\n", - "[INFO|modeling_utils.py:3556] 2024-07-01 02:27:49,851 >> loading weights file model.safetensors from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/model.safetensors\n", - "[INFO|modeling_utils.py:1531] 2024-07-01 02:27:51,890 >> Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16.\n", - "[INFO|configuration_utils.py:1000] 2024-07-01 02:27:51,895 >> Generate config GenerationConfig {\n", - " \"bos_token_id\": 151643,\n", - " \"eos_token_id\": 151645\n", - "}\n", - "\n", - "[INFO|modeling_utils.py:4364] 2024-07-01 02:29:12,004 >> All model checkpoint weights were used when initializing Qwen2ForCausalLM.\n", - "\n", - "[INFO|modeling_utils.py:4372] 2024-07-01 02:29:12,004 >> All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at Qwen/Qwen2-1.5B-Instruct.\n", - "If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training.\n", - "[INFO|configuration_utils.py:955] 2024-07-01 02:29:12,299 >> loading configuration file generation_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/generation_config.json\n", - "[INFO|configuration_utils.py:1000] 2024-07-01 02:29:12,299 >> Generate config GenerationConfig {\n", - " \"bos_token_id\": 151643,\n", - " \"do_sample\": true,\n", - " \"eos_token_id\": [\n", - " 151645,\n", - " 151643\n", - " ],\n", - " \"pad_token_id\": 151643,\n", - " \"repetition_penalty\": 1.1,\n", - " \"temperature\": 0.7,\n", - " \"top_k\": 20,\n", - " \"top_p\": 0.8\n", - "}\n", - "\n", - "07/01/2024 02:29:12 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.\n", - "07/01/2024 02:29:13 - INFO - llamafactory.model.adapter - Loaded adapter(s): llama-factory/saves/qwen2-1.5b/lora/sft/checkpoint-3360\n", - "07/01/2024 02:29:13 - INFO - llamafactory.model.loader - all params: 1,552,946,688\n", - "(2) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n", - "1.604 GB of memory reserved.\n", - "loading train/test data files\n", - "DatasetDict({\n", - " train: Dataset({\n", - " features: ['chinese', 'english', 'text', 'prompt'],\n", - " num_rows: 4528\n", - " })\n", - " test: Dataset({\n", - " features: ['chinese', 'english', 'text', 'prompt'],\n", - " num_rows: 1133\n", - " })\n", - "})\n", - "Evaluating model: Qwen/Qwen2-1.5B-Instruct\n", - " 0%| | 0/1133 [00:00\n", - "--------\n", - "step 2: Old Geng took a step forward, raised his pistol, and squeezed the trigger. The pellets of lead raining down from above exploded against the snow-covered ground. They flew through the air as sparks of iron grit crackled among the willows.\n", - "--------\n", - "step 3: Old Geng took a step forward, raised his pistol, and squeezed the trigger. The pellets of lead raining down from above exploded against the snow-covered ground. They flew through the air as sparks of iron grit crackled among the willows.\n", - "100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 1133/1133 [1:14:59<00:00, 3.97s/it]\n", - "(3) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n", - "5.178 GB of memory reserved.\n", - " chinese ... Qwen/Qwen2-1.5B-Instruct_checkpoint-3360\n", - "0 老耿端衷ζžͺοΌŒηœ―ηΌθ΅·δΈ€εͺδΈ‰θ§’ηœΌοΌŒδΈ€ζ‚ζ‰³ζœΊε“δΊ†ζžͺοΌŒε†°ι›Ήθˆ¬ηš„ι‡‘ιΊ»ι›€εŠˆε“©ε•ͺε•¦εΎ€δΈ‹θ½οΌŒι“η ‚ε­εœ¨ζŸ³ζžι—΄ι£ž... ... Old Geng took a step forward, raised his pisto...\n", - "\n", - "[1 rows x 18 columns]\n", - "{'accuracy': 0.00176522506619594, 'correct_ids': [147, 199], 'meteor': 0.3460642024871934, 'bleu_scores': {'bleu': 0.09384985027759411, 'precisions': [0.39390243902439026, 0.1306634744440817, 0.059353130319651975, 0.031256174181056626], 'brevity_penalty': 0.949408256548351, 'length_ratio': 0.9506459092414706, 'translation_length': 28700, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.37889769060024026, 'rouge2': 0.14962195702951014, 'rougeL': 0.32301072520504354, 'rougeLsum': 0.3229695536364973}}\n", - "Epoch 7\n", - "loading env vars from: /home/inflaton/code/projects/courses/llm-finetuning/.env\n", - "Adding /home/inflaton/code/projects/courses/llm-finetuning to sys.path\n", - "πŸ¦₯ Unsloth: Will patch your computer to enable 2x faster free finetuning.\n", - "[nltk_data] Downloading package wordnet to /home/inflaton/nltk_data...\n", - "[nltk_data] Package wordnet is already up-to-date!\n", - "[nltk_data] Downloading package punkt to /home/inflaton/nltk_data...\n", - "[nltk_data] Package punkt is already up-to-date!\n", - "[nltk_data] Downloading package omw-1.4 to /home/inflaton/nltk_data...\n", - "[nltk_data] Package omw-1.4 is already up-to-date!\n", - "loading /home/inflaton/code/projects/courses/llm-finetuning/llm_toolkit/translation_engine.py\n", - "Qwen/Qwen2-1.5B-Instruct llama-factory/saves/qwen2-1.5b/lora/sft/checkpoint-3920 True datasets/mac/mac.tsv results/mac-results_lf.csv\n", - "(1) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n", - "0.0 GB of memory reserved.\n", - "loading model: Qwen/Qwen2-1.5B-Instruct\n", - "[INFO|tokenization_utils_base.py:2161] 2024-07-01 03:44:33,600 >> loading file vocab.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/vocab.json\n", - "[INFO|tokenization_utils_base.py:2161] 2024-07-01 03:44:33,600 >> loading file merges.txt from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/merges.txt\n", - "[INFO|tokenization_utils_base.py:2161] 2024-07-01 03:44:33,600 >> loading file tokenizer.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/tokenizer.json\n", - "[INFO|tokenization_utils_base.py:2161] 2024-07-01 03:44:33,601 >> loading file added_tokens.json from cache at None\n", - "[INFO|tokenization_utils_base.py:2161] 2024-07-01 03:44:33,601 >> loading file special_tokens_map.json from cache at None\n", - "[INFO|tokenization_utils_base.py:2161] 2024-07-01 03:44:33,601 >> loading file tokenizer_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/tokenizer_config.json\n", - "[WARNING|logging.py:313] 2024-07-01 03:44:34,047 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n", - "07/01/2024 03:44:34 - INFO - llamafactory.data.template - Replace eos token: <|im_end|>\n", - "07/01/2024 03:44:34 - INFO - llamafactory.data.template - Add <|im_start|> to stop words.\n", - "[INFO|configuration_utils.py:733] 2024-07-01 03:44:34,340 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/config.json\n", - "[INFO|configuration_utils.py:800] 2024-07-01 03:44:34,341 >> Model config Qwen2Config {\n", - " \"_name_or_path\": \"Qwen/Qwen2-1.5B-Instruct\",\n", - " \"architectures\": [\n", - " \"Qwen2ForCausalLM\"\n", - " ],\n", - " \"attention_dropout\": 0.0,\n", - " \"bos_token_id\": 151643,\n", - " \"eos_token_id\": 151645,\n", - " \"hidden_act\": \"silu\",\n", - " \"hidden_size\": 1536,\n", - " \"initializer_range\": 0.02,\n", - " \"intermediate_size\": 8960,\n", - " \"max_position_embeddings\": 32768,\n", - " \"max_window_layers\": 28,\n", - " \"model_type\": \"qwen2\",\n", - " \"num_attention_heads\": 12,\n", - " \"num_hidden_layers\": 28,\n", - " \"num_key_value_heads\": 2,\n", - " \"rms_norm_eps\": 1e-06,\n", - " \"rope_theta\": 1000000.0,\n", - " \"sliding_window\": 32768,\n", - " \"tie_word_embeddings\": true,\n", - " \"torch_dtype\": \"bfloat16\",\n", - " \"transformers_version\": \"4.42.3\",\n", - " \"use_cache\": true,\n", - " \"use_sliding_window\": false,\n", - " \"vocab_size\": 151936\n", - "}\n", - "\n", - "07/01/2024 03:44:34 - INFO - llamafactory.model.model_utils.quantization - Quantizing model to 4 bit with bitsandbytes.\n", - "07/01/2024 03:44:34 - INFO - llamafactory.model.patcher - Using KV cache for faster generation.\n", - "[INFO|modeling_utils.py:3556] 2024-07-01 03:44:34,397 >> loading weights file model.safetensors from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/model.safetensors\n", - "[INFO|modeling_utils.py:1531] 2024-07-01 03:44:35,481 >> Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16.\n", - "[INFO|configuration_utils.py:1000] 2024-07-01 03:44:35,484 >> Generate config GenerationConfig {\n", - " \"bos_token_id\": 151643,\n", - " \"eos_token_id\": 151645\n", - "}\n", - "\n", - "[INFO|modeling_utils.py:4364] 2024-07-01 03:45:57,180 >> All model checkpoint weights were used when initializing Qwen2ForCausalLM.\n", - "\n", - "[INFO|modeling_utils.py:4372] 2024-07-01 03:45:57,180 >> All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at Qwen/Qwen2-1.5B-Instruct.\n", - "If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training.\n", - "[INFO|configuration_utils.py:955] 2024-07-01 03:45:57,530 >> loading configuration file generation_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/generation_config.json\n", - "[INFO|configuration_utils.py:1000] 2024-07-01 03:45:57,530 >> Generate config GenerationConfig {\n", - " \"bos_token_id\": 151643,\n", - " \"do_sample\": true,\n", - " \"eos_token_id\": [\n", - " 151645,\n", - " 151643\n", - " ],\n", - " \"pad_token_id\": 151643,\n", - " \"repetition_penalty\": 1.1,\n", - " \"temperature\": 0.7,\n", - " \"top_k\": 20,\n", - " \"top_p\": 0.8\n", - "}\n", - "\n", - "07/01/2024 03:45:57 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.\n", - "07/01/2024 03:45:58 - INFO - llamafactory.model.adapter - Loaded adapter(s): llama-factory/saves/qwen2-1.5b/lora/sft/checkpoint-3920\n", - "07/01/2024 03:45:58 - INFO - llamafactory.model.loader - all params: 1,552,946,688\n", - "(2) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n", - "1.604 GB of memory reserved.\n", - "loading train/test data files\n", - "DatasetDict({\n", - " train: Dataset({\n", - " features: ['chinese', 'english', 'text', 'prompt'],\n", - " num_rows: 4528\n", - " })\n", - " test: Dataset({\n", - " features: ['chinese', 'english', 'text', 'prompt'],\n", - " num_rows: 1133\n", - " })\n", - "})\n", - "Evaluating model: Qwen/Qwen2-1.5B-Instruct\n", - " 0%| | 0/1133 [00:00\n", - "--------\n", - "step 2: Old Geng raised his pistol, opened it up, and a few bullets flew out, like hailstones. Golden sparrows fell, and grit exploded among the willows, making a tinkling sound.\n", - "--------\n", - "step 3: Old Geng raised his pistol, opened it up, and a few bullets flew out, like hailstones. Golden sparrows fell, and grit exploded among the willows, making a tinkling sound.\n", - "100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 1133/1133 [1:15:29<00:00, 4.00s/it]\n", - "(3) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n", - "5.197 GB of memory reserved.\n", - " chinese ... Qwen/Qwen2-1.5B-Instruct_checkpoint-3920\n", - "0 老耿端衷ζžͺοΌŒηœ―ηΌθ΅·δΈ€εͺδΈ‰θ§’ηœΌοΌŒδΈ€ζ‚ζ‰³ζœΊε“δΊ†ζžͺοΌŒε†°ι›Ήθˆ¬ηš„ι‡‘ιΊ»ι›€εŠˆε“©ε•ͺε•¦εΎ€δΈ‹θ½οΌŒι“η ‚ε­εœ¨ζŸ³ζžι—΄ι£ž... ... Old Geng raised his pistol, opened it up, and ...\n", - "\n", - "[1 rows x 19 columns]\n", - "{'accuracy': 0.00176522506619594, 'correct_ids': [147, 199], 'meteor': 0.3479480952549209, 'bleu_scores': {'bleu': 0.08568897530454278, 'precisions': [0.34471041533934044, 0.11467889908256881, 0.051635392233515764, 0.02641279718624235], 'brevity_penalty': 1.0, 'length_ratio': 1.0917853593905267, 'translation_length': 32961, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.3755109220918789, 'rouge2': 0.14664341233690792, 'rougeL': 0.3172964023166135, 'rougeLsum': 0.31738234724622777}}\n", - "Epoch 8\n", - "loading env vars from: /home/inflaton/code/projects/courses/llm-finetuning/.env\n", - "Adding /home/inflaton/code/projects/courses/llm-finetuning to sys.path\n", - "πŸ¦₯ Unsloth: Will patch your computer to enable 2x faster free finetuning.\n", - "[nltk_data] Downloading package wordnet to /home/inflaton/nltk_data...\n", - "[nltk_data] Package wordnet is already up-to-date!\n", - "[nltk_data] Downloading package punkt to /home/inflaton/nltk_data...\n", - "[nltk_data] Package punkt is already up-to-date!\n", - "[nltk_data] Downloading package omw-1.4 to /home/inflaton/nltk_data...\n", - "[nltk_data] Package omw-1.4 is already up-to-date!\n", - "loading /home/inflaton/code/projects/courses/llm-finetuning/llm_toolkit/translation_engine.py\n", - "Qwen/Qwen2-1.5B-Instruct llama-factory/saves/qwen2-1.5b/lora/sft/checkpoint-4480 True datasets/mac/mac.tsv results/mac-results_lf.csv\n", - "(1) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n", - "0.0 GB of memory reserved.\n", - "loading model: Qwen/Qwen2-1.5B-Instruct\n", - "[INFO|tokenization_utils_base.py:2161] 2024-07-01 05:01:48,632 >> loading file vocab.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/vocab.json\n", - "[INFO|tokenization_utils_base.py:2161] 2024-07-01 05:01:48,632 >> loading file merges.txt from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/merges.txt\n", - "[INFO|tokenization_utils_base.py:2161] 2024-07-01 05:01:48,632 >> loading file tokenizer.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/tokenizer.json\n", - "[INFO|tokenization_utils_base.py:2161] 2024-07-01 05:01:48,632 >> loading file added_tokens.json from cache at None\n", - "[INFO|tokenization_utils_base.py:2161] 2024-07-01 05:01:48,632 >> loading file special_tokens_map.json from cache at None\n", - "[INFO|tokenization_utils_base.py:2161] 2024-07-01 05:01:48,632 >> loading file tokenizer_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/tokenizer_config.json\n", - "[WARNING|logging.py:313] 2024-07-01 05:01:48,913 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n", - "07/01/2024 05:01:48 - INFO - llamafactory.data.template - Replace eos token: <|im_end|>\n", - "07/01/2024 05:01:48 - INFO - llamafactory.data.template - Add <|im_start|> to stop words.\n", - "[INFO|configuration_utils.py:733] 2024-07-01 05:01:49,230 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/config.json\n", - "[INFO|configuration_utils.py:800] 2024-07-01 05:01:49,230 >> Model config Qwen2Config {\n", - " \"_name_or_path\": \"Qwen/Qwen2-1.5B-Instruct\",\n", - " \"architectures\": [\n", - " \"Qwen2ForCausalLM\"\n", - " ],\n", - " \"attention_dropout\": 0.0,\n", - " \"bos_token_id\": 151643,\n", - " \"eos_token_id\": 151645,\n", - " \"hidden_act\": \"silu\",\n", - " \"hidden_size\": 1536,\n", - " \"initializer_range\": 0.02,\n", - " \"intermediate_size\": 8960,\n", - " \"max_position_embeddings\": 32768,\n", - " \"max_window_layers\": 28,\n", - " \"model_type\": \"qwen2\",\n", - " \"num_attention_heads\": 12,\n", - " \"num_hidden_layers\": 28,\n", - " \"num_key_value_heads\": 2,\n", - " \"rms_norm_eps\": 1e-06,\n", - " \"rope_theta\": 1000000.0,\n", - " \"sliding_window\": 32768,\n", - " \"tie_word_embeddings\": true,\n", - " \"torch_dtype\": \"bfloat16\",\n", - " \"transformers_version\": \"4.42.3\",\n", - " \"use_cache\": true,\n", - " \"use_sliding_window\": false,\n", - " \"vocab_size\": 151936\n", - "}\n", - "\n", - "07/01/2024 05:01:49 - INFO - llamafactory.model.model_utils.quantization - Quantizing model to 4 bit with bitsandbytes.\n", - "07/01/2024 05:01:49 - INFO - llamafactory.model.patcher - Using KV cache for faster generation.\n", - "[INFO|modeling_utils.py:3556] 2024-07-01 05:01:49,319 >> loading weights file model.safetensors from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/model.safetensors\n", - "[INFO|modeling_utils.py:1531] 2024-07-01 05:01:51,629 >> Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16.\n", - "[INFO|configuration_utils.py:1000] 2024-07-01 05:01:51,633 >> Generate config GenerationConfig {\n", - " \"bos_token_id\": 151643,\n", - " \"eos_token_id\": 151645\n", - "}\n", - "\n", - "[INFO|modeling_utils.py:4364] 2024-07-01 05:03:12,246 >> All model checkpoint weights were used when initializing Qwen2ForCausalLM.\n", - "\n", - "[INFO|modeling_utils.py:4372] 2024-07-01 05:03:12,246 >> All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at Qwen/Qwen2-1.5B-Instruct.\n", - "If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training.\n", - "[INFO|configuration_utils.py:955] 2024-07-01 05:03:12,762 >> loading configuration file generation_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/generation_config.json\n", - "[INFO|configuration_utils.py:1000] 2024-07-01 05:03:12,762 >> Generate config GenerationConfig {\n", - " \"bos_token_id\": 151643,\n", - " \"do_sample\": true,\n", - " \"eos_token_id\": [\n", - " 151645,\n", - " 151643\n", - " ],\n", - " \"pad_token_id\": 151643,\n", - " \"repetition_penalty\": 1.1,\n", - " \"temperature\": 0.7,\n", - " \"top_k\": 20,\n", - " \"top_p\": 0.8\n", - "}\n", - "\n", - "07/01/2024 05:03:13 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.\n", - "07/01/2024 05:03:13 - INFO - llamafactory.model.adapter - Loaded adapter(s): llama-factory/saves/qwen2-1.5b/lora/sft/checkpoint-4480\n", - "07/01/2024 05:03:13 - INFO - llamafactory.model.loader - all params: 1,552,946,688\n", - "(2) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n", - "1.604 GB of memory reserved.\n", - "loading train/test data files\n", - "DatasetDict({\n", - " train: Dataset({\n", - " features: ['chinese', 'english', 'text', 'prompt'],\n", - " num_rows: 4528\n", - " })\n", - " test: Dataset({\n", - " features: ['chinese', 'english', 'text', 'prompt'],\n", - " num_rows: 1133\n", - " })\n", - "})\n", - "Evaluating model: Qwen/Qwen2-1.5B-Instruct\n", - " 0%| | 0/1133 [00:00\n", - "--------\n", - "step 2: Old Geng took a shot with his rifle. A spray of bullets flew forth, like ice pellets, and a cloud of sparrows fell to the ground. Shot after shot, each one accompanied by a crack, exploded against the willows.\n", - "--------\n", - "step 3: Old Geng took a shot with his rifle. A spray of bullets flew forth, like ice pellets, and a cloud of sparrows fell to the ground. Shot after shot, each one accompanied by a crack, exploded against the willows.\n", - "100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 1133/1133 [1:16:24<00:00, 4.05s/it]\n", - "(3) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n", - "5.236 GB of memory reserved.\n", - " chinese ... Qwen/Qwen2-1.5B-Instruct_checkpoint-4480\n", - "0 老耿端衷ζžͺοΌŒηœ―ηΌθ΅·δΈ€εͺδΈ‰θ§’ηœΌοΌŒδΈ€ζ‚ζ‰³ζœΊε“δΊ†ζžͺοΌŒε†°ι›Ήθˆ¬ηš„ι‡‘ιΊ»ι›€εŠˆε“©ε•ͺε•¦εΎ€δΈ‹θ½οΌŒι“η ‚ε­εœ¨ζŸ³ζžι—΄ι£ž... ... Old Geng took a shot with his rifle. A spray o...\n", - "\n", - "[1 rows x 20 columns]\n", - "{'accuracy': 0.00176522506619594, 'correct_ids': [147, 199], 'meteor': 0.33844145976530193, 'bleu_scores': {'bleu': 0.08009132331873689, 'precisions': [0.33483795251421866, 0.10704716804785346, 0.047180778918814184, 0.024331389503317917], 'brevity_penalty': 1.0, 'length_ratio': 1.1007287181185823, 'translation_length': 33231, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.37147413848910177, 'rouge2': 0.14173580477944275, 'rougeL': 0.31332200211175076, 'rougeLsum': 0.3132659362806373}}\n", - "Epoch 9\n", - "loading env vars from: /home/inflaton/code/projects/courses/llm-finetuning/.env\n", - "Adding /home/inflaton/code/projects/courses/llm-finetuning to sys.path\n", - "πŸ¦₯ Unsloth: Will patch your computer to enable 2x faster free finetuning.\n", - "[nltk_data] Downloading package wordnet to /home/inflaton/nltk_data...\n", - "[nltk_data] Package wordnet is already up-to-date!\n", - "[nltk_data] Downloading package punkt to /home/inflaton/nltk_data...\n", - "[nltk_data] Package punkt is already up-to-date!\n", - "[nltk_data] Downloading package omw-1.4 to /home/inflaton/nltk_data...\n", - "[nltk_data] Package omw-1.4 is already up-to-date!\n", - "loading /home/inflaton/code/projects/courses/llm-finetuning/llm_toolkit/translation_engine.py\n", - "Qwen/Qwen2-1.5B-Instruct llama-factory/saves/qwen2-1.5b/lora/sft/checkpoint-5040 True datasets/mac/mac.tsv results/mac-results_lf.csv\n", - "(1) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n", - "0.0 GB of memory reserved.\n", - "loading model: Qwen/Qwen2-1.5B-Instruct\n", - "[INFO|tokenization_utils_base.py:2161] 2024-07-01 06:20:02,103 >> loading file vocab.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/vocab.json\n", - "[INFO|tokenization_utils_base.py:2161] 2024-07-01 06:20:02,103 >> loading file merges.txt from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/merges.txt\n", - "[INFO|tokenization_utils_base.py:2161] 2024-07-01 06:20:02,103 >> loading file tokenizer.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/tokenizer.json\n", - "[INFO|tokenization_utils_base.py:2161] 2024-07-01 06:20:02,103 >> loading file added_tokens.json from cache at None\n", - "[INFO|tokenization_utils_base.py:2161] 2024-07-01 06:20:02,103 >> loading file special_tokens_map.json from cache at None\n", - "[INFO|tokenization_utils_base.py:2161] 2024-07-01 06:20:02,103 >> loading file tokenizer_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/tokenizer_config.json\n", - "[WARNING|logging.py:313] 2024-07-01 06:20:02,237 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n", - "07/01/2024 06:20:02 - INFO - llamafactory.data.template - Replace eos token: <|im_end|>\n", - "07/01/2024 06:20:02 - INFO - llamafactory.data.template - Add <|im_start|> to stop words.\n", - "[INFO|configuration_utils.py:733] 2024-07-01 06:20:02,540 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/config.json\n", - "[INFO|configuration_utils.py:800] 2024-07-01 06:20:02,540 >> Model config Qwen2Config {\n", - " \"_name_or_path\": \"Qwen/Qwen2-1.5B-Instruct\",\n", - " \"architectures\": [\n", - " \"Qwen2ForCausalLM\"\n", - " ],\n", - " \"attention_dropout\": 0.0,\n", - " \"bos_token_id\": 151643,\n", - " \"eos_token_id\": 151645,\n", - " \"hidden_act\": \"silu\",\n", - " \"hidden_size\": 1536,\n", - " \"initializer_range\": 0.02,\n", - " \"intermediate_size\": 8960,\n", - " \"max_position_embeddings\": 32768,\n", - " \"max_window_layers\": 28,\n", - " \"model_type\": \"qwen2\",\n", - " \"num_attention_heads\": 12,\n", - " \"num_hidden_layers\": 28,\n", - " \"num_key_value_heads\": 2,\n", - " \"rms_norm_eps\": 1e-06,\n", - " \"rope_theta\": 1000000.0,\n", - " \"sliding_window\": 32768,\n", - " \"tie_word_embeddings\": true,\n", - " \"torch_dtype\": \"bfloat16\",\n", - " \"transformers_version\": \"4.42.3\",\n", - " \"use_cache\": true,\n", - " \"use_sliding_window\": false,\n", - " \"vocab_size\": 151936\n", - "}\n", - "\n", - "07/01/2024 06:20:02 - INFO - llamafactory.model.model_utils.quantization - Quantizing model to 4 bit with bitsandbytes.\n", - "07/01/2024 06:20:02 - INFO - llamafactory.model.patcher - Using KV cache for faster generation.\n", - "[INFO|modeling_utils.py:3556] 2024-07-01 06:20:02,582 >> loading weights file model.safetensors from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/model.safetensors\n", - "[INFO|modeling_utils.py:1531] 2024-07-01 06:20:04,218 >> Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16.\n", - "[INFO|configuration_utils.py:1000] 2024-07-01 06:20:04,222 >> Generate config GenerationConfig {\n", - " \"bos_token_id\": 151643,\n", - " \"eos_token_id\": 151645\n", - "}\n", - "\n", - "[INFO|modeling_utils.py:4364] 2024-07-01 06:21:26,114 >> All model checkpoint weights were used when initializing Qwen2ForCausalLM.\n", - "\n", - "[INFO|modeling_utils.py:4372] 2024-07-01 06:21:26,115 >> All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at Qwen/Qwen2-1.5B-Instruct.\n", - "If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training.\n", - "[INFO|configuration_utils.py:955] 2024-07-01 06:21:26,406 >> loading configuration file generation_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/generation_config.json\n", - "[INFO|configuration_utils.py:1000] 2024-07-01 06:21:26,406 >> Generate config GenerationConfig {\n", - " \"bos_token_id\": 151643,\n", - " \"do_sample\": true,\n", - " \"eos_token_id\": [\n", - " 151645,\n", - " 151643\n", - " ],\n", - " \"pad_token_id\": 151643,\n", - " \"repetition_penalty\": 1.1,\n", - " \"temperature\": 0.7,\n", - " \"top_k\": 20,\n", - " \"top_p\": 0.8\n", - "}\n", - "\n", - "07/01/2024 06:21:26 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.\n", - "07/01/2024 06:21:27 - INFO - llamafactory.model.adapter - Loaded adapter(s): llama-factory/saves/qwen2-1.5b/lora/sft/checkpoint-5040\n", - "07/01/2024 06:21:27 - INFO - llamafactory.model.loader - all params: 1,552,946,688\n", - "(2) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n", - "1.604 GB of memory reserved.\n", - "loading train/test data files\n", - "DatasetDict({\n", - " train: Dataset({\n", - " features: ['chinese', 'english', 'text', 'prompt'],\n", - " num_rows: 4528\n", - " })\n", - " test: Dataset({\n", - " features: ['chinese', 'english', 'text', 'prompt'],\n", - " num_rows: 1133\n", - " })\n", - "})\n", - "Evaluating model: Qwen/Qwen2-1.5B-Instruct\n", - " 0%| | 0/1133 [00:00\n", - "--------\n", - "step 2: Old Geng took a step forward, raised his rifle to his eye, and squeezed the trigger. Rifle pellets, bayoneted sparrows, rained down around him. Shotouts of iron sand flew everywhere, crackling as they went.\n", - "--------\n", - "step 3: Old Geng took a step forward, raised his rifle to his eye, and squeezed the trigger. Rifle pellets, bayoneted sparrows, rained down around him. Shotouts of iron sand flew everywhere, crackling as they went.\n", - "100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 1133/1133 [1:15:49<00:00, 4.02s/it]\n", - "(3) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n", - "5.197 GB of memory reserved.\n", - " chinese ... Qwen/Qwen2-1.5B-Instruct_checkpoint-5040\n", - "0 老耿端衷ζžͺοΌŒηœ―ηΌθ΅·δΈ€εͺδΈ‰θ§’ηœΌοΌŒδΈ€ζ‚ζ‰³ζœΊε“δΊ†ζžͺοΌŒε†°ι›Ήθˆ¬ηš„ι‡‘ιΊ»ι›€εŠˆε“©ε•ͺε•¦εΎ€δΈ‹θ½οΌŒι“η ‚ε­εœ¨ζŸ³ζžι—΄ι£ž... ... Old Geng took a step forward, raised his rifle...\n", - "\n", - "[1 rows x 21 columns]\n", - "{'accuracy': 0.00176522506619594, 'correct_ids': [147, 199], 'meteor': 0.3380289789419591, 'bleu_scores': {'bleu': 0.08738865032530332, 'precisions': [0.36355344170440107, 0.11703423082126911, 0.052124366910523356, 0.026296513331380018], 'brevity_penalty': 1.0, 'length_ratio': 1.0167936402782378, 'translation_length': 30697, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.3674967504985488, 'rouge2': 0.14110284985778096, 'rougeL': 0.3092157882639477, 'rougeLsum': 0.30969047388276916}}\n", - "Epoch 10\n", - "loading env vars from: /home/inflaton/code/projects/courses/llm-finetuning/.env\n", - "Adding /home/inflaton/code/projects/courses/llm-finetuning to sys.path\n", - "πŸ¦₯ Unsloth: Will patch your computer to enable 2x faster free finetuning.\n", - "[nltk_data] Downloading package wordnet to /home/inflaton/nltk_data...\n", - "[nltk_data] Package wordnet is already up-to-date!\n", - "[nltk_data] Downloading package punkt to /home/inflaton/nltk_data...\n", - "[nltk_data] Package punkt is already up-to-date!\n", - "[nltk_data] Downloading package omw-1.4 to /home/inflaton/nltk_data...\n", - "[nltk_data] Package omw-1.4 is already up-to-date!\n", - "loading /home/inflaton/code/projects/courses/llm-finetuning/llm_toolkit/translation_engine.py\n", - "Qwen/Qwen2-1.5B-Instruct llama-factory/saves/qwen2-1.5b/lora/sft/checkpoint-5600 True datasets/mac/mac.tsv results/mac-results_lf.csv\n", - "(1) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n", - "0.0 GB of memory reserved.\n", - "loading model: Qwen/Qwen2-1.5B-Instruct\n", - "[INFO|tokenization_utils_base.py:2161] 2024-07-01 07:37:36,379 >> loading file vocab.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/vocab.json\n", - "[INFO|tokenization_utils_base.py:2161] 2024-07-01 07:37:36,379 >> loading file merges.txt from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/merges.txt\n", - "[INFO|tokenization_utils_base.py:2161] 2024-07-01 07:37:36,379 >> loading file tokenizer.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/tokenizer.json\n", - "[INFO|tokenization_utils_base.py:2161] 2024-07-01 07:37:36,379 >> loading file added_tokens.json from cache at None\n", - "[INFO|tokenization_utils_base.py:2161] 2024-07-01 07:37:36,379 >> loading file special_tokens_map.json from cache at None\n", - "[INFO|tokenization_utils_base.py:2161] 2024-07-01 07:37:36,379 >> loading file tokenizer_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/tokenizer_config.json\n", - "[WARNING|logging.py:313] 2024-07-01 07:37:36,515 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n", - "07/01/2024 07:37:36 - INFO - llamafactory.data.template - Replace eos token: <|im_end|>\n", - "07/01/2024 07:37:36 - INFO - llamafactory.data.template - Add <|im_start|> to stop words.\n", - "[INFO|configuration_utils.py:733] 2024-07-01 07:37:36,942 >> loading configuration file config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/config.json\n", - "[INFO|configuration_utils.py:800] 2024-07-01 07:37:36,943 >> Model config Qwen2Config {\n", - " \"_name_or_path\": \"Qwen/Qwen2-1.5B-Instruct\",\n", - " \"architectures\": [\n", - " \"Qwen2ForCausalLM\"\n", - " ],\n", - " \"attention_dropout\": 0.0,\n", - " \"bos_token_id\": 151643,\n", - " \"eos_token_id\": 151645,\n", - " \"hidden_act\": \"silu\",\n", - " \"hidden_size\": 1536,\n", - " \"initializer_range\": 0.02,\n", - " \"intermediate_size\": 8960,\n", - " \"max_position_embeddings\": 32768,\n", - " \"max_window_layers\": 28,\n", - " \"model_type\": \"qwen2\",\n", - " \"num_attention_heads\": 12,\n", - " \"num_hidden_layers\": 28,\n", - " \"num_key_value_heads\": 2,\n", - " \"rms_norm_eps\": 1e-06,\n", - " \"rope_theta\": 1000000.0,\n", - " \"sliding_window\": 32768,\n", - " \"tie_word_embeddings\": true,\n", - " \"torch_dtype\": \"bfloat16\",\n", - " \"transformers_version\": \"4.42.3\",\n", - " \"use_cache\": true,\n", - " \"use_sliding_window\": false,\n", - " \"vocab_size\": 151936\n", - "}\n", - "\n", - "07/01/2024 07:37:36 - INFO - llamafactory.model.model_utils.quantization - Quantizing model to 4 bit with bitsandbytes.\n", - "07/01/2024 07:37:36 - INFO - llamafactory.model.patcher - Using KV cache for faster generation.\n", - "[INFO|modeling_utils.py:3556] 2024-07-01 07:37:36,987 >> loading weights file model.safetensors from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/model.safetensors\n", - "[INFO|modeling_utils.py:1531] 2024-07-01 07:37:38,446 >> Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16.\n", - "[INFO|configuration_utils.py:1000] 2024-07-01 07:37:38,450 >> Generate config GenerationConfig {\n", - " \"bos_token_id\": 151643,\n", - " \"eos_token_id\": 151645\n", - "}\n", - "\n", - "[INFO|modeling_utils.py:4364] 2024-07-01 07:39:01,352 >> All model checkpoint weights were used when initializing Qwen2ForCausalLM.\n", - "\n", - "[INFO|modeling_utils.py:4372] 2024-07-01 07:39:01,352 >> All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at Qwen/Qwen2-1.5B-Instruct.\n", - "If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training.\n", - "[INFO|configuration_utils.py:955] 2024-07-01 07:39:01,658 >> loading configuration file generation_config.json from cache at /home/inflaton/.cache/huggingface/hub/models--Qwen--Qwen2-1.5B-Instruct/snapshots/ba1cf1846d7df0a0591d6c00649f57e798519da8/generation_config.json\n", - "[INFO|configuration_utils.py:1000] 2024-07-01 07:39:01,658 >> Generate config GenerationConfig {\n", - " \"bos_token_id\": 151643,\n", - " \"do_sample\": true,\n", - " \"eos_token_id\": [\n", - " 151645,\n", - " 151643\n", - " ],\n", - " \"pad_token_id\": 151643,\n", - " \"repetition_penalty\": 1.1,\n", - " \"temperature\": 0.7,\n", - " \"top_k\": 20,\n", - " \"top_p\": 0.8\n", - "}\n", - "\n", - "07/01/2024 07:39:02 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.\n", - "07/01/2024 07:39:02 - INFO - llamafactory.model.adapter - Loaded adapter(s): llama-factory/saves/qwen2-1.5b/lora/sft/checkpoint-5600\n", - "07/01/2024 07:39:02 - INFO - llamafactory.model.loader - all params: 1,552,946,688\n", - "(2) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n", - "1.604 GB of memory reserved.\n", - "loading train/test data files\n", - "DatasetDict({\n", - " train: Dataset({\n", - " features: ['chinese', 'english', 'text', 'prompt'],\n", - " num_rows: 4528\n", - " })\n", - " test: Dataset({\n", - " features: ['chinese', 'english', 'text', 'prompt'],\n", - " num_rows: 1133\n", - " })\n", - "})\n", - "Evaluating model: Qwen/Qwen2-1.5B-Instruct\n", - " 0%| | 0/1133 [00:00\n", - "--------\n", - "step 2: Old Geng reached for his rifle, wedged it to his eye, took a squeeze, and firedβ€”a shower of lead pellets flew from the barrel, crackering through the air as they hit.\n", - "--------\n", - "step 3: Old Geng reached for his rifle, wedged it to his eye, took a squeeze, and firedβ€”a shower of lead pellets flew from the barrel, crackering through the air as they hit.\n", - "100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 1133/1133 [43:28<00:00, 2.30s/it]\n", - "(3) GPU = NVIDIA GeForce RTX 4080 Laptop GPU. Max memory = 11.994 GB.\n", - "1.877 GB of memory reserved.\n", - " chinese ... Qwen/Qwen2-1.5B-Instruct_checkpoint-5600\n", - "0 老耿端衷ζžͺοΌŒηœ―ηΌθ΅·δΈ€εͺδΈ‰θ§’ηœΌοΌŒδΈ€ζ‚ζ‰³ζœΊε“δΊ†ζžͺοΌŒε†°ι›Ήθˆ¬ηš„ι‡‘ιΊ»ι›€εŠˆε“©ε•ͺε•¦εΎ€δΈ‹θ½οΌŒι“η ‚ε­εœ¨ζŸ³ζžι—΄ι£ž... ... Old Geng reached for his rifle, wedged it to h...\n", - "\n", - "[1 rows x 22 columns]\n", - "{'accuracy': 0.00176522506619594, 'correct_ids': [147, 199], 'meteor': 0.3339867178782917, 'bleu_scores': {'bleu': 0.08544000315753703, 'precisions': [0.3757308441891476, 0.11972682649213914, 0.05255355422133274, 0.025644000928289626], 'brevity_penalty': 0.9682716284409708, 'length_ratio': 0.9687644915534945, 'translation_length': 29247, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.3631078400113029, 'rouge2': 0.13850862500702893, 'rougeL': 0.3081859195764205, 'rougeLsum': 0.30821718216431304}}\n", - "CPU times: user 21min 38s, sys: 7min 47s, total: 29min 25s\n", - "Wall time: 10h 26min 31s\n" - ] - } - ], - "source": [ - "%%time\n", - "\n", - "os.environ[\"MODEL_NAME\"] = \"Qwen/Qwen2-1.5B-Instruct\" \n", - "for i in range(1, num_train_epochs + 1):\n", - " print(f\"Epoch {i}\")\n", - " adapter_path = f\"llama-factory/saves/qwen2-1.5b/lora/sft/checkpoint-{560 * i}\"\n", - " os.environ[\"ADAPTER_NAME_OR_PATH\"] = adapter_path\n", - " !python llm_toolkit/eval.py " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%%time\n", - "\n", - "os.environ[\"MODEL_NAME\"] = \"Qwen/Qwen2-7B-Instruct\" \n", - "for i in range(1, num_train_epochs + 1):\n", - " print(f\"Epoch {i}\")\n", - " adapter_path = f\"llama-factory/saves/qwen2-7b/lora/sft/checkpoint-{560 * i}\"\n", - " os.environ[\"ADAPTER_NAME_OR_PATH\"] = adapter_path\n", - " !python llm_toolkit/eval.py " - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [], - "source": [ - "from langchain_openai import ChatOpenAI\n", - "\n", - "llm = ChatOpenAI(\n", - " model=\"gpt-4o\",\n", - " temperature=0,\n", - " max_tokens=None,\n", - " timeout=None,\n", - " max_retries=2,\n", - " # api_key=\"...\", # if you prefer to pass api key in directly instaed of using env vars\n", - " base_url=\"http://localhost:8000/v1\",\n", - " # organization=\"...\",\n", - " # other params...\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[nltk_data] Downloading package wordnet to /home/inflaton/nltk_data...\n", - "[nltk_data] Package wordnet is already up-to-date!\n", - "[nltk_data] Downloading package punkt to /home/inflaton/nltk_data...\n", - "[nltk_data] Package punkt is already up-to-date!\n", - "[nltk_data] Downloading package omw-1.4 to /home/inflaton/nltk_data...\n", - "[nltk_data] Package omw-1.4 is already up-to-date!\n" - ] - } - ], - "source": [ - "from llm_toolkit.translation_utils import *" - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'The body was found on the morning beach'" - ] - }, - "execution_count": 28, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "translate_via_llm(\"ζ­»θ€…ε‡Œζ™¨εŽ»ηš„ζ²™ζ»©\")" - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "metadata": {}, - "outputs": [], - "source": [ - "cache_dict = {}" - ] - }, - { - "cell_type": "code", - "execution_count": 34, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'ζ­»θ€…ε‡Œζ™¨εŽ»ηš„ζ²™ζ»©': 'The body was found on the morning beach'}" - ] - }, - "execution_count": 34, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "translate(\n", - " \"ζ­»θ€…ε‡Œζ™¨εŽ»ηš„ζ²™ζ»©\",\n", - " cache_dict=cache_dict,\n", - ")\n", - "cache_dict" - ] - } - ], - "metadata": { - "accelerator": "GPU", - "application/vnd.databricks.v1+notebook": { - "dashboards": [], - "environmentMetadata": null, - "language": "python", - "notebookMetadata": { - "pythonIndentUnit": 4 - }, - "notebookName": "07_MAC_+_Qwen2-7B-Instructi_Unsloth_train", - "widgets": {} - }, - "colab": { - "gpuType": "T4", - "provenance": [] - }, - "kernelspec": { - "display_name": "Python 3", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.9" - }, - "widgets": { - "application/vnd.jupyter.widget-state+json": { - "036fc5746f43416db18c19ad8fd36677": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "06e806c82c7b4cbea31c5358dd9c3434": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "087b76a8b7514269b1f0ab29b062e444": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_a069d2ab23824f29aa320ac256e2cfe9", - "placeholder": "​", - "style": "IPY_MODEL_06e806c82c7b4cbea31c5358dd9c3434", - "value": "Map (num_proc=2): 100%" - } - }, - "09b76013aa9e45efb6deb23a7a0d0925": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_dea41c5260884aa6879b5e1d1697b14f", - "placeholder": "​", - "style": "IPY_MODEL_89965917796a4f81b899fdc7685f33df", - "value": "config.json: 100%" - } - }, - "0a92c56bfa134ef583220d7ef0b13e17": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "0c34be936c8145d3ab41282f30a70713": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "0f8b6bfe16894500838793f2491d403f": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "177c78fce95d4b4ab33057c5a048d693": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "1f44c9ce1adf470cbb19784493ed209f": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_0c34be936c8145d3ab41282f30a70713", - "placeholder": "​", - "style": "IPY_MODEL_0a92c56bfa134ef583220d7ef0b13e17", - "value": "model.safetensors: 100%" - } - }, - "201b59ccd9f845e197029b57e424aefc": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "2157f01726d748f8a9ae4a00664430da": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "21db8a77b00d4a4e82fdfa608657531f": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "26e4202cca81496a90d15a0dd4ca9cf1": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_ba90fdb8822d47dab7ba203bee297f37", - "IPY_MODEL_61560ff6a36b44f4a9dfdae5c52791d4", - "IPY_MODEL_95fbe66647904c06a20f640630d6dc0e" - ], - "layout": "IPY_MODEL_57182a263d324a3dbf1471c74290a0d5" - } - }, - "27155728b6b84cb199c91c940095d0a8": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_6b91feeed5464877991ac2c207aebe7c", - "IPY_MODEL_cca8113c54c0495daedce1327bf9c68b", - "IPY_MODEL_2e63a29e2f7247bba5beede9a568c99f" - ], - "layout": "IPY_MODEL_5c9d781c28944f3eb86e2a6d44efdf18" - } - }, - "271ddaa553a042d09b6db7b450643d8f": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "2a58d04b428c46f4b3dbadd3bc6cd529": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "2d18ddf6482c4d97829ac0e5a7b9868f": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_9f679ad3ec7f4fe8ad0510ffb57bc2ab", - "IPY_MODEL_f2df530d22c74977b249dd9fb5f4829b", - "IPY_MODEL_89b2ef0dbfea47ab8e6f8d659e3351d1" - ], - "layout": "IPY_MODEL_3056b148aa9f4e6e8aa3b61d26886255" - } - }, - "2e5087c76f98437cb5dc729230358cba": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "2e63a29e2f7247bba5beede9a568c99f": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_b993eaec6b224440bf80c0958c6fb536", - "placeholder": "​", - "style": "IPY_MODEL_de868e26e7154f62aa86223a539ad421", - "value": " 464/464 [00:00<00:00, 27.1kB/s]" - } - }, - "2f6c70dd266c4816bfad3fd3d192929a": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "30307300bc4e4baf96560e30969a82b6": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_e36a3f9eff0e4cf68834d66b0213ae96", - "placeholder": "​", - "style": "IPY_MODEL_a0037bdccf254159becde630bee3d1db", - "value": "generation_config.json: 100%" - } - }, - "3056b148aa9f4e6e8aa3b61d26886255": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "30cdc32298134cb0be4d41615b9e5774": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "3572201bd4d74a58b7a665f9bdfdcdba": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "35b0e8c26d6640e9bd0ed7b242a423d8": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_2e5087c76f98437cb5dc729230358cba", - "max": 51760, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_036fc5746f43416db18c19ad8fd36677", - "value": 51760 - } - }, - "36166c7bcb854b34aca1f41a5d6ea50b": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "370692d819df41828b48c4ad446f977b": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "39b29a75374b45c0a22506010be2b84e": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_30cdc32298134cb0be4d41615b9e5774", - "max": 1179, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_47928317548c454bba6358ab132e8dee", - "value": 1179 - } - }, - "3cf2dd993b5e4d3daecf61e4bab5a404": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_087b76a8b7514269b1f0ab29b062e444", - "IPY_MODEL_35b0e8c26d6640e9bd0ed7b242a423d8", - "IPY_MODEL_54ad89e05fd74576b9b8b5b5a10eaf8d" - ], - "layout": "IPY_MODEL_a41dc44766444a998bec2d777f249d23" - } - }, - "43dec2ede91341f5af60eb522e18e984": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "4463edd481c1467f914c7dcd6c6e6ffc": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "47928317548c454bba6358ab132e8dee": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "49277aeeac16434a865a4d12308b1abc": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "4ae7e449e4ea4c729b5f34607c18ebae": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "4b2061b8a73c43ffb0c2f83daf0d0183": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "4c4c88d4c701450692fa0f6b0c5764b0": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "4c666f4ace3943f8b80ecd20e7503236": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "4ccedf0d93094e63b57a0f8a434fba06": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_4463edd481c1467f914c7dcd6c6e6ffc", - "max": 44307561, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_6d3b9a05db0b4dadb638c686faa0c40a", - "value": 44307561 - } - }, - "4dcf6ff672d24983a1877a8431709aa9": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_5807d5fb827d490fb3bc698f801ffff5", - "placeholder": "​", - "style": "IPY_MODEL_c4f2b06a82fd4987b8b659524a7b503b", - "value": "Generating train split: 100%" - } - }, - "4ea63adfce694725bdba878aef709dd3": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "5234566b1bfc4655b8d582ea5b46ed9f": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "54ad89e05fd74576b9b8b5b5a10eaf8d": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_fdb1941405ed4e4aa06019933892deb3", - "placeholder": "​", - "style": "IPY_MODEL_668d5377ca56426a99753867e6e24862", - "value": " 51760/51760 [01:02<00:00, 1131.51 examples/s]" - } - }, - "56aee4853b7740e6a977254f5d1fa66d": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "57182a263d324a3dbf1471c74290a0d5": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "5807d5fb827d490fb3bc698f801ffff5": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "5c9d781c28944f3eb86e2a6d44efdf18": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "5f40db8173dd4d76b6ef5ed6d9ec8b6e": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "61560ff6a36b44f4a9dfdae5c52791d4": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_db19fc8d37db4e45a5790a876836d8c4", - "max": 11610, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_36166c7bcb854b34aca1f41a5d6ea50b", - "value": 11610 - } - }, - "6578fd7acdb54c4c93528ea431fd0144": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_370692d819df41828b48c4ad446f977b", - "placeholder": "​", - "style": "IPY_MODEL_a0bf9160eb2647409b3200270914b90f", - "value": " 50.6k/50.6k [00:00<00:00, 2.71MB/s]" - } - }, - "668d5377ca56426a99753867e6e24862": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "697f027529b54ee9956bae78a11e0611": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "69ac12aec0714318bf2c83d4f4e745f5": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "6b2012c3f88547af8884a9ea90e3164b": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_938f45f1b3e24118b815d96ae34ba86a", - "placeholder": "​", - "style": "IPY_MODEL_9367047a800747f79c6b225d92397846", - "value": " 44.3M/44.3M [00:01<00:00, 31.0MB/s]" - } - }, - "6b91feeed5464877991ac2c207aebe7c": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_4b2061b8a73c43ffb0c2f83daf0d0183", - "placeholder": "​", - "style": "IPY_MODEL_69ac12aec0714318bf2c83d4f4e745f5", - "value": "special_tokens_map.json: 100%" - } - }, - "6d3b9a05db0b4dadb638c686faa0c40a": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "6dbbedeca9314e66ae50e44ffa31a414": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "6e34619b45934040b6092e6fb01ea7fe": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "71ce208e20d6483abb9ed923510c86d7": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_d69dc491b3ab44d7852b21873ed7bb7f", - "placeholder": "​", - "style": "IPY_MODEL_f401d53bf28e44eb906bce6c05412662", - "value": " 51760/51760 [00:01<00:00, 45512.81 examples/s]" - } - }, - "7358cdad832342c983e31efb8754ab78": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "73e352a3404f4c7dad0737f57d29e92f": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_988a0e8c1f89446086858da0a891a79c", - "IPY_MODEL_4ccedf0d93094e63b57a0f8a434fba06", - "IPY_MODEL_6b2012c3f88547af8884a9ea90e3164b" - ], - "layout": "IPY_MODEL_7e29cb8dd4df4d5b94407cd8fd3f2011" - } - }, - "74501720ac7e4dbb911a4a99b3633bc6": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "78e5400bff924a92a4cc61c4ff18b182": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_b9b313fd861948f5aba25b24b1518d30", - "placeholder": "​", - "style": "IPY_MODEL_4c666f4ace3943f8b80ecd20e7503236", - "value": " 1.18k/1.18k [00:00<00:00, 31.3kB/s]" - } - }, - "7975adbc2ec5489ea7fa0167e620d85c": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_6e34619b45934040b6092e6fb01ea7fe", - "max": 51760, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_271ddaa553a042d09b6db7b450643d8f", - "value": 51760 - } - }, - "7e29cb8dd4df4d5b94407cd8fd3f2011": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "810ff6c0e17d4fa09a30fef27eacff90": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "89965917796a4f81b899fdc7685f33df": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "89b2ef0dbfea47ab8e6f8d659e3351d1": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_b8908fa0df3743ecb9d12983a739104f", - "placeholder": "​", - "style": "IPY_MODEL_177c78fce95d4b4ab33057c5a048d693", - "value": " 9.09M/9.09M [00:00<00:00, 32.6MB/s]" - } - }, - "8b3505352a5a42bf910428c40ce40465": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_49277aeeac16434a865a4d12308b1abc", - "placeholder": "​", - "style": "IPY_MODEL_2157f01726d748f8a9ae4a00664430da", - "value": " 5.70G/5.70G [01:02<00:00, 30.1MB/s]" - } - }, - "8fc142b628fb40568730234de1cafde2": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_4ae7e449e4ea4c729b5f34607c18ebae", - "max": 172, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_3572201bd4d74a58b7a665f9bdfdcdba", - "value": 172 - } - }, - "9367047a800747f79c6b225d92397846": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "938f45f1b3e24118b815d96ae34ba86a": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "95fbe66647904c06a20f640630d6dc0e": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_b0a370dc20654b279b9680692e34418e", - "placeholder": "​", - "style": "IPY_MODEL_cfeb365ddf7548d58b2557f22737fcf5", - "value": " 11.6k/11.6k [00:00<00:00, 716kB/s]" - } - }, - "988a0e8c1f89446086858da0a891a79c": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_ad2be500fc164c0f86f33e914ef8e6a0", - "placeholder": "​", - "style": "IPY_MODEL_5234566b1bfc4655b8d582ea5b46ed9f", - "value": "Downloading data: 100%" - } - }, - "98c58f23f4d549518832cb2d18f796e8": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_09b76013aa9e45efb6deb23a7a0d0925", - "IPY_MODEL_39b29a75374b45c0a22506010be2b84e", - "IPY_MODEL_78e5400bff924a92a4cc61c4ff18b182" - ], - "layout": "IPY_MODEL_2a58d04b428c46f4b3dbadd3bc6cd529" - } - }, - "99fdbb0300c14c139d1937c646f0cfe7": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_7358cdad832342c983e31efb8754ab78", - "placeholder": "​", - "style": "IPY_MODEL_e9adf418296e436fb48bb9f78885598b", - "value": " 51760/51760 [00:01<00:00, 38665.95 examples/s]" - } - }, - "9f679ad3ec7f4fe8ad0510ffb57bc2ab": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_4ea63adfce694725bdba878aef709dd3", - "placeholder": "​", - "style": "IPY_MODEL_74501720ac7e4dbb911a4a99b3633bc6", - "value": "tokenizer.json: 100%" - } - }, - "a0037bdccf254159becde630bee3d1db": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "a069d2ab23824f29aa320ac256e2cfe9": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "a0bf9160eb2647409b3200270914b90f": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "a41dc44766444a998bec2d777f249d23": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "a8464a4c711e4e00aafdfc919b60d07e": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_fb995c740590427b882572c81d4e848c", - "placeholder": "​", - "style": "IPY_MODEL_201b59ccd9f845e197029b57e424aefc", - "value": " 172/172 [00:00<00:00, 12.0kB/s]" - } - }, - "a9f0cc51fc3d4d7b874c32dcf1c5bdf2": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "ad2be500fc164c0f86f33e914ef8e6a0": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "b0240cd9a4554b29ae11f8051984a1c6": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_edaf890370314a218f138015faa0b05d", - "placeholder": "​", - "style": "IPY_MODEL_697f027529b54ee9956bae78a11e0611", - "value": "Map: 100%" - } - }, - "b0a370dc20654b279b9680692e34418e": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "b518dcee69074b87be73957cd810e7ed": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_d891f8d0b1fc462f8008d02bb2a15692", - "placeholder": "​", - "style": "IPY_MODEL_cced8fd7e998472794f3f3e3018956a5", - "value": "tokenizer_config.json: 100%" - } - }, - "b8908fa0df3743ecb9d12983a739104f": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "b993eaec6b224440bf80c0958c6fb536": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "b9b313fd861948f5aba25b24b1518d30": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "ba90fdb8822d47dab7ba203bee297f37": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_0f8b6bfe16894500838793f2491d403f", - "placeholder": "​", - "style": "IPY_MODEL_bb19f6c747754682a514373a3a0535ba", - "value": "Downloading readme: 100%" - } - }, - "bb19f6c747754682a514373a3a0535ba": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "bc883d4cf13e4f8b8a4fe5f410cb6efd": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_e9159e03e61f4f56978ece9c3bca49b2", - "max": 51760, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_810ff6c0e17d4fa09a30fef27eacff90", - "value": 51760 - } - }, - "c161d94df0f04feba9542237e0856c22": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "c22f71b1f85843209d7e5321506b9cb9": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_1f44c9ce1adf470cbb19784493ed209f", - "IPY_MODEL_f1addc4479d849879e743cf9089e6540", - "IPY_MODEL_8b3505352a5a42bf910428c40ce40465" - ], - "layout": "IPY_MODEL_4c4c88d4c701450692fa0f6b0c5764b0" - } - }, - "c4f2b06a82fd4987b8b659524a7b503b": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "cca8113c54c0495daedce1327bf9c68b": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_e02f9b7849c64531835eb77b860d1c93", - "max": 464, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_56aee4853b7740e6a977254f5d1fa66d", - "value": 464 - } - }, - "cced8fd7e998472794f3f3e3018956a5": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "cf245afeb1c04f29a24d291608c3d157": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_b518dcee69074b87be73957cd810e7ed", - "IPY_MODEL_e29104486d594b2992d7285e0ef77371", - "IPY_MODEL_6578fd7acdb54c4c93528ea431fd0144" - ], - "layout": "IPY_MODEL_d35db8148a354c56aaac56dbae22536f" - } - }, - "cfe8cae0e22b495bafa221a63d13b283": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "cfeb365ddf7548d58b2557f22737fcf5": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "d1b47d39450d4019ae85c9b2f943eeaf": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_4dcf6ff672d24983a1877a8431709aa9", - "IPY_MODEL_7975adbc2ec5489ea7fa0167e620d85c", - "IPY_MODEL_71ce208e20d6483abb9ed923510c86d7" - ], - "layout": "IPY_MODEL_cfe8cae0e22b495bafa221a63d13b283" - } - }, - "d35db8148a354c56aaac56dbae22536f": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "d69dc491b3ab44d7852b21873ed7bb7f": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "d891f8d0b1fc462f8008d02bb2a15692": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "d8e5318cead340c4adbeaccc05d39225": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "daf4cd890b35422683d22fd30bc71e83": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_b0240cd9a4554b29ae11f8051984a1c6", - "IPY_MODEL_bc883d4cf13e4f8b8a4fe5f410cb6efd", - "IPY_MODEL_99fdbb0300c14c139d1937c646f0cfe7" - ], - "layout": "IPY_MODEL_c161d94df0f04feba9542237e0856c22" - } - }, - "db19fc8d37db4e45a5790a876836d8c4": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "de868e26e7154f62aa86223a539ad421": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "dea41c5260884aa6879b5e1d1697b14f": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "e02f9b7849c64531835eb77b860d1c93": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "e29104486d594b2992d7285e0ef77371": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_a9f0cc51fc3d4d7b874c32dcf1c5bdf2", - "max": 50641, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_2f6c70dd266c4816bfad3fd3d192929a", - "value": 50641 - } - }, - "e36a3f9eff0e4cf68834d66b0213ae96": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "e9159e03e61f4f56978ece9c3bca49b2": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "e9adf418296e436fb48bb9f78885598b": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "edaf890370314a218f138015faa0b05d": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "f1addc4479d849879e743cf9089e6540": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_43dec2ede91341f5af60eb522e18e984", - "max": 5702746405, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_d8e5318cead340c4adbeaccc05d39225", - "value": 5702746405 - } - }, - "f2df530d22c74977b249dd9fb5f4829b": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_21db8a77b00d4a4e82fdfa608657531f", - "max": 9085698, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_6dbbedeca9314e66ae50e44ffa31a414", - "value": 9085698 - } - }, - "f401d53bf28e44eb906bce6c05412662": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "fb995c740590427b882572c81d4e848c": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "fce7a61c25ec4390af43d92b7c473a45": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_30307300bc4e4baf96560e30969a82b6", - "IPY_MODEL_8fc142b628fb40568730234de1cafde2", - "IPY_MODEL_a8464a4c711e4e00aafdfc919b60d07e" - ], - "layout": "IPY_MODEL_5f40db8173dd4d76b6ef5ed6d9ec8b6e" - } - }, - "fdb1941405ed4e4aa06019933892deb3": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - } - } - } - }, - "nbformat": 4, - "nbformat_minor": 0 -}