{
  "cells": [
    {
      "cell_type": "code",
      "execution_count": 1,
      "metadata": {
        "id": "mo-H82fsy1jy"
      },
      "outputs": [],
      "source": [
        "%load_ext autoreload\n",
        "%autoreload 2"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 1,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "bYpMYw2Wz7Bv",
        "outputId": "cc9dce0a-c5f4-421c-ed41-d79c3a1b3577"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "workding dir: c:\\Users\\HT\\Documents\\URP\\logical-reasoning\n"
          ]
        }
      ],
      "source": [
        "import os\n",
        "import sys\n",
        "from pathlib import Path\n",
        "\n",
        "workding_dir = str(Path.cwd().parent)\n",
        "os.chdir(workding_dir)\n",
        "sys.path.append(workding_dir)\n",
        "print(\"working dir:\", workding_dir)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 2,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "aA2yLesz27M8",
        "outputId": "32909874-deee-44b8-c3de-5476cc3008f1"
      },
      "outputs": [
        {
          "data": {
            "text/plain": [
              "False"
            ]
          },
          "execution_count": 2,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "need_to_setup_env = False\n",
        "need_to_setup_env"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 3,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "u0QXyHU_5DQR",
        "outputId": "54672b45-b5dc-48ef-efd2-5e8545e7b78b"
      },
      "outputs": [],
      "source": [
        "if need_to_setup_env:\n",
        "    %pip install -r requirements.txt"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 12,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "RKmGaYU_5OkA",
        "outputId": "27c8c14b-1538-41e0-e3dd-dc37c650f5fd"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "loading env vars from: c:\\Users\\HT\\Documents\\URP\\logical-reasoning\\.env.qwen2_7b\n"
          ]
        },
        {
          "data": {
            "text/plain": [
              "True"
            ]
          },
          "execution_count": 12,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "from dotenv import find_dotenv, load_dotenv\n",
        "\n",
        "found_dotenv = find_dotenv(\".env.qwen2_7b\")\n",
        "\n",
        "if len(found_dotenv) == 0:\n",
        "    found_dotenv = find_dotenv(\".env.example\")\n",
        "print(f\"loading env vars from: {found_dotenv}\")\n",
        "load_dotenv(found_dotenv, override=True)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 14,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "Xa7KxkuzUeS9",
        "outputId": "6c71b30e-7b02-44ef-feeb-df94989be7f3"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "Qwen/Qwen2-7B None False datasets/mgtv results/mgtv-results_qwen2_7b.csv False\n"
          ]
        }
      ],
      "source": [
        "import os\n",
        "\n",
        "model_name = os.getenv(\"MODEL_NAME\")\n",
        "adapter_name_or_path = os.getenv(\"ADAPTER_NAME_OR_PATH\")\n",
        "load_in_4bit = os.getenv(\"LOAD_IN_4BIT\") == \"true\"\n",
        "data_path = os.getenv(\"LOGICAL_REASONING_DATA_PATH\")\n",
        "results_path = os.getenv(\"LOGICAL_REASONING_RESULTS_PATH\")\n",
        "use_english_datasets = os.getenv(\"USE_ENGLISH_DATASETS\") == \"true\"\n",
        "\n",
        "print(model_name, adapter_name_or_path, load_in_4bit, data_path, results_path, use_english_datasets)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 19,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 379
        },
        "id": "goEFOG9Z5TvW",
        "outputId": "1491df15-1eca-43ac-89d2-ca1f74b42297"
      },
      "outputs": [
        {
          "data": {
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>text</th>\n",
              "      <th>label</th>\n",
              "      <th>answer</th>\n",
              "      <th>title</th>\n",
              "      <th>puzzle</th>\n",
              "      <th>truth</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>偷的人信神吗</td>\n",
              "      <td>不是</td>\n",
              "      <td>NaN</td>\n",
              "      <td>乡村之谜：消失的南瓜</td>\n",
              "      <td>在甄家村里，有一个古老的传说：每年南瓜丰收的季节，南瓜田里总有一个最大的南瓜会不翼而飞，村民...</td>\n",
              "      <td>真相原来与一位年迈的农夫有关。这位农夫年轻时，曾与一位美丽的姑娘相恋。他们约定在南瓜丰收的季...</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>偷南瓜是为了来年丰收吗</td>\n",
              "      <td>不是</td>\n",
              "      <td>NaN</td>\n",
              "      <td>乡村之谜：消失的南瓜</td>\n",
              "      <td>在甄家村里，有一个古老的传说：每年南瓜丰收的季节，南瓜田里总有一个最大的南瓜会不翼而飞，村民...</td>\n",
              "      <td>真相原来与一位年迈的农夫有关。这位农夫年轻时，曾与一位美丽的姑娘相恋。他们约定在南瓜丰收的季...</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>村庄里的人喜欢南瓜嘛</td>\n",
              "      <td>不重要</td>\n",
              "      <td>NaN</td>\n",
              "      <td>乡村之谜：消失的南瓜</td>\n",
              "      <td>在甄家村里，有一个古老的传说：每年南瓜丰收的季节，南瓜田里总有一个最大的南瓜会不翼而飞，村民...</td>\n",
              "      <td>真相原来与一位年迈的农夫有关。这位农夫年轻时，曾与一位美丽的姑娘相恋。他们约定在南瓜丰收的季...</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>村庄里的人每年都需要用南瓜做祭品嘛</td>\n",
              "      <td>不是</td>\n",
              "      <td>NaN</td>\n",
              "      <td>乡村之谜：消失的南瓜</td>\n",
              "      <td>在甄家村里，有一个古老的传说：每年南瓜丰收的季节，南瓜田里总有一个最大的南瓜会不翼而飞，村民...</td>\n",
              "      <td>真相原来与一位年迈的农夫有关。这位农夫年轻时，曾与一位美丽的姑娘相恋。他们约定在南瓜丰收的季...</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>是村里的人偷的么</td>\n",
              "      <td>是</td>\n",
              "      <td>NaN</td>\n",
              "      <td>乡村之谜：消失的南瓜</td>\n",
              "      <td>在甄家村里，有一个古老的传说：每年南瓜丰收的季节，南瓜田里总有一个最大的南瓜会不翼而飞，村民...</td>\n",
              "      <td>真相原来与一位年迈的农夫有关。这位农夫年轻时，曾与一位美丽的姑娘相恋。他们约定在南瓜丰收的季...</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>"
            ],
            "text/plain": [
              "                text label  answer       title  \\\n",
              "0             偷的人信神吗    不是     NaN  乡村之谜：消失的南瓜   \n",
              "1        偷南瓜是为了来年丰收吗    不是     NaN  乡村之谜：消失的南瓜   \n",
              "2         村庄里的人喜欢南瓜嘛   不重要     NaN  乡村之谜：消失的南瓜   \n",
              "3  村庄里的人每年都需要用南瓜做祭品嘛    不是     NaN  乡村之谜：消失的南瓜   \n",
              "4           是村里的人偷的么     是     NaN  乡村之谜：消失的南瓜   \n",
              "\n",
              "                                              puzzle  \\\n",
              "0  在甄家村里，有一个古老的传说：每年南瓜丰收的季节，南瓜田里总有一个最大的南瓜会不翼而飞，村民...   \n",
              "1  在甄家村里，有一个古老的传说：每年南瓜丰收的季节，南瓜田里总有一个最大的南瓜会不翼而飞，村民...   \n",
              "2  在甄家村里，有一个古老的传说：每年南瓜丰收的季节，南瓜田里总有一个最大的南瓜会不翼而飞，村民...   \n",
              "3  在甄家村里，有一个古老的传说：每年南瓜丰收的季节，南瓜田里总有一个最大的南瓜会不翼而飞，村民...   \n",
              "4  在甄家村里，有一个古老的传说：每年南瓜丰收的季节，南瓜田里总有一个最大的南瓜会不翼而飞，村民...   \n",
              "\n",
              "                                               truth  \n",
              "0  真相原来与一位年迈的农夫有关。这位农夫年轻时，曾与一位美丽的姑娘相恋。他们约定在南瓜丰收的季...  \n",
              "1  真相原来与一位年迈的农夫有关。这位农夫年轻时，曾与一位美丽的姑娘相恋。他们约定在南瓜丰收的季...  \n",
              "2  真相原来与一位年迈的农夫有关。这位农夫年轻时，曾与一位美丽的姑娘相恋。他们约定在南瓜丰收的季...  \n",
              "3  真相原来与一位年迈的农夫有关。这位农夫年轻时，曾与一位美丽的姑娘相恋。他们约定在南瓜丰收的季...  \n",
              "4  真相原来与一位年迈的农夫有关。这位农夫年轻时，曾与一位美丽的姑娘相恋。他们约定在南瓜丰收的季...  "
            ]
          },
          "execution_count": 19,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "import pandas as pd\n",
        "df = pd.read_csv(\"datasets/mgtv/train.csv\")\n",
        "df.head()"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 20,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "zPuVZfdRICtY",
        "outputId": "7b423111-fe0e-47ab-eeb8-438bc10c7930"
      },
      "outputs": [
        {
          "data": {
            "text/plain": [
              "{'instruction': '你是一个逻辑游戏的主持人。游戏规则如下：1. 参与者会得到一个谜题。2. 参与者可以通过提问来获取线索，尝试解开谜题。3. 对于每个问题，主持人将根据实际情况回答以下五个选项之一：是、不是、不重要、回答正确、问法错误。4. 回答中不能添加任何其它信息，也不能省略选项中的任何一个字。例如，不可以把“不是”省略成“不”。5. 参与者需要根据回答来推理，并最终找出谜题的正确答案。请严格按照这些规则回答参与者提出的问题。谜题: {}实际情况: {}参与者提出的问题: {}',\n",
              " 'input': '谜题: 乡村之谜：消失的南瓜 在甄家村里，有一个古老的传说：每年南瓜丰收的季节，南瓜田里总有一个最大的南瓜会不翼而飞，村民们对此现象困惑不解。请找出南瓜失踪背后的原因。实际情况: 真相原来与一位年迈的农夫有关。这位农夫年轻时，曾与一位美丽的姑娘相恋。他们约定在南瓜丰收的季节结婚。然而，命运弄人，姑娘在婚礼前的一场意外中离世。悲伤的农夫为了纪念心爱的姑娘，每年都会将最大的南瓜偷走，放到姑娘的墓前，以此寄托自己的哀思。这一行为延续了多年，成为了乡村里一个神秘的传说。参与者提出的问题: 偷的人信神吗',\n",
              " 'output': '不是'}"
            ]
          },
          "execution_count": 20,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "dataset_data = [\n",
        "    {\n",
        "        \"instruction\": \"你是一个逻辑游戏的主持人。游戏规则如下：1. 参与者会得到一个谜题。2. 参与者可以通过提问来获取线索，尝试解开谜题。3. 对于每个问题，主持人将根据实际情况回答以下五个选项之一：是、不是、不重要、回答正确、问法错误。4. 回答中不能添加任何其它信息，也不能省略选项中的任何一个字。例如，不可以把“不是”省略成“不”。5. 参与者需要根据回答来推理，并最终找出谜题的正确答案。请严格按照这些规则回答参与者提出的问题。谜题: {}实际情况: {}参与者提出的问题: {}\",\n",
        "        \"input\": \"谜题: \" + row_dict[\"title\"] + \" \" + row_dict[\"puzzle\"] + \"实际情况: \" + row_dict[\"truth\"] + \"参与者提出的问题: \" + row_dict[\"text\"],\n",
        "        \"output\": row_dict[\"label\"]\n",
        "    }\n",
        "    for row_dict in df.to_dict(orient=\"records\")\n",
        "]\n",
        "\n",
        "dataset_data[0]"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "unuNtJc5_AIL",
        "outputId": "d3b87976-e32d-4b8f-fcc9-56b048604526"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "JSON file saved to /content/LLaMA-Factory/data/mgtv_train.json\n"
          ]
        }
      ],
      "source": [
        "import os\n",
        "import json\n",
        "\n",
        "# Define the directory where you want to save the JSON file\n",
        "output_dir = \"/content/LLaMA-Factory/data/\"\n",
        "\n",
        "# Ensure the directory exists\n",
        "os.makedirs(output_dir, exist_ok=True)\n",
        "\n",
        "# Define the full path for the JSON file\n",
        "json_file_path = os.path.join(output_dir, \"mgtv_train.json\")\n",
        "\n",
        "# Save the dataset data to the specified path\n",
        "with open(json_file_path, \"w\") as f:\n",
        "   json.dump(dataset_data, f)\n",
        "\n",
        "print(f\"JSON file saved to {json_file_path}\")\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "zyxvE1nfX8cq",
        "outputId": "1d3bddc5-289f-48b7-c2ce-5e9bd1684ea0"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "/content/LLaMA-Factory\n"
          ]
        }
      ],
      "source": [
        "import json\n",
        "%cd /content/LLaMA-Factory/\n",
        "\n",
        "args = dict(\n",
        "  model_name_or_path=\"Qwen/Qwen2-7B\", # use Qwen/Qwen2-7B-Instruct model\n",
        "\n",
        "  stage=\"sft\",                        # do supervised fine-tuning\n",
        "  do_train=True,\n",
        "  finetuning_type=\"lora\",                   # use LoRA adapters to save memory\n",
        "  lora_target=\"all\",                     # attach LoRA adapters to all linear layers\n",
        "  quantization_bit=4,\n",
        "  loraplus_lr_ratio=16.0,                   # 16x base LoRA learning rate\n",
        "\n",
        "  dataset=\"mgtv_train\",\n",
        "  template=\"qwen\",\n",
        "  cutoff_len=4096,\n",
        "  max_samples=5000,\n",
        "  overwrite_cache=\"true\",\n",
        "  preprocessing_num_workers=16,\n",
        "\n",
        "  output_dir=\"/content/qwen2-7b\",\n",
        "  logging_steps=562,\n",
        "  save_steps=562,\n",
        "  plot_loss=\"true\",\n",
        "  overwrite_output_dir=\"true\",\n",
        "\n",
        "  per_device_train_batch_size=1,               # the batch size\n",
        "  gradient_accumulation_steps=8,               # the gradient accumulation steps\n",
        "  learning_rate=0.001,                     # the learning rate\n",
        "  num_train_epochs=6.0,                    # the epochs of training\n",
        "  lr_scheduler_type=\"cosine\",                 # use cosine learning rate scheduler\n",
        "  warmup_ratio=0.1,                      # use warmup scheduler\n",
        "  bf16=True,\n",
        "  ddp_timeout=180000000,                  #5.71 years lol\n",
        "\n",
        "  val_size=0.1,\n",
        "  per_device_eval_batch_size=1,\n",
        "  eval_strategy=\"steps\",\n",
        "  eval_steps=562,\n",
        "\n",
        "  report_to=\"wandb\",\n",
        ")\n",
        "\n",
        "with open(\"train_qwen2_7b.json\", \"w\", encoding=\"utf-8\") as f:\n",
        "  json.dump(args, f, indent=2)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "QlYqm4TePib3"
      },
      "outputs": [],
      "source": [
        "with open(\"data/dataset_info.json\", 'r+') as file:\n",
        "    # First we load existing data into a dict.\n",
        "    file_data = json.load(file)\n",
        "    # Insert new_data at the beginning of the emp_details list.\n",
        "    qwen2_7b = {\"mgtv_train\": {\n",
        "        \"file_name\": \"mgtv_train.json\"\n",
        "      }\n",
        "    }\n",
        "\n",
        "    qwen2_7b.update(file_data)\n",
        "    file.seek(0)\n",
        "    # convert back to json.\n",
        "    json.dump(qwen2_7b, file, indent=2)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "background_save": true,
          "base_uri": "https://localhost:8080/"
        },
        "id": "VEuCMjMpITg-",
        "outputId": "76cf7882-3ae8-4c53-8d6c-c59b3557af0e"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "2024-07-15 14:34:28.658348: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n",
            "2024-07-15 14:34:28.710574: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n",
            "2024-07-15 14:34:28.710630: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n",
            "2024-07-15 14:34:28.712064: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n",
            "2024-07-15 14:34:28.719927: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
            "To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
            "2024-07-15 14:34:29.954969: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n",
            "07/15/2024 14:34:36 - WARNING - llamafactory.hparams.parser - We recommend enable `upcast_layernorm` in quantized training.\n",
            "07/15/2024 14:34:36 - INFO - llamafactory.hparams.parser - Process rank: 0, device: cuda:0, n_gpu: 1, distributed training: False, compute dtype: torch.bfloat16\n",
            "tokenizer_config.json: 100% 1.29k/1.29k [00:00<00:00, 9.74MB/s]\n",
            "vocab.json: 100% 2.78M/2.78M [00:00<00:00, 10.4MB/s]\n",
            "merges.txt: 100% 1.67M/1.67M [00:00<00:00, 6.67MB/s]\n",
            "tokenizer.json: 100% 7.03M/7.03M [00:00<00:00, 18.8MB/s]\n",
            "[INFO|tokenization_utils_base.py:2108] 2024-07-15 14:34:38,471 >> loading file vocab.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2-7B/snapshots/453ed1575b739b5b03ce3758b23befdb0967f40e/vocab.json\n",
            "[INFO|tokenization_utils_base.py:2108] 2024-07-15 14:34:38,471 >> loading file merges.txt from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2-7B/snapshots/453ed1575b739b5b03ce3758b23befdb0967f40e/merges.txt\n",
            "[INFO|tokenization_utils_base.py:2108] 2024-07-15 14:34:38,471 >> loading file tokenizer.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2-7B/snapshots/453ed1575b739b5b03ce3758b23befdb0967f40e/tokenizer.json\n",
            "[INFO|tokenization_utils_base.py:2108] 2024-07-15 14:34:38,471 >> loading file added_tokens.json from cache at None\n",
            "[INFO|tokenization_utils_base.py:2108] 2024-07-15 14:34:38,471 >> loading file special_tokens_map.json from cache at None\n",
            "[INFO|tokenization_utils_base.py:2108] 2024-07-15 14:34:38,471 >> loading file tokenizer_config.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2-7B/snapshots/453ed1575b739b5b03ce3758b23befdb0967f40e/tokenizer_config.json\n",
            "[WARNING|logging.py:314] 2024-07-15 14:34:38,733 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
            "07/15/2024 14:34:38 - INFO - llamafactory.data.template - Replace eos token: <|im_end|>\n",
            "07/15/2024 14:34:38 - INFO - llamafactory.data.loader - Loading dataset mgtv_train.json...\n",
            "Generating train split: 25000 examples [00:01, 18396.69 examples/s]\n",
            "/usr/local/lib/python3.10/dist-packages/multiprocess/popen_fork.py:66: RuntimeWarning: os.fork() was called. os.fork() is incompatible with multithreaded code, and JAX is multithreaded, so this will likely lead to a deadlock.\n",
            "  self.pid = os.fork()\n",
            "Converting format of dataset (num_proc=16): 100% 5000/5000 [00:00<00:00, 21217.02 examples/s]\n",
            "Running tokenizer on dataset (num_proc=16): 100% 5000/5000 [00:02<00:00, 1705.27 examples/s]\n",
            "training example:\n",
            "input_ids:\n",
            "[151644, 8948, 198, 2610, 525, 264, 10950, 17847, 13, 151645, 198, 151644, 872, 198, 56568, 101909, 104913, 99329, 9370, 106040, 1773, 99329, 104190, 104506, 5122, 16, 13, 26853, 224, 57218, 28946, 36993, 101051, 46944, 107969, 33872, 1773, 17, 13, 26853, 224, 57218, 28946, 105125, 107666, 36407, 45912, 105814, 3837, 104482, 117647, 107969, 33872, 1773, 18, 13, 69162, 34204, 103991, 86119, 3837, 106040, 44063, 100345, 107591, 102104, 87752, 105220, 109487, 100653, 5122, 20412, 5373, 99520, 5373, 16530, 99335, 5373, 102104, 88991, 5373, 56007, 24339, 32100, 1773, 19, 13, 49602, 252, 99590, 15946, 53153, 42855, 99885, 102158, 27369, 3837, 105827, 65770, 99475, 109487, 101047, 110281, 18600, 1773, 77557, 3837, 108620, 99360, 2073, 99520, 854, 65770, 99475, 12857, 2073, 16530, 55807, 20, 13, 26853, 224, 57218, 28946, 85106, 100345, 102104, 36407, 113272, 90395, 103941, 109363, 107969, 33872, 9370, 88991, 102349, 1773, 14880, 110439, 100001, 104190, 102104, 111842, 101080, 103936, 1773, 107969, 33872, 25, 4687, 107591, 25, 4687, 111842, 101080, 103936, 25, 5613, 107969, 33872, 25, 220, 100833, 53930, 107969, 5122, 102505, 9370, 115865, 73562, 109628, 45629, 105489, 3837, 104133, 111718, 106023, 5122, 101988, 115865, 110731, 9370, 105419, 3837, 115865, 99810, 69249, 59743, 104133, 104003, 115865, 36993, 16530, 101401, 68536, 99723, 3837, 115967, 104270, 102060, 110666, 112031, 1773, 14880, 109363, 115865, 110786, 101423, 104249, 1773, 107591, 25, 10236, 250, 253, 48921, 101221, 57218, 101961, 7948, 100894, 9370, 99288, 99818, 101063, 1773, 104269, 99288, 99818, 100774, 13343, 3837, 99798, 57218, 101961, 105664, 102373, 48921, 100271, 1773, 99650, 105616, 18493, 115865, 110731, 9370, 105419, 104388, 1773, 103968, 3837, 102606, 102115, 17340, 3837, 102373, 18493, 106340, 24562, 99774, 82224, 104424, 15946, 99372, 99244, 1773, 110597, 9370, 99288, 99818, 100012, 101416, 63109, 99242, 9370, 102373, 3837, 101988, 101938, 44063, 104003, 115865, 101329, 99314, 3837, 107974, 102373, 9370, 104575, 24562, 3837, 105699, 116418, 100005, 103000, 90663, 1773, 100147, 101070, 105443, 34187, 100097, 3837, 104989, 100833, 69249, 46944, 105190, 9370, 106023, 1773, 111842, 101080, 103936, 25, 4891, 223, 115, 100623, 21317, 99315, 101037, 151645, 198, 151644, 77091, 198, 99520, 151645]\n",
            "inputs:\n",
            "<|im_start|>system\n",
            "You are a helpful assistant.<|im_end|>\n",
            "<|im_start|>user\n",
            "你是一个逻辑游戏的主持人。游戏规则如下：1. 参与者会得到一个谜题。2. 参与者可以通过提问来获取线索，尝试解开谜题。3. 对于每个问题，主持人将根据实际情况回答以下五个选项之一：是、不是、不重要、回答正确、问法错误。4. 回答中不能添加任何其它信息，也不能省略选项中的任何一个字。例如，不可以把“不是”省略成“不”。5. 参与者需要根据回答来推理，并最终找出谜题的正确答案。请严格按照这些规则回答参与者提出的问题。谜题: {}实际情况: {}参与者提出的问题: {}\n",
            "谜题: 乡村之谜：消失的南瓜 在甄家村里，有一个古老的传说：每年南瓜丰收的季节，南瓜田里总有一个最大的南瓜会不翼而飞，村民们对此现象困惑不解。请找出南瓜失踪背后的原因。实际情况: 真相原来与一位年迈的农夫有关。这位农夫年轻时，曾与一位美丽的姑娘相恋。他们约定在南瓜丰收的季节结婚。然而，命运弄人，姑娘在婚礼前的一场意外中离世。悲伤的农夫为了纪念心爱的姑娘，每年都会将最大的南瓜偷走，放到姑娘的墓前，以此寄托自己的哀思。这一行为延续了多年，成为了乡村里一个神秘的传说。参与者提出的问题: 偷的人信神吗<|im_end|>\n",
            "<|im_start|>assistant\n",
            "不是<|im_end|>\n",
            "label_ids:\n",
            "[-100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 99520, 151645]\n",
            "labels:\n",
            "不是<|im_end|>\n",
            "/usr/local/lib/python3.10/dist-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n",
            "  warnings.warn(\n",
            "config.json: 100% 664/664 [00:00<00:00, 5.23MB/s]\n",
            "[INFO|configuration_utils.py:733] 2024-07-15 14:34:44,448 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2-7B/snapshots/453ed1575b739b5b03ce3758b23befdb0967f40e/config.json\n",
            "[INFO|configuration_utils.py:796] 2024-07-15 14:34:44,451 >> Model config Qwen2Config {\n",
            "  \"_name_or_path\": \"Qwen/Qwen2-7B\",\n",
            "  \"architectures\": [\n",
            "    \"Qwen2ForCausalLM\"\n",
            "  ],\n",
            "  \"attention_dropout\": 0.0,\n",
            "  \"bos_token_id\": 151643,\n",
            "  \"eos_token_id\": 151643,\n",
            "  \"hidden_act\": \"silu\",\n",
            "  \"hidden_size\": 3584,\n",
            "  \"initializer_range\": 0.02,\n",
            "  \"intermediate_size\": 18944,\n",
            "  \"max_position_embeddings\": 131072,\n",
            "  \"max_window_layers\": 28,\n",
            "  \"model_type\": \"qwen2\",\n",
            "  \"num_attention_heads\": 28,\n",
            "  \"num_hidden_layers\": 28,\n",
            "  \"num_key_value_heads\": 4,\n",
            "  \"rms_norm_eps\": 1e-06,\n",
            "  \"rope_theta\": 1000000.0,\n",
            "  \"sliding_window\": 131072,\n",
            "  \"tie_word_embeddings\": false,\n",
            "  \"torch_dtype\": \"bfloat16\",\n",
            "  \"transformers_version\": \"4.41.2\",\n",
            "  \"use_cache\": true,\n",
            "  \"use_sliding_window\": false,\n",
            "  \"vocab_size\": 152064\n",
            "}\n",
            "\n",
            "07/15/2024 14:34:44 - INFO - llamafactory.model.model_utils.quantization - Quantizing model to 4 bit with bitsandbytes.\n",
            "model.safetensors.index.json: 100% 27.8k/27.8k [00:00<00:00, 99.4MB/s]\n",
            "[INFO|modeling_utils.py:3474] 2024-07-15 14:34:44,977 >> loading weights file model.safetensors from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2-7B/snapshots/453ed1575b739b5b03ce3758b23befdb0967f40e/model.safetensors.index.json\n",
            "Downloading shards:   0% 0/4 [00:00<?, ?it/s]\n",
            "model-00001-of-00004.safetensors:   0% 0.00/3.95G [00:00<?, ?B/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:   0% 10.5M/3.95G [00:00<00:59, 65.7MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:   1% 21.0M/3.95G [00:00<00:52, 75.3MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:   1% 41.9M/3.95G [00:00<00:40, 96.1MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:   2% 62.9M/3.95G [00:00<00:32, 118MB/s] \u001b[A\n",
            "model-00001-of-00004.safetensors:   2% 83.9M/3.95G [00:00<00:27, 143MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:   3% 105M/3.95G [00:00<00:23, 161MB/s] \u001b[A\n",
            "model-00001-of-00004.safetensors:   3% 126M/3.95G [00:00<00:22, 171MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:   4% 147M/3.95G [00:01<00:20, 182MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:   5% 178M/3.95G [00:01<00:19, 193MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:   5% 210M/3.95G [00:01<00:18, 199MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:   6% 241M/3.95G [00:01<00:18, 204MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:   7% 262M/3.95G [00:01<00:17, 205MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:   7% 294M/3.95G [00:01<00:17, 207MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:   8% 315M/3.95G [00:01<00:17, 203MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:   9% 336M/3.95G [00:01<00:17, 203MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:   9% 357M/3.95G [00:02<00:17, 204MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  10% 377M/3.95G [00:02<00:17, 201MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  10% 409M/3.95G [00:02<00:17, 205MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  11% 440M/3.95G [00:02<00:16, 208MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  12% 472M/3.95G [00:02<00:16, 209MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  13% 503M/3.95G [00:02<00:16, 210MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  13% 524M/3.95G [00:02<00:16, 210MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  14% 545M/3.95G [00:02<00:16, 207MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  14% 566M/3.95G [00:03<00:16, 204MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  15% 598M/3.95G [00:03<00:16, 207MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  16% 619M/3.95G [00:03<00:16, 207MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  16% 650M/3.95G [00:03<00:15, 209MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  17% 682M/3.95G [00:03<00:15, 210MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  18% 713M/3.95G [00:03<00:15, 211MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  19% 744M/3.95G [00:03<00:15, 211MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  20% 776M/3.95G [00:04<00:14, 212MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  20% 807M/3.95G [00:04<00:14, 211MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  21% 839M/3.95G [00:04<00:14, 212MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  22% 870M/3.95G [00:04<00:14, 211MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  23% 902M/3.95G [00:04<00:14, 212MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  24% 933M/3.95G [00:04<00:14, 212MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  24% 965M/3.95G [00:04<00:14, 212MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  25% 996M/3.95G [00:05<00:13, 212MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  26% 1.03G/3.95G [00:05<00:13, 213MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  27% 1.06G/3.95G [00:05<00:13, 212MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  28% 1.09G/3.95G [00:05<00:13, 211MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  28% 1.12G/3.95G [00:05<00:13, 210MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  29% 1.15G/3.95G [00:05<00:13, 212MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  30% 1.18G/3.95G [00:05<00:13, 211MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  31% 1.22G/3.95G [00:06<00:12, 211MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  32% 1.25G/3.95G [00:06<00:12, 212MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  32% 1.28G/3.95G [00:06<00:12, 212MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  33% 1.31G/3.95G [00:06<00:12, 210MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  34% 1.34G/3.95G [00:06<00:12, 207MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  35% 1.37G/3.95G [00:06<00:12, 208MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  36% 1.41G/3.95G [00:07<00:12, 210MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  36% 1.44G/3.95G [00:07<00:11, 211MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  37% 1.47G/3.95G [00:07<00:11, 210MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  38% 1.50G/3.95G [00:07<00:11, 211MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  39% 1.53G/3.95G [00:07<00:11, 211MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  40% 1.56G/3.95G [00:07<00:11, 210MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  40% 1.59G/3.95G [00:07<00:11, 211MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  41% 1.63G/3.95G [00:08<00:10, 212MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  42% 1.66G/3.95G [00:08<00:10, 212MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  43% 1.69G/3.95G [00:08<00:10, 211MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  44% 1.72G/3.95G [00:08<00:10, 209MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  44% 1.74G/3.95G [00:08<00:10, 205MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  45% 1.77G/3.95G [00:08<00:10, 210MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  46% 1.80G/3.95G [00:08<00:10, 211MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  47% 1.84G/3.95G [00:09<00:10, 211MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  47% 1.87G/3.95G [00:09<00:09, 212MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  48% 1.90G/3.95G [00:09<00:09, 212MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  49% 1.93G/3.95G [00:09<00:09, 212MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  50% 1.96G/3.95G [00:09<00:09, 212MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  50% 1.99G/3.95G [00:09<00:09, 211MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  51% 2.02G/3.95G [00:09<00:09, 208MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  52% 2.04G/3.95G [00:10<00:09, 207MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  52% 2.07G/3.95G [00:10<00:13, 142MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  53% 2.09G/3.95G [00:10<00:17, 109MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  53% 2.11G/3.95G [00:10<00:18, 101MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  54% 2.13G/3.95G [00:11<00:16, 113MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  54% 2.15G/3.95G [00:11<00:15, 113MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  55% 2.17G/3.95G [00:11<00:14, 126MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  56% 2.19G/3.95G [00:11<00:12, 142MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  56% 2.22G/3.95G [00:11<00:10, 163MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  57% 2.24G/3.95G [00:11<00:09, 172MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  57% 2.26G/3.95G [00:11<00:11, 152MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  58% 2.29G/3.95G [00:12<00:12, 137MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  58% 2.31G/3.95G [00:12<00:14, 111MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  59% 2.33G/3.95G [00:12<00:15, 101MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  60% 2.35G/3.95G [00:12<00:16, 96.8MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  60% 2.37G/3.95G [00:13<00:16, 95.1MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  60% 2.38G/3.95G [00:13<00:18, 84.8MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  61% 2.40G/3.95G [00:13<00:16, 95.2MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  61% 2.42G/3.95G [00:13<00:15, 98.4MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  62% 2.44G/3.95G [00:13<00:14, 107MB/s] \u001b[A\n",
            "model-00001-of-00004.safetensors:  62% 2.46G/3.95G [00:14<00:17, 86.6MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  63% 2.49G/3.95G [00:14<00:13, 105MB/s] \u001b[A\n",
            "model-00001-of-00004.safetensors:  64% 2.51G/3.95G [00:14<00:14, 98.4MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  64% 2.53G/3.95G [00:14<00:13, 108MB/s] \u001b[A\n",
            "model-00001-of-00004.safetensors:  65% 2.55G/3.95G [00:14<00:13, 103MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  65% 2.57G/3.95G [00:15<00:13, 100MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  66% 2.59G/3.95G [00:15<00:14, 95.9MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  66% 2.60G/3.95G [00:15<00:16, 83.0MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  66% 2.61G/3.95G [00:15<00:15, 83.5MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  67% 2.63G/3.95G [00:15<00:14, 91.9MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  67% 2.65G/3.95G [00:16<00:12, 99.8MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  68% 2.66G/3.95G [00:16<00:14, 87.0MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  68% 2.68G/3.95G [00:16<00:11, 110MB/s] \u001b[A\n",
            "model-00001-of-00004.safetensors:  69% 2.71G/3.95G [00:16<00:09, 130MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  69% 2.73G/3.95G [00:16<00:08, 147MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  70% 2.75G/3.95G [00:16<00:07, 162MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  70% 2.77G/3.95G [00:16<00:06, 173MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  71% 2.79G/3.95G [00:16<00:06, 182MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  71% 2.81G/3.95G [00:16<00:06, 189MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  72% 2.83G/3.95G [00:17<00:05, 193MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  72% 2.85G/3.95G [00:17<00:05, 197MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  73% 2.87G/3.95G [00:17<00:05, 199MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  73% 2.89G/3.95G [00:17<00:05, 201MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  74% 2.93G/3.95G [00:17<00:04, 204MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  75% 2.95G/3.95G [00:17<00:04, 206MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  75% 2.97G/3.95G [00:17<00:04, 206MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  76% 2.99G/3.95G [00:17<00:04, 206MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  76% 3.01G/3.95G [00:17<00:04, 205MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  77% 3.03G/3.95G [00:17<00:04, 204MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  77% 3.05G/3.95G [00:18<00:04, 204MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  78% 3.07G/3.95G [00:18<00:04, 202MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  78% 3.09G/3.95G [00:18<00:04, 201MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  79% 3.11G/3.95G [00:18<00:04, 200MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  79% 3.14G/3.95G [00:18<00:04, 201MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  80% 3.16G/3.95G [00:18<00:03, 201MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  81% 3.18G/3.95G [00:18<00:03, 202MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  81% 3.20G/3.95G [00:18<00:03, 202MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  82% 3.22G/3.95G [00:18<00:03, 203MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  82% 3.24G/3.95G [00:19<00:03, 205MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  83% 3.27G/3.95G [00:19<00:03, 207MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  83% 3.29G/3.95G [00:19<00:03, 205MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  84% 3.31G/3.95G [00:19<00:03, 204MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  85% 3.33G/3.95G [00:19<00:02, 204MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  85% 3.36G/3.95G [00:19<00:02, 204MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  86% 3.38G/3.95G [00:19<00:02, 205MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  86% 3.40G/3.95G [00:19<00:02, 205MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  87% 3.42G/3.95G [00:19<00:02, 206MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  87% 3.44G/3.95G [00:19<00:02, 206MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  88% 3.46G/3.95G [00:20<00:02, 205MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  88% 3.48G/3.95G [00:20<00:02, 205MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  89% 3.50G/3.95G [00:20<00:02, 206MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  89% 3.52G/3.95G [00:20<00:02, 206MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  90% 3.54G/3.95G [00:20<00:01, 204MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  90% 3.57G/3.95G [00:20<00:01, 202MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  91% 3.60G/3.95G [00:20<00:01, 205MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  92% 3.62G/3.95G [00:20<00:01, 204MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  92% 3.64G/3.95G [00:20<00:01, 205MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  93% 3.66G/3.95G [00:21<00:01, 205MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  93% 3.68G/3.95G [00:21<00:01, 206MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  94% 3.70G/3.95G [00:21<00:01, 206MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  94% 3.72G/3.95G [00:21<00:01, 206MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  95% 3.74G/3.95G [00:21<00:00, 204MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  95% 3.76G/3.95G [00:21<00:00, 204MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  96% 3.79G/3.95G [00:21<00:00, 203MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  97% 3.82G/3.95G [00:21<00:00, 205MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  97% 3.84G/3.95G [00:21<00:00, 205MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  98% 3.86G/3.95G [00:22<00:00, 205MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  98% 3.88G/3.95G [00:22<00:00, 206MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  99% 3.91G/3.95G [00:22<00:00, 208MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors: 100% 3.95G/3.95G [00:22<00:00, 176MB/s]\n",
            "Downloading shards:  25% 1/4 [00:22<01:08, 22.70s/it]\n",
            "model-00002-of-00004.safetensors:   0% 0.00/3.86G [00:00<?, ?B/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:   0% 10.5M/3.86G [00:00<01:29, 43.0MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:   1% 21.0M/3.86G [00:00<00:58, 65.7MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:   1% 41.9M/3.86G [00:00<00:39, 97.8MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:   2% 62.9M/3.86G [00:00<00:31, 122MB/s] \u001b[A\n",
            "model-00002-of-00004.safetensors:   2% 83.9M/3.86G [00:00<00:26, 142MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:   3% 105M/3.86G [00:00<00:23, 160MB/s] \u001b[A\n",
            "model-00002-of-00004.safetensors:   3% 126M/3.86G [00:00<00:21, 174MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:   4% 157M/3.86G [00:01<00:19, 188MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:   5% 178M/3.86G [00:01<00:19, 194MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:   5% 199M/3.86G [00:01<00:18, 194MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:   6% 220M/3.86G [00:01<00:18, 196MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:   6% 241M/3.86G [00:01<00:18, 199MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:   7% 262M/3.86G [00:01<00:18, 200MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:   7% 283M/3.86G [00:01<00:17, 201MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:   8% 304M/3.86G [00:01<00:17, 202MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:   8% 325M/3.86G [00:01<00:17, 200MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:   9% 346M/3.86G [00:02<00:17, 198MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:   9% 367M/3.86G [00:02<00:17, 198MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  10% 388M/3.86G [00:02<00:17, 196MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  11% 409M/3.86G [00:02<00:17, 194MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  11% 430M/3.86G [00:02<00:17, 196MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  12% 451M/3.86G [00:02<00:17, 196MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  12% 472M/3.86G [00:02<00:17, 195MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  13% 493M/3.86G [00:02<00:17, 194MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  13% 514M/3.86G [00:02<00:17, 193MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  14% 535M/3.86G [00:02<00:17, 193MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  14% 556M/3.86G [00:03<00:16, 196MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  15% 577M/3.86G [00:03<00:16, 196MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  15% 598M/3.86G [00:03<00:16, 196MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  16% 619M/3.86G [00:03<00:16, 198MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  17% 640M/3.86G [00:03<00:16, 198MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  17% 661M/3.86G [00:03<00:16, 197MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  18% 682M/3.86G [00:03<00:16, 198MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  18% 703M/3.86G [00:03<00:16, 197MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  19% 724M/3.86G [00:03<00:16, 196MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  20% 755M/3.86G [00:04<00:15, 201MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  20% 776M/3.86G [00:04<00:15, 203MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  21% 797M/3.86G [00:04<00:15, 201MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  21% 818M/3.86G [00:04<00:15, 203MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  22% 839M/3.86G [00:04<00:14, 204MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  22% 860M/3.86G [00:04<00:14, 203MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  23% 891M/3.86G [00:04<00:14, 206MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  24% 923M/3.86G [00:04<00:14, 208MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  25% 954M/3.86G [00:05<00:13, 210MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  25% 975M/3.86G [00:05<00:13, 209MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  26% 996M/3.86G [00:05<00:13, 209MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  26% 1.02G/3.86G [00:05<00:13, 208MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  27% 1.04G/3.86G [00:05<00:13, 206MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  27% 1.06G/3.86G [00:05<00:13, 206MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  28% 1.09G/3.86G [00:05<00:13, 207MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  29% 1.11G/3.86G [00:05<00:13, 207MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  29% 1.13G/3.86G [00:05<00:13, 207MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  30% 1.15G/3.86G [00:06<00:13, 207MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  30% 1.17G/3.86G [00:06<00:12, 207MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  31% 1.21G/3.86G [00:06<00:12, 209MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  32% 1.23G/3.86G [00:06<00:12, 209MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  32% 1.25G/3.86G [00:06<00:12, 209MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  33% 1.27G/3.86G [00:06<00:12, 209MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  33% 1.29G/3.86G [00:06<00:12, 208MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  34% 1.31G/3.86G [00:06<00:12, 207MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  34% 1.33G/3.86G [00:06<00:12, 205MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  35% 1.35G/3.86G [00:06<00:12, 204MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  36% 1.37G/3.86G [00:07<00:12, 205MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  36% 1.39G/3.86G [00:07<00:12, 206MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  37% 1.42G/3.86G [00:07<00:11, 205MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  37% 1.44G/3.86G [00:07<00:11, 204MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  38% 1.46G/3.86G [00:07<00:12, 199MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  38% 1.48G/3.86G [00:07<00:11, 201MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  39% 1.50G/3.86G [00:07<00:11, 202MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  39% 1.52G/3.86G [00:07<00:11, 201MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  40% 1.54G/3.86G [00:07<00:11, 201MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  40% 1.56G/3.86G [00:08<00:11, 201MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  41% 1.58G/3.86G [00:08<00:11, 200MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  42% 1.60G/3.86G [00:08<00:11, 201MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  42% 1.63G/3.86G [00:08<00:11, 201MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  43% 1.65G/3.86G [00:08<00:11, 200MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  43% 1.67G/3.86G [00:08<00:10, 201MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  44% 1.69G/3.86G [00:08<00:10, 199MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  44% 1.71G/3.86G [00:08<00:10, 201MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  45% 1.73G/3.86G [00:08<00:10, 202MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  45% 1.75G/3.86G [00:08<00:10, 202MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  46% 1.77G/3.86G [00:09<00:10, 202MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  46% 1.79G/3.86G [00:09<00:10, 203MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  47% 1.81G/3.86G [00:09<00:10, 199MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  47% 1.84G/3.86G [00:09<00:10, 198MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  48% 1.86G/3.86G [00:09<00:10, 199MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  49% 1.88G/3.86G [00:09<00:09, 199MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  49% 1.90G/3.86G [00:09<00:09, 199MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  50% 1.92G/3.86G [00:09<00:09, 199MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  50% 1.94G/3.86G [00:09<00:09, 200MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  51% 1.96G/3.86G [00:10<00:09, 200MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  51% 1.98G/3.86G [00:10<00:10, 183MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  52% 2.00G/3.86G [00:10<00:12, 155MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  52% 2.02G/3.86G [00:10<00:11, 166MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  53% 2.04G/3.86G [00:10<00:14, 127MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  53% 2.07G/3.86G [00:10<00:12, 141MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  54% 2.09G/3.86G [00:10<00:13, 135MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  55% 2.11G/3.86G [00:11<00:11, 150MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  55% 2.13G/3.86G [00:11<00:14, 117MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  56% 2.15G/3.86G [00:11<00:16, 102MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  56% 2.17G/3.86G [00:11<00:14, 118MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  57% 2.19G/3.86G [00:11<00:12, 134MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  57% 2.21G/3.86G [00:11<00:11, 146MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  58% 2.23G/3.86G [00:12<00:12, 134MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  58% 2.25G/3.86G [00:12<00:12, 130MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  59% 2.28G/3.86G [00:12<00:17, 88.6MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  59% 2.30G/3.86G [00:12<00:15, 102MB/s] \u001b[A\n",
            "model-00002-of-00004.safetensors:  60% 2.32G/3.86G [00:13<00:16, 94.2MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  61% 2.34G/3.86G [00:13<00:17, 89.1MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  61% 2.36G/3.86G [00:13<00:14, 107MB/s] \u001b[A\n",
            "model-00002-of-00004.safetensors:  62% 2.38G/3.86G [00:13<00:11, 125MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  62% 2.40G/3.86G [00:13<00:11, 123MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  63% 2.42G/3.86G [00:14<00:16, 88.2MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  63% 2.44G/3.86G [00:14<00:19, 73.5MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  63% 2.45G/3.86G [00:14<00:20, 68.0MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  64% 2.47G/3.86G [00:14<00:15, 87.7MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  65% 2.50G/3.86G [00:15<00:14, 91.7MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  65% 2.52G/3.86G [00:15<00:12, 108MB/s] \u001b[A\n",
            "model-00002-of-00004.safetensors:  66% 2.54G/3.86G [00:15<00:10, 126MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  66% 2.56G/3.86G [00:15<00:09, 142MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  67% 2.58G/3.86G [00:15<00:13, 94.4MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  67% 2.60G/3.86G [00:15<00:11, 112MB/s] \u001b[A\n",
            "model-00002-of-00004.safetensors:  68% 2.62G/3.86G [00:16<00:13, 89.8MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  68% 2.64G/3.86G [00:16<00:13, 87.7MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  69% 2.66G/3.86G [00:16<00:11, 105MB/s] \u001b[A\n",
            "model-00002-of-00004.safetensors:  69% 2.68G/3.86G [00:16<00:09, 123MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  70% 2.72G/3.86G [00:16<00:07, 147MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  71% 2.74G/3.86G [00:16<00:07, 160MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  72% 2.77G/3.86G [00:17<00:06, 175MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  72% 2.79G/3.86G [00:17<00:05, 183MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  73% 2.81G/3.86G [00:17<00:05, 189MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  73% 2.83G/3.86G [00:17<00:05, 194MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  74% 2.85G/3.86G [00:17<00:05, 198MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  74% 2.87G/3.86G [00:17<00:04, 200MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  75% 2.89G/3.86G [00:17<00:04, 202MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  75% 2.92G/3.86G [00:17<00:04, 204MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  76% 2.94G/3.86G [00:17<00:04, 205MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  77% 2.96G/3.86G [00:18<00:04, 206MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  77% 2.98G/3.86G [00:18<00:04, 206MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  78% 3.00G/3.86G [00:18<00:04, 206MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  78% 3.02G/3.86G [00:18<00:04, 207MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  79% 3.04G/3.86G [00:18<00:03, 207MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  79% 3.06G/3.86G [00:18<00:03, 206MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  80% 3.08G/3.86G [00:18<00:03, 197MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  80% 3.10G/3.86G [00:18<00:03, 199MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  81% 3.12G/3.86G [00:18<00:03, 202MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  81% 3.15G/3.86G [00:18<00:03, 194MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  82% 3.17G/3.86G [00:19<00:03, 196MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  83% 3.20G/3.86G [00:19<00:03, 201MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  83% 3.22G/3.86G [00:19<00:03, 202MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  84% 3.24G/3.86G [00:19<00:03, 202MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  84% 3.26G/3.86G [00:19<00:03, 201MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  85% 3.28G/3.86G [00:19<00:02, 200MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  85% 3.30G/3.86G [00:19<00:02, 200MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  86% 3.32G/3.86G [00:19<00:02, 200MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  87% 3.34G/3.86G [00:19<00:02, 198MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  87% 3.37G/3.86G [00:20<00:02, 199MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  88% 3.39G/3.86G [00:20<00:02, 197MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  88% 3.41G/3.86G [00:20<00:02, 198MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  89% 3.43G/3.86G [00:20<00:02, 198MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  89% 3.45G/3.86G [00:20<00:02, 200MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  90% 3.47G/3.86G [00:20<00:01, 198MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  90% 3.49G/3.86G [00:20<00:01, 196MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  91% 3.51G/3.86G [00:20<00:01, 196MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  91% 3.53G/3.86G [00:20<00:01, 195MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  92% 3.55G/3.86G [00:21<00:01, 197MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  93% 3.58G/3.86G [00:21<00:01, 198MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  93% 3.60G/3.86G [00:21<00:01, 198MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  94% 3.62G/3.86G [00:21<00:01, 154MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  94% 3.64G/3.86G [00:21<00:01, 165MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  95% 3.66G/3.86G [00:21<00:01, 173MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  95% 3.68G/3.86G [00:21<00:01, 181MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  96% 3.70G/3.86G [00:21<00:00, 187MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  96% 3.72G/3.86G [00:21<00:00, 191MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  97% 3.74G/3.86G [00:22<00:00, 196MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  97% 3.76G/3.86G [00:22<00:00, 198MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  98% 3.79G/3.86G [00:22<00:00, 199MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  98% 3.81G/3.86G [00:22<00:00, 198MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  99% 3.83G/3.86G [00:22<00:00, 199MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors: 100% 3.86G/3.86G [00:22<00:00, 170MB/s]\n",
            "Downloading shards:  50% 2/4 [00:45<00:45, 22.84s/it]\n",
            "model-00003-of-00004.safetensors:   0% 0.00/3.86G [00:00<?, ?B/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:   1% 21.0M/3.86G [00:00<00:28, 135MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:   1% 41.9M/3.86G [00:00<00:24, 158MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:   2% 62.9M/3.86G [00:00<00:21, 176MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:   2% 94.4M/3.86G [00:00<00:19, 193MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:   3% 115M/3.86G [00:00<00:19, 192MB/s] \u001b[A\n",
            "model-00003-of-00004.safetensors:   4% 136M/3.86G [00:00<00:19, 196MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:   4% 157M/3.86G [00:00<00:18, 199MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:   5% 178M/3.86G [00:00<00:18, 201MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:   5% 199M/3.86G [00:01<00:18, 202MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:   6% 220M/3.86G [00:01<00:18, 202MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:   6% 241M/3.86G [00:01<00:18, 201MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:   7% 262M/3.86G [00:01<00:17, 202MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:   7% 283M/3.86G [00:01<00:17, 203MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:   8% 304M/3.86G [00:01<00:17, 204MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:   8% 325M/3.86G [00:01<00:17, 205MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:   9% 346M/3.86G [00:01<00:17, 206MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:   9% 367M/3.86G [00:01<00:17, 205MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  10% 388M/3.86G [00:01<00:16, 206MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  11% 409M/3.86G [00:02<00:16, 205MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  11% 430M/3.86G [00:02<00:16, 205MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  12% 451M/3.86G [00:02<00:16, 203MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  12% 472M/3.86G [00:02<00:16, 200MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  13% 493M/3.86G [00:02<00:16, 200MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  13% 514M/3.86G [00:02<00:16, 201MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  14% 535M/3.86G [00:02<00:16, 203MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  14% 556M/3.86G [00:02<00:16, 202MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  15% 577M/3.86G [00:02<00:16, 200MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  15% 598M/3.86G [00:03<00:16, 196MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  16% 619M/3.86G [00:03<00:16, 195MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  17% 650M/3.86G [00:03<00:15, 203MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  17% 671M/3.86G [00:03<00:15, 202MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  18% 692M/3.86G [00:03<00:15, 202MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  18% 713M/3.86G [00:03<00:15, 203MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  19% 734M/3.86G [00:03<00:15, 204MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  20% 755M/3.86G [00:03<00:15, 201MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  20% 776M/3.86G [00:03<00:15, 201MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  21% 797M/3.86G [00:04<00:15, 199MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  21% 818M/3.86G [00:04<00:15, 199MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  22% 849M/3.86G [00:04<00:14, 201MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  23% 870M/3.86G [00:04<00:14, 200MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  23% 891M/3.86G [00:04<00:14, 200MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  24% 912M/3.86G [00:04<00:14, 200MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  24% 933M/3.86G [00:04<00:14, 200MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  25% 954M/3.86G [00:04<00:14, 201MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  25% 975M/3.86G [00:04<00:14, 201MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  26% 1.01G/3.86G [00:05<00:13, 204MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  27% 1.03G/3.86G [00:05<00:13, 203MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  27% 1.05G/3.86G [00:05<00:13, 201MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  28% 1.07G/3.86G [00:05<00:13, 202MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  28% 1.09G/3.86G [00:05<00:13, 202MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  29% 1.11G/3.86G [00:05<00:13, 204MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  29% 1.13G/3.86G [00:05<00:13, 203MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  30% 1.15G/3.86G [00:05<00:21, 128MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  30% 1.17G/3.86G [00:06<00:18, 144MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  31% 1.21G/3.86G [00:06<00:15, 167MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  32% 1.23G/3.86G [00:06<00:15, 176MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  32% 1.25G/3.86G [00:06<00:14, 181MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  33% 1.27G/3.86G [00:06<00:13, 186MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  34% 1.30G/3.86G [00:06<00:13, 197MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  34% 1.32G/3.86G [00:06<00:12, 200MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  35% 1.35G/3.86G [00:06<00:12, 203MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  36% 1.38G/3.86G [00:07<00:12, 206MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  37% 1.42G/3.86G [00:07<00:11, 207MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  37% 1.44G/3.86G [00:07<00:11, 207MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  38% 1.46G/3.86G [00:07<00:11, 208MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  38% 1.48G/3.86G [00:07<00:11, 208MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  39% 1.50G/3.86G [00:07<00:11, 208MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  39% 1.52G/3.86G [00:07<00:11, 208MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  40% 1.54G/3.86G [00:07<00:11, 207MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  40% 1.56G/3.86G [00:07<00:11, 205MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  41% 1.58G/3.86G [00:08<00:11, 206MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  42% 1.61G/3.86G [00:08<00:10, 208MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  42% 1.64G/3.86G [00:08<00:10, 207MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  43% 1.66G/3.86G [00:08<00:10, 202MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  43% 1.68G/3.86G [00:08<00:10, 201MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  44% 1.70G/3.86G [00:08<00:10, 201MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  44% 1.72G/3.86G [00:08<00:10, 204MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  45% 1.74G/3.86G [00:08<00:10, 205MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  46% 1.77G/3.86G [00:08<00:10, 207MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  46% 1.79G/3.86G [00:09<00:10, 206MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  47% 1.81G/3.86G [00:09<00:10, 204MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  47% 1.84G/3.86G [00:09<00:09, 203MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  48% 1.87G/3.86G [00:09<00:09, 206MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  49% 1.89G/3.86G [00:09<00:09, 207MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  49% 1.91G/3.86G [00:09<00:09, 207MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  50% 1.93G/3.86G [00:09<00:09, 207MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  51% 1.96G/3.86G [00:09<00:09, 209MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  52% 1.99G/3.86G [00:10<00:08, 211MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  52% 2.02G/3.86G [00:10<00:14, 131MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  53% 2.04G/3.86G [00:10<00:13, 134MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  53% 2.07G/3.86G [00:10<00:13, 130MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  54% 2.09G/3.86G [00:11<00:16, 111MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  55% 2.11G/3.86G [00:11<00:19, 90.8MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  55% 2.13G/3.86G [00:11<00:22, 77.8MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  56% 2.15G/3.86G [00:11<00:18, 94.8MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  56% 2.17G/3.86G [00:12<00:20, 83.0MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  57% 2.19G/3.86G [00:12<00:23, 69.8MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  57% 2.20G/3.86G [00:12<00:23, 70.1MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  58% 2.22G/3.86G [00:12<00:18, 86.7MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  58% 2.24G/3.86G [00:12<00:15, 106MB/s] \u001b[A\n",
            "model-00003-of-00004.safetensors:  59% 2.26G/3.86G [00:13<00:20, 79.5MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  59% 2.29G/3.86G [00:13<00:16, 98.1MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  60% 2.31G/3.86G [00:13<00:16, 93.8MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  60% 2.33G/3.86G [00:13<00:15, 97.9MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  61% 2.35G/3.86G [00:14<00:16, 90.9MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  61% 2.37G/3.86G [00:14<00:17, 86.0MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  62% 2.38G/3.86G [00:14<00:17, 85.0MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  62% 2.40G/3.86G [00:14<00:14, 104MB/s] \u001b[A\n",
            "model-00003-of-00004.safetensors:  63% 2.42G/3.86G [00:14<00:13, 106MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  63% 2.44G/3.86G [00:15<00:15, 94.2MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  64% 2.46G/3.86G [00:15<00:14, 95.2MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  64% 2.49G/3.86G [00:15<00:15, 87.9MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  65% 2.50G/3.86G [00:15<00:15, 89.9MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  65% 2.51G/3.86G [00:15<00:17, 77.3MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  65% 2.53G/3.86G [00:16<00:15, 87.8MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  66% 2.55G/3.86G [00:16<00:12, 109MB/s] \u001b[A\n",
            "model-00003-of-00004.safetensors:  66% 2.57G/3.86G [00:16<00:10, 128MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  67% 2.59G/3.86G [00:16<00:08, 144MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  68% 2.61G/3.86G [00:16<00:07, 157MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  68% 2.63G/3.86G [00:16<00:07, 168MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  69% 2.65G/3.86G [00:16<00:06, 176MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  69% 2.67G/3.86G [00:16<00:06, 180MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  70% 2.69G/3.86G [00:16<00:06, 187MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  70% 2.72G/3.86G [00:17<00:06, 188MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  71% 2.74G/3.86G [00:17<00:05, 191MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  71% 2.76G/3.86G [00:17<00:05, 195MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  72% 2.78G/3.86G [00:17<00:05, 197MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  72% 2.80G/3.86G [00:17<00:05, 197MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  73% 2.82G/3.86G [00:17<00:05, 199MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  74% 2.84G/3.86G [00:17<00:05, 196MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  74% 2.86G/3.86G [00:17<00:05, 196MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  75% 2.88G/3.86G [00:17<00:05, 194MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  75% 2.90G/3.86G [00:18<00:04, 194MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  76% 2.93G/3.86G [00:18<00:04, 198MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  76% 2.95G/3.86G [00:18<00:04, 200MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  77% 2.98G/3.86G [00:18<00:04, 203MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  78% 3.00G/3.86G [00:18<00:04, 201MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  78% 3.02G/3.86G [00:18<00:04, 203MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  79% 3.04G/3.86G [00:18<00:04, 205MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  79% 3.06G/3.86G [00:18<00:03, 206MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  80% 3.08G/3.86G [00:18<00:03, 207MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  81% 3.11G/3.86G [00:19<00:03, 210MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  81% 3.15G/3.86G [00:19<00:03, 213MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  82% 3.18G/3.86G [00:19<00:03, 215MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  83% 3.21G/3.86G [00:19<00:03, 216MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  84% 3.24G/3.86G [00:19<00:04, 146MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  85% 3.27G/3.86G [00:20<00:03, 163MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  85% 3.30G/3.86G [00:20<00:03, 177MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  86% 3.33G/3.86G [00:20<00:02, 188MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  87% 3.37G/3.86G [00:20<00:02, 194MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  88% 3.39G/3.86G [00:20<00:02, 197MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  88% 3.41G/3.86G [00:20<00:02, 200MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  89% 3.43G/3.86G [00:20<00:02, 201MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  89% 3.45G/3.86G [00:20<00:02, 203MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  90% 3.47G/3.86G [00:20<00:01, 203MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  91% 3.50G/3.86G [00:21<00:01, 208MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  91% 3.52G/3.86G [00:21<00:01, 208MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  92% 3.54G/3.86G [00:21<00:01, 208MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  92% 3.57G/3.86G [00:21<00:01, 207MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  93% 3.59G/3.86G [00:21<00:01, 202MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  93% 3.61G/3.86G [00:21<00:01, 201MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  94% 3.63G/3.86G [00:21<00:01, 200MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  94% 3.65G/3.86G [00:21<00:01, 199MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  95% 3.67G/3.86G [00:21<00:00, 200MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  96% 3.69G/3.86G [00:22<00:00, 201MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  96% 3.71G/3.86G [00:22<00:00, 200MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  97% 3.73G/3.86G [00:22<00:00, 203MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  97% 3.75G/3.86G [00:22<00:00, 202MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  98% 3.77G/3.86G [00:22<00:00, 200MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  98% 3.80G/3.86G [00:22<00:00, 201MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  99% 3.82G/3.86G [00:22<00:00, 201MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  99% 3.84G/3.86G [00:22<00:00, 201MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors: 100% 3.86G/3.86G [00:22<00:00, 169MB/s]\n",
            "Downloading shards:  75% 3/4 [01:08<00:22, 22.97s/it]\n",
            "model-00004-of-00004.safetensors:   0% 0.00/3.56G [00:00<?, ?B/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:   0% 10.5M/3.56G [00:00<00:36, 96.5MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:   1% 31.5M/3.56G [00:00<00:32, 108MB/s] \u001b[A\n",
            "model-00004-of-00004.safetensors:   1% 52.4M/3.56G [00:02<03:05, 18.9MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:   2% 73.4M/3.56G [00:02<01:53, 30.7MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:   3% 94.4M/3.56G [00:02<01:16, 45.1MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:   3% 115M/3.56G [00:02<00:55, 62.1MB/s] \u001b[A\n",
            "model-00004-of-00004.safetensors:   4% 147M/3.56G [00:02<00:38, 89.0MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:   5% 168M/3.56G [00:02<00:31, 107MB/s] \u001b[A\n",
            "model-00004-of-00004.safetensors:   6% 199M/3.56G [00:02<00:25, 132MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:   6% 231M/3.56G [00:03<00:21, 152MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:   7% 252M/3.56G [00:03<00:20, 163MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:   8% 273M/3.56G [00:03<00:19, 172MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:   8% 294M/3.56G [00:03<00:18, 181MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:   9% 315M/3.56G [00:03<00:17, 187MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:   9% 336M/3.56G [00:03<00:16, 193MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  10% 367M/3.56G [00:03<00:15, 201MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  11% 398M/3.56G [00:03<00:15, 206MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  12% 430M/3.56G [00:04<00:15, 205MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  13% 451M/3.56G [00:04<00:15, 206MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  13% 472M/3.56G [00:04<00:14, 206MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  14% 493M/3.56G [00:04<00:14, 206MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  14% 514M/3.56G [00:04<00:14, 206MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  15% 535M/3.56G [00:04<00:14, 206MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  16% 556M/3.56G [00:04<00:14, 206MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  16% 577M/3.56G [00:04<00:14, 206MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  17% 598M/3.56G [00:04<00:14, 206MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  17% 619M/3.56G [00:04<00:14, 206MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  18% 640M/3.56G [00:05<00:14, 205MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  19% 661M/3.56G [00:05<00:14, 205MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  19% 682M/3.56G [00:05<00:14, 203MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  20% 703M/3.56G [00:05<00:14, 201MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  21% 734M/3.56G [00:05<00:13, 205MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  21% 755M/3.56G [00:05<00:13, 205MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  22% 776M/3.56G [00:05<00:13, 206MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  22% 797M/3.56G [00:05<00:13, 206MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  23% 818M/3.56G [00:05<00:13, 205MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  24% 839M/3.56G [00:06<00:13, 204MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  24% 860M/3.56G [00:06<00:13, 205MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  25% 881M/3.56G [00:06<00:12, 206MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  25% 902M/3.56G [00:06<00:12, 206MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  26% 923M/3.56G [00:06<00:12, 207MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  27% 944M/3.56G [00:06<00:12, 206MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  27% 965M/3.56G [00:06<00:12, 207MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  28% 986M/3.56G [00:06<00:12, 207MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  28% 1.01G/3.56G [00:06<00:12, 207MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  29% 1.03G/3.56G [00:07<00:13, 186MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  30% 1.06G/3.56G [00:07<00:12, 196MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  30% 1.08G/3.56G [00:07<00:12, 198MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  31% 1.10G/3.56G [00:07<00:12, 199MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  32% 1.13G/3.56G [00:07<00:11, 204MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  33% 1.16G/3.56G [00:07<00:11, 208MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  33% 1.18G/3.56G [00:07<00:11, 208MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  34% 1.21G/3.56G [00:07<00:11, 208MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  34% 1.23G/3.56G [00:07<00:11, 208MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  35% 1.25G/3.56G [00:08<00:11, 201MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  36% 1.28G/3.56G [00:08<00:11, 204MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  37% 1.30G/3.56G [00:08<00:11, 203MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  37% 1.32G/3.56G [00:08<00:10, 203MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  38% 1.35G/3.56G [00:08<00:10, 206MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  39% 1.37G/3.56G [00:08<00:10, 206MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  39% 1.39G/3.56G [00:08<00:10, 206MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  40% 1.42G/3.56G [00:08<00:10, 204MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  40% 1.44G/3.56G [00:09<00:10, 203MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  41% 1.46G/3.56G [00:09<00:10, 202MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  42% 1.48G/3.56G [00:09<00:10, 202MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  42% 1.50G/3.56G [00:09<00:10, 201MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  43% 1.52G/3.56G [00:09<00:10, 200MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  43% 1.54G/3.56G [00:09<00:10, 199MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  44% 1.56G/3.56G [00:09<00:10, 199MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  45% 1.58G/3.56G [00:09<00:09, 199MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  45% 1.61G/3.56G [00:09<00:09, 202MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  46% 1.64G/3.56G [00:09<00:09, 202MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  47% 1.66G/3.56G [00:10<00:11, 165MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  47% 1.68G/3.56G [00:10<00:11, 157MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  48% 1.70G/3.56G [00:10<00:12, 147MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  48% 1.72G/3.56G [00:10<00:13, 134MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  49% 1.74G/3.56G [00:10<00:12, 146MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  50% 1.76G/3.56G [00:11<00:16, 112MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  50% 1.78G/3.56G [00:11<00:21, 84.3MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  51% 1.80G/3.56G [00:11<00:22, 78.0MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  51% 1.82G/3.56G [00:11<00:20, 86.6MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  52% 1.85G/3.56G [00:12<00:16, 104MB/s] \u001b[A\n",
            "model-00004-of-00004.safetensors:  52% 1.87G/3.56G [00:12<00:13, 122MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  53% 1.89G/3.56G [00:12<00:16, 99.6MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  54% 1.91G/3.56G [00:12<00:18, 91.0MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  54% 1.93G/3.56G [00:12<00:17, 94.4MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  55% 1.95G/3.56G [00:13<00:18, 89.2MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  55% 1.97G/3.56G [00:13<00:16, 94.7MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  56% 1.99G/3.56G [00:13<00:16, 93.5MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  56% 2.00G/3.56G [00:13<00:18, 84.2MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  57% 2.02G/3.56G [00:14<00:17, 89.7MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  57% 2.04G/3.56G [00:14<00:15, 99.7MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  58% 2.06G/3.56G [00:14<00:15, 95.7MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  58% 2.08G/3.56G [00:14<00:13, 109MB/s] \u001b[A\n",
            "model-00004-of-00004.safetensors:  59% 2.10G/3.56G [00:14<00:11, 127MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  60% 2.12G/3.56G [00:14<00:09, 145MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  60% 2.14G/3.56G [00:14<00:08, 160MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  61% 2.16G/3.56G [00:14<00:08, 172MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  61% 2.18G/3.56G [00:14<00:07, 181MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  62% 2.20G/3.56G [00:15<00:07, 187MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  63% 2.22G/3.56G [00:15<00:07, 167MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  63% 2.24G/3.56G [00:15<00:09, 145MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  64% 2.26G/3.56G [00:15<00:08, 145MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  64% 2.29G/3.56G [00:15<00:08, 147MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  65% 2.31G/3.56G [00:15<00:08, 151MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  65% 2.33G/3.56G [00:15<00:08, 152MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  66% 2.35G/3.56G [00:16<00:08, 147MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  67% 2.37G/3.56G [00:16<00:07, 152MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  67% 2.39G/3.56G [00:16<00:07, 158MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  68% 2.41G/3.56G [00:16<00:06, 168MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  68% 2.43G/3.56G [00:16<00:06, 168MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  69% 2.45G/3.56G [00:16<00:06, 169MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  70% 2.47G/3.56G [00:16<00:06, 171MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  70% 2.50G/3.56G [00:16<00:06, 171MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  71% 2.52G/3.56G [00:17<00:06, 172MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  71% 2.54G/3.56G [00:17<00:05, 173MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  72% 2.56G/3.56G [00:17<00:05, 174MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  73% 2.58G/3.56G [00:17<00:05, 178MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  73% 2.60G/3.56G [00:17<00:05, 178MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  74% 2.62G/3.56G [00:17<00:05, 178MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  74% 2.64G/3.56G [00:17<00:05, 179MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  75% 2.66G/3.56G [00:17<00:04, 181MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  75% 2.68G/3.56G [00:18<00:04, 183MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  76% 2.71G/3.56G [00:18<00:04, 182MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  77% 2.73G/3.56G [00:18<00:04, 185MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  77% 2.75G/3.56G [00:18<00:04, 186MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  78% 2.77G/3.56G [00:18<00:04, 185MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  78% 2.79G/3.56G [00:18<00:04, 188MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  79% 2.81G/3.56G [00:18<00:03, 187MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  80% 2.83G/3.56G [00:18<00:03, 190MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  80% 2.85G/3.56G [00:18<00:03, 189MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  81% 2.87G/3.56G [00:19<00:03, 191MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  81% 2.89G/3.56G [00:19<00:03, 191MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  82% 2.92G/3.56G [00:19<00:03, 194MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  83% 2.94G/3.56G [00:19<00:03, 194MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  83% 2.96G/3.56G [00:19<00:03, 190MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  84% 2.98G/3.56G [00:19<00:03, 171MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  84% 3.00G/3.56G [00:19<00:03, 168MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  85% 3.02G/3.56G [00:19<00:03, 178MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  86% 3.04G/3.56G [00:19<00:02, 185MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  86% 3.06G/3.56G [00:20<00:02, 187MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  87% 3.08G/3.56G [00:20<00:02, 192MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  87% 3.10G/3.56G [00:20<00:02, 195MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  88% 3.12G/3.56G [00:20<00:02, 196MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  88% 3.15G/3.56G [00:20<00:02, 199MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  89% 3.17G/3.56G [00:20<00:01, 201MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  90% 3.19G/3.56G [00:20<00:01, 203MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  90% 3.21G/3.56G [00:20<00:01, 202MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  91% 3.23G/3.56G [00:20<00:01, 203MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  91% 3.25G/3.56G [00:20<00:01, 205MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  92% 3.27G/3.56G [00:21<00:01, 204MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  93% 3.29G/3.56G [00:21<00:01, 204MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  93% 3.31G/3.56G [00:21<00:01, 204MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  94% 3.34G/3.56G [00:21<00:01, 206MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  95% 3.37G/3.56G [00:21<00:00, 207MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  95% 3.39G/3.56G [00:21<00:00, 207MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  96% 3.42G/3.56G [00:21<00:00, 208MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  97% 3.45G/3.56G [00:21<00:00, 208MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  98% 3.47G/3.56G [00:22<00:00, 209MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  98% 3.50G/3.56G [00:22<00:00, 210MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  99% 3.53G/3.56G [00:22<00:00, 211MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors: 100% 3.56G/3.56G [00:22<00:00, 158MB/s]\n",
            "Downloading shards: 100% 4/4 [01:31<00:00, 22.86s/it]\n",
            "[INFO|modeling_utils.py:1519] 2024-07-15 14:36:16,410 >> Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16.\n",
            "[INFO|configuration_utils.py:962] 2024-07-15 14:36:16,412 >> Generate config GenerationConfig {\n",
            "  \"bos_token_id\": 151643,\n",
            "  \"eos_token_id\": 151643\n",
            "}\n",
            "\n",
            "Loading checkpoint shards: 100% 4/4 [00:06<00:00,  1.55s/it]\n",
            "[INFO|modeling_utils.py:4280] 2024-07-15 14:36:26,291 >> All model checkpoint weights were used when initializing Qwen2ForCausalLM.\n",
            "\n",
            "[INFO|modeling_utils.py:4288] 2024-07-15 14:36:26,291 >> All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at Qwen/Qwen2-7B.\n",
            "If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training.\n",
            "generation_config.json: 100% 138/138 [00:00<00:00, 1.11MB/s]\n",
            "[INFO|configuration_utils.py:917] 2024-07-15 14:36:26,489 >> loading configuration file generation_config.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2-7B/snapshots/453ed1575b739b5b03ce3758b23befdb0967f40e/generation_config.json\n",
            "[INFO|configuration_utils.py:962] 2024-07-15 14:36:26,489 >> Generate config GenerationConfig {\n",
            "  \"bos_token_id\": 151643,\n",
            "  \"eos_token_id\": 151643,\n",
            "  \"max_new_tokens\": 2048\n",
            "}\n",
            "\n",
            "07/15/2024 14:36:27 - INFO - llamafactory.model.model_utils.checkpointing - Gradient checkpointing enabled.\n",
            "07/15/2024 14:36:27 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.\n",
            "07/15/2024 14:36:27 - INFO - llamafactory.model.adapter - Upcasting trainable params to float32.\n",
            "07/15/2024 14:36:27 - INFO - llamafactory.model.adapter - Fine-tuning method: LoRA\n",
            "07/15/2024 14:36:27 - INFO - llamafactory.model.model_utils.misc - Found linear modules: q_proj,down_proj,o_proj,gate_proj,v_proj,up_proj,k_proj\n",
            "07/15/2024 14:36:27 - INFO - llamafactory.model.loader - trainable params: 20,185,088 || all params: 7,635,801,600 || trainable%: 0.2643\n",
            "[INFO|trainer.py:641] 2024-07-15 14:36:27,732 >> Using auto half precision backend\n",
            "07/15/2024 14:36:28 - INFO - llamafactory.train.trainer_utils - Using LoRA+ optimizer with loraplus lr ratio 16.00.\n",
            "[INFO|trainer.py:2078] 2024-07-15 14:36:28,977 >> ***** Running training *****\n",
            "[INFO|trainer.py:2079] 2024-07-15 14:36:28,977 >>   Num examples = 4,500\n",
            "[INFO|trainer.py:2080] 2024-07-15 14:36:28,977 >>   Num Epochs = 6\n",
            "[INFO|trainer.py:2081] 2024-07-15 14:36:28,977 >>   Instantaneous batch size per device = 1\n",
            "[INFO|trainer.py:2084] 2024-07-15 14:36:28,977 >>   Total train batch size (w. parallel, distributed & accumulation) = 8\n",
            "[INFO|trainer.py:2085] 2024-07-15 14:36:28,977 >>   Gradient Accumulation steps = 8\n",
            "[INFO|trainer.py:2086] 2024-07-15 14:36:28,977 >>   Total optimization steps = 3,372\n",
            "[INFO|trainer.py:2087] 2024-07-15 14:36:28,981 >>   Number of trainable parameters = 20,185,088\n",
            "[INFO|integration_utils.py:723] 2024-07-15 14:36:28,986 >> Automatic Weights & Biases logging enabled, to disable set os.environ[\"WANDB_DISABLED\"] = \"true\"\n",
            "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m The `run_name` is currently set to the same value as `TrainingArguments.output_dir`. If this was not intended, please specify a different run name by setting the `TrainingArguments.run_name` parameter.\n",
            "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33minflaton-sg\u001b[0m (\u001b[33minflaton-ai\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n",
            "\u001b[34m\u001b[1mwandb\u001b[0m: Tracking run with wandb version 0.17.4\n",
            "\u001b[34m\u001b[1mwandb\u001b[0m: Run data is saved locally in \u001b[35m\u001b[1m/content/LLaMA-Factory/wandb/run-20240715_143630-ancw8jgs\u001b[0m\n",
            "\u001b[34m\u001b[1mwandb\u001b[0m: Run \u001b[1m`wandb offline`\u001b[0m to turn off syncing.\n",
            "\u001b[34m\u001b[1mwandb\u001b[0m: Syncing run \u001b[33m/content/qwen2-7b\u001b[0m\n",
            "\u001b[34m\u001b[1mwandb\u001b[0m: ⭐️ View project at \u001b[34m\u001b[4mhttps://wandb.ai/inflaton-ai/huggingface\u001b[0m\n",
            "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run at \u001b[34m\u001b[4mhttps://wandb.ai/inflaton-ai/huggingface/runs/ancw8jgs\u001b[0m\n",
            "{'loss': 1.9143, 'grad_norm': 2.1186106204986572, 'learning_rate': 0.000986610734407955, 'epoch': 1.0}\n",
            " 17% 562/3372 [1:01:09<5:02:54,  6.47s/it][INFO|trainer.py:3719] 2024-07-15 15:37:39,784 >> ***** Running Evaluation *****\n",
            "[INFO|trainer.py:3721] 2024-07-15 15:37:39,784 >>   Num examples = 500\n",
            "[INFO|trainer.py:3724] 2024-07-15 15:37:39,785 >>   Batch size = 1\n",
            "\n",
            "  0% 0/500 [00:00<?, ?it/s]\u001b[A\n",
            "  0% 2/500 [00:00<00:57,  8.72it/s]\u001b[A\n",
            "  1% 3/500 [00:00<01:21,  6.10it/s]\u001b[A\n",
            "  1% 4/500 [00:00<01:43,  4.78it/s]\u001b[A\n",
            "  1% 5/500 [00:01<01:51,  4.44it/s]\u001b[A\n",
            "  1% 6/500 [00:01<01:54,  4.32it/s]\u001b[A\n",
            "  1% 7/500 [00:01<01:55,  4.27it/s]\u001b[A\n",
            "  2% 8/500 [00:01<02:03,  3.99it/s]\u001b[A\n",
            "  2% 9/500 [00:02<02:03,  3.97it/s]\u001b[A\n",
            "  2% 10/500 [00:02<02:02,  3.99it/s]\u001b[A\n",
            "  2% 11/500 [00:02<02:06,  3.87it/s]\u001b[A\n",
            "  2% 12/500 [00:02<02:10,  3.75it/s]\u001b[A\n",
            "  3% 13/500 [00:03<02:07,  3.81it/s]\u001b[A\n",
            "  3% 14/500 [00:03<02:10,  3.71it/s]\u001b[A\n",
            "  3% 15/500 [00:03<02:07,  3.80it/s]\u001b[A\n",
            "  3% 16/500 [00:03<02:05,  3.86it/s]\u001b[A\n",
            "  3% 17/500 [00:04<02:04,  3.89it/s]\u001b[A\n",
            "  4% 18/500 [00:04<02:07,  3.77it/s]\u001b[A\n",
            "  4% 19/500 [00:04<02:05,  3.85it/s]\u001b[A\n",
            "  4% 20/500 [00:04<02:08,  3.74it/s]\u001b[A\n",
            "  4% 21/500 [00:05<02:05,  3.83it/s]\u001b[A\n",
            "  4% 22/500 [00:05<02:03,  3.87it/s]\u001b[A\n",
            "  5% 23/500 [00:05<02:06,  3.78it/s]\u001b[A\n",
            "  5% 24/500 [00:06<02:08,  3.70it/s]\u001b[A\n",
            "  5% 25/500 [00:06<02:04,  3.81it/s]\u001b[A\n",
            "  5% 26/500 [00:06<02:02,  3.87it/s]\u001b[A\n",
            "  5% 27/500 [00:06<02:01,  3.90it/s]\u001b[A\n",
            "  6% 28/500 [00:07<02:00,  3.92it/s]\u001b[A\n",
            "  6% 29/500 [00:07<01:58,  3.97it/s]\u001b[A\n",
            "  6% 30/500 [00:07<02:02,  3.84it/s]\u001b[A\n",
            "  6% 31/500 [00:07<02:00,  3.90it/s]\u001b[A\n",
            "  6% 32/500 [00:08<01:59,  3.91it/s]\u001b[A\n",
            "  7% 33/500 [00:08<01:58,  3.95it/s]\u001b[A\n",
            "  7% 34/500 [00:08<01:58,  3.94it/s]\u001b[A\n",
            "  7% 35/500 [00:08<02:02,  3.80it/s]\u001b[A\n",
            "  7% 36/500 [00:09<02:00,  3.86it/s]\u001b[A\n",
            "  7% 37/500 [00:09<01:58,  3.91it/s]\u001b[A\n",
            "  8% 38/500 [00:09<02:02,  3.78it/s]\u001b[A\n",
            "  8% 39/500 [00:09<02:04,  3.70it/s]\u001b[A\n",
            "  8% 40/500 [00:10<02:00,  3.82it/s]\u001b[A\n",
            "  8% 41/500 [00:10<02:03,  3.73it/s]\u001b[A\n",
            "  8% 42/500 [00:10<02:04,  3.67it/s]\u001b[A\n",
            "  9% 43/500 [00:10<02:06,  3.60it/s]\u001b[A\n",
            "  9% 44/500 [00:11<02:01,  3.76it/s]\u001b[A\n",
            "  9% 45/500 [00:11<01:58,  3.86it/s]\u001b[A\n",
            "  9% 46/500 [00:11<01:56,  3.91it/s]\u001b[A\n",
            "  9% 47/500 [00:11<01:55,  3.93it/s]\u001b[A\n",
            " 10% 48/500 [00:12<01:53,  3.97it/s]\u001b[A\n",
            " 10% 49/500 [00:12<01:57,  3.83it/s]\u001b[A\n",
            " 10% 50/500 [00:12<01:59,  3.76it/s]\u001b[A\n",
            " 10% 51/500 [00:13<01:57,  3.82it/s]\u001b[A\n",
            " 10% 52/500 [00:13<01:59,  3.76it/s]\u001b[A\n",
            " 11% 53/500 [00:13<01:56,  3.83it/s]\u001b[A\n",
            " 11% 54/500 [00:13<01:54,  3.89it/s]\u001b[A\n",
            " 11% 55/500 [00:14<01:54,  3.89it/s]\u001b[A\n",
            " 11% 56/500 [00:14<01:52,  3.94it/s]\u001b[A\n",
            " 11% 57/500 [00:14<01:51,  3.96it/s]\u001b[A\n",
            " 12% 58/500 [00:14<01:55,  3.83it/s]\u001b[A\n",
            " 12% 59/500 [00:15<01:53,  3.89it/s]\u001b[A\n",
            " 12% 60/500 [00:15<01:56,  3.77it/s]\u001b[A\n",
            " 12% 61/500 [00:15<01:53,  3.87it/s]\u001b[A\n",
            " 12% 62/500 [00:15<01:56,  3.75it/s]\u001b[A\n",
            " 13% 63/500 [00:16<01:58,  3.69it/s]\u001b[A\n",
            " 13% 64/500 [00:16<01:55,  3.78it/s]\u001b[A\n",
            " 13% 65/500 [00:16<01:56,  3.72it/s]\u001b[A\n",
            " 13% 66/500 [00:16<01:58,  3.65it/s]\u001b[A\n",
            " 13% 67/500 [00:17<01:54,  3.77it/s]\u001b[A\n",
            " 14% 68/500 [00:17<01:56,  3.69it/s]\u001b[A\n",
            " 14% 69/500 [00:17<01:57,  3.67it/s]\u001b[A\n",
            " 14% 70/500 [00:18<01:54,  3.76it/s]\u001b[A\n",
            " 14% 71/500 [00:18<01:51,  3.85it/s]\u001b[A\n",
            " 14% 72/500 [00:18<01:49,  3.91it/s]\u001b[A\n",
            " 15% 73/500 [00:18<01:53,  3.76it/s]\u001b[A\n",
            " 15% 74/500 [00:19<01:55,  3.69it/s]\u001b[A\n",
            " 15% 75/500 [00:19<01:51,  3.80it/s]\u001b[A\n",
            " 15% 76/500 [00:19<01:50,  3.85it/s]\u001b[A\n",
            " 15% 77/500 [00:19<01:48,  3.91it/s]\u001b[A\n",
            " 16% 78/500 [00:20<01:47,  3.92it/s]\u001b[A\n",
            " 16% 79/500 [00:20<01:46,  3.96it/s]\u001b[A\n",
            " 16% 80/500 [00:20<01:45,  3.98it/s]\u001b[A\n",
            " 16% 81/500 [00:20<01:49,  3.84it/s]\u001b[A\n",
            " 16% 82/500 [00:21<01:47,  3.89it/s]\u001b[A\n",
            " 17% 83/500 [00:21<01:50,  3.78it/s]\u001b[A\n",
            " 17% 84/500 [00:21<01:53,  3.67it/s]\u001b[A\n",
            " 17% 85/500 [00:21<01:53,  3.65it/s]\u001b[A\n",
            " 17% 86/500 [00:22<01:49,  3.76it/s]\u001b[A\n",
            " 17% 87/500 [00:22<01:47,  3.85it/s]\u001b[A\n",
            " 18% 88/500 [00:22<01:50,  3.72it/s]\u001b[A\n",
            " 18% 89/500 [00:22<01:47,  3.82it/s]\u001b[A\n",
            " 18% 90/500 [00:23<01:45,  3.88it/s]\u001b[A\n",
            " 18% 91/500 [00:23<01:48,  3.77it/s]\u001b[A\n",
            " 18% 92/500 [00:23<01:50,  3.69it/s]\u001b[A\n",
            " 19% 93/500 [00:24<01:47,  3.79it/s]\u001b[A\n",
            " 19% 94/500 [00:24<01:45,  3.86it/s]\u001b[A\n",
            " 19% 95/500 [00:24<01:44,  3.89it/s]\u001b[A\n",
            " 19% 96/500 [00:24<01:43,  3.91it/s]\u001b[A\n",
            " 19% 97/500 [00:25<01:45,  3.80it/s]\u001b[A\n",
            " 20% 98/500 [00:25<01:43,  3.87it/s]\u001b[A\n",
            " 20% 99/500 [00:25<01:42,  3.90it/s]\u001b[A\n",
            " 20% 100/500 [00:25<01:41,  3.94it/s]\u001b[A\n",
            " 20% 101/500 [00:26<01:41,  3.94it/s]\u001b[A\n",
            " 20% 102/500 [00:26<01:39,  3.98it/s]\u001b[A\n",
            " 21% 103/500 [00:26<01:39,  3.98it/s]\u001b[A\n",
            " 21% 104/500 [00:26<01:38,  4.04it/s]\u001b[A\n",
            " 21% 105/500 [00:27<01:41,  3.89it/s]\u001b[A\n",
            " 21% 106/500 [00:27<01:39,  3.95it/s]\u001b[A\n",
            " 21% 107/500 [00:27<01:39,  3.96it/s]\u001b[A\n",
            " 22% 108/500 [00:27<01:39,  3.96it/s]\u001b[A\n",
            " 22% 109/500 [00:28<01:38,  3.95it/s]\u001b[A\n",
            " 22% 110/500 [00:28<01:41,  3.83it/s]\u001b[A\n",
            " 22% 111/500 [00:28<01:39,  3.91it/s]\u001b[A\n",
            " 22% 112/500 [00:28<01:42,  3.79it/s]\u001b[A\n",
            " 23% 113/500 [00:29<01:44,  3.70it/s]\u001b[A\n",
            " 23% 114/500 [00:29<01:46,  3.63it/s]\u001b[A\n",
            " 23% 115/500 [00:29<01:42,  3.75it/s]\u001b[A\n",
            " 23% 116/500 [00:30<01:44,  3.69it/s]\u001b[A\n",
            " 23% 117/500 [00:30<01:41,  3.77it/s]\u001b[A\n",
            " 24% 118/500 [00:30<01:39,  3.84it/s]\u001b[A\n",
            " 24% 119/500 [00:30<01:37,  3.89it/s]\u001b[A\n",
            " 24% 120/500 [00:31<01:40,  3.79it/s]\u001b[A\n",
            " 24% 121/500 [00:31<01:42,  3.69it/s]\u001b[A\n",
            " 24% 122/500 [00:31<01:39,  3.79it/s]\u001b[A\n",
            " 25% 123/500 [00:31<01:41,  3.70it/s]\u001b[A\n",
            " 25% 124/500 [00:32<01:38,  3.82it/s]\u001b[A\n",
            " 25% 125/500 [00:32<01:36,  3.87it/s]\u001b[A\n",
            " 25% 126/500 [00:32<01:39,  3.75it/s]\u001b[A\n",
            " 25% 127/500 [00:32<01:37,  3.83it/s]\u001b[A\n",
            " 26% 128/500 [00:33<01:35,  3.89it/s]\u001b[A\n",
            " 26% 129/500 [00:33<01:34,  3.91it/s]\u001b[A\n",
            " 26% 130/500 [00:33<01:37,  3.80it/s]\u001b[A\n",
            " 26% 131/500 [00:33<01:35,  3.87it/s]\u001b[A\n",
            " 26% 132/500 [00:34<01:34,  3.89it/s]\u001b[A\n",
            " 27% 133/500 [00:34<01:37,  3.77it/s]\u001b[A\n",
            " 27% 134/500 [00:34<01:35,  3.84it/s]\u001b[A\n",
            " 27% 135/500 [00:34<01:33,  3.89it/s]\u001b[A\n",
            " 27% 136/500 [00:35<01:37,  3.75it/s]\u001b[A\n",
            " 27% 137/500 [00:35<01:34,  3.84it/s]\u001b[A\n",
            " 28% 138/500 [00:35<01:32,  3.90it/s]\u001b[A\n",
            " 28% 139/500 [00:35<01:32,  3.91it/s]\u001b[A\n",
            " 28% 140/500 [00:36<01:31,  3.95it/s]\u001b[A\n",
            " 28% 141/500 [00:36<01:34,  3.81it/s]\u001b[A\n",
            " 28% 142/500 [00:36<01:32,  3.88it/s]\u001b[A\n",
            " 29% 143/500 [00:37<01:35,  3.75it/s]\u001b[A\n",
            " 29% 144/500 [00:37<01:33,  3.81it/s]\u001b[A\n",
            " 29% 145/500 [00:37<01:31,  3.89it/s]\u001b[A\n",
            " 29% 146/500 [00:37<01:30,  3.91it/s]\u001b[A\n",
            " 29% 147/500 [00:38<01:29,  3.94it/s]\u001b[A\n",
            " 30% 148/500 [00:38<01:33,  3.77it/s]\u001b[A\n",
            " 30% 149/500 [00:38<01:34,  3.70it/s]\u001b[A\n",
            " 30% 150/500 [00:38<01:32,  3.80it/s]\u001b[A\n",
            " 30% 151/500 [00:39<01:30,  3.85it/s]\u001b[A\n",
            " 30% 152/500 [00:39<01:29,  3.90it/s]\u001b[A\n",
            " 31% 153/500 [00:39<01:28,  3.92it/s]\u001b[A\n",
            " 31% 154/500 [00:39<01:27,  3.95it/s]\u001b[A\n",
            " 31% 155/500 [00:40<01:31,  3.79it/s]\u001b[A\n",
            " 31% 156/500 [00:40<01:32,  3.72it/s]\u001b[A\n",
            " 31% 157/500 [00:40<01:29,  3.81it/s]\u001b[A\n",
            " 32% 158/500 [00:40<01:32,  3.71it/s]\u001b[A\n",
            " 32% 159/500 [00:41<01:30,  3.79it/s]\u001b[A\n",
            " 32% 160/500 [00:41<01:31,  3.71it/s]\u001b[A\n",
            " 32% 161/500 [00:41<01:29,  3.79it/s]\u001b[A\n",
            " 32% 162/500 [00:42<01:31,  3.70it/s]\u001b[A\n",
            " 33% 163/500 [00:42<01:28,  3.80it/s]\u001b[A\n",
            " 33% 164/500 [00:42<01:30,  3.71it/s]\u001b[A\n",
            " 33% 165/500 [00:42<01:31,  3.64it/s]\u001b[A\n",
            " 33% 166/500 [00:43<01:28,  3.76it/s]\u001b[A\n",
            " 33% 167/500 [00:43<01:30,  3.68it/s]\u001b[A\n",
            " 34% 168/500 [00:43<01:31,  3.64it/s]\u001b[A\n",
            " 34% 169/500 [00:43<01:28,  3.75it/s]\u001b[A\n",
            " 34% 170/500 [00:44<01:30,  3.66it/s]\u001b[A\n",
            " 34% 171/500 [00:44<01:30,  3.62it/s]\u001b[A\n",
            " 34% 172/500 [00:44<01:27,  3.74it/s]\u001b[A\n",
            " 35% 173/500 [00:45<01:25,  3.82it/s]\u001b[A\n",
            " 35% 174/500 [00:45<01:28,  3.70it/s]\u001b[A\n",
            " 35% 175/500 [00:45<01:25,  3.79it/s]\u001b[A\n",
            " 35% 176/500 [00:45<01:23,  3.88it/s]\u001b[A\n",
            " 35% 177/500 [00:46<01:22,  3.91it/s]\u001b[A\n",
            " 36% 178/500 [00:46<01:25,  3.78it/s]\u001b[A\n",
            " 36% 179/500 [00:46<01:27,  3.67it/s]\u001b[A\n",
            " 36% 180/500 [00:46<01:28,  3.62it/s]\u001b[A\n",
            " 36% 181/500 [00:47<01:25,  3.72it/s]\u001b[A\n",
            " 36% 182/500 [00:47<01:23,  3.82it/s]\u001b[A\n",
            " 37% 183/500 [00:47<01:21,  3.87it/s]\u001b[A\n",
            " 37% 184/500 [00:47<01:24,  3.76it/s]\u001b[A\n",
            " 37% 185/500 [00:48<01:21,  3.84it/s]\u001b[A\n",
            " 37% 186/500 [00:48<01:24,  3.72it/s]\u001b[A\n",
            " 37% 187/500 [00:48<01:27,  3.60it/s]\u001b[A\n",
            " 38% 188/500 [00:48<01:22,  3.79it/s]\u001b[A\n",
            " 38% 189/500 [00:49<01:19,  3.91it/s]\u001b[A\n",
            " 38% 190/500 [00:49<01:21,  3.82it/s]\u001b[A\n",
            " 38% 191/500 [00:49<01:20,  3.84it/s]\u001b[A\n",
            " 38% 192/500 [00:50<01:18,  3.90it/s]\u001b[A\n",
            " 39% 193/500 [00:50<01:20,  3.82it/s]\u001b[A\n",
            " 39% 194/500 [00:50<01:21,  3.75it/s]\u001b[A\n",
            " 39% 195/500 [00:50<01:23,  3.63it/s]\u001b[A\n",
            " 39% 196/500 [00:51<01:21,  3.75it/s]\u001b[A\n",
            " 39% 197/500 [00:51<01:19,  3.83it/s]\u001b[A\n",
            " 40% 198/500 [00:51<01:18,  3.86it/s]\u001b[A\n",
            " 40% 199/500 [00:51<01:17,  3.89it/s]\u001b[A\n",
            " 40% 200/500 [00:52<01:16,  3.92it/s]\u001b[A\n",
            " 40% 201/500 [00:52<01:15,  3.97it/s]\u001b[A\n",
            " 40% 202/500 [00:52<01:15,  3.95it/s]\u001b[A\n",
            " 41% 203/500 [00:52<01:14,  3.98it/s]\u001b[A\n",
            " 41% 204/500 [00:53<01:17,  3.83it/s]\u001b[A\n",
            " 41% 205/500 [00:53<01:18,  3.75it/s]\u001b[A\n",
            " 41% 206/500 [00:53<01:17,  3.81it/s]\u001b[A\n",
            " 41% 207/500 [00:53<01:18,  3.74it/s]\u001b[A\n",
            " 42% 208/500 [00:54<01:16,  3.82it/s]\u001b[A\n",
            " 42% 209/500 [00:54<01:15,  3.88it/s]\u001b[A\n",
            " 42% 210/500 [00:54<01:13,  3.92it/s]\u001b[A\n",
            " 42% 211/500 [00:54<01:13,  3.96it/s]\u001b[A\n",
            " 42% 212/500 [00:55<01:12,  3.98it/s]\u001b[A\n",
            " 43% 213/500 [00:55<01:15,  3.83it/s]\u001b[A\n",
            " 43% 214/500 [00:55<01:17,  3.70it/s]\u001b[A\n",
            " 43% 215/500 [00:56<01:15,  3.79it/s]\u001b[A\n",
            " 43% 216/500 [00:56<01:17,  3.68it/s]\u001b[A\n",
            " 43% 217/500 [00:56<01:14,  3.79it/s]\u001b[A\n",
            " 44% 218/500 [00:56<01:13,  3.83it/s]\u001b[A\n",
            " 44% 219/500 [00:57<01:12,  3.89it/s]\u001b[A\n",
            " 44% 220/500 [00:57<01:11,  3.92it/s]\u001b[A\n",
            " 44% 221/500 [00:57<01:10,  3.97it/s]\u001b[A\n",
            " 44% 222/500 [00:57<01:12,  3.82it/s]\u001b[A\n",
            " 45% 223/500 [00:58<01:11,  3.87it/s]\u001b[A\n",
            " 45% 224/500 [00:58<01:10,  3.93it/s]\u001b[A\n",
            " 45% 225/500 [00:58<01:12,  3.79it/s]\u001b[A\n",
            " 45% 226/500 [00:58<01:11,  3.84it/s]\u001b[A\n",
            " 45% 227/500 [00:59<01:13,  3.74it/s]\u001b[A\n",
            " 46% 228/500 [00:59<01:11,  3.81it/s]\u001b[A\n",
            " 46% 229/500 [00:59<01:09,  3.89it/s]\u001b[A\n",
            " 46% 230/500 [00:59<01:11,  3.78it/s]\u001b[A\n",
            " 46% 231/500 [01:00<01:12,  3.71it/s]\u001b[A\n",
            " 46% 232/500 [01:00<01:10,  3.80it/s]\u001b[A\n",
            " 47% 233/500 [01:00<01:11,  3.72it/s]\u001b[A\n",
            " 47% 234/500 [01:00<01:09,  3.80it/s]\u001b[A\n",
            " 47% 235/500 [01:01<01:11,  3.73it/s]\u001b[A\n",
            " 47% 236/500 [01:01<01:09,  3.79it/s]\u001b[A\n",
            " 47% 237/500 [01:01<01:07,  3.89it/s]\u001b[A\n",
            " 48% 238/500 [01:02<01:09,  3.78it/s]\u001b[A\n",
            " 48% 239/500 [01:02<01:07,  3.86it/s]\u001b[A\n",
            " 48% 240/500 [01:02<01:07,  3.88it/s]\u001b[A\n",
            " 48% 241/500 [01:02<01:06,  3.91it/s]\u001b[A\n",
            " 48% 242/500 [01:03<01:04,  3.99it/s]\u001b[A\n",
            " 49% 243/500 [01:03<01:04,  4.00it/s]\u001b[A\n",
            " 49% 244/500 [01:03<01:03,  4.01it/s]\u001b[A\n",
            " 49% 245/500 [01:03<01:03,  4.00it/s]\u001b[A\n",
            " 49% 246/500 [01:04<01:06,  3.84it/s]\u001b[A\n",
            " 49% 247/500 [01:04<01:05,  3.88it/s]\u001b[A\n",
            " 50% 248/500 [01:04<01:04,  3.92it/s]\u001b[A\n",
            " 50% 249/500 [01:04<01:06,  3.78it/s]\u001b[A\n",
            " 50% 250/500 [01:05<01:04,  3.86it/s]\u001b[A\n",
            " 50% 251/500 [01:05<01:03,  3.92it/s]\u001b[A\n",
            " 50% 252/500 [01:05<01:03,  3.93it/s]\u001b[A\n",
            " 51% 253/500 [01:05<01:02,  3.98it/s]\u001b[A\n",
            " 51% 254/500 [01:06<01:01,  3.99it/s]\u001b[A\n",
            " 51% 255/500 [01:06<01:01,  3.98it/s]\u001b[A\n",
            " 51% 256/500 [01:06<01:01,  3.99it/s]\u001b[A\n",
            " 51% 257/500 [01:06<01:03,  3.84it/s]\u001b[A\n",
            " 52% 258/500 [01:07<01:04,  3.76it/s]\u001b[A\n",
            " 52% 259/500 [01:07<01:02,  3.84it/s]\u001b[A\n",
            " 52% 260/500 [01:07<01:01,  3.89it/s]\u001b[A\n",
            " 52% 261/500 [01:07<01:01,  3.90it/s]\u001b[A\n",
            " 52% 262/500 [01:08<01:03,  3.76it/s]\u001b[A\n",
            " 53% 263/500 [01:08<01:03,  3.71it/s]\u001b[A\n",
            " 53% 264/500 [01:08<01:02,  3.80it/s]\u001b[A\n",
            " 53% 265/500 [01:08<01:01,  3.85it/s]\u001b[A\n",
            " 53% 266/500 [01:09<01:02,  3.76it/s]\u001b[A\n",
            " 53% 267/500 [01:09<01:00,  3.85it/s]\u001b[A\n",
            " 54% 268/500 [01:09<00:59,  3.90it/s]\u001b[A\n",
            " 54% 269/500 [01:10<01:01,  3.78it/s]\u001b[A\n",
            " 54% 270/500 [01:10<01:02,  3.70it/s]\u001b[A\n",
            " 54% 271/500 [01:10<01:02,  3.66it/s]\u001b[A\n",
            " 54% 272/500 [01:10<01:00,  3.77it/s]\u001b[A\n",
            " 55% 273/500 [01:11<00:59,  3.85it/s]\u001b[A\n",
            " 55% 274/500 [01:11<00:58,  3.86it/s]\u001b[A\n",
            " 55% 275/500 [01:11<00:57,  3.88it/s]\u001b[A\n",
            " 55% 276/500 [01:11<00:56,  3.94it/s]\u001b[A\n",
            " 55% 277/500 [01:12<00:58,  3.78it/s]\u001b[A\n",
            " 56% 278/500 [01:12<00:57,  3.87it/s]\u001b[A\n",
            " 56% 279/500 [01:12<00:56,  3.94it/s]\u001b[A\n",
            " 56% 280/500 [01:12<00:55,  3.98it/s]\u001b[A\n",
            " 56% 281/500 [01:13<00:57,  3.80it/s]\u001b[A\n",
            " 56% 282/500 [01:13<00:56,  3.87it/s]\u001b[A\n",
            " 57% 283/500 [01:13<00:57,  3.76it/s]\u001b[A\n",
            " 57% 284/500 [01:13<00:58,  3.67it/s]\u001b[A\n",
            " 57% 285/500 [01:14<00:57,  3.77it/s]\u001b[A\n",
            " 57% 286/500 [01:14<00:58,  3.68it/s]\u001b[A\n",
            " 57% 287/500 [01:14<00:56,  3.79it/s]\u001b[A\n",
            " 58% 288/500 [01:15<00:54,  3.86it/s]\u001b[A\n",
            " 58% 289/500 [01:15<00:54,  3.89it/s]\u001b[A\n",
            " 58% 290/500 [01:15<00:53,  3.92it/s]\u001b[A\n",
            " 58% 291/500 [01:15<00:54,  3.80it/s]\u001b[A\n",
            " 58% 292/500 [01:16<00:55,  3.72it/s]\u001b[A\n",
            " 59% 293/500 [01:16<00:56,  3.68it/s]\u001b[A\n",
            " 59% 294/500 [01:16<00:56,  3.66it/s]\u001b[A\n",
            " 59% 295/500 [01:16<00:54,  3.76it/s]\u001b[A\n",
            " 59% 296/500 [01:17<00:53,  3.84it/s]\u001b[A\n",
            " 59% 297/500 [01:17<00:52,  3.88it/s]\u001b[A\n",
            " 60% 298/500 [01:17<00:50,  3.96it/s]\u001b[A\n",
            " 60% 299/500 [01:17<00:50,  3.99it/s]\u001b[A\n",
            " 60% 300/500 [01:18<00:52,  3.83it/s]\u001b[A\n",
            " 60% 301/500 [01:18<00:51,  3.89it/s]\u001b[A\n",
            " 60% 302/500 [01:18<00:52,  3.80it/s]\u001b[A\n",
            " 61% 303/500 [01:18<00:50,  3.88it/s]\u001b[A\n",
            " 61% 304/500 [01:19<00:50,  3.92it/s]\u001b[A\n",
            " 61% 305/500 [01:19<00:49,  3.93it/s]\u001b[A\n",
            " 61% 306/500 [01:19<00:49,  3.94it/s]\u001b[A\n",
            " 61% 307/500 [01:19<00:50,  3.84it/s]\u001b[A\n",
            " 62% 308/500 [01:20<00:51,  3.72it/s]\u001b[A\n",
            " 62% 309/500 [01:20<00:51,  3.68it/s]\u001b[A\n",
            " 62% 310/500 [01:20<00:50,  3.77it/s]\u001b[A\n",
            " 62% 311/500 [01:21<00:49,  3.84it/s]\u001b[A\n",
            " 62% 312/500 [01:21<00:48,  3.89it/s]\u001b[A\n",
            " 63% 313/500 [01:21<00:50,  3.74it/s]\u001b[A\n",
            " 63% 314/500 [01:21<00:50,  3.68it/s]\u001b[A\n",
            " 63% 315/500 [01:22<00:48,  3.79it/s]\u001b[A\n",
            " 63% 316/500 [01:22<00:47,  3.84it/s]\u001b[A\n",
            " 63% 317/500 [01:22<00:46,  3.90it/s]\u001b[A\n",
            " 64% 318/500 [01:22<00:48,  3.77it/s]\u001b[A\n",
            " 64% 319/500 [01:23<00:49,  3.69it/s]\u001b[A\n",
            " 64% 320/500 [01:23<00:47,  3.80it/s]\u001b[A\n",
            " 64% 321/500 [01:23<00:46,  3.86it/s]\u001b[A\n",
            " 64% 322/500 [01:23<00:45,  3.91it/s]\u001b[A\n",
            " 65% 323/500 [01:24<00:46,  3.77it/s]\u001b[A\n",
            " 65% 324/500 [01:24<00:45,  3.83it/s]\u001b[A\n",
            " 65% 325/500 [01:24<00:44,  3.89it/s]\u001b[A\n",
            " 65% 326/500 [01:24<00:43,  3.96it/s]\u001b[A\n",
            " 65% 327/500 [01:25<00:45,  3.79it/s]\u001b[A\n",
            " 66% 328/500 [01:25<00:44,  3.86it/s]\u001b[A\n",
            " 66% 329/500 [01:25<00:43,  3.90it/s]\u001b[A\n",
            " 66% 330/500 [01:25<00:43,  3.95it/s]\u001b[A\n",
            " 66% 331/500 [01:26<00:44,  3.80it/s]\u001b[A\n",
            " 66% 332/500 [01:26<00:43,  3.85it/s]\u001b[A\n",
            " 67% 333/500 [01:26<00:44,  3.76it/s]\u001b[A\n",
            " 67% 334/500 [01:27<00:45,  3.68it/s]\u001b[A\n",
            " 67% 335/500 [01:27<00:43,  3.77it/s]\u001b[A\n",
            " 67% 336/500 [01:27<00:42,  3.86it/s]\u001b[A\n",
            " 67% 337/500 [01:27<00:43,  3.77it/s]\u001b[A\n",
            " 68% 338/500 [01:28<00:42,  3.84it/s]\u001b[A\n",
            " 68% 339/500 [01:28<00:43,  3.73it/s]\u001b[A\n",
            " 68% 340/500 [01:28<00:41,  3.84it/s]\u001b[A\n",
            " 68% 341/500 [01:28<00:40,  3.90it/s]\u001b[A\n",
            " 68% 342/500 [01:29<00:40,  3.91it/s]\u001b[A\n",
            " 69% 343/500 [01:29<00:39,  3.94it/s]\u001b[A\n",
            " 69% 344/500 [01:29<00:40,  3.82it/s]\u001b[A\n",
            " 69% 345/500 [01:29<00:39,  3.88it/s]\u001b[A\n",
            " 69% 346/500 [01:30<00:39,  3.92it/s]\u001b[A\n",
            " 69% 347/500 [01:30<00:40,  3.77it/s]\u001b[A\n",
            " 70% 348/500 [01:30<00:39,  3.86it/s]\u001b[A\n",
            " 70% 349/500 [01:30<00:38,  3.90it/s]\u001b[A\n",
            " 70% 350/500 [01:31<00:38,  3.93it/s]\u001b[A\n",
            " 70% 351/500 [01:31<00:37,  3.95it/s]\u001b[A\n",
            " 70% 352/500 [01:31<00:37,  3.99it/s]\u001b[A\n",
            " 71% 353/500 [01:31<00:36,  4.01it/s]\u001b[A\n",
            " 71% 354/500 [01:32<00:36,  3.99it/s]\u001b[A\n",
            " 71% 355/500 [01:32<00:37,  3.86it/s]\u001b[A\n",
            " 71% 356/500 [01:32<00:36,  3.98it/s]\u001b[A\n",
            " 71% 357/500 [01:32<00:35,  3.97it/s]\u001b[A\n",
            " 72% 358/500 [01:33<00:35,  3.98it/s]\u001b[A\n",
            " 72% 359/500 [01:33<00:36,  3.82it/s]\u001b[A\n",
            " 72% 360/500 [01:33<00:35,  3.90it/s]\u001b[A\n",
            " 72% 361/500 [01:33<00:35,  3.95it/s]\u001b[A\n",
            " 72% 362/500 [01:34<00:34,  3.94it/s]\u001b[A\n",
            " 73% 363/500 [01:34<00:34,  3.96it/s]\u001b[A\n",
            " 73% 364/500 [01:34<00:33,  4.00it/s]\u001b[A\n",
            " 73% 365/500 [01:34<00:33,  4.02it/s]\u001b[A\n",
            " 73% 366/500 [01:35<00:33,  3.99it/s]\u001b[A\n",
            " 73% 367/500 [01:35<00:33,  4.01it/s]\u001b[A\n",
            " 74% 368/500 [01:35<00:32,  4.01it/s]\u001b[A\n",
            " 74% 369/500 [01:35<00:32,  4.02it/s]\u001b[A\n",
            " 74% 370/500 [01:36<00:32,  4.03it/s]\u001b[A\n",
            " 74% 371/500 [01:36<00:32,  4.01it/s]\u001b[A\n",
            " 74% 372/500 [01:36<00:33,  3.86it/s]\u001b[A\n",
            " 75% 373/500 [01:36<00:32,  3.92it/s]\u001b[A\n",
            " 75% 374/500 [01:37<00:32,  3.92it/s]\u001b[A\n",
            " 75% 375/500 [01:37<00:31,  3.94it/s]\u001b[A\n",
            " 75% 376/500 [01:37<00:31,  3.98it/s]\u001b[A\n",
            " 75% 377/500 [01:38<00:31,  3.85it/s]\u001b[A\n",
            " 76% 378/500 [01:38<00:31,  3.91it/s]\u001b[A\n",
            " 76% 379/500 [01:38<00:30,  3.92it/s]\u001b[A\n",
            " 76% 380/500 [01:38<00:30,  3.96it/s]\u001b[A\n",
            " 76% 381/500 [01:39<00:31,  3.82it/s]\u001b[A\n",
            " 76% 382/500 [01:39<00:30,  3.87it/s]\u001b[A\n",
            " 77% 383/500 [01:39<00:29,  3.92it/s]\u001b[A\n",
            " 77% 384/500 [01:39<00:30,  3.78it/s]\u001b[A\n",
            " 77% 385/500 [01:40<00:29,  3.86it/s]\u001b[A\n",
            " 77% 386/500 [01:40<00:29,  3.91it/s]\u001b[A\n",
            " 77% 387/500 [01:40<00:29,  3.77it/s]\u001b[A\n",
            " 78% 388/500 [01:40<00:28,  3.87it/s]\u001b[A\n",
            " 78% 389/500 [01:41<00:28,  3.93it/s]\u001b[A\n",
            " 78% 390/500 [01:41<00:27,  3.94it/s]\u001b[A\n",
            " 78% 391/500 [01:41<00:28,  3.82it/s]\u001b[A\n",
            " 78% 392/500 [01:41<00:28,  3.74it/s]\u001b[A\n",
            " 79% 393/500 [01:42<00:27,  3.85it/s]\u001b[A\n",
            " 79% 394/500 [01:42<00:27,  3.89it/s]\u001b[A\n",
            " 79% 395/500 [01:42<00:27,  3.76it/s]\u001b[A\n",
            " 79% 396/500 [01:42<00:27,  3.72it/s]\u001b[A\n",
            " 79% 397/500 [01:43<00:27,  3.68it/s]\u001b[A\n",
            " 80% 398/500 [01:43<00:27,  3.78it/s]\u001b[A\n",
            " 80% 399/500 [01:43<00:26,  3.85it/s]\u001b[A\n",
            " 80% 400/500 [01:43<00:25,  3.90it/s]\u001b[A\n",
            " 80% 401/500 [01:44<00:25,  3.94it/s]\u001b[A\n",
            " 80% 402/500 [01:44<00:24,  3.95it/s]\u001b[A\n",
            " 81% 403/500 [01:44<00:24,  3.99it/s]\u001b[A\n",
            " 81% 404/500 [01:44<00:23,  4.02it/s]\u001b[A\n",
            " 81% 405/500 [01:45<00:23,  4.02it/s]\u001b[A\n",
            " 81% 406/500 [01:45<00:23,  4.00it/s]\u001b[A\n",
            " 81% 407/500 [01:45<00:23,  3.99it/s]\u001b[A\n",
            " 82% 408/500 [01:46<00:23,  3.85it/s]\u001b[A\n",
            " 82% 409/500 [01:46<00:23,  3.91it/s]\u001b[A\n",
            " 82% 410/500 [01:46<00:22,  3.93it/s]\u001b[A\n",
            " 82% 411/500 [01:46<00:22,  3.97it/s]\u001b[A\n",
            " 82% 412/500 [01:47<00:22,  3.98it/s]\u001b[A\n",
            " 83% 413/500 [01:47<00:21,  4.00it/s]\u001b[A\n",
            " 83% 414/500 [01:47<00:21,  4.00it/s]\u001b[A\n",
            " 83% 415/500 [01:47<00:21,  4.00it/s]\u001b[A\n",
            " 83% 416/500 [01:47<00:20,  4.03it/s]\u001b[A\n",
            " 83% 417/500 [01:48<00:20,  4.02it/s]\u001b[A\n",
            " 84% 418/500 [01:48<00:20,  4.02it/s]\u001b[A\n",
            " 84% 419/500 [01:48<00:20,  4.02it/s]\u001b[A\n",
            " 84% 420/500 [01:48<00:19,  4.03it/s]\u001b[A\n",
            " 84% 421/500 [01:49<00:19,  4.04it/s]\u001b[A\n",
            " 84% 422/500 [01:49<00:19,  4.03it/s]\u001b[A\n",
            " 85% 423/500 [01:49<00:19,  3.89it/s]\u001b[A\n",
            " 85% 424/500 [01:50<00:20,  3.78it/s]\u001b[A\n",
            " 85% 425/500 [01:50<00:19,  3.87it/s]\u001b[A\n",
            " 85% 426/500 [01:50<00:18,  3.92it/s]\u001b[A\n",
            " 85% 427/500 [01:50<00:18,  3.94it/s]\u001b[A\n",
            " 86% 428/500 [01:51<00:18,  3.98it/s]\u001b[A\n",
            " 86% 429/500 [01:51<00:18,  3.83it/s]\u001b[A\n",
            " 86% 430/500 [01:51<00:18,  3.87it/s]\u001b[A\n",
            " 86% 431/500 [01:51<00:18,  3.75it/s]\u001b[A\n",
            " 86% 432/500 [01:52<00:18,  3.68it/s]\u001b[A\n",
            " 87% 433/500 [01:52<00:17,  3.77it/s]\u001b[A\n",
            " 87% 434/500 [01:52<00:17,  3.84it/s]\u001b[A\n",
            " 87% 435/500 [01:52<00:17,  3.74it/s]\u001b[A\n",
            " 87% 436/500 [01:53<00:17,  3.68it/s]\u001b[A\n",
            " 87% 437/500 [01:53<00:17,  3.65it/s]\u001b[A\n",
            " 88% 438/500 [01:53<00:16,  3.75it/s]\u001b[A\n",
            " 88% 439/500 [01:54<00:16,  3.71it/s]\u001b[A\n",
            " 88% 440/500 [01:54<00:15,  3.79it/s]\u001b[A\n",
            " 88% 441/500 [01:54<00:15,  3.85it/s]\u001b[A\n",
            " 88% 442/500 [01:54<00:14,  3.92it/s]\u001b[A\n",
            " 89% 443/500 [01:55<00:14,  3.95it/s]\u001b[A\n",
            " 89% 444/500 [01:55<00:14,  3.99it/s]\u001b[A\n",
            " 89% 445/500 [01:55<00:13,  4.01it/s]\u001b[A\n",
            " 89% 446/500 [01:55<00:13,  3.99it/s]\u001b[A\n",
            " 89% 447/500 [01:55<00:13,  4.00it/s]\u001b[A\n",
            " 90% 448/500 [01:56<00:13,  3.98it/s]\u001b[A\n",
            " 90% 449/500 [01:56<00:13,  3.86it/s]\u001b[A\n",
            " 90% 450/500 [01:56<00:12,  3.92it/s]\u001b[A\n",
            " 90% 451/500 [01:57<00:12,  3.92it/s]\u001b[A\n",
            " 90% 452/500 [01:57<00:12,  3.96it/s]\u001b[A\n",
            " 91% 453/500 [01:57<00:11,  3.97it/s]\u001b[A\n",
            " 91% 454/500 [01:57<00:11,  4.03it/s]\u001b[A\n",
            " 91% 455/500 [01:58<00:11,  3.89it/s]\u001b[A\n",
            " 91% 456/500 [01:58<00:11,  3.80it/s]\u001b[A\n",
            " 91% 457/500 [01:58<00:11,  3.72it/s]\u001b[A\n",
            " 92% 458/500 [01:58<00:11,  3.81it/s]\u001b[A\n",
            " 92% 459/500 [01:59<00:10,  3.87it/s]\u001b[A\n",
            " 92% 460/500 [01:59<00:10,  3.75it/s]\u001b[A\n",
            " 92% 461/500 [01:59<00:10,  3.84it/s]\u001b[A\n",
            " 92% 462/500 [01:59<00:09,  3.88it/s]\u001b[A\n",
            " 93% 463/500 [02:00<00:09,  3.93it/s]\u001b[A\n",
            " 93% 464/500 [02:00<00:09,  3.96it/s]\u001b[A\n",
            " 93% 465/500 [02:00<00:08,  3.97it/s]\u001b[A\n",
            " 93% 466/500 [02:00<00:08,  3.99it/s]\u001b[A\n",
            " 93% 467/500 [02:01<00:08,  3.83it/s]\u001b[A\n",
            " 94% 468/500 [02:01<00:08,  3.90it/s]\u001b[A\n",
            " 94% 469/500 [02:01<00:07,  3.96it/s]\u001b[A\n",
            " 94% 470/500 [02:01<00:07,  3.97it/s]\u001b[A\n",
            " 94% 471/500 [02:02<00:07,  3.99it/s]\u001b[A\n",
            " 94% 472/500 [02:02<00:06,  4.00it/s]\u001b[A\n",
            " 95% 473/500 [02:02<00:06,  4.01it/s]\u001b[A\n",
            " 95% 474/500 [02:02<00:06,  4.03it/s]\u001b[A\n",
            " 95% 475/500 [02:03<00:06,  3.86it/s]\u001b[A\n",
            " 95% 476/500 [02:03<00:06,  3.90it/s]\u001b[A\n",
            " 95% 477/500 [02:03<00:05,  3.97it/s]\u001b[A\n",
            " 96% 478/500 [02:03<00:05,  3.98it/s]\u001b[A\n",
            " 96% 479/500 [02:04<00:05,  3.83it/s]\u001b[A\n",
            " 96% 480/500 [02:04<00:05,  3.75it/s]\u001b[A\n",
            " 96% 481/500 [02:04<00:04,  3.83it/s]\u001b[A\n",
            " 96% 482/500 [02:04<00:04,  3.88it/s]\u001b[A\n",
            " 97% 483/500 [02:05<00:04,  3.91it/s]\u001b[A\n",
            " 97% 484/500 [02:05<00:04,  3.95it/s]\u001b[A\n",
            " 97% 485/500 [02:05<00:03,  3.97it/s]\u001b[A\n",
            " 97% 486/500 [02:06<00:03,  3.84it/s]\u001b[A\n",
            " 97% 487/500 [02:06<00:03,  3.74it/s]\u001b[A\n",
            " 98% 488/500 [02:06<00:03,  3.83it/s]\u001b[A\n",
            " 98% 489/500 [02:06<00:02,  3.71it/s]\u001b[A\n",
            " 98% 490/500 [02:07<00:02,  3.68it/s]\u001b[A\n",
            " 98% 491/500 [02:07<00:02,  3.79it/s]\u001b[A\n",
            " 98% 492/500 [02:07<00:02,  3.71it/s]\u001b[A\n",
            " 99% 493/500 [02:07<00:01,  3.67it/s]\u001b[A\n",
            " 99% 494/500 [02:08<00:01,  3.63it/s]\u001b[A\n",
            " 99% 495/500 [02:08<00:01,  3.74it/s]\u001b[A\n",
            " 99% 496/500 [02:08<00:01,  3.68it/s]\u001b[A\n",
            " 99% 497/500 [02:08<00:00,  3.79it/s]\u001b[A\n",
            "100% 498/500 [02:09<00:00,  3.85it/s]\u001b[A\n",
            "100% 499/500 [02:09<00:00,  3.87it/s]\u001b[A\n",
            "100% 500/500 [02:09<00:00,  3.77it/s]\u001b[A\n",
            "{'eval_loss': 1.0599186420440674, 'eval_accuracy': 0.5283333333333332, 'eval_runtime': 130.0477, 'eval_samples_per_second': 3.845, 'eval_steps_per_second': 3.845, 'epoch': 1.0}\n",
            "\n",
            " 17% 562/3372 [1:03:19<5:02:54,  6.47s/it]\n",
            "                                     \u001b[A[INFO|trainer.py:3410] 2024-07-15 15:39:49,834 >> Saving model checkpoint to /content/qwen2-7b/checkpoint-562\n",
            "/usr/local/lib/python3.10/dist-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n",
            "  warnings.warn(\n",
            "[INFO|configuration_utils.py:733] 2024-07-15 15:39:50,101 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2-7B/snapshots/453ed1575b739b5b03ce3758b23befdb0967f40e/config.json\n",
            "[INFO|configuration_utils.py:796] 2024-07-15 15:39:50,102 >> Model config Qwen2Config {\n",
            "  \"architectures\": [\n",
            "    \"Qwen2ForCausalLM\"\n",
            "  ],\n",
            "  \"attention_dropout\": 0.0,\n",
            "  \"bos_token_id\": 151643,\n",
            "  \"eos_token_id\": 151643,\n",
            "  \"hidden_act\": \"silu\",\n",
            "  \"hidden_size\": 3584,\n",
            "  \"initializer_range\": 0.02,\n",
            "  \"intermediate_size\": 18944,\n",
            "  \"max_position_embeddings\": 131072,\n",
            "  \"max_window_layers\": 28,\n",
            "  \"model_type\": \"qwen2\",\n",
            "  \"num_attention_heads\": 28,\n",
            "  \"num_hidden_layers\": 28,\n",
            "  \"num_key_value_heads\": 4,\n",
            "  \"rms_norm_eps\": 1e-06,\n",
            "  \"rope_theta\": 1000000.0,\n",
            "  \"sliding_window\": 131072,\n",
            "  \"tie_word_embeddings\": false,\n",
            "  \"torch_dtype\": \"bfloat16\",\n",
            "  \"transformers_version\": \"4.41.2\",\n",
            "  \"use_cache\": true,\n",
            "  \"use_sliding_window\": false,\n",
            "  \"vocab_size\": 152064\n",
            "}\n",
            "\n",
            "[INFO|tokenization_utils_base.py:2513] 2024-07-15 15:39:50,298 >> tokenizer config file saved in /content/qwen2-7b/checkpoint-562/tokenizer_config.json\n",
            "[INFO|tokenization_utils_base.py:2522] 2024-07-15 15:39:50,298 >> Special tokens file saved in /content/qwen2-7b/checkpoint-562/special_tokens_map.json\n",
            "{'loss': 0.847, 'grad_norm': 0.33948227763175964, 'learning_rate': 0.0008433439152121052, 'epoch': 2.0}\n",
            " 33% 1124/3372 [2:04:27<4:05:43,  6.56s/it][INFO|trainer.py:3719] 2024-07-15 16:40:58,115 >> ***** Running Evaluation *****\n",
            "[INFO|trainer.py:3721] 2024-07-15 16:40:58,115 >>   Num examples = 500\n",
            "[INFO|trainer.py:3724] 2024-07-15 16:40:58,115 >>   Batch size = 1\n",
            "\n",
            "  0% 0/500 [00:00<?, ?it/s]\u001b[A\n",
            "  0% 2/500 [00:00<01:02,  8.01it/s]\u001b[A\n",
            "  1% 3/500 [00:00<01:26,  5.73it/s]\u001b[A\n",
            "  1% 4/500 [00:00<01:50,  4.50it/s]\u001b[A\n",
            "  1% 5/500 [00:01<01:54,  4.33it/s]\u001b[A\n",
            "  1% 6/500 [00:01<01:57,  4.20it/s]\u001b[A\n",
            "  1% 7/500 [00:01<01:58,  4.16it/s]\u001b[A\n",
            "  2% 8/500 [00:01<02:06,  3.89it/s]\u001b[A\n",
            "  2% 9/500 [00:02<02:04,  3.93it/s]\u001b[A\n",
            "  2% 10/500 [00:02<02:04,  3.93it/s]\u001b[A\n",
            "  2% 11/500 [00:02<02:09,  3.79it/s]\u001b[A\n",
            "  2% 12/500 [00:02<02:14,  3.64it/s]\u001b[A\n",
            "  3% 13/500 [00:03<02:10,  3.72it/s]\u001b[A\n",
            "  3% 14/500 [00:03<02:14,  3.62it/s]\u001b[A\n",
            "  3% 15/500 [00:03<02:10,  3.71it/s]\u001b[A\n",
            "  3% 16/500 [00:03<02:08,  3.78it/s]\u001b[A\n",
            "  3% 17/500 [00:04<02:05,  3.83it/s]\u001b[A\n",
            "  4% 18/500 [00:04<02:10,  3.70it/s]\u001b[A\n",
            "  4% 19/500 [00:04<02:07,  3.77it/s]\u001b[A\n",
            "  4% 20/500 [00:05<02:11,  3.65it/s]\u001b[A\n",
            "  4% 21/500 [00:05<02:08,  3.73it/s]\u001b[A\n",
            "  4% 22/500 [00:05<02:05,  3.80it/s]\u001b[A\n",
            "  5% 23/500 [00:05<02:09,  3.67it/s]\u001b[A\n",
            "  5% 24/500 [00:06<02:12,  3.59it/s]\u001b[A\n",
            "  5% 25/500 [00:06<02:08,  3.70it/s]\u001b[A\n",
            "  5% 26/500 [00:06<02:05,  3.78it/s]\u001b[A\n",
            "  5% 27/500 [00:06<02:03,  3.81it/s]\u001b[A\n",
            "  6% 28/500 [00:07<02:02,  3.86it/s]\u001b[A\n",
            "  6% 29/500 [00:07<02:00,  3.90it/s]\u001b[A\n",
            "  6% 30/500 [00:07<02:06,  3.72it/s]\u001b[A\n",
            "  6% 31/500 [00:07<02:03,  3.79it/s]\u001b[A\n",
            "  6% 32/500 [00:08<02:01,  3.85it/s]\u001b[A\n",
            "  7% 33/500 [00:08<02:00,  3.88it/s]\u001b[A\n",
            "  7% 34/500 [00:08<01:59,  3.91it/s]\u001b[A\n",
            "  7% 35/500 [00:09<02:04,  3.74it/s]\u001b[A\n",
            "  7% 36/500 [00:09<02:02,  3.79it/s]\u001b[A\n",
            "  7% 37/500 [00:09<02:00,  3.85it/s]\u001b[A\n",
            "  8% 38/500 [00:09<02:04,  3.71it/s]\u001b[A\n",
            "  8% 39/500 [00:10<02:08,  3.59it/s]\u001b[A\n",
            "  8% 40/500 [00:10<02:04,  3.69it/s]\u001b[A\n",
            "  8% 41/500 [00:10<02:07,  3.59it/s]\u001b[A\n",
            "  8% 42/500 [00:10<02:08,  3.55it/s]\u001b[A\n",
            "  9% 43/500 [00:11<02:10,  3.51it/s]\u001b[A\n",
            "  9% 44/500 [00:11<02:03,  3.69it/s]\u001b[A\n",
            "  9% 45/500 [00:11<02:00,  3.77it/s]\u001b[A\n",
            "  9% 46/500 [00:11<01:58,  3.82it/s]\u001b[A\n",
            "  9% 47/500 [00:12<01:57,  3.86it/s]\u001b[A\n",
            " 10% 48/500 [00:12<01:55,  3.91it/s]\u001b[A\n",
            " 10% 49/500 [00:12<02:00,  3.74it/s]\u001b[A\n",
            " 10% 50/500 [00:13<02:03,  3.66it/s]\u001b[A\n",
            " 10% 51/500 [00:13<02:00,  3.74it/s]\u001b[A\n",
            " 10% 52/500 [00:13<02:02,  3.65it/s]\u001b[A\n",
            " 11% 53/500 [00:13<02:00,  3.72it/s]\u001b[A\n",
            " 11% 54/500 [00:14<01:57,  3.79it/s]\u001b[A\n",
            " 11% 55/500 [00:14<01:55,  3.84it/s]\u001b[A\n",
            " 11% 56/500 [00:14<01:54,  3.88it/s]\u001b[A\n",
            " 11% 57/500 [00:14<01:53,  3.90it/s]\u001b[A\n",
            " 12% 58/500 [00:15<01:58,  3.74it/s]\u001b[A\n",
            " 12% 59/500 [00:15<01:55,  3.82it/s]\u001b[A\n",
            " 12% 60/500 [00:15<01:59,  3.69it/s]\u001b[A\n",
            " 12% 61/500 [00:15<01:56,  3.77it/s]\u001b[A\n",
            " 12% 62/500 [00:16<02:00,  3.65it/s]\u001b[A\n",
            " 13% 63/500 [00:16<02:03,  3.55it/s]\u001b[A\n",
            " 13% 64/500 [00:16<01:58,  3.68it/s]\u001b[A\n",
            " 13% 65/500 [00:17<02:00,  3.60it/s]\u001b[A\n",
            " 13% 66/500 [00:17<02:03,  3.52it/s]\u001b[A\n",
            " 13% 67/500 [00:17<01:59,  3.64it/s]\u001b[A\n",
            " 14% 68/500 [00:17<02:01,  3.56it/s]\u001b[A\n",
            " 14% 69/500 [00:18<02:03,  3.48it/s]\u001b[A\n",
            " 14% 70/500 [00:18<01:59,  3.61it/s]\u001b[A\n",
            " 14% 71/500 [00:18<01:55,  3.72it/s]\u001b[A\n",
            " 14% 72/500 [00:19<01:53,  3.78it/s]\u001b[A\n",
            " 15% 73/500 [00:19<01:56,  3.66it/s]\u001b[A\n",
            " 15% 74/500 [00:19<01:58,  3.60it/s]\u001b[A\n",
            " 15% 75/500 [00:19<01:54,  3.70it/s]\u001b[A\n",
            " 15% 76/500 [00:20<01:52,  3.78it/s]\u001b[A\n",
            " 15% 77/500 [00:20<01:51,  3.80it/s]\u001b[A\n",
            " 16% 78/500 [00:20<01:49,  3.85it/s]\u001b[A\n",
            " 16% 79/500 [00:20<01:48,  3.88it/s]\u001b[A\n",
            " 16% 80/500 [00:21<01:47,  3.90it/s]\u001b[A\n",
            " 16% 81/500 [00:21<01:52,  3.72it/s]\u001b[A\n",
            " 16% 82/500 [00:21<01:50,  3.79it/s]\u001b[A\n",
            " 17% 83/500 [00:21<01:53,  3.66it/s]\u001b[A\n",
            " 17% 84/500 [00:22<01:56,  3.58it/s]\u001b[A\n",
            " 17% 85/500 [00:22<01:58,  3.51it/s]\u001b[A\n",
            " 17% 86/500 [00:22<01:54,  3.61it/s]\u001b[A\n",
            " 17% 87/500 [00:23<01:50,  3.72it/s]\u001b[A\n",
            " 18% 88/500 [00:23<01:53,  3.63it/s]\u001b[A\n",
            " 18% 89/500 [00:23<01:50,  3.71it/s]\u001b[A\n",
            " 18% 90/500 [00:23<01:47,  3.80it/s]\u001b[A\n",
            " 18% 91/500 [00:24<01:51,  3.65it/s]\u001b[A\n",
            " 18% 92/500 [00:24<01:54,  3.56it/s]\u001b[A\n",
            " 19% 93/500 [00:24<01:51,  3.67it/s]\u001b[A\n",
            " 19% 94/500 [00:24<01:47,  3.77it/s]\u001b[A\n",
            " 19% 95/500 [00:25<01:46,  3.82it/s]\u001b[A\n",
            " 19% 96/500 [00:25<01:44,  3.88it/s]\u001b[A\n",
            " 19% 97/500 [00:25<01:48,  3.71it/s]\u001b[A\n",
            " 20% 98/500 [00:26<01:46,  3.79it/s]\u001b[A\n",
            " 20% 99/500 [00:26<01:44,  3.84it/s]\u001b[A\n",
            " 20% 100/500 [00:26<01:43,  3.87it/s]\u001b[A\n",
            " 20% 101/500 [00:26<01:42,  3.89it/s]\u001b[A\n",
            " 20% 102/500 [00:27<01:41,  3.92it/s]\u001b[A\n",
            " 21% 103/500 [00:27<01:41,  3.92it/s]\u001b[A\n",
            " 21% 104/500 [00:27<01:39,  3.97it/s]\u001b[A\n",
            " 21% 105/500 [00:27<01:44,  3.77it/s]\u001b[A\n",
            " 21% 106/500 [00:28<01:42,  3.83it/s]\u001b[A\n",
            " 21% 107/500 [00:28<01:41,  3.87it/s]\u001b[A\n",
            " 22% 108/500 [00:28<01:40,  3.88it/s]\u001b[A\n",
            " 22% 109/500 [00:28<01:40,  3.90it/s]\u001b[A\n",
            " 22% 110/500 [00:29<01:44,  3.72it/s]\u001b[A\n",
            " 22% 111/500 [00:29<01:42,  3.81it/s]\u001b[A\n",
            " 22% 112/500 [00:29<01:45,  3.68it/s]\u001b[A\n",
            " 23% 113/500 [00:29<01:48,  3.57it/s]\u001b[A\n",
            " 23% 114/500 [00:30<01:49,  3.54it/s]\u001b[A\n",
            " 23% 115/500 [00:30<01:44,  3.67it/s]\u001b[A\n",
            " 23% 116/500 [00:30<01:48,  3.55it/s]\u001b[A\n",
            " 23% 117/500 [00:31<01:44,  3.66it/s]\u001b[A\n",
            " 24% 118/500 [00:31<01:42,  3.74it/s]\u001b[A\n",
            " 24% 119/500 [00:31<01:39,  3.82it/s]\u001b[A\n",
            " 24% 120/500 [00:31<01:44,  3.64it/s]\u001b[A\n",
            " 24% 121/500 [00:32<01:45,  3.59it/s]\u001b[A\n",
            " 24% 122/500 [00:32<01:42,  3.70it/s]\u001b[A\n",
            " 25% 123/500 [00:32<01:44,  3.59it/s]\u001b[A\n",
            " 25% 124/500 [00:32<01:41,  3.72it/s]\u001b[A\n",
            " 25% 125/500 [00:33<01:38,  3.81it/s]\u001b[A\n",
            " 25% 126/500 [00:33<01:42,  3.67it/s]\u001b[A\n",
            " 25% 127/500 [00:33<01:39,  3.74it/s]\u001b[A\n",
            " 26% 128/500 [00:34<01:37,  3.80it/s]\u001b[A\n",
            " 26% 129/500 [00:34<01:36,  3.86it/s]\u001b[A\n",
            " 26% 130/500 [00:34<01:39,  3.72it/s]\u001b[A\n",
            " 26% 131/500 [00:34<01:37,  3.77it/s]\u001b[A\n",
            " 26% 132/500 [00:35<01:36,  3.82it/s]\u001b[A\n",
            " 27% 133/500 [00:35<01:39,  3.68it/s]\u001b[A\n",
            " 27% 134/500 [00:35<01:37,  3.77it/s]\u001b[A\n",
            " 27% 135/500 [00:35<01:36,  3.80it/s]\u001b[A\n",
            " 27% 136/500 [00:36<01:40,  3.63it/s]\u001b[A\n",
            " 27% 137/500 [00:36<01:37,  3.73it/s]\u001b[A\n",
            " 28% 138/500 [00:36<01:35,  3.79it/s]\u001b[A\n",
            " 28% 139/500 [00:36<01:34,  3.83it/s]\u001b[A\n",
            " 28% 140/500 [00:37<01:33,  3.87it/s]\u001b[A\n",
            " 28% 141/500 [00:37<01:37,  3.70it/s]\u001b[A\n",
            " 28% 142/500 [00:37<01:35,  3.77it/s]\u001b[A\n",
            " 29% 143/500 [00:38<01:38,  3.63it/s]\u001b[A\n",
            " 29% 144/500 [00:38<01:35,  3.74it/s]\u001b[A\n",
            " 29% 145/500 [00:38<01:33,  3.80it/s]\u001b[A\n",
            " 29% 146/500 [00:38<01:32,  3.82it/s]\u001b[A\n",
            " 29% 147/500 [00:39<01:31,  3.87it/s]\u001b[A\n",
            " 30% 148/500 [00:39<01:34,  3.72it/s]\u001b[A\n",
            " 30% 149/500 [00:39<01:37,  3.61it/s]\u001b[A\n",
            " 30% 150/500 [00:39<01:34,  3.71it/s]\u001b[A\n",
            " 30% 151/500 [00:40<01:32,  3.79it/s]\u001b[A\n",
            " 30% 152/500 [00:40<01:30,  3.86it/s]\u001b[A\n",
            " 31% 153/500 [00:40<01:29,  3.86it/s]\u001b[A\n",
            " 31% 154/500 [00:40<01:29,  3.86it/s]\u001b[A\n",
            " 31% 155/500 [00:41<01:33,  3.70it/s]\u001b[A\n",
            " 31% 156/500 [00:41<01:35,  3.61it/s]\u001b[A\n",
            " 31% 157/500 [00:41<01:32,  3.70it/s]\u001b[A\n",
            " 32% 158/500 [00:42<01:35,  3.60it/s]\u001b[A\n",
            " 32% 159/500 [00:42<01:31,  3.71it/s]\u001b[A\n",
            " 32% 160/500 [00:42<01:34,  3.60it/s]\u001b[A\n",
            " 32% 161/500 [00:42<01:31,  3.70it/s]\u001b[A\n",
            " 32% 162/500 [00:43<01:34,  3.59it/s]\u001b[A\n",
            " 33% 163/500 [00:43<01:30,  3.71it/s]\u001b[A\n",
            " 33% 164/500 [00:43<01:33,  3.58it/s]\u001b[A\n",
            " 33% 165/500 [00:43<01:34,  3.54it/s]\u001b[A\n",
            " 33% 166/500 [00:44<01:31,  3.65it/s]\u001b[A\n",
            " 33% 167/500 [00:44<01:33,  3.57it/s]\u001b[A\n",
            " 34% 168/500 [00:44<01:34,  3.50it/s]\u001b[A\n",
            " 34% 169/500 [00:45<01:31,  3.63it/s]\u001b[A\n",
            " 34% 170/500 [00:45<01:33,  3.53it/s]\u001b[A\n",
            " 34% 171/500 [00:45<01:34,  3.47it/s]\u001b[A\n",
            " 34% 172/500 [00:45<01:30,  3.62it/s]\u001b[A\n",
            " 35% 173/500 [00:46<01:28,  3.71it/s]\u001b[A\n",
            " 35% 174/500 [00:46<01:30,  3.60it/s]\u001b[A\n",
            " 35% 175/500 [00:46<01:27,  3.69it/s]\u001b[A\n",
            " 35% 176/500 [00:46<01:24,  3.82it/s]\u001b[A\n",
            " 35% 177/500 [00:47<01:23,  3.88it/s]\u001b[A\n",
            " 36% 178/500 [00:47<01:26,  3.71it/s]\u001b[A\n",
            " 36% 179/500 [00:47<01:29,  3.58it/s]\u001b[A\n",
            " 36% 180/500 [00:48<01:31,  3.52it/s]\u001b[A\n",
            " 36% 181/500 [00:48<01:27,  3.64it/s]\u001b[A\n",
            " 36% 182/500 [00:48<01:25,  3.73it/s]\u001b[A\n",
            " 37% 183/500 [00:48<01:23,  3.78it/s]\u001b[A\n",
            " 37% 184/500 [00:49<01:26,  3.64it/s]\u001b[A\n",
            " 37% 185/500 [00:49<01:24,  3.73it/s]\u001b[A\n",
            " 37% 186/500 [00:49<01:27,  3.60it/s]\u001b[A\n",
            " 37% 187/500 [00:49<01:24,  3.71it/s]\u001b[A\n",
            " 38% 188/500 [00:50<01:22,  3.78it/s]\u001b[A\n",
            " 38% 189/500 [00:50<01:20,  3.86it/s]\u001b[A\n",
            " 38% 190/500 [00:50<01:23,  3.70it/s]\u001b[A\n",
            " 38% 191/500 [00:51<01:21,  3.78it/s]\u001b[A\n",
            " 38% 192/500 [00:51<01:20,  3.83it/s]\u001b[A\n",
            " 39% 193/500 [00:51<01:23,  3.67it/s]\u001b[A\n",
            " 39% 194/500 [00:51<01:25,  3.58it/s]\u001b[A\n",
            " 39% 195/500 [00:52<01:26,  3.53it/s]\u001b[A\n",
            " 39% 196/500 [00:52<01:23,  3.65it/s]\u001b[A\n",
            " 39% 197/500 [00:52<01:21,  3.70it/s]\u001b[A\n",
            " 40% 198/500 [00:52<01:20,  3.77it/s]\u001b[A\n",
            " 40% 199/500 [00:53<01:18,  3.82it/s]\u001b[A\n",
            " 40% 200/500 [00:53<01:17,  3.87it/s]\u001b[A\n",
            " 40% 201/500 [00:53<01:16,  3.89it/s]\u001b[A\n",
            " 40% 202/500 [00:53<01:15,  3.93it/s]\u001b[A\n",
            " 41% 203/500 [00:54<01:15,  3.93it/s]\u001b[A\n",
            " 41% 204/500 [00:54<01:19,  3.72it/s]\u001b[A\n",
            " 41% 205/500 [00:54<01:21,  3.61it/s]\u001b[A\n",
            " 41% 206/500 [00:55<01:19,  3.71it/s]\u001b[A\n",
            " 41% 207/500 [00:55<01:20,  3.62it/s]\u001b[A\n",
            " 42% 208/500 [00:55<01:18,  3.71it/s]\u001b[A\n",
            " 42% 209/500 [00:55<01:16,  3.78it/s]\u001b[A\n",
            " 42% 210/500 [00:56<01:15,  3.84it/s]\u001b[A\n",
            " 42% 211/500 [00:56<01:14,  3.87it/s]\u001b[A\n",
            " 42% 212/500 [00:56<01:13,  3.92it/s]\u001b[A\n",
            " 43% 213/500 [00:56<01:16,  3.73it/s]\u001b[A\n",
            " 43% 214/500 [00:57<01:19,  3.60it/s]\u001b[A\n",
            " 43% 215/500 [00:57<01:16,  3.70it/s]\u001b[A\n",
            " 43% 216/500 [00:57<01:18,  3.63it/s]\u001b[A\n",
            " 43% 217/500 [00:57<01:16,  3.71it/s]\u001b[A\n",
            " 44% 218/500 [00:58<01:14,  3.78it/s]\u001b[A\n",
            " 44% 219/500 [00:58<01:13,  3.84it/s]\u001b[A\n",
            " 44% 220/500 [00:58<01:12,  3.87it/s]\u001b[A\n",
            " 44% 221/500 [00:58<01:11,  3.90it/s]\u001b[A\n",
            " 44% 222/500 [00:59<01:15,  3.69it/s]\u001b[A\n",
            " 45% 223/500 [00:59<01:12,  3.80it/s]\u001b[A\n",
            " 45% 224/500 [00:59<01:11,  3.86it/s]\u001b[A\n",
            " 45% 225/500 [01:00<01:14,  3.70it/s]\u001b[A\n",
            " 45% 226/500 [01:00<01:12,  3.75it/s]\u001b[A\n",
            " 45% 227/500 [01:00<01:14,  3.66it/s]\u001b[A\n",
            " 46% 228/500 [01:00<01:12,  3.75it/s]\u001b[A\n",
            " 46% 229/500 [01:01<01:11,  3.81it/s]\u001b[A\n",
            " 46% 230/500 [01:01<01:13,  3.67it/s]\u001b[A\n",
            " 46% 231/500 [01:01<01:14,  3.60it/s]\u001b[A\n",
            " 46% 232/500 [01:01<01:12,  3.70it/s]\u001b[A\n",
            " 47% 233/500 [01:02<01:14,  3.60it/s]\u001b[A\n",
            " 47% 234/500 [01:02<01:11,  3.70it/s]\u001b[A\n",
            " 47% 235/500 [01:02<01:13,  3.60it/s]\u001b[A\n",
            " 47% 236/500 [01:03<01:11,  3.69it/s]\u001b[A\n",
            " 47% 237/500 [01:03<01:09,  3.78it/s]\u001b[A\n",
            " 48% 238/500 [01:03<01:11,  3.66it/s]\u001b[A\n",
            " 48% 239/500 [01:03<01:09,  3.77it/s]\u001b[A\n",
            " 48% 240/500 [01:04<01:08,  3.82it/s]\u001b[A\n",
            " 48% 241/500 [01:04<01:06,  3.88it/s]\u001b[A\n",
            " 48% 242/500 [01:04<01:05,  3.94it/s]\u001b[A\n",
            " 49% 243/500 [01:04<01:05,  3.94it/s]\u001b[A\n",
            " 49% 244/500 [01:05<01:04,  3.95it/s]\u001b[A\n",
            " 49% 245/500 [01:05<01:04,  3.97it/s]\u001b[A\n",
            " 49% 246/500 [01:05<01:07,  3.77it/s]\u001b[A\n",
            " 49% 247/500 [01:05<01:06,  3.82it/s]\u001b[A\n",
            " 50% 248/500 [01:06<01:05,  3.88it/s]\u001b[A\n",
            " 50% 249/500 [01:06<01:07,  3.71it/s]\u001b[A\n",
            " 50% 250/500 [01:06<01:06,  3.78it/s]\u001b[A\n",
            " 50% 251/500 [01:06<01:04,  3.84it/s]\u001b[A\n",
            " 50% 252/500 [01:07<01:04,  3.86it/s]\u001b[A\n",
            " 51% 253/500 [01:07<01:03,  3.88it/s]\u001b[A\n",
            " 51% 254/500 [01:07<01:02,  3.91it/s]\u001b[A\n",
            " 51% 255/500 [01:07<01:02,  3.93it/s]\u001b[A\n",
            " 51% 256/500 [01:08<01:01,  3.98it/s]\u001b[A\n",
            " 51% 257/500 [01:08<01:04,  3.76it/s]\u001b[A\n",
            " 52% 258/500 [01:08<01:06,  3.63it/s]\u001b[A\n",
            " 52% 259/500 [01:09<01:04,  3.72it/s]\u001b[A\n",
            " 52% 260/500 [01:09<01:03,  3.81it/s]\u001b[A\n",
            " 52% 261/500 [01:09<01:02,  3.83it/s]\u001b[A\n",
            " 52% 262/500 [01:09<01:04,  3.70it/s]\u001b[A\n",
            " 53% 263/500 [01:10<01:05,  3.62it/s]\u001b[A\n",
            " 53% 264/500 [01:10<01:03,  3.72it/s]\u001b[A\n",
            " 53% 265/500 [01:10<01:01,  3.79it/s]\u001b[A\n",
            " 53% 266/500 [01:10<01:04,  3.65it/s]\u001b[A\n",
            " 53% 267/500 [01:11<01:02,  3.75it/s]\u001b[A\n",
            " 54% 268/500 [01:11<01:00,  3.81it/s]\u001b[A\n",
            " 54% 269/500 [01:11<01:02,  3.67it/s]\u001b[A\n",
            " 54% 270/500 [01:12<01:03,  3.60it/s]\u001b[A\n",
            " 54% 271/500 [01:12<01:05,  3.52it/s]\u001b[A\n",
            " 54% 272/500 [01:12<01:02,  3.65it/s]\u001b[A\n",
            " 55% 273/500 [01:12<01:00,  3.73it/s]\u001b[A\n",
            " 55% 274/500 [01:13<00:59,  3.79it/s]\u001b[A\n",
            " 55% 275/500 [01:13<00:58,  3.82it/s]\u001b[A\n",
            " 55% 276/500 [01:13<00:58,  3.86it/s]\u001b[A\n",
            " 55% 277/500 [01:13<01:00,  3.71it/s]\u001b[A\n",
            " 56% 278/500 [01:14<00:58,  3.80it/s]\u001b[A\n",
            " 56% 279/500 [01:14<00:57,  3.87it/s]\u001b[A\n",
            " 56% 280/500 [01:14<00:56,  3.89it/s]\u001b[A\n",
            " 56% 281/500 [01:14<00:59,  3.70it/s]\u001b[A\n",
            " 56% 282/500 [01:15<00:57,  3.78it/s]\u001b[A\n",
            " 57% 283/500 [01:15<00:58,  3.68it/s]\u001b[A\n",
            " 57% 284/500 [01:15<01:00,  3.57it/s]\u001b[A\n",
            " 57% 285/500 [01:16<00:58,  3.68it/s]\u001b[A\n",
            " 57% 286/500 [01:16<00:59,  3.59it/s]\u001b[A\n",
            " 57% 287/500 [01:16<00:57,  3.71it/s]\u001b[A\n",
            " 58% 288/500 [01:16<00:56,  3.77it/s]\u001b[A\n",
            " 58% 289/500 [01:17<00:55,  3.82it/s]\u001b[A\n",
            " 58% 290/500 [01:17<00:54,  3.86it/s]\u001b[A\n",
            " 58% 291/500 [01:17<00:56,  3.68it/s]\u001b[A\n",
            " 58% 292/500 [01:17<00:57,  3.60it/s]\u001b[A\n",
            " 59% 293/500 [01:18<00:58,  3.55it/s]\u001b[A\n",
            " 59% 294/500 [01:18<00:58,  3.52it/s]\u001b[A\n",
            " 59% 295/500 [01:18<00:56,  3.64it/s]\u001b[A\n",
            " 59% 296/500 [01:19<00:54,  3.71it/s]\u001b[A\n",
            " 59% 297/500 [01:19<00:53,  3.78it/s]\u001b[A\n",
            " 60% 298/500 [01:19<00:52,  3.87it/s]\u001b[A\n",
            " 60% 299/500 [01:19<00:51,  3.88it/s]\u001b[A\n",
            " 60% 300/500 [01:20<00:53,  3.72it/s]\u001b[A\n",
            " 60% 301/500 [01:20<00:52,  3.80it/s]\u001b[A\n",
            " 60% 302/500 [01:20<00:54,  3.65it/s]\u001b[A\n",
            " 61% 303/500 [01:20<00:52,  3.72it/s]\u001b[A\n",
            " 61% 304/500 [01:21<00:51,  3.79it/s]\u001b[A\n",
            " 61% 305/500 [01:21<00:50,  3.83it/s]\u001b[A\n",
            " 61% 306/500 [01:21<00:50,  3.87it/s]\u001b[A\n",
            " 61% 307/500 [01:21<00:51,  3.71it/s]\u001b[A\n",
            " 62% 308/500 [01:22<00:52,  3.63it/s]\u001b[A\n",
            " 62% 309/500 [01:22<00:53,  3.54it/s]\u001b[A\n",
            " 62% 310/500 [01:22<00:52,  3.64it/s]\u001b[A\n",
            " 62% 311/500 [01:23<00:50,  3.74it/s]\u001b[A\n",
            " 62% 312/500 [01:23<00:49,  3.80it/s]\u001b[A\n",
            " 63% 313/500 [01:23<00:50,  3.68it/s]\u001b[A\n",
            " 63% 314/500 [01:23<00:52,  3.57it/s]\u001b[A\n",
            " 63% 315/500 [01:24<00:50,  3.69it/s]\u001b[A\n",
            " 63% 316/500 [01:24<00:48,  3.77it/s]\u001b[A\n",
            " 63% 317/500 [01:24<00:47,  3.83it/s]\u001b[A\n",
            " 64% 318/500 [01:24<00:49,  3.68it/s]\u001b[A\n",
            " 64% 319/500 [01:25<00:50,  3.58it/s]\u001b[A\n",
            " 64% 320/500 [01:25<00:48,  3.69it/s]\u001b[A\n",
            " 64% 321/500 [01:25<00:47,  3.77it/s]\u001b[A\n",
            " 64% 322/500 [01:25<00:46,  3.82it/s]\u001b[A\n",
            " 65% 323/500 [01:26<00:48,  3.69it/s]\u001b[A\n",
            " 65% 324/500 [01:26<00:46,  3.75it/s]\u001b[A\n",
            " 65% 325/500 [01:26<00:46,  3.80it/s]\u001b[A\n",
            " 65% 326/500 [01:27<00:44,  3.89it/s]\u001b[A\n",
            " 65% 327/500 [01:27<00:46,  3.71it/s]\u001b[A\n",
            " 66% 328/500 [01:27<00:45,  3.79it/s]\u001b[A\n",
            " 66% 329/500 [01:27<00:44,  3.84it/s]\u001b[A\n",
            " 66% 330/500 [01:28<00:43,  3.88it/s]\u001b[A\n",
            " 66% 331/500 [01:28<00:45,  3.72it/s]\u001b[A\n",
            " 66% 332/500 [01:28<00:44,  3.79it/s]\u001b[A\n",
            " 67% 333/500 [01:28<00:45,  3.65it/s]\u001b[A\n",
            " 67% 334/500 [01:29<00:46,  3.56it/s]\u001b[A\n",
            " 67% 335/500 [01:29<00:45,  3.66it/s]\u001b[A\n",
            " 67% 336/500 [01:29<00:43,  3.77it/s]\u001b[A\n",
            " 67% 337/500 [01:30<00:44,  3.64it/s]\u001b[A\n",
            " 68% 338/500 [01:30<00:43,  3.73it/s]\u001b[A\n",
            " 68% 339/500 [01:30<00:44,  3.61it/s]\u001b[A\n",
            " 68% 340/500 [01:30<00:43,  3.72it/s]\u001b[A\n",
            " 68% 341/500 [01:31<00:41,  3.79it/s]\u001b[A\n",
            " 68% 342/500 [01:31<00:41,  3.84it/s]\u001b[A\n",
            " 69% 343/500 [01:31<00:40,  3.89it/s]\u001b[A\n",
            " 69% 344/500 [01:31<00:41,  3.76it/s]\u001b[A\n",
            " 69% 345/500 [01:32<00:40,  3.82it/s]\u001b[A\n",
            " 69% 346/500 [01:32<00:39,  3.86it/s]\u001b[A\n",
            " 69% 347/500 [01:32<00:41,  3.67it/s]\u001b[A\n",
            " 70% 348/500 [01:32<00:40,  3.77it/s]\u001b[A\n",
            " 70% 349/500 [01:33<00:39,  3.83it/s]\u001b[A\n",
            " 70% 350/500 [01:33<00:38,  3.86it/s]\u001b[A\n",
            " 70% 351/500 [01:33<00:38,  3.88it/s]\u001b[A\n",
            " 70% 352/500 [01:33<00:37,  3.91it/s]\u001b[A\n",
            " 71% 353/500 [01:34<00:37,  3.93it/s]\u001b[A\n",
            " 71% 354/500 [01:34<00:37,  3.94it/s]\u001b[A\n",
            " 71% 355/500 [01:34<00:38,  3.75it/s]\u001b[A\n",
            " 71% 356/500 [01:34<00:37,  3.86it/s]\u001b[A\n",
            " 71% 357/500 [01:35<00:36,  3.89it/s]\u001b[A\n",
            " 72% 358/500 [01:35<00:36,  3.90it/s]\u001b[A\n",
            " 72% 359/500 [01:35<00:37,  3.74it/s]\u001b[A\n",
            " 72% 360/500 [01:36<00:36,  3.81it/s]\u001b[A\n",
            " 72% 361/500 [01:36<00:35,  3.86it/s]\u001b[A\n",
            " 72% 362/500 [01:36<00:35,  3.90it/s]\u001b[A\n",
            " 73% 363/500 [01:36<00:34,  3.92it/s]\u001b[A\n",
            " 73% 364/500 [01:37<00:34,  3.96it/s]\u001b[A\n",
            " 73% 365/500 [01:37<00:34,  3.95it/s]\u001b[A\n",
            " 73% 366/500 [01:37<00:33,  3.97it/s]\u001b[A\n",
            " 73% 367/500 [01:37<00:33,  3.95it/s]\u001b[A\n",
            " 74% 368/500 [01:38<00:33,  3.97it/s]\u001b[A\n",
            " 74% 369/500 [01:38<00:33,  3.96it/s]\u001b[A\n",
            " 74% 370/500 [01:38<00:32,  3.95it/s]\u001b[A\n",
            " 74% 371/500 [01:38<00:32,  3.95it/s]\u001b[A\n",
            " 74% 372/500 [01:39<00:34,  3.76it/s]\u001b[A\n",
            " 75% 373/500 [01:39<00:33,  3.82it/s]\u001b[A\n",
            " 75% 374/500 [01:39<00:32,  3.86it/s]\u001b[A\n",
            " 75% 375/500 [01:39<00:31,  3.92it/s]\u001b[A\n",
            " 75% 376/500 [01:40<00:31,  3.92it/s]\u001b[A\n",
            " 75% 377/500 [01:40<00:32,  3.75it/s]\u001b[A\n",
            " 76% 378/500 [01:40<00:32,  3.81it/s]\u001b[A\n",
            " 76% 379/500 [01:40<00:31,  3.84it/s]\u001b[A\n",
            " 76% 380/500 [01:41<00:31,  3.86it/s]\u001b[A\n",
            " 76% 381/500 [01:41<00:32,  3.71it/s]\u001b[A\n",
            " 76% 382/500 [01:41<00:31,  3.78it/s]\u001b[A\n",
            " 77% 383/500 [01:41<00:30,  3.82it/s]\u001b[A\n",
            " 77% 384/500 [01:42<00:31,  3.69it/s]\u001b[A\n",
            " 77% 385/500 [01:42<00:30,  3.76it/s]\u001b[A\n",
            " 77% 386/500 [01:42<00:29,  3.81it/s]\u001b[A\n",
            " 77% 387/500 [01:43<00:30,  3.68it/s]\u001b[A\n",
            " 78% 388/500 [01:43<00:29,  3.78it/s]\u001b[A\n",
            " 78% 389/500 [01:43<00:28,  3.84it/s]\u001b[A\n",
            " 78% 390/500 [01:43<00:28,  3.87it/s]\u001b[A\n",
            " 78% 391/500 [01:44<00:29,  3.69it/s]\u001b[A\n",
            " 78% 392/500 [01:44<00:30,  3.59it/s]\u001b[A\n",
            " 79% 393/500 [01:44<00:28,  3.70it/s]\u001b[A\n",
            " 79% 394/500 [01:44<00:28,  3.76it/s]\u001b[A\n",
            " 79% 395/500 [01:45<00:28,  3.64it/s]\u001b[A\n",
            " 79% 396/500 [01:45<00:29,  3.55it/s]\u001b[A\n",
            " 79% 397/500 [01:45<00:29,  3.51it/s]\u001b[A\n",
            " 80% 398/500 [01:46<00:27,  3.65it/s]\u001b[A\n",
            " 80% 399/500 [01:46<00:27,  3.73it/s]\u001b[A\n",
            " 80% 400/500 [01:46<00:26,  3.81it/s]\u001b[A\n",
            " 80% 401/500 [01:46<00:25,  3.83it/s]\u001b[A\n",
            " 80% 402/500 [01:47<00:25,  3.87it/s]\u001b[A\n",
            " 81% 403/500 [01:47<00:24,  3.90it/s]\u001b[A\n",
            " 81% 404/500 [01:47<00:24,  3.91it/s]\u001b[A\n",
            " 81% 405/500 [01:47<00:24,  3.92it/s]\u001b[A\n",
            " 81% 406/500 [01:48<00:23,  3.94it/s]\u001b[A\n",
            " 81% 407/500 [01:48<00:23,  3.93it/s]\u001b[A\n",
            " 82% 408/500 [01:48<00:24,  3.74it/s]\u001b[A\n",
            " 82% 409/500 [01:48<00:24,  3.79it/s]\u001b[A\n",
            " 82% 410/500 [01:49<00:23,  3.85it/s]\u001b[A\n",
            " 82% 411/500 [01:49<00:22,  3.88it/s]\u001b[A\n",
            " 82% 412/500 [01:49<00:22,  3.92it/s]\u001b[A\n",
            " 83% 413/500 [01:49<00:22,  3.91it/s]\u001b[A\n",
            " 83% 414/500 [01:50<00:22,  3.91it/s]\u001b[A\n",
            " 83% 415/500 [01:50<00:21,  3.92it/s]\u001b[A\n",
            " 83% 416/500 [01:50<00:21,  3.93it/s]\u001b[A\n",
            " 83% 417/500 [01:50<00:21,  3.91it/s]\u001b[A\n",
            " 84% 418/500 [01:51<00:20,  3.92it/s]\u001b[A\n",
            " 84% 419/500 [01:51<00:20,  3.91it/s]\u001b[A\n",
            " 84% 420/500 [01:51<00:20,  3.95it/s]\u001b[A\n",
            " 84% 421/500 [01:51<00:20,  3.93it/s]\u001b[A\n",
            " 84% 422/500 [01:52<00:19,  3.93it/s]\u001b[A\n",
            " 85% 423/500 [01:52<00:20,  3.75it/s]\u001b[A\n",
            " 85% 424/500 [01:52<00:20,  3.65it/s]\u001b[A\n",
            " 85% 425/500 [01:53<00:20,  3.74it/s]\u001b[A\n",
            " 85% 426/500 [01:53<00:19,  3.80it/s]\u001b[A\n",
            " 85% 427/500 [01:53<00:19,  3.83it/s]\u001b[A\n",
            " 86% 428/500 [01:53<00:18,  3.86it/s]\u001b[A\n",
            " 86% 429/500 [01:54<00:19,  3.71it/s]\u001b[A\n",
            " 86% 430/500 [01:54<00:18,  3.79it/s]\u001b[A\n",
            " 86% 431/500 [01:54<00:18,  3.65it/s]\u001b[A\n",
            " 86% 432/500 [01:54<00:19,  3.58it/s]\u001b[A\n",
            " 87% 433/500 [01:55<00:18,  3.69it/s]\u001b[A\n",
            " 87% 434/500 [01:55<00:17,  3.75it/s]\u001b[A\n",
            " 87% 435/500 [01:55<00:17,  3.65it/s]\u001b[A\n",
            " 87% 436/500 [01:56<00:18,  3.55it/s]\u001b[A\n",
            " 87% 437/500 [01:56<00:17,  3.52it/s]\u001b[A\n",
            " 88% 438/500 [01:56<00:16,  3.65it/s]\u001b[A\n",
            " 88% 439/500 [01:56<00:17,  3.55it/s]\u001b[A\n",
            " 88% 440/500 [01:57<00:16,  3.66it/s]\u001b[A\n",
            " 88% 441/500 [01:57<00:15,  3.75it/s]\u001b[A\n",
            " 88% 442/500 [01:57<00:15,  3.83it/s]\u001b[A\n",
            " 89% 443/500 [01:57<00:14,  3.85it/s]\u001b[A\n",
            " 89% 444/500 [01:58<00:14,  3.89it/s]\u001b[A\n",
            " 89% 445/500 [01:58<00:13,  3.93it/s]\u001b[A\n",
            " 89% 446/500 [01:58<00:13,  3.93it/s]\u001b[A\n",
            " 89% 447/500 [01:58<00:13,  3.94it/s]\u001b[A\n",
            " 90% 448/500 [01:59<00:13,  3.94it/s]\u001b[A\n",
            " 90% 449/500 [01:59<00:13,  3.77it/s]\u001b[A\n",
            " 90% 450/500 [01:59<00:13,  3.82it/s]\u001b[A\n",
            " 90% 451/500 [01:59<00:12,  3.83it/s]\u001b[A\n",
            " 90% 452/500 [02:00<00:12,  3.86it/s]\u001b[A\n",
            " 91% 453/500 [02:00<00:12,  3.89it/s]\u001b[A\n",
            " 91% 454/500 [02:00<00:11,  3.97it/s]\u001b[A\n",
            " 91% 455/500 [02:00<00:11,  3.80it/s]\u001b[A\n",
            " 91% 456/500 [02:01<00:12,  3.65it/s]\u001b[A\n",
            " 91% 457/500 [02:01<00:12,  3.54it/s]\u001b[A\n",
            " 92% 458/500 [02:01<00:11,  3.66it/s]\u001b[A\n",
            " 92% 459/500 [02:02<00:10,  3.77it/s]\u001b[A\n",
            " 92% 460/500 [02:02<00:11,  3.63it/s]\u001b[A\n",
            " 92% 461/500 [02:02<00:10,  3.72it/s]\u001b[A\n",
            " 92% 462/500 [02:02<00:10,  3.79it/s]\u001b[A\n",
            " 93% 463/500 [02:03<00:09,  3.84it/s]\u001b[A\n",
            " 93% 464/500 [02:03<00:09,  3.89it/s]\u001b[A\n",
            " 93% 465/500 [02:03<00:09,  3.89it/s]\u001b[A\n",
            " 93% 466/500 [02:03<00:08,  3.90it/s]\u001b[A\n",
            " 93% 467/500 [02:04<00:08,  3.75it/s]\u001b[A\n",
            " 94% 468/500 [02:04<00:08,  3.84it/s]\u001b[A\n",
            " 94% 469/500 [02:04<00:08,  3.87it/s]\u001b[A\n",
            " 94% 470/500 [02:04<00:07,  3.89it/s]\u001b[A\n",
            " 94% 471/500 [02:05<00:07,  3.91it/s]\u001b[A\n",
            " 94% 472/500 [02:05<00:07,  3.94it/s]\u001b[A\n",
            " 95% 473/500 [02:05<00:06,  3.92it/s]\u001b[A\n",
            " 95% 474/500 [02:05<00:06,  3.93it/s]\u001b[A\n",
            " 95% 475/500 [02:06<00:06,  3.76it/s]\u001b[A\n",
            " 95% 476/500 [02:06<00:06,  3.82it/s]\u001b[A\n",
            " 95% 477/500 [02:06<00:05,  3.86it/s]\u001b[A\n",
            " 96% 478/500 [02:06<00:05,  3.89it/s]\u001b[A\n",
            " 96% 479/500 [02:07<00:05,  3.73it/s]\u001b[A\n",
            " 96% 480/500 [02:07<00:05,  3.61it/s]\u001b[A\n",
            " 96% 481/500 [02:07<00:05,  3.70it/s]\u001b[A\n",
            " 96% 482/500 [02:08<00:04,  3.79it/s]\u001b[A\n",
            " 97% 483/500 [02:08<00:04,  3.83it/s]\u001b[A\n",
            " 97% 484/500 [02:08<00:04,  3.88it/s]\u001b[A\n",
            " 97% 485/500 [02:08<00:03,  3.89it/s]\u001b[A\n",
            " 97% 486/500 [02:09<00:03,  3.72it/s]\u001b[A\n",
            " 97% 487/500 [02:09<00:03,  3.63it/s]\u001b[A\n",
            " 98% 488/500 [02:09<00:03,  3.72it/s]\u001b[A\n",
            " 98% 489/500 [02:09<00:03,  3.61it/s]\u001b[A\n",
            " 98% 490/500 [02:10<00:02,  3.53it/s]\u001b[A\n",
            " 98% 491/500 [02:10<00:02,  3.66it/s]\u001b[A\n",
            " 98% 492/500 [02:10<00:02,  3.55it/s]\u001b[A\n",
            " 99% 493/500 [02:11<00:01,  3.52it/s]\u001b[A\n",
            " 99% 494/500 [02:11<00:01,  3.49it/s]\u001b[A\n",
            " 99% 495/500 [02:11<00:01,  3.63it/s]\u001b[A\n",
            " 99% 496/500 [02:11<00:01,  3.55it/s]\u001b[A\n",
            " 99% 497/500 [02:12<00:00,  3.65it/s]\u001b[A\n",
            "100% 498/500 [02:12<00:00,  3.75it/s]\u001b[A\n",
            "100% 499/500 [02:12<00:00,  3.79it/s]\u001b[A\n",
            "100% 500/500 [02:13<00:00,  3.65it/s]\u001b[A\n",
            "{'eval_loss': 0.7577768564224243, 'eval_accuracy': 0.6696666666666666, 'eval_runtime': 133.2707, 'eval_samples_per_second': 3.752, 'eval_steps_per_second': 3.752, 'epoch': 2.0}\n",
            "\n",
            " 33% 1124/3372 [2:06:40<4:05:43,  6.56s/it]\n",
            "                                     \u001b[A[INFO|trainer.py:3410] 2024-07-15 16:43:11,388 >> Saving model checkpoint to /content/qwen2-7b/checkpoint-1124\n",
            "/usr/local/lib/python3.10/dist-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n",
            "  warnings.warn(\n",
            "[INFO|configuration_utils.py:733] 2024-07-15 16:43:11,686 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2-7B/snapshots/453ed1575b739b5b03ce3758b23befdb0967f40e/config.json\n",
            "[INFO|configuration_utils.py:796] 2024-07-15 16:43:11,687 >> Model config Qwen2Config {\n",
            "  \"architectures\": [\n",
            "    \"Qwen2ForCausalLM\"\n",
            "  ],\n",
            "  \"attention_dropout\": 0.0,\n",
            "  \"bos_token_id\": 151643,\n",
            "  \"eos_token_id\": 151643,\n",
            "  \"hidden_act\": \"silu\",\n",
            "  \"hidden_size\": 3584,\n",
            "  \"initializer_range\": 0.02,\n",
            "  \"intermediate_size\": 18944,\n",
            "  \"max_position_embeddings\": 131072,\n",
            "  \"max_window_layers\": 28,\n",
            "  \"model_type\": \"qwen2\",\n",
            "  \"num_attention_heads\": 28,\n",
            "  \"num_hidden_layers\": 28,\n",
            "  \"num_key_value_heads\": 4,\n",
            "  \"rms_norm_eps\": 1e-06,\n",
            "  \"rope_theta\": 1000000.0,\n",
            "  \"sliding_window\": 131072,\n",
            "  \"tie_word_embeddings\": false,\n",
            "  \"torch_dtype\": \"bfloat16\",\n",
            "  \"transformers_version\": \"4.41.2\",\n",
            "  \"use_cache\": true,\n",
            "  \"use_sliding_window\": false,\n",
            "  \"vocab_size\": 152064\n",
            "}\n",
            "\n",
            "[INFO|tokenization_utils_base.py:2513] 2024-07-15 16:43:11,872 >> tokenizer config file saved in /content/qwen2-7b/checkpoint-1124/tokenizer_config.json\n",
            "[INFO|tokenization_utils_base.py:2522] 2024-07-15 16:43:11,873 >> Special tokens file saved in /content/qwen2-7b/checkpoint-1124/special_tokens_map.json\n",
            "{'loss': 0.5831, 'grad_norm': 0.08739642798900604, 'learning_rate': 0.0005870506865895984, 'epoch': 3.0}\n",
            " 50% 1686/3372 [3:08:20<3:02:02,  6.48s/it][INFO|trainer.py:3719] 2024-07-15 17:44:50,834 >> ***** Running Evaluation *****\n",
            "[INFO|trainer.py:3721] 2024-07-15 17:44:50,834 >>   Num examples = 500\n",
            "[INFO|trainer.py:3724] 2024-07-15 17:44:50,835 >>   Batch size = 1\n",
            "\n",
            "  0% 0/500 [00:00<?, ?it/s]\u001b[A\n",
            "  0% 2/500 [00:00<01:03,  7.83it/s]\u001b[A\n",
            "  1% 3/500 [00:00<01:27,  5.68it/s]\u001b[A\n",
            "  1% 4/500 [00:00<01:50,  4.50it/s]\u001b[A\n",
            "  1% 5/500 [00:01<01:55,  4.29it/s]\u001b[A\n",
            "  1% 6/500 [00:01<01:57,  4.20it/s]\u001b[A\n",
            "  1% 7/500 [00:01<01:59,  4.12it/s]\u001b[A\n",
            "  2% 8/500 [00:01<02:08,  3.82it/s]\u001b[A\n",
            "  2% 9/500 [00:02<02:07,  3.86it/s]\u001b[A\n",
            "  2% 10/500 [00:02<02:05,  3.91it/s]\u001b[A\n",
            "  2% 11/500 [00:02<02:11,  3.72it/s]\u001b[A\n",
            "  2% 12/500 [00:02<02:15,  3.59it/s]\u001b[A\n",
            "  3% 13/500 [00:03<02:11,  3.70it/s]\u001b[A\n",
            "  3% 14/500 [00:03<02:15,  3.60it/s]\u001b[A\n",
            "  3% 15/500 [00:03<02:11,  3.69it/s]\u001b[A\n",
            "  3% 16/500 [00:04<02:09,  3.75it/s]\u001b[A\n",
            "  3% 17/500 [00:04<02:06,  3.81it/s]\u001b[A\n",
            "  4% 18/500 [00:04<02:11,  3.67it/s]\u001b[A\n",
            "  4% 19/500 [00:04<02:08,  3.75it/s]\u001b[A\n",
            "  4% 20/500 [00:05<02:12,  3.62it/s]\u001b[A\n",
            "  4% 21/500 [00:05<02:09,  3.71it/s]\u001b[A\n",
            "  4% 22/500 [00:05<02:06,  3.79it/s]\u001b[A\n",
            "  5% 23/500 [00:05<02:10,  3.66it/s]\u001b[A\n",
            "  5% 24/500 [00:06<02:13,  3.56it/s]\u001b[A\n",
            "  5% 25/500 [00:06<02:09,  3.66it/s]\u001b[A\n",
            "  5% 26/500 [00:06<02:05,  3.76it/s]\u001b[A\n",
            "  5% 27/500 [00:06<02:04,  3.81it/s]\u001b[A\n",
            "  6% 28/500 [00:07<02:02,  3.84it/s]\u001b[A\n",
            "  6% 29/500 [00:07<02:01,  3.87it/s]\u001b[A\n",
            "  6% 30/500 [00:07<02:06,  3.73it/s]\u001b[A\n",
            "  6% 31/500 [00:08<02:03,  3.79it/s]\u001b[A\n",
            "  6% 32/500 [00:08<02:01,  3.86it/s]\u001b[A\n",
            "  7% 33/500 [00:08<02:00,  3.87it/s]\u001b[A\n",
            "  7% 34/500 [00:08<01:59,  3.89it/s]\u001b[A\n",
            "  7% 35/500 [00:09<02:05,  3.72it/s]\u001b[A\n",
            "  7% 36/500 [00:09<02:02,  3.79it/s]\u001b[A\n",
            "  7% 37/500 [00:09<02:00,  3.83it/s]\u001b[A\n",
            "  8% 38/500 [00:09<02:05,  3.70it/s]\u001b[A\n",
            "  8% 39/500 [00:10<02:08,  3.58it/s]\u001b[A\n",
            "  8% 40/500 [00:10<02:04,  3.69it/s]\u001b[A\n",
            "  8% 41/500 [00:10<02:08,  3.58it/s]\u001b[A\n",
            "  8% 42/500 [00:11<02:10,  3.50it/s]\u001b[A\n",
            "  9% 43/500 [00:11<02:12,  3.44it/s]\u001b[A\n",
            "  9% 44/500 [00:11<02:06,  3.62it/s]\u001b[A\n",
            "  9% 45/500 [00:11<02:03,  3.69it/s]\u001b[A\n",
            "  9% 46/500 [00:12<02:00,  3.76it/s]\u001b[A\n",
            "  9% 47/500 [00:12<01:58,  3.81it/s]\u001b[A\n",
            " 10% 48/500 [00:12<01:56,  3.89it/s]\u001b[A\n",
            " 10% 49/500 [00:12<02:00,  3.73it/s]\u001b[A\n",
            " 10% 50/500 [00:13<02:04,  3.61it/s]\u001b[A\n",
            " 10% 51/500 [00:13<02:01,  3.71it/s]\u001b[A\n",
            " 10% 52/500 [00:13<02:04,  3.59it/s]\u001b[A\n",
            " 11% 53/500 [00:13<02:01,  3.68it/s]\u001b[A\n",
            " 11% 54/500 [00:14<01:58,  3.76it/s]\u001b[A\n",
            " 11% 55/500 [00:14<01:56,  3.83it/s]\u001b[A\n",
            " 11% 56/500 [00:14<01:55,  3.85it/s]\u001b[A\n",
            " 11% 57/500 [00:15<01:53,  3.89it/s]\u001b[A\n",
            " 12% 58/500 [00:15<01:58,  3.73it/s]\u001b[A\n",
            " 12% 59/500 [00:15<01:55,  3.81it/s]\u001b[A\n",
            " 12% 60/500 [00:15<02:00,  3.66it/s]\u001b[A\n",
            " 12% 61/500 [00:16<01:57,  3.74it/s]\u001b[A\n",
            " 12% 62/500 [00:16<02:01,  3.62it/s]\u001b[A\n",
            " 13% 63/500 [00:16<02:03,  3.53it/s]\u001b[A\n",
            " 13% 64/500 [00:16<01:59,  3.64it/s]\u001b[A\n",
            " 13% 65/500 [00:17<02:02,  3.56it/s]\u001b[A\n",
            " 13% 66/500 [00:17<02:04,  3.50it/s]\u001b[A\n",
            " 13% 67/500 [00:17<01:59,  3.62it/s]\u001b[A\n",
            " 14% 68/500 [00:18<02:02,  3.53it/s]\u001b[A\n",
            " 14% 69/500 [00:18<02:04,  3.48it/s]\u001b[A\n",
            " 14% 70/500 [00:18<01:59,  3.60it/s]\u001b[A\n",
            " 14% 71/500 [00:18<01:55,  3.71it/s]\u001b[A\n",
            " 14% 72/500 [00:19<01:53,  3.77it/s]\u001b[A\n",
            " 15% 73/500 [00:19<01:57,  3.63it/s]\u001b[A\n",
            " 15% 74/500 [00:19<02:00,  3.54it/s]\u001b[A\n",
            " 15% 75/500 [00:20<01:56,  3.64it/s]\u001b[A\n",
            " 15% 76/500 [00:20<01:53,  3.75it/s]\u001b[A\n",
            " 15% 77/500 [00:20<01:51,  3.78it/s]\u001b[A\n",
            " 16% 78/500 [00:20<01:50,  3.83it/s]\u001b[A\n",
            " 16% 79/500 [00:21<01:49,  3.86it/s]\u001b[A\n",
            " 16% 80/500 [00:21<01:47,  3.90it/s]\u001b[A\n",
            " 16% 81/500 [00:21<01:52,  3.71it/s]\u001b[A\n",
            " 16% 82/500 [00:21<01:50,  3.77it/s]\u001b[A\n",
            " 17% 83/500 [00:22<01:54,  3.65it/s]\u001b[A\n",
            " 17% 84/500 [00:22<01:56,  3.58it/s]\u001b[A\n",
            " 17% 85/500 [00:22<01:58,  3.51it/s]\u001b[A\n",
            " 17% 86/500 [00:22<01:54,  3.62it/s]\u001b[A\n",
            " 17% 87/500 [00:23<01:51,  3.72it/s]\u001b[A\n",
            " 18% 88/500 [00:23<01:53,  3.63it/s]\u001b[A\n",
            " 18% 89/500 [00:23<01:50,  3.71it/s]\u001b[A\n",
            " 18% 90/500 [00:24<01:48,  3.79it/s]\u001b[A\n",
            " 18% 91/500 [00:24<01:51,  3.66it/s]\u001b[A\n",
            " 18% 92/500 [00:24<01:54,  3.56it/s]\u001b[A\n",
            " 19% 93/500 [00:24<01:51,  3.66it/s]\u001b[A\n",
            " 19% 94/500 [00:25<01:48,  3.76it/s]\u001b[A\n",
            " 19% 95/500 [00:25<01:46,  3.81it/s]\u001b[A\n",
            " 19% 96/500 [00:25<01:44,  3.85it/s]\u001b[A\n",
            " 19% 97/500 [00:25<01:49,  3.68it/s]\u001b[A\n",
            " 20% 98/500 [00:26<01:46,  3.77it/s]\u001b[A\n",
            " 20% 99/500 [00:26<01:44,  3.82it/s]\u001b[A\n",
            " 20% 100/500 [00:26<01:43,  3.86it/s]\u001b[A\n",
            " 20% 101/500 [00:26<01:42,  3.88it/s]\u001b[A\n",
            " 20% 102/500 [00:27<01:42,  3.88it/s]\u001b[A\n",
            " 21% 103/500 [00:27<01:41,  3.90it/s]\u001b[A\n",
            " 21% 104/500 [00:27<01:39,  3.97it/s]\u001b[A\n",
            " 21% 105/500 [00:27<01:44,  3.78it/s]\u001b[A\n",
            " 21% 106/500 [00:28<01:42,  3.84it/s]\u001b[A\n",
            " 21% 107/500 [00:28<01:41,  3.87it/s]\u001b[A\n",
            " 22% 108/500 [00:28<01:40,  3.89it/s]\u001b[A\n",
            " 22% 109/500 [00:28<01:40,  3.90it/s]\u001b[A\n",
            " 22% 110/500 [00:29<01:44,  3.72it/s]\u001b[A\n",
            " 22% 111/500 [00:29<01:42,  3.79it/s]\u001b[A\n",
            " 22% 112/500 [00:29<01:46,  3.66it/s]\u001b[A\n",
            " 23% 113/500 [00:30<01:48,  3.56it/s]\u001b[A\n",
            " 23% 114/500 [00:30<01:50,  3.48it/s]\u001b[A\n",
            " 23% 115/500 [00:30<01:45,  3.64it/s]\u001b[A\n",
            " 23% 116/500 [00:30<01:47,  3.56it/s]\u001b[A\n",
            " 23% 117/500 [00:31<01:44,  3.67it/s]\u001b[A\n",
            " 24% 118/500 [00:31<01:42,  3.71it/s]\u001b[A\n",
            " 24% 119/500 [00:31<01:40,  3.79it/s]\u001b[A\n",
            " 24% 120/500 [00:32<01:43,  3.67it/s]\u001b[A\n",
            " 24% 121/500 [00:32<01:45,  3.59it/s]\u001b[A\n",
            " 24% 122/500 [00:32<01:42,  3.67it/s]\u001b[A\n",
            " 25% 123/500 [00:32<01:45,  3.56it/s]\u001b[A\n",
            " 25% 124/500 [00:33<01:41,  3.70it/s]\u001b[A\n",
            " 25% 125/500 [00:33<01:40,  3.75it/s]\u001b[A\n",
            " 25% 126/500 [00:33<01:44,  3.60it/s]\u001b[A\n",
            " 25% 127/500 [00:33<01:40,  3.71it/s]\u001b[A\n",
            " 26% 128/500 [00:34<01:38,  3.76it/s]\u001b[A\n",
            " 26% 129/500 [00:34<01:37,  3.82it/s]\u001b[A\n",
            " 26% 130/500 [00:34<01:40,  3.70it/s]\u001b[A\n",
            " 26% 131/500 [00:34<01:37,  3.77it/s]\u001b[A\n",
            " 26% 132/500 [00:35<01:36,  3.83it/s]\u001b[A\n",
            " 27% 133/500 [00:35<01:39,  3.68it/s]\u001b[A\n",
            " 27% 134/500 [00:35<01:37,  3.76it/s]\u001b[A\n",
            " 27% 135/500 [00:36<01:35,  3.81it/s]\u001b[A\n",
            " 27% 136/500 [00:36<01:39,  3.67it/s]\u001b[A\n",
            " 27% 137/500 [00:36<01:36,  3.76it/s]\u001b[A\n",
            " 28% 138/500 [00:36<01:34,  3.83it/s]\u001b[A\n",
            " 28% 139/500 [00:37<01:33,  3.84it/s]\u001b[A\n",
            " 28% 140/500 [00:37<01:33,  3.85it/s]\u001b[A\n",
            " 28% 141/500 [00:37<01:37,  3.68it/s]\u001b[A\n",
            " 28% 142/500 [00:37<01:35,  3.76it/s]\u001b[A\n",
            " 29% 143/500 [00:38<01:38,  3.63it/s]\u001b[A\n",
            " 29% 144/500 [00:38<01:35,  3.72it/s]\u001b[A\n",
            " 29% 145/500 [00:38<01:33,  3.78it/s]\u001b[A\n",
            " 29% 146/500 [00:38<01:32,  3.82it/s]\u001b[A\n",
            " 29% 147/500 [00:39<01:31,  3.87it/s]\u001b[A\n",
            " 30% 148/500 [00:39<01:34,  3.71it/s]\u001b[A\n",
            " 30% 149/500 [00:39<01:37,  3.59it/s]\u001b[A\n",
            " 30% 150/500 [00:40<01:34,  3.69it/s]\u001b[A\n",
            " 30% 151/500 [00:40<01:32,  3.79it/s]\u001b[A\n",
            " 30% 152/500 [00:40<01:30,  3.84it/s]\u001b[A\n",
            " 31% 153/500 [00:40<01:29,  3.88it/s]\u001b[A\n",
            " 31% 154/500 [00:41<01:29,  3.86it/s]\u001b[A\n",
            " 31% 155/500 [00:41<01:32,  3.72it/s]\u001b[A\n",
            " 31% 156/500 [00:41<01:35,  3.60it/s]\u001b[A\n",
            " 31% 157/500 [00:41<01:32,  3.69it/s]\u001b[A\n",
            " 32% 158/500 [00:42<01:35,  3.59it/s]\u001b[A\n",
            " 32% 159/500 [00:42<01:31,  3.71it/s]\u001b[A\n",
            " 32% 160/500 [00:42<01:33,  3.62it/s]\u001b[A\n",
            " 32% 161/500 [00:43<01:31,  3.71it/s]\u001b[A\n",
            " 32% 162/500 [00:43<01:34,  3.57it/s]\u001b[A\n",
            " 33% 163/500 [00:43<01:31,  3.69it/s]\u001b[A\n",
            " 33% 164/500 [00:43<01:34,  3.57it/s]\u001b[A\n",
            " 33% 165/500 [00:44<01:35,  3.50it/s]\u001b[A\n",
            " 33% 166/500 [00:44<01:32,  3.62it/s]\u001b[A\n",
            " 33% 167/500 [00:44<01:34,  3.54it/s]\u001b[A\n",
            " 34% 168/500 [00:45<01:35,  3.48it/s]\u001b[A\n",
            " 34% 169/500 [00:45<01:31,  3.61it/s]\u001b[A\n",
            " 34% 170/500 [00:45<01:33,  3.53it/s]\u001b[A\n",
            " 34% 171/500 [00:45<01:34,  3.47it/s]\u001b[A\n",
            " 34% 172/500 [00:46<01:31,  3.60it/s]\u001b[A\n",
            " 35% 173/500 [00:46<01:28,  3.69it/s]\u001b[A\n",
            " 35% 174/500 [00:46<01:31,  3.58it/s]\u001b[A\n",
            " 35% 175/500 [00:46<01:28,  3.69it/s]\u001b[A\n",
            " 35% 176/500 [00:47<01:25,  3.78it/s]\u001b[A\n",
            " 35% 177/500 [00:47<01:24,  3.83it/s]\u001b[A\n",
            " 36% 178/500 [00:47<01:27,  3.68it/s]\u001b[A\n",
            " 36% 179/500 [00:48<01:30,  3.57it/s]\u001b[A\n",
            " 36% 180/500 [00:48<01:31,  3.49it/s]\u001b[A\n",
            " 36% 181/500 [00:48<01:28,  3.60it/s]\u001b[A\n",
            " 36% 182/500 [00:48<01:25,  3.71it/s]\u001b[A\n",
            " 37% 183/500 [00:49<01:24,  3.77it/s]\u001b[A\n",
            " 37% 184/500 [00:49<01:26,  3.64it/s]\u001b[A\n",
            " 37% 185/500 [00:49<01:24,  3.72it/s]\u001b[A\n",
            " 37% 186/500 [00:49<01:26,  3.61it/s]\u001b[A\n",
            " 37% 187/500 [00:50<01:24,  3.71it/s]\u001b[A\n",
            " 38% 188/500 [00:50<01:22,  3.79it/s]\u001b[A\n",
            " 38% 189/500 [00:50<01:20,  3.86it/s]\u001b[A\n",
            " 38% 190/500 [00:51<01:23,  3.69it/s]\u001b[A\n",
            " 38% 191/500 [00:51<01:21,  3.77it/s]\u001b[A\n",
            " 38% 192/500 [00:51<01:20,  3.84it/s]\u001b[A\n",
            " 39% 193/500 [00:51<01:23,  3.68it/s]\u001b[A\n",
            " 39% 194/500 [00:52<01:25,  3.58it/s]\u001b[A\n",
            " 39% 195/500 [00:52<01:27,  3.50it/s]\u001b[A\n",
            " 39% 196/500 [00:52<01:23,  3.62it/s]\u001b[A\n",
            " 39% 197/500 [00:52<01:21,  3.71it/s]\u001b[A\n",
            " 40% 198/500 [00:53<01:19,  3.78it/s]\u001b[A\n",
            " 40% 199/500 [00:53<01:18,  3.81it/s]\u001b[A\n",
            " 40% 200/500 [00:53<01:18,  3.85it/s]\u001b[A\n",
            " 40% 201/500 [00:53<01:17,  3.88it/s]\u001b[A\n",
            " 40% 202/500 [00:54<01:16,  3.89it/s]\u001b[A\n",
            " 41% 203/500 [00:54<01:16,  3.90it/s]\u001b[A\n",
            " 41% 204/500 [00:54<01:19,  3.74it/s]\u001b[A\n",
            " 41% 205/500 [00:55<01:21,  3.64it/s]\u001b[A\n",
            " 41% 206/500 [00:55<01:18,  3.73it/s]\u001b[A\n",
            " 41% 207/500 [00:55<01:21,  3.61it/s]\u001b[A\n",
            " 42% 208/500 [00:55<01:18,  3.70it/s]\u001b[A\n",
            " 42% 209/500 [00:56<01:17,  3.77it/s]\u001b[A\n",
            " 42% 210/500 [00:56<01:15,  3.82it/s]\u001b[A\n",
            " 42% 211/500 [00:56<01:14,  3.88it/s]\u001b[A\n",
            " 42% 212/500 [00:56<01:14,  3.88it/s]\u001b[A\n",
            " 43% 213/500 [00:57<01:17,  3.69it/s]\u001b[A\n",
            " 43% 214/500 [00:57<01:20,  3.57it/s]\u001b[A\n",
            " 43% 215/500 [00:57<01:17,  3.68it/s]\u001b[A\n",
            " 43% 216/500 [00:57<01:19,  3.59it/s]\u001b[A\n",
            " 43% 217/500 [00:58<01:16,  3.68it/s]\u001b[A\n",
            " 44% 218/500 [00:58<01:15,  3.73it/s]\u001b[A\n",
            " 44% 219/500 [00:58<01:14,  3.78it/s]\u001b[A\n",
            " 44% 220/500 [00:59<01:13,  3.83it/s]\u001b[A\n",
            " 44% 221/500 [00:59<01:11,  3.88it/s]\u001b[A\n",
            " 44% 222/500 [00:59<01:15,  3.71it/s]\u001b[A\n",
            " 45% 223/500 [00:59<01:13,  3.76it/s]\u001b[A\n",
            " 45% 224/500 [01:00<01:12,  3.81it/s]\u001b[A\n",
            " 45% 225/500 [01:00<01:14,  3.70it/s]\u001b[A\n",
            " 45% 226/500 [01:00<01:12,  3.76it/s]\u001b[A\n",
            " 45% 227/500 [01:00<01:14,  3.65it/s]\u001b[A\n",
            " 46% 228/500 [01:01<01:12,  3.73it/s]\u001b[A\n",
            " 46% 229/500 [01:01<01:11,  3.80it/s]\u001b[A\n",
            " 46% 230/500 [01:01<01:13,  3.67it/s]\u001b[A\n",
            " 46% 231/500 [01:02<01:15,  3.57it/s]\u001b[A\n",
            " 46% 232/500 [01:02<01:12,  3.68it/s]\u001b[A\n",
            " 47% 233/500 [01:02<01:14,  3.57it/s]\u001b[A\n",
            " 47% 234/500 [01:02<01:12,  3.66it/s]\u001b[A\n",
            " 47% 235/500 [01:03<01:13,  3.58it/s]\u001b[A\n",
            " 47% 236/500 [01:03<01:11,  3.70it/s]\u001b[A\n",
            " 47% 237/500 [01:03<01:09,  3.76it/s]\u001b[A\n",
            " 48% 238/500 [01:03<01:11,  3.64it/s]\u001b[A\n",
            " 48% 239/500 [01:04<01:09,  3.75it/s]\u001b[A\n",
            " 48% 240/500 [01:04<01:07,  3.82it/s]\u001b[A\n",
            " 48% 241/500 [01:04<01:07,  3.85it/s]\u001b[A\n",
            " 48% 242/500 [01:04<01:06,  3.90it/s]\u001b[A\n",
            " 49% 243/500 [01:05<01:05,  3.93it/s]\u001b[A\n",
            " 49% 244/500 [01:05<01:05,  3.93it/s]\u001b[A\n",
            " 49% 245/500 [01:05<01:04,  3.93it/s]\u001b[A\n",
            " 49% 246/500 [01:05<01:07,  3.74it/s]\u001b[A\n",
            " 49% 247/500 [01:06<01:06,  3.83it/s]\u001b[A\n",
            " 50% 248/500 [01:06<01:04,  3.89it/s]\u001b[A\n",
            " 50% 249/500 [01:06<01:08,  3.68it/s]\u001b[A\n",
            " 50% 250/500 [01:07<01:06,  3.76it/s]\u001b[A\n",
            " 50% 251/500 [01:07<01:05,  3.81it/s]\u001b[A\n",
            " 50% 252/500 [01:07<01:04,  3.86it/s]\u001b[A\n",
            " 51% 253/500 [01:07<01:03,  3.87it/s]\u001b[A\n",
            " 51% 254/500 [01:08<01:03,  3.87it/s]\u001b[A\n",
            " 51% 255/500 [01:08<01:02,  3.89it/s]\u001b[A\n",
            " 51% 256/500 [01:08<01:02,  3.93it/s]\u001b[A\n",
            " 51% 257/500 [01:08<01:05,  3.71it/s]\u001b[A\n",
            " 52% 258/500 [01:09<01:06,  3.63it/s]\u001b[A\n",
            " 52% 259/500 [01:09<01:04,  3.72it/s]\u001b[A\n",
            " 52% 260/500 [01:09<01:03,  3.79it/s]\u001b[A\n",
            " 52% 261/500 [01:09<01:02,  3.80it/s]\u001b[A\n",
            " 52% 262/500 [01:10<01:04,  3.68it/s]\u001b[A\n",
            " 53% 263/500 [01:10<01:05,  3.60it/s]\u001b[A\n",
            " 53% 264/500 [01:10<01:03,  3.70it/s]\u001b[A\n",
            " 53% 265/500 [01:10<01:02,  3.75it/s]\u001b[A\n",
            " 53% 266/500 [01:11<01:04,  3.63it/s]\u001b[A\n",
            " 53% 267/500 [01:11<01:02,  3.71it/s]\u001b[A\n",
            " 54% 268/500 [01:11<01:01,  3.79it/s]\u001b[A\n",
            " 54% 269/500 [01:12<01:02,  3.67it/s]\u001b[A\n",
            " 54% 270/500 [01:12<01:04,  3.56it/s]\u001b[A\n",
            " 54% 271/500 [01:12<01:05,  3.49it/s]\u001b[A\n",
            " 54% 272/500 [01:12<01:03,  3.61it/s]\u001b[A\n",
            " 55% 273/500 [01:13<01:01,  3.72it/s]\u001b[A\n",
            " 55% 274/500 [01:13<01:00,  3.75it/s]\u001b[A\n",
            " 55% 275/500 [01:13<00:59,  3.78it/s]\u001b[A\n",
            " 55% 276/500 [01:13<00:58,  3.82it/s]\u001b[A\n",
            " 55% 277/500 [01:14<01:00,  3.66it/s]\u001b[A\n",
            " 56% 278/500 [01:14<00:59,  3.74it/s]\u001b[A\n",
            " 56% 279/500 [01:14<00:57,  3.82it/s]\u001b[A\n",
            " 56% 280/500 [01:15<00:57,  3.85it/s]\u001b[A\n",
            " 56% 281/500 [01:15<00:58,  3.71it/s]\u001b[A\n",
            " 56% 282/500 [01:15<00:57,  3.79it/s]\u001b[A\n",
            " 57% 283/500 [01:15<00:59,  3.65it/s]\u001b[A\n",
            " 57% 284/500 [01:16<01:00,  3.55it/s]\u001b[A\n",
            " 57% 285/500 [01:16<00:58,  3.65it/s]\u001b[A\n",
            " 57% 286/500 [01:16<00:59,  3.57it/s]\u001b[A\n",
            " 57% 287/500 [01:16<00:57,  3.69it/s]\u001b[A\n",
            " 58% 288/500 [01:17<00:56,  3.76it/s]\u001b[A\n",
            " 58% 289/500 [01:17<00:55,  3.83it/s]\u001b[A\n",
            " 58% 290/500 [01:17<00:54,  3.84it/s]\u001b[A\n",
            " 58% 291/500 [01:18<00:56,  3.68it/s]\u001b[A\n",
            " 58% 292/500 [01:18<00:58,  3.57it/s]\u001b[A\n",
            " 59% 293/500 [01:18<00:59,  3.50it/s]\u001b[A\n",
            " 59% 294/500 [01:18<00:59,  3.45it/s]\u001b[A\n",
            " 59% 295/500 [01:19<00:57,  3.59it/s]\u001b[A\n",
            " 59% 296/500 [01:19<00:55,  3.70it/s]\u001b[A\n",
            " 59% 297/500 [01:19<00:54,  3.75it/s]\u001b[A\n",
            " 60% 298/500 [01:19<00:52,  3.81it/s]\u001b[A\n",
            " 60% 299/500 [01:20<00:52,  3.86it/s]\u001b[A\n",
            " 60% 300/500 [01:20<00:54,  3.70it/s]\u001b[A\n",
            " 60% 301/500 [01:20<00:52,  3.77it/s]\u001b[A\n",
            " 60% 302/500 [01:21<00:54,  3.64it/s]\u001b[A\n",
            " 61% 303/500 [01:21<00:52,  3.75it/s]\u001b[A\n",
            " 61% 304/500 [01:21<00:51,  3.80it/s]\u001b[A\n",
            " 61% 305/500 [01:21<00:50,  3.85it/s]\u001b[A\n",
            " 61% 306/500 [01:22<00:50,  3.85it/s]\u001b[A\n",
            " 61% 307/500 [01:22<00:52,  3.70it/s]\u001b[A\n",
            " 62% 308/500 [01:22<00:53,  3.58it/s]\u001b[A\n",
            " 62% 309/500 [01:22<00:54,  3.50it/s]\u001b[A\n",
            " 62% 310/500 [01:23<00:52,  3.62it/s]\u001b[A\n",
            " 62% 311/500 [01:23<00:50,  3.73it/s]\u001b[A\n",
            " 62% 312/500 [01:23<00:49,  3.79it/s]\u001b[A\n",
            " 63% 313/500 [01:24<00:51,  3.63it/s]\u001b[A\n",
            " 63% 314/500 [01:24<00:52,  3.55it/s]\u001b[A\n",
            " 63% 315/500 [01:24<00:50,  3.66it/s]\u001b[A\n",
            " 63% 316/500 [01:24<00:48,  3.77it/s]\u001b[A\n",
            " 63% 317/500 [01:25<00:48,  3.81it/s]\u001b[A\n",
            " 64% 318/500 [01:25<00:49,  3.71it/s]\u001b[A\n",
            " 64% 319/500 [01:25<00:50,  3.59it/s]\u001b[A\n",
            " 64% 320/500 [01:25<00:48,  3.70it/s]\u001b[A\n",
            " 64% 321/500 [01:26<00:47,  3.78it/s]\u001b[A\n",
            " 64% 322/500 [01:26<00:46,  3.82it/s]\u001b[A\n",
            " 65% 323/500 [01:26<00:48,  3.69it/s]\u001b[A\n",
            " 65% 324/500 [01:26<00:46,  3.76it/s]\u001b[A\n",
            " 65% 325/500 [01:27<00:46,  3.80it/s]\u001b[A\n",
            " 65% 326/500 [01:27<00:44,  3.87it/s]\u001b[A\n",
            " 65% 327/500 [01:27<00:46,  3.69it/s]\u001b[A\n",
            " 66% 328/500 [01:28<00:45,  3.77it/s]\u001b[A\n",
            " 66% 329/500 [01:28<00:44,  3.82it/s]\u001b[A\n",
            " 66% 330/500 [01:28<00:44,  3.86it/s]\u001b[A\n",
            " 66% 331/500 [01:28<00:45,  3.69it/s]\u001b[A\n",
            " 66% 332/500 [01:29<00:44,  3.77it/s]\u001b[A\n",
            " 67% 333/500 [01:29<00:45,  3.63it/s]\u001b[A\n",
            " 67% 334/500 [01:29<00:47,  3.53it/s]\u001b[A\n",
            " 67% 335/500 [01:29<00:45,  3.65it/s]\u001b[A\n",
            " 67% 336/500 [01:30<00:43,  3.75it/s]\u001b[A\n",
            " 67% 337/500 [01:30<00:44,  3.63it/s]\u001b[A\n",
            " 68% 338/500 [01:30<00:43,  3.71it/s]\u001b[A\n",
            " 68% 339/500 [01:31<00:44,  3.60it/s]\u001b[A\n",
            " 68% 340/500 [01:31<00:43,  3.71it/s]\u001b[A\n",
            " 68% 341/500 [01:31<00:42,  3.78it/s]\u001b[A\n",
            " 68% 342/500 [01:31<00:41,  3.85it/s]\u001b[A\n",
            " 69% 343/500 [01:32<00:40,  3.86it/s]\u001b[A\n",
            " 69% 344/500 [01:32<00:42,  3.69it/s]\u001b[A\n",
            " 69% 345/500 [01:32<00:41,  3.77it/s]\u001b[A\n",
            " 69% 346/500 [01:32<00:40,  3.83it/s]\u001b[A\n",
            " 69% 347/500 [01:33<00:41,  3.68it/s]\u001b[A\n",
            " 70% 348/500 [01:33<00:40,  3.76it/s]\u001b[A\n",
            " 70% 349/500 [01:33<00:39,  3.83it/s]\u001b[A\n",
            " 70% 350/500 [01:33<00:38,  3.86it/s]\u001b[A\n",
            " 70% 351/500 [01:34<00:38,  3.90it/s]\u001b[A\n",
            " 70% 352/500 [01:34<00:38,  3.89it/s]\u001b[A\n",
            " 71% 353/500 [01:34<00:37,  3.91it/s]\u001b[A\n",
            " 71% 354/500 [01:34<00:37,  3.92it/s]\u001b[A\n",
            " 71% 355/500 [01:35<00:38,  3.75it/s]\u001b[A\n",
            " 71% 356/500 [01:35<00:37,  3.84it/s]\u001b[A\n",
            " 71% 357/500 [01:35<00:36,  3.89it/s]\u001b[A\n",
            " 72% 358/500 [01:35<00:36,  3.91it/s]\u001b[A\n",
            " 72% 359/500 [01:36<00:37,  3.73it/s]\u001b[A\n",
            " 72% 360/500 [01:36<00:36,  3.80it/s]\u001b[A\n",
            " 72% 361/500 [01:36<00:36,  3.86it/s]\u001b[A\n",
            " 72% 362/500 [01:36<00:35,  3.91it/s]\u001b[A\n",
            " 73% 363/500 [01:37<00:34,  3.92it/s]\u001b[A\n",
            " 73% 364/500 [01:37<00:34,  3.94it/s]\u001b[A\n",
            " 73% 365/500 [01:37<00:34,  3.92it/s]\u001b[A\n",
            " 73% 366/500 [01:38<00:34,  3.92it/s]\u001b[A\n",
            " 73% 367/500 [01:38<00:34,  3.91it/s]\u001b[A\n",
            " 74% 368/500 [01:38<00:33,  3.93it/s]\u001b[A\n",
            " 74% 369/500 [01:38<00:33,  3.93it/s]\u001b[A\n",
            " 74% 370/500 [01:39<00:33,  3.93it/s]\u001b[A\n",
            " 74% 371/500 [01:39<00:32,  3.93it/s]\u001b[A\n",
            " 74% 372/500 [01:39<00:33,  3.77it/s]\u001b[A\n",
            " 75% 373/500 [01:39<00:33,  3.82it/s]\u001b[A\n",
            " 75% 374/500 [01:40<00:32,  3.87it/s]\u001b[A\n",
            " 75% 375/500 [01:40<00:32,  3.88it/s]\u001b[A\n",
            " 75% 376/500 [01:40<00:31,  3.91it/s]\u001b[A\n",
            " 75% 377/500 [01:40<00:32,  3.77it/s]\u001b[A\n",
            " 76% 378/500 [01:41<00:32,  3.81it/s]\u001b[A\n",
            " 76% 379/500 [01:41<00:31,  3.85it/s]\u001b[A\n",
            " 76% 380/500 [01:41<00:31,  3.87it/s]\u001b[A\n",
            " 76% 381/500 [01:41<00:32,  3.71it/s]\u001b[A\n",
            " 76% 382/500 [01:42<00:31,  3.77it/s]\u001b[A\n",
            " 77% 383/500 [01:42<00:30,  3.83it/s]\u001b[A\n",
            " 77% 384/500 [01:42<00:31,  3.69it/s]\u001b[A\n",
            " 77% 385/500 [01:42<00:30,  3.77it/s]\u001b[A\n",
            " 77% 386/500 [01:43<00:29,  3.83it/s]\u001b[A\n",
            " 77% 387/500 [01:43<00:30,  3.69it/s]\u001b[A\n",
            " 78% 388/500 [01:43<00:29,  3.77it/s]\u001b[A\n",
            " 78% 389/500 [01:44<00:28,  3.83it/s]\u001b[A\n",
            " 78% 390/500 [01:44<00:28,  3.87it/s]\u001b[A\n",
            " 78% 391/500 [01:44<00:29,  3.71it/s]\u001b[A\n",
            " 78% 392/500 [01:44<00:29,  3.63it/s]\u001b[A\n",
            " 79% 393/500 [01:45<00:28,  3.72it/s]\u001b[A\n",
            " 79% 394/500 [01:45<00:27,  3.80it/s]\u001b[A\n",
            " 79% 395/500 [01:45<00:28,  3.66it/s]\u001b[A\n",
            " 79% 396/500 [01:45<00:29,  3.57it/s]\u001b[A\n",
            " 79% 397/500 [01:46<00:29,  3.49it/s]\u001b[A\n",
            " 80% 398/500 [01:46<00:28,  3.63it/s]\u001b[A\n",
            " 80% 399/500 [01:46<00:27,  3.73it/s]\u001b[A\n",
            " 80% 400/500 [01:47<00:26,  3.80it/s]\u001b[A\n",
            " 80% 401/500 [01:47<00:25,  3.84it/s]\u001b[A\n",
            " 80% 402/500 [01:47<00:25,  3.86it/s]\u001b[A\n",
            " 81% 403/500 [01:47<00:24,  3.88it/s]\u001b[A\n",
            " 81% 404/500 [01:48<00:24,  3.90it/s]\u001b[A\n",
            " 81% 405/500 [01:48<00:24,  3.91it/s]\u001b[A\n",
            " 81% 406/500 [01:48<00:24,  3.90it/s]\u001b[A\n",
            " 81% 407/500 [01:48<00:23,  3.91it/s]\u001b[A\n",
            " 82% 408/500 [01:49<00:24,  3.74it/s]\u001b[A\n",
            " 82% 409/500 [01:49<00:23,  3.81it/s]\u001b[A\n",
            " 82% 410/500 [01:49<00:23,  3.84it/s]\u001b[A\n",
            " 82% 411/500 [01:49<00:22,  3.88it/s]\u001b[A\n",
            " 82% 412/500 [01:50<00:22,  3.90it/s]\u001b[A\n",
            " 83% 413/500 [01:50<00:22,  3.91it/s]\u001b[A\n",
            " 83% 414/500 [01:50<00:21,  3.93it/s]\u001b[A\n",
            " 83% 415/500 [01:50<00:21,  3.92it/s]\u001b[A\n",
            " 83% 416/500 [01:51<00:21,  3.92it/s]\u001b[A\n",
            " 83% 417/500 [01:51<00:21,  3.92it/s]\u001b[A\n",
            " 84% 418/500 [01:51<00:20,  3.94it/s]\u001b[A\n",
            " 84% 419/500 [01:51<00:20,  3.93it/s]\u001b[A\n",
            " 84% 420/500 [01:52<00:20,  3.95it/s]\u001b[A\n",
            " 84% 421/500 [01:52<00:19,  3.96it/s]\u001b[A\n",
            " 84% 422/500 [01:52<00:19,  3.94it/s]\u001b[A\n",
            " 85% 423/500 [01:52<00:20,  3.74it/s]\u001b[A\n",
            " 85% 424/500 [01:53<00:21,  3.61it/s]\u001b[A\n",
            " 85% 425/500 [01:53<00:20,  3.72it/s]\u001b[A\n",
            " 85% 426/500 [01:53<00:19,  3.78it/s]\u001b[A\n",
            " 85% 427/500 [01:54<00:19,  3.81it/s]\u001b[A\n",
            " 86% 428/500 [01:54<00:18,  3.84it/s]\u001b[A\n",
            " 86% 429/500 [01:54<00:19,  3.70it/s]\u001b[A\n",
            " 86% 430/500 [01:54<00:18,  3.78it/s]\u001b[A\n",
            " 86% 431/500 [01:55<00:18,  3.64it/s]\u001b[A\n",
            " 86% 432/500 [01:55<00:19,  3.55it/s]\u001b[A\n",
            " 87% 433/500 [01:55<00:18,  3.66it/s]\u001b[A\n",
            " 87% 434/500 [01:55<00:17,  3.76it/s]\u001b[A\n",
            " 87% 435/500 [01:56<00:17,  3.66it/s]\u001b[A\n",
            " 87% 436/500 [01:56<00:17,  3.60it/s]\u001b[A\n",
            " 87% 437/500 [01:56<00:17,  3.51it/s]\u001b[A\n",
            " 88% 438/500 [01:57<00:17,  3.63it/s]\u001b[A\n",
            " 88% 439/500 [01:57<00:17,  3.56it/s]\u001b[A\n",
            " 88% 440/500 [01:57<00:16,  3.67it/s]\u001b[A\n",
            " 88% 441/500 [01:57<00:15,  3.75it/s]\u001b[A\n",
            " 88% 442/500 [01:58<00:15,  3.83it/s]\u001b[A\n",
            " 89% 443/500 [01:58<00:14,  3.85it/s]\u001b[A\n",
            " 89% 444/500 [01:58<00:14,  3.87it/s]\u001b[A\n",
            " 89% 445/500 [01:58<00:14,  3.91it/s]\u001b[A\n",
            " 89% 446/500 [01:59<00:13,  3.90it/s]\u001b[A\n",
            " 89% 447/500 [01:59<00:13,  3.91it/s]\u001b[A\n",
            " 90% 448/500 [01:59<00:13,  3.92it/s]\u001b[A\n",
            " 90% 449/500 [01:59<00:13,  3.73it/s]\u001b[A\n",
            " 90% 450/500 [02:00<00:13,  3.80it/s]\u001b[A\n",
            " 90% 451/500 [02:00<00:12,  3.83it/s]\u001b[A\n",
            " 90% 452/500 [02:00<00:12,  3.88it/s]\u001b[A\n",
            " 91% 453/500 [02:00<00:12,  3.90it/s]\u001b[A\n",
            " 91% 454/500 [02:01<00:11,  3.96it/s]\u001b[A\n",
            " 91% 455/500 [02:01<00:11,  3.76it/s]\u001b[A\n",
            " 91% 456/500 [02:01<00:12,  3.63it/s]\u001b[A\n",
            " 91% 457/500 [02:02<00:12,  3.53it/s]\u001b[A\n",
            " 92% 458/500 [02:02<00:11,  3.65it/s]\u001b[A\n",
            " 92% 459/500 [02:02<00:10,  3.74it/s]\u001b[A\n",
            " 92% 460/500 [02:02<00:11,  3.59it/s]\u001b[A\n",
            " 92% 461/500 [02:03<00:10,  3.70it/s]\u001b[A\n",
            " 92% 462/500 [02:03<00:10,  3.77it/s]\u001b[A\n",
            " 93% 463/500 [02:03<00:09,  3.84it/s]\u001b[A\n",
            " 93% 464/500 [02:03<00:09,  3.87it/s]\u001b[A\n",
            " 93% 465/500 [02:04<00:08,  3.90it/s]\u001b[A\n",
            " 93% 466/500 [02:04<00:08,  3.91it/s]\u001b[A\n",
            " 93% 467/500 [02:04<00:08,  3.73it/s]\u001b[A\n",
            " 94% 468/500 [02:04<00:08,  3.79it/s]\u001b[A\n",
            " 94% 469/500 [02:05<00:08,  3.86it/s]\u001b[A\n",
            " 94% 470/500 [02:05<00:07,  3.87it/s]\u001b[A\n",
            " 94% 471/500 [02:05<00:07,  3.90it/s]\u001b[A\n",
            " 94% 472/500 [02:05<00:07,  3.92it/s]\u001b[A\n",
            " 95% 473/500 [02:06<00:06,  3.92it/s]\u001b[A\n",
            " 95% 474/500 [02:06<00:06,  3.95it/s]\u001b[A\n",
            " 95% 475/500 [02:06<00:06,  3.73it/s]\u001b[A\n",
            " 95% 476/500 [02:06<00:06,  3.82it/s]\u001b[A\n",
            " 95% 477/500 [02:07<00:05,  3.87it/s]\u001b[A\n",
            " 96% 478/500 [02:07<00:05,  3.89it/s]\u001b[A\n",
            " 96% 479/500 [02:07<00:05,  3.71it/s]\u001b[A\n",
            " 96% 480/500 [02:08<00:05,  3.59it/s]\u001b[A\n",
            " 96% 481/500 [02:08<00:05,  3.70it/s]\u001b[A\n",
            " 96% 482/500 [02:08<00:04,  3.79it/s]\u001b[A\n",
            " 97% 483/500 [02:08<00:04,  3.84it/s]\u001b[A\n",
            " 97% 484/500 [02:09<00:04,  3.89it/s]\u001b[A\n",
            " 97% 485/500 [02:09<00:03,  3.89it/s]\u001b[A\n",
            " 97% 486/500 [02:09<00:03,  3.71it/s]\u001b[A\n",
            " 97% 487/500 [02:09<00:03,  3.62it/s]\u001b[A\n",
            " 98% 488/500 [02:10<00:03,  3.72it/s]\u001b[A\n",
            " 98% 489/500 [02:10<00:03,  3.61it/s]\u001b[A\n",
            " 98% 490/500 [02:10<00:02,  3.52it/s]\u001b[A\n",
            " 98% 491/500 [02:11<00:02,  3.64it/s]\u001b[A\n",
            " 98% 492/500 [02:11<00:02,  3.57it/s]\u001b[A\n",
            " 99% 493/500 [02:11<00:01,  3.51it/s]\u001b[A\n",
            " 99% 494/500 [02:11<00:01,  3.45it/s]\u001b[A\n",
            " 99% 495/500 [02:12<00:01,  3.58it/s]\u001b[A\n",
            " 99% 496/500 [02:12<00:01,  3.53it/s]\u001b[A\n",
            " 99% 497/500 [02:12<00:00,  3.64it/s]\u001b[A\n",
            "100% 498/500 [02:12<00:00,  3.71it/s]\u001b[A\n",
            "100% 499/500 [02:13<00:00,  3.76it/s]\u001b[A\n",
            "100% 500/500 [02:13<00:00,  3.62it/s]\u001b[A\n",
            "{'eval_loss': 0.5144070386886597, 'eval_accuracy': 0.7746666666666665, 'eval_runtime': 133.8042, 'eval_samples_per_second': 3.737, 'eval_steps_per_second': 3.737, 'epoch': 3.0}\n",
            "\n",
            " 50% 1686/3372 [3:10:33<3:02:02,  6.48s/it]\n",
            "                                     \u001b[A[INFO|trainer.py:3410] 2024-07-15 17:47:04,640 >> Saving model checkpoint to /content/qwen2-7b/checkpoint-1686\n",
            "/usr/local/lib/python3.10/dist-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n",
            "  warnings.warn(\n",
            "[INFO|configuration_utils.py:733] 2024-07-15 17:47:04,904 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2-7B/snapshots/453ed1575b739b5b03ce3758b23befdb0967f40e/config.json\n",
            "[INFO|configuration_utils.py:796] 2024-07-15 17:47:04,905 >> Model config Qwen2Config {\n",
            "  \"architectures\": [\n",
            "    \"Qwen2ForCausalLM\"\n",
            "  ],\n",
            "  \"attention_dropout\": 0.0,\n",
            "  \"bos_token_id\": 151643,\n",
            "  \"eos_token_id\": 151643,\n",
            "  \"hidden_act\": \"silu\",\n",
            "  \"hidden_size\": 3584,\n",
            "  \"initializer_range\": 0.02,\n",
            "  \"intermediate_size\": 18944,\n",
            "  \"max_position_embeddings\": 131072,\n",
            "  \"max_window_layers\": 28,\n",
            "  \"model_type\": \"qwen2\",\n",
            "  \"num_attention_heads\": 28,\n",
            "  \"num_hidden_layers\": 28,\n",
            "  \"num_key_value_heads\": 4,\n",
            "  \"rms_norm_eps\": 1e-06,\n",
            "  \"rope_theta\": 1000000.0,\n",
            "  \"sliding_window\": 131072,\n",
            "  \"tie_word_embeddings\": false,\n",
            "  \"torch_dtype\": \"bfloat16\",\n",
            "  \"transformers_version\": \"4.41.2\",\n",
            "  \"use_cache\": true,\n",
            "  \"use_sliding_window\": false,\n",
            "  \"vocab_size\": 152064\n",
            "}\n",
            "\n",
            "[INFO|tokenization_utils_base.py:2513] 2024-07-15 17:47:05,097 >> tokenizer config file saved in /content/qwen2-7b/checkpoint-1686/tokenizer_config.json\n",
            "[INFO|tokenization_utils_base.py:2522] 2024-07-15 17:47:05,098 >> Special tokens file saved in /content/qwen2-7b/checkpoint-1686/special_tokens_map.json\n",
            "{'loss': 0.5469, 'grad_norm': 0.5723391771316528, 'learning_rate': 0.00030210098232438424, 'epoch': 4.0}\n",
            " 67% 2248/3372 [4:12:15<2:04:11,  6.63s/it][INFO|trainer.py:3719] 2024-07-15 18:48:45,813 >> ***** Running Evaluation *****\n",
            "[INFO|trainer.py:3721] 2024-07-15 18:48:45,813 >>   Num examples = 500\n",
            "[INFO|trainer.py:3724] 2024-07-15 18:48:45,814 >>   Batch size = 1\n",
            "\n",
            "  0% 0/500 [00:00<?, ?it/s]\u001b[A\n",
            "  0% 2/500 [00:00<01:01,  8.06it/s]\u001b[A\n",
            "  1% 3/500 [00:00<01:26,  5.77it/s]\u001b[A\n",
            "  1% 4/500 [00:00<01:49,  4.52it/s]\u001b[A\n",
            "  1% 5/500 [00:01<01:54,  4.31it/s]\u001b[A\n",
            "  1% 6/500 [00:01<01:56,  4.24it/s]\u001b[A\n",
            "  1% 7/500 [00:01<01:58,  4.16it/s]\u001b[A\n",
            "  2% 8/500 [00:01<02:07,  3.86it/s]\u001b[A\n",
            "  2% 9/500 [00:02<02:06,  3.89it/s]\u001b[A\n",
            "  2% 10/500 [00:02<02:04,  3.92it/s]\u001b[A\n",
            "  2% 11/500 [00:02<02:10,  3.74it/s]\u001b[A\n",
            "  2% 12/500 [00:02<02:15,  3.60it/s]\u001b[A\n",
            "  3% 13/500 [00:03<02:10,  3.72it/s]\u001b[A\n",
            "  3% 14/500 [00:03<02:13,  3.63it/s]\u001b[A\n",
            "  3% 15/500 [00:03<02:10,  3.71it/s]\u001b[A\n",
            "  3% 16/500 [00:03<02:08,  3.76it/s]\u001b[A\n",
            "  3% 17/500 [00:04<02:06,  3.82it/s]\u001b[A\n",
            "  4% 18/500 [00:04<02:10,  3.68it/s]\u001b[A\n",
            "  4% 19/500 [00:04<02:08,  3.75it/s]\u001b[A\n",
            "  4% 20/500 [00:05<02:12,  3.63it/s]\u001b[A\n",
            "  4% 21/500 [00:05<02:08,  3.74it/s]\u001b[A\n",
            "  4% 22/500 [00:05<02:05,  3.80it/s]\u001b[A\n",
            "  5% 23/500 [00:05<02:10,  3.66it/s]\u001b[A\n",
            "  5% 24/500 [00:06<02:12,  3.59it/s]\u001b[A\n",
            "  5% 25/500 [00:06<02:08,  3.71it/s]\u001b[A\n",
            "  5% 26/500 [00:06<02:05,  3.78it/s]\u001b[A\n",
            "  5% 27/500 [00:06<02:03,  3.84it/s]\u001b[A\n",
            "  6% 28/500 [00:07<02:02,  3.87it/s]\u001b[A\n",
            "  6% 29/500 [00:07<02:01,  3.88it/s]\u001b[A\n",
            "  6% 30/500 [00:07<02:06,  3.71it/s]\u001b[A\n",
            "  6% 31/500 [00:07<02:03,  3.79it/s]\u001b[A\n",
            "  6% 32/500 [00:08<02:01,  3.84it/s]\u001b[A\n",
            "  7% 33/500 [00:08<02:00,  3.88it/s]\u001b[A\n",
            "  7% 34/500 [00:08<02:00,  3.88it/s]\u001b[A\n",
            "  7% 35/500 [00:09<02:05,  3.70it/s]\u001b[A\n",
            "  7% 36/500 [00:09<02:02,  3.78it/s]\u001b[A\n",
            "  7% 37/500 [00:09<02:00,  3.85it/s]\u001b[A\n",
            "  8% 38/500 [00:09<02:05,  3.69it/s]\u001b[A\n",
            "  8% 39/500 [00:10<02:07,  3.61it/s]\u001b[A\n",
            "  8% 40/500 [00:10<02:03,  3.72it/s]\u001b[A\n",
            "  8% 41/500 [00:10<02:07,  3.59it/s]\u001b[A\n",
            "  8% 42/500 [00:10<02:10,  3.51it/s]\u001b[A\n",
            "  9% 43/500 [00:11<02:12,  3.46it/s]\u001b[A\n",
            "  9% 44/500 [00:11<02:05,  3.64it/s]\u001b[A\n",
            "  9% 45/500 [00:11<02:02,  3.73it/s]\u001b[A\n",
            "  9% 46/500 [00:12<01:59,  3.80it/s]\u001b[A\n",
            "  9% 47/500 [00:12<01:57,  3.84it/s]\u001b[A\n",
            " 10% 48/500 [00:12<01:56,  3.88it/s]\u001b[A\n",
            " 10% 49/500 [00:12<02:01,  3.71it/s]\u001b[A\n",
            " 10% 50/500 [00:13<02:04,  3.62it/s]\u001b[A\n",
            " 10% 51/500 [00:13<02:00,  3.72it/s]\u001b[A\n",
            " 10% 52/500 [00:13<02:04,  3.61it/s]\u001b[A\n",
            " 11% 53/500 [00:13<02:00,  3.70it/s]\u001b[A\n",
            " 11% 54/500 [00:14<01:58,  3.76it/s]\u001b[A\n",
            " 11% 55/500 [00:14<01:56,  3.81it/s]\u001b[A\n",
            " 11% 56/500 [00:14<01:54,  3.86it/s]\u001b[A\n",
            " 11% 57/500 [00:14<01:53,  3.89it/s]\u001b[A\n",
            " 12% 58/500 [00:15<01:58,  3.73it/s]\u001b[A\n",
            " 12% 59/500 [00:15<01:55,  3.82it/s]\u001b[A\n",
            " 12% 60/500 [00:15<02:00,  3.66it/s]\u001b[A\n",
            " 12% 61/500 [00:16<01:57,  3.74it/s]\u001b[A\n",
            " 12% 62/500 [00:16<02:01,  3.61it/s]\u001b[A\n",
            " 13% 63/500 [00:16<02:02,  3.57it/s]\u001b[A\n",
            " 13% 64/500 [00:16<01:58,  3.67it/s]\u001b[A\n",
            " 13% 65/500 [00:17<02:01,  3.57it/s]\u001b[A\n",
            " 13% 66/500 [00:17<02:03,  3.51it/s]\u001b[A\n",
            " 13% 67/500 [00:17<01:58,  3.64it/s]\u001b[A\n",
            " 14% 68/500 [00:18<02:01,  3.54it/s]\u001b[A\n",
            " 14% 69/500 [00:18<02:04,  3.47it/s]\u001b[A\n",
            " 14% 70/500 [00:18<01:58,  3.62it/s]\u001b[A\n",
            " 14% 71/500 [00:18<01:55,  3.72it/s]\u001b[A\n",
            " 14% 72/500 [00:19<01:53,  3.77it/s]\u001b[A\n",
            " 15% 73/500 [00:19<01:58,  3.62it/s]\u001b[A\n",
            " 15% 74/500 [00:19<02:00,  3.53it/s]\u001b[A\n",
            " 15% 75/500 [00:19<01:56,  3.65it/s]\u001b[A\n",
            " 15% 76/500 [00:20<01:53,  3.74it/s]\u001b[A\n",
            " 15% 77/500 [00:20<01:51,  3.78it/s]\u001b[A\n",
            " 16% 78/500 [00:20<01:49,  3.84it/s]\u001b[A\n",
            " 16% 79/500 [00:20<01:48,  3.87it/s]\u001b[A\n",
            " 16% 80/500 [00:21<01:48,  3.88it/s]\u001b[A\n",
            " 16% 81/500 [00:21<01:52,  3.71it/s]\u001b[A\n",
            " 16% 82/500 [00:21<01:50,  3.77it/s]\u001b[A\n",
            " 17% 83/500 [00:22<01:54,  3.65it/s]\u001b[A\n",
            " 17% 84/500 [00:22<01:56,  3.58it/s]\u001b[A\n",
            " 17% 85/500 [00:22<01:58,  3.50it/s]\u001b[A\n",
            " 17% 86/500 [00:22<01:54,  3.62it/s]\u001b[A\n",
            " 17% 87/500 [00:23<01:51,  3.72it/s]\u001b[A\n",
            " 18% 88/500 [00:23<01:54,  3.59it/s]\u001b[A\n",
            " 18% 89/500 [00:23<01:51,  3.70it/s]\u001b[A\n",
            " 18% 90/500 [00:23<01:49,  3.75it/s]\u001b[A\n",
            " 18% 91/500 [00:24<01:52,  3.65it/s]\u001b[A\n",
            " 18% 92/500 [00:24<01:54,  3.55it/s]\u001b[A\n",
            " 19% 93/500 [00:24<01:51,  3.66it/s]\u001b[A\n",
            " 19% 94/500 [00:25<01:48,  3.75it/s]\u001b[A\n",
            " 19% 95/500 [00:25<01:45,  3.84it/s]\u001b[A\n",
            " 19% 96/500 [00:25<01:44,  3.85it/s]\u001b[A\n",
            " 19% 97/500 [00:25<01:48,  3.71it/s]\u001b[A\n",
            " 20% 98/500 [00:26<01:46,  3.79it/s]\u001b[A\n",
            " 20% 99/500 [00:26<01:44,  3.84it/s]\u001b[A\n",
            " 20% 100/500 [00:26<01:43,  3.88it/s]\u001b[A\n",
            " 20% 101/500 [00:26<01:42,  3.88it/s]\u001b[A\n",
            " 20% 102/500 [00:27<01:42,  3.90it/s]\u001b[A\n",
            " 21% 103/500 [00:27<01:41,  3.93it/s]\u001b[A\n",
            " 21% 104/500 [00:27<01:39,  3.98it/s]\u001b[A\n",
            " 21% 105/500 [00:27<01:43,  3.81it/s]\u001b[A\n",
            " 21% 106/500 [00:28<01:42,  3.86it/s]\u001b[A\n",
            " 21% 107/500 [00:28<01:40,  3.90it/s]\u001b[A\n",
            " 22% 108/500 [00:28<01:40,  3.89it/s]\u001b[A\n",
            " 22% 109/500 [00:28<01:40,  3.91it/s]\u001b[A\n",
            " 22% 110/500 [00:29<01:44,  3.73it/s]\u001b[A\n",
            " 22% 111/500 [00:29<01:42,  3.81it/s]\u001b[A\n",
            " 22% 112/500 [00:29<01:46,  3.66it/s]\u001b[A\n",
            " 23% 113/500 [00:30<01:49,  3.55it/s]\u001b[A\n",
            " 23% 114/500 [00:30<01:50,  3.48it/s]\u001b[A\n",
            " 23% 115/500 [00:30<01:46,  3.62it/s]\u001b[A\n",
            " 23% 116/500 [00:30<01:48,  3.54it/s]\u001b[A\n",
            " 23% 117/500 [00:31<01:44,  3.66it/s]\u001b[A\n",
            " 24% 118/500 [00:31<01:42,  3.74it/s]\u001b[A\n",
            " 24% 119/500 [00:31<01:40,  3.81it/s]\u001b[A\n",
            " 24% 120/500 [00:31<01:43,  3.67it/s]\u001b[A\n",
            " 24% 121/500 [00:32<01:46,  3.57it/s]\u001b[A\n",
            " 24% 122/500 [00:32<01:43,  3.67it/s]\u001b[A\n",
            " 25% 123/500 [00:32<01:45,  3.57it/s]\u001b[A\n",
            " 25% 124/500 [00:33<01:40,  3.73it/s]\u001b[A\n",
            " 25% 125/500 [00:33<01:38,  3.80it/s]\u001b[A\n",
            " 25% 126/500 [00:33<01:42,  3.66it/s]\u001b[A\n",
            " 25% 127/500 [00:33<01:39,  3.74it/s]\u001b[A\n",
            " 26% 128/500 [00:34<01:37,  3.81it/s]\u001b[A\n",
            " 26% 129/500 [00:34<01:36,  3.84it/s]\u001b[A\n",
            " 26% 130/500 [00:34<01:40,  3.68it/s]\u001b[A\n",
            " 26% 131/500 [00:34<01:38,  3.76it/s]\u001b[A\n",
            " 26% 132/500 [00:35<01:36,  3.83it/s]\u001b[A\n",
            " 27% 133/500 [00:35<01:39,  3.67it/s]\u001b[A\n",
            " 27% 134/500 [00:35<01:37,  3.74it/s]\u001b[A\n",
            " 27% 135/500 [00:35<01:35,  3.80it/s]\u001b[A\n",
            " 27% 136/500 [00:36<01:39,  3.67it/s]\u001b[A\n",
            " 27% 137/500 [00:36<01:36,  3.76it/s]\u001b[A\n",
            " 28% 138/500 [00:36<01:34,  3.83it/s]\u001b[A\n",
            " 28% 139/500 [00:37<01:33,  3.84it/s]\u001b[A\n",
            " 28% 140/500 [00:37<01:32,  3.88it/s]\u001b[A\n",
            " 28% 141/500 [00:37<01:36,  3.72it/s]\u001b[A\n",
            " 28% 142/500 [00:37<01:34,  3.78it/s]\u001b[A\n",
            " 29% 143/500 [00:38<01:38,  3.64it/s]\u001b[A\n",
            " 29% 144/500 [00:38<01:35,  3.73it/s]\u001b[A\n",
            " 29% 145/500 [00:38<01:33,  3.81it/s]\u001b[A\n",
            " 29% 146/500 [00:38<01:32,  3.83it/s]\u001b[A\n",
            " 29% 147/500 [00:39<01:31,  3.85it/s]\u001b[A\n",
            " 30% 148/500 [00:39<01:35,  3.68it/s]\u001b[A\n",
            " 30% 149/500 [00:39<01:37,  3.60it/s]\u001b[A\n",
            " 30% 150/500 [00:39<01:34,  3.71it/s]\u001b[A\n",
            " 30% 151/500 [00:40<01:32,  3.79it/s]\u001b[A\n",
            " 30% 152/500 [00:40<01:30,  3.85it/s]\u001b[A\n",
            " 31% 153/500 [00:40<01:29,  3.87it/s]\u001b[A\n",
            " 31% 154/500 [00:40<01:28,  3.89it/s]\u001b[A\n",
            " 31% 155/500 [00:41<01:32,  3.75it/s]\u001b[A\n",
            " 31% 156/500 [00:41<01:35,  3.62it/s]\u001b[A\n",
            " 31% 157/500 [00:41<01:32,  3.71it/s]\u001b[A\n",
            " 32% 158/500 [00:42<01:34,  3.61it/s]\u001b[A\n",
            " 32% 159/500 [00:42<01:31,  3.71it/s]\u001b[A\n",
            " 32% 160/500 [00:42<01:34,  3.59it/s]\u001b[A\n",
            " 32% 161/500 [00:42<01:32,  3.67it/s]\u001b[A\n",
            " 32% 162/500 [00:43<01:34,  3.59it/s]\u001b[A\n",
            " 33% 163/500 [00:43<01:30,  3.73it/s]\u001b[A\n",
            " 33% 164/500 [00:43<01:32,  3.62it/s]\u001b[A\n",
            " 33% 165/500 [00:44<01:34,  3.53it/s]\u001b[A\n",
            " 33% 166/500 [00:44<01:31,  3.66it/s]\u001b[A\n",
            " 33% 167/500 [00:44<01:33,  3.55it/s]\u001b[A\n",
            " 34% 168/500 [00:44<01:35,  3.49it/s]\u001b[A\n",
            " 34% 169/500 [00:45<01:31,  3.61it/s]\u001b[A\n",
            " 34% 170/500 [00:45<01:33,  3.53it/s]\u001b[A\n",
            " 34% 171/500 [00:45<01:33,  3.51it/s]\u001b[A\n",
            " 34% 172/500 [00:46<01:30,  3.63it/s]\u001b[A\n",
            " 35% 173/500 [00:46<01:27,  3.73it/s]\u001b[A\n",
            " 35% 174/500 [00:46<01:30,  3.59it/s]\u001b[A\n",
            " 35% 175/500 [00:46<01:27,  3.69it/s]\u001b[A\n",
            " 35% 176/500 [00:47<01:25,  3.78it/s]\u001b[A\n",
            " 35% 177/500 [00:47<01:23,  3.85it/s]\u001b[A\n",
            " 36% 178/500 [00:47<01:26,  3.72it/s]\u001b[A\n",
            " 36% 179/500 [00:47<01:29,  3.59it/s]\u001b[A\n",
            " 36% 180/500 [00:48<01:30,  3.52it/s]\u001b[A\n",
            " 36% 181/500 [00:48<01:27,  3.64it/s]\u001b[A\n",
            " 36% 182/500 [00:48<01:25,  3.74it/s]\u001b[A\n",
            " 37% 183/500 [00:48<01:23,  3.80it/s]\u001b[A\n",
            " 37% 184/500 [00:49<01:27,  3.63it/s]\u001b[A\n",
            " 37% 185/500 [00:49<01:24,  3.74it/s]\u001b[A\n",
            " 37% 186/500 [00:49<01:26,  3.63it/s]\u001b[A\n",
            " 37% 187/500 [00:50<01:23,  3.73it/s]\u001b[A\n",
            " 38% 188/500 [00:50<01:21,  3.81it/s]\u001b[A\n",
            " 38% 189/500 [00:50<01:20,  3.88it/s]\u001b[A\n",
            " 38% 190/500 [00:50<01:23,  3.73it/s]\u001b[A\n",
            " 38% 191/500 [00:51<01:21,  3.81it/s]\u001b[A\n",
            " 38% 192/500 [00:51<01:19,  3.85it/s]\u001b[A\n",
            " 39% 193/500 [00:51<01:22,  3.70it/s]\u001b[A\n",
            " 39% 194/500 [00:51<01:24,  3.62it/s]\u001b[A\n",
            " 39% 195/500 [00:52<01:26,  3.53it/s]\u001b[A\n",
            " 39% 196/500 [00:52<01:23,  3.65it/s]\u001b[A\n",
            " 39% 197/500 [00:52<01:21,  3.73it/s]\u001b[A\n",
            " 40% 198/500 [00:52<01:19,  3.79it/s]\u001b[A\n",
            " 40% 199/500 [00:53<01:18,  3.82it/s]\u001b[A\n",
            " 40% 200/500 [00:53<01:17,  3.87it/s]\u001b[A\n",
            " 40% 201/500 [00:53<01:16,  3.90it/s]\u001b[A\n",
            " 40% 202/500 [00:54<01:16,  3.91it/s]\u001b[A\n",
            " 41% 203/500 [00:54<01:15,  3.93it/s]\u001b[A\n",
            " 41% 204/500 [00:54<01:19,  3.71it/s]\u001b[A\n",
            " 41% 205/500 [00:54<01:21,  3.63it/s]\u001b[A\n",
            " 41% 206/500 [00:55<01:19,  3.71it/s]\u001b[A\n",
            " 41% 207/500 [00:55<01:21,  3.60it/s]\u001b[A\n",
            " 42% 208/500 [00:55<01:18,  3.71it/s]\u001b[A\n",
            " 42% 209/500 [00:55<01:16,  3.79it/s]\u001b[A\n",
            " 42% 210/500 [00:56<01:15,  3.86it/s]\u001b[A\n",
            " 42% 211/500 [00:56<01:14,  3.89it/s]\u001b[A\n",
            " 42% 212/500 [00:56<01:13,  3.93it/s]\u001b[A\n",
            " 43% 213/500 [00:56<01:16,  3.73it/s]\u001b[A\n",
            " 43% 214/500 [00:57<01:19,  3.60it/s]\u001b[A\n",
            " 43% 215/500 [00:57<01:16,  3.71it/s]\u001b[A\n",
            " 43% 216/500 [00:57<01:18,  3.63it/s]\u001b[A\n",
            " 43% 217/500 [00:58<01:16,  3.71it/s]\u001b[A\n",
            " 44% 218/500 [00:58<01:14,  3.77it/s]\u001b[A\n",
            " 44% 219/500 [00:58<01:13,  3.84it/s]\u001b[A\n",
            " 44% 220/500 [00:58<01:12,  3.87it/s]\u001b[A\n",
            " 44% 221/500 [00:59<01:11,  3.92it/s]\u001b[A\n",
            " 44% 222/500 [00:59<01:14,  3.73it/s]\u001b[A\n",
            " 45% 223/500 [00:59<01:12,  3.80it/s]\u001b[A\n",
            " 45% 224/500 [00:59<01:11,  3.85it/s]\u001b[A\n",
            " 45% 225/500 [01:00<01:15,  3.66it/s]\u001b[A\n",
            " 45% 226/500 [01:00<01:13,  3.75it/s]\u001b[A\n",
            " 45% 227/500 [01:00<01:14,  3.65it/s]\u001b[A\n",
            " 46% 228/500 [01:00<01:12,  3.76it/s]\u001b[A\n",
            " 46% 229/500 [01:01<01:11,  3.81it/s]\u001b[A\n",
            " 46% 230/500 [01:01<01:13,  3.67it/s]\u001b[A\n",
            " 46% 231/500 [01:01<01:15,  3.57it/s]\u001b[A\n",
            " 46% 232/500 [01:02<01:12,  3.69it/s]\u001b[A\n",
            " 47% 233/500 [01:02<01:14,  3.60it/s]\u001b[A\n",
            " 47% 234/500 [01:02<01:11,  3.71it/s]\u001b[A\n",
            " 47% 235/500 [01:02<01:13,  3.60it/s]\u001b[A\n",
            " 47% 236/500 [01:03<01:11,  3.69it/s]\u001b[A\n",
            " 47% 237/500 [01:03<01:09,  3.77it/s]\u001b[A\n",
            " 48% 238/500 [01:03<01:11,  3.65it/s]\u001b[A\n",
            " 48% 239/500 [01:03<01:09,  3.75it/s]\u001b[A\n",
            " 48% 240/500 [01:04<01:08,  3.80it/s]\u001b[A\n",
            " 48% 241/500 [01:04<01:07,  3.85it/s]\u001b[A\n",
            " 48% 242/500 [01:04<01:05,  3.94it/s]\u001b[A\n",
            " 49% 243/500 [01:04<01:05,  3.95it/s]\u001b[A\n",
            " 49% 244/500 [01:05<01:04,  3.94it/s]\u001b[A\n",
            " 49% 245/500 [01:05<01:04,  3.93it/s]\u001b[A\n",
            " 49% 246/500 [01:05<01:08,  3.72it/s]\u001b[A\n",
            " 49% 247/500 [01:06<01:06,  3.79it/s]\u001b[A\n",
            " 50% 248/500 [01:06<01:05,  3.86it/s]\u001b[A\n",
            " 50% 249/500 [01:06<01:08,  3.69it/s]\u001b[A\n",
            " 50% 250/500 [01:06<01:06,  3.77it/s]\u001b[A\n",
            " 50% 251/500 [01:07<01:04,  3.84it/s]\u001b[A\n",
            " 50% 252/500 [01:07<01:04,  3.87it/s]\u001b[A\n",
            " 51% 253/500 [01:07<01:03,  3.91it/s]\u001b[A\n",
            " 51% 254/500 [01:07<01:03,  3.90it/s]\u001b[A\n",
            " 51% 255/500 [01:08<01:02,  3.91it/s]\u001b[A\n",
            " 51% 256/500 [01:08<01:01,  3.96it/s]\u001b[A\n",
            " 51% 257/500 [01:08<01:05,  3.73it/s]\u001b[A\n",
            " 52% 258/500 [01:08<01:06,  3.63it/s]\u001b[A\n",
            " 52% 259/500 [01:09<01:04,  3.71it/s]\u001b[A\n",
            " 52% 260/500 [01:09<01:03,  3.80it/s]\u001b[A\n",
            " 52% 261/500 [01:09<01:02,  3.82it/s]\u001b[A\n",
            " 52% 262/500 [01:09<01:04,  3.67it/s]\u001b[A\n",
            " 53% 263/500 [01:10<01:06,  3.56it/s]\u001b[A\n",
            " 53% 264/500 [01:10<01:04,  3.67it/s]\u001b[A\n",
            " 53% 265/500 [01:10<01:02,  3.76it/s]\u001b[A\n",
            " 53% 266/500 [01:11<01:04,  3.64it/s]\u001b[A\n",
            " 53% 267/500 [01:11<01:02,  3.73it/s]\u001b[A\n",
            " 54% 268/500 [01:11<01:01,  3.79it/s]\u001b[A\n",
            " 54% 269/500 [01:11<01:03,  3.64it/s]\u001b[A\n",
            " 54% 270/500 [01:12<01:04,  3.57it/s]\u001b[A\n",
            " 54% 271/500 [01:12<01:05,  3.49it/s]\u001b[A\n",
            " 54% 272/500 [01:12<01:02,  3.63it/s]\u001b[A\n",
            " 55% 273/500 [01:12<01:00,  3.73it/s]\u001b[A\n",
            " 55% 274/500 [01:13<00:59,  3.78it/s]\u001b[A\n",
            " 55% 275/500 [01:13<00:59,  3.81it/s]\u001b[A\n",
            " 55% 276/500 [01:13<00:57,  3.86it/s]\u001b[A\n",
            " 55% 277/500 [01:14<00:59,  3.72it/s]\u001b[A\n",
            " 56% 278/500 [01:14<00:58,  3.78it/s]\u001b[A\n",
            " 56% 279/500 [01:14<00:57,  3.85it/s]\u001b[A\n",
            " 56% 280/500 [01:14<00:56,  3.87it/s]\u001b[A\n",
            " 56% 281/500 [01:15<00:58,  3.73it/s]\u001b[A\n",
            " 56% 282/500 [01:15<00:57,  3.79it/s]\u001b[A\n",
            " 57% 283/500 [01:15<00:59,  3.65it/s]\u001b[A\n",
            " 57% 284/500 [01:15<01:00,  3.55it/s]\u001b[A\n",
            " 57% 285/500 [01:16<00:58,  3.66it/s]\u001b[A\n",
            " 57% 286/500 [01:16<00:59,  3.57it/s]\u001b[A\n",
            " 57% 287/500 [01:16<00:57,  3.68it/s]\u001b[A\n",
            " 58% 288/500 [01:16<00:56,  3.75it/s]\u001b[A\n",
            " 58% 289/500 [01:17<00:55,  3.82it/s]\u001b[A\n",
            " 58% 290/500 [01:17<00:54,  3.85it/s]\u001b[A\n",
            " 58% 291/500 [01:17<00:56,  3.69it/s]\u001b[A\n",
            " 58% 292/500 [01:18<00:58,  3.57it/s]\u001b[A\n",
            " 59% 293/500 [01:18<00:59,  3.50it/s]\u001b[A\n",
            " 59% 294/500 [01:18<00:59,  3.46it/s]\u001b[A\n",
            " 59% 295/500 [01:18<00:57,  3.60it/s]\u001b[A\n",
            " 59% 296/500 [01:19<00:55,  3.69it/s]\u001b[A\n",
            " 59% 297/500 [01:19<00:54,  3.75it/s]\u001b[A\n",
            " 60% 298/500 [01:19<00:52,  3.82it/s]\u001b[A\n",
            " 60% 299/500 [01:19<00:51,  3.87it/s]\u001b[A\n",
            " 60% 300/500 [01:20<00:53,  3.70it/s]\u001b[A\n",
            " 60% 301/500 [01:20<00:52,  3.77it/s]\u001b[A\n",
            " 60% 302/500 [01:20<00:54,  3.64it/s]\u001b[A\n",
            " 61% 303/500 [01:21<00:52,  3.73it/s]\u001b[A\n",
            " 61% 304/500 [01:21<00:51,  3.79it/s]\u001b[A\n",
            " 61% 305/500 [01:21<00:50,  3.83it/s]\u001b[A\n",
            " 61% 306/500 [01:21<00:50,  3.87it/s]\u001b[A\n",
            " 61% 307/500 [01:22<00:52,  3.67it/s]\u001b[A\n",
            " 62% 308/500 [01:22<00:53,  3.60it/s]\u001b[A\n",
            " 62% 309/500 [01:22<00:54,  3.52it/s]\u001b[A\n",
            " 62% 310/500 [01:22<00:52,  3.63it/s]\u001b[A\n",
            " 62% 311/500 [01:23<00:50,  3.73it/s]\u001b[A\n",
            " 62% 312/500 [01:23<00:49,  3.79it/s]\u001b[A\n",
            " 63% 313/500 [01:23<00:51,  3.64it/s]\u001b[A\n",
            " 63% 314/500 [01:24<00:52,  3.54it/s]\u001b[A\n",
            " 63% 315/500 [01:24<00:50,  3.66it/s]\u001b[A\n",
            " 63% 316/500 [01:24<00:48,  3.77it/s]\u001b[A\n",
            " 63% 317/500 [01:24<00:48,  3.80it/s]\u001b[A\n",
            " 64% 318/500 [01:25<00:49,  3.67it/s]\u001b[A\n",
            " 64% 319/500 [01:25<00:50,  3.60it/s]\u001b[A\n",
            " 64% 320/500 [01:25<00:48,  3.70it/s]\u001b[A\n",
            " 64% 321/500 [01:25<00:47,  3.79it/s]\u001b[A\n",
            " 64% 322/500 [01:26<00:46,  3.81it/s]\u001b[A\n",
            " 65% 323/500 [01:26<00:48,  3.66it/s]\u001b[A\n",
            " 65% 324/500 [01:26<00:47,  3.74it/s]\u001b[A\n",
            " 65% 325/500 [01:26<00:45,  3.81it/s]\u001b[A\n",
            " 65% 326/500 [01:27<00:44,  3.88it/s]\u001b[A\n",
            " 65% 327/500 [01:27<00:46,  3.71it/s]\u001b[A\n",
            " 66% 328/500 [01:27<00:45,  3.79it/s]\u001b[A\n",
            " 66% 329/500 [01:28<00:44,  3.83it/s]\u001b[A\n",
            " 66% 330/500 [01:28<00:44,  3.86it/s]\u001b[A\n",
            " 66% 331/500 [01:28<00:45,  3.69it/s]\u001b[A\n",
            " 66% 332/500 [01:28<00:44,  3.77it/s]\u001b[A\n",
            " 67% 333/500 [01:29<00:45,  3.63it/s]\u001b[A\n",
            " 67% 334/500 [01:29<00:46,  3.54it/s]\u001b[A\n",
            " 67% 335/500 [01:29<00:45,  3.66it/s]\u001b[A\n",
            " 67% 336/500 [01:29<00:43,  3.75it/s]\u001b[A\n",
            " 67% 337/500 [01:30<00:44,  3.63it/s]\u001b[A\n",
            " 68% 338/500 [01:30<00:43,  3.71it/s]\u001b[A\n",
            " 68% 339/500 [01:30<00:44,  3.61it/s]\u001b[A\n",
            " 68% 340/500 [01:31<00:43,  3.71it/s]\u001b[A\n",
            " 68% 341/500 [01:31<00:42,  3.78it/s]\u001b[A\n",
            " 68% 342/500 [01:31<00:41,  3.83it/s]\u001b[A\n",
            " 69% 343/500 [01:31<00:40,  3.85it/s]\u001b[A\n",
            " 69% 344/500 [01:32<00:42,  3.69it/s]\u001b[A\n",
            " 69% 345/500 [01:32<00:41,  3.77it/s]\u001b[A\n",
            " 69% 346/500 [01:32<00:40,  3.83it/s]\u001b[A\n",
            " 69% 347/500 [01:32<00:41,  3.68it/s]\u001b[A\n",
            " 70% 348/500 [01:33<00:40,  3.75it/s]\u001b[A\n",
            " 70% 349/500 [01:33<00:39,  3.82it/s]\u001b[A\n",
            " 70% 350/500 [01:33<00:38,  3.86it/s]\u001b[A\n",
            " 70% 351/500 [01:33<00:38,  3.88it/s]\u001b[A\n",
            " 70% 352/500 [01:34<00:37,  3.90it/s]\u001b[A\n",
            " 71% 353/500 [01:34<00:37,  3.90it/s]\u001b[A\n",
            " 71% 354/500 [01:34<00:37,  3.90it/s]\u001b[A\n",
            " 71% 355/500 [01:34<00:38,  3.72it/s]\u001b[A\n",
            " 71% 356/500 [01:35<00:37,  3.83it/s]\u001b[A\n",
            " 71% 357/500 [01:35<00:37,  3.85it/s]\u001b[A\n",
            " 72% 358/500 [01:35<00:36,  3.87it/s]\u001b[A\n",
            " 72% 359/500 [01:36<00:38,  3.71it/s]\u001b[A\n",
            " 72% 360/500 [01:36<00:36,  3.79it/s]\u001b[A\n",
            " 72% 361/500 [01:36<00:36,  3.85it/s]\u001b[A\n",
            " 72% 362/500 [01:36<00:35,  3.88it/s]\u001b[A\n",
            " 73% 363/500 [01:37<00:35,  3.88it/s]\u001b[A\n",
            " 73% 364/500 [01:37<00:35,  3.88it/s]\u001b[A\n",
            " 73% 365/500 [01:37<00:34,  3.88it/s]\u001b[A\n",
            " 73% 366/500 [01:37<00:34,  3.92it/s]\u001b[A\n",
            " 73% 367/500 [01:38<00:33,  3.92it/s]\u001b[A\n",
            " 74% 368/500 [01:38<00:33,  3.91it/s]\u001b[A\n",
            " 74% 369/500 [01:38<00:33,  3.92it/s]\u001b[A\n",
            " 74% 370/500 [01:38<00:33,  3.94it/s]\u001b[A\n",
            " 74% 371/500 [01:39<00:32,  3.92it/s]\u001b[A\n",
            " 74% 372/500 [01:39<00:34,  3.73it/s]\u001b[A\n",
            " 75% 373/500 [01:39<00:33,  3.79it/s]\u001b[A\n",
            " 75% 374/500 [01:39<00:32,  3.84it/s]\u001b[A\n",
            " 75% 375/500 [01:40<00:32,  3.87it/s]\u001b[A\n",
            " 75% 376/500 [01:40<00:31,  3.88it/s]\u001b[A\n",
            " 75% 377/500 [01:40<00:33,  3.72it/s]\u001b[A\n",
            " 76% 378/500 [01:40<00:32,  3.80it/s]\u001b[A\n",
            " 76% 379/500 [01:41<00:31,  3.82it/s]\u001b[A\n",
            " 76% 380/500 [01:41<00:31,  3.85it/s]\u001b[A\n",
            " 76% 381/500 [01:41<00:32,  3.70it/s]\u001b[A\n",
            " 76% 382/500 [01:41<00:31,  3.78it/s]\u001b[A\n",
            " 77% 383/500 [01:42<00:30,  3.83it/s]\u001b[A\n",
            " 77% 384/500 [01:42<00:31,  3.69it/s]\u001b[A\n",
            " 77% 385/500 [01:42<00:30,  3.76it/s]\u001b[A\n",
            " 77% 386/500 [01:43<00:29,  3.81it/s]\u001b[A\n",
            " 77% 387/500 [01:43<00:30,  3.65it/s]\u001b[A\n",
            " 78% 388/500 [01:43<00:30,  3.73it/s]\u001b[A\n",
            " 78% 389/500 [01:43<00:29,  3.80it/s]\u001b[A\n",
            " 78% 390/500 [01:44<00:28,  3.83it/s]\u001b[A\n",
            " 78% 391/500 [01:44<00:29,  3.67it/s]\u001b[A\n",
            " 78% 392/500 [01:44<00:30,  3.56it/s]\u001b[A\n",
            " 79% 393/500 [01:44<00:29,  3.67it/s]\u001b[A\n",
            " 79% 394/500 [01:45<00:28,  3.76it/s]\u001b[A\n",
            " 79% 395/500 [01:45<00:29,  3.60it/s]\u001b[A\n",
            " 79% 396/500 [01:45<00:29,  3.53it/s]\u001b[A\n",
            " 79% 397/500 [01:46<00:29,  3.46it/s]\u001b[A\n",
            " 80% 398/500 [01:46<00:28,  3.60it/s]\u001b[A\n",
            " 80% 399/500 [01:46<00:27,  3.69it/s]\u001b[A\n",
            " 80% 400/500 [01:46<00:26,  3.78it/s]\u001b[A\n",
            " 80% 401/500 [01:47<00:26,  3.80it/s]\u001b[A\n",
            " 80% 402/500 [01:47<00:25,  3.83it/s]\u001b[A\n",
            " 81% 403/500 [01:47<00:25,  3.85it/s]\u001b[A\n",
            " 81% 404/500 [01:47<00:24,  3.87it/s]\u001b[A\n",
            " 81% 405/500 [01:48<00:24,  3.91it/s]\u001b[A\n",
            " 81% 406/500 [01:48<00:24,  3.90it/s]\u001b[A\n",
            " 81% 407/500 [01:48<00:23,  3.90it/s]\u001b[A\n",
            " 82% 408/500 [01:48<00:24,  3.75it/s]\u001b[A\n",
            " 82% 409/500 [01:49<00:23,  3.81it/s]\u001b[A\n",
            " 82% 410/500 [01:49<00:23,  3.85it/s]\u001b[A\n",
            " 82% 411/500 [01:49<00:22,  3.88it/s]\u001b[A\n",
            " 82% 412/500 [01:49<00:22,  3.89it/s]\u001b[A\n",
            " 83% 413/500 [01:50<00:22,  3.90it/s]\u001b[A\n",
            " 83% 414/500 [01:50<00:22,  3.91it/s]\u001b[A\n",
            " 83% 415/500 [01:50<00:21,  3.93it/s]\u001b[A\n",
            " 83% 416/500 [01:50<00:21,  3.92it/s]\u001b[A\n",
            " 83% 417/500 [01:51<00:21,  3.90it/s]\u001b[A\n",
            " 84% 418/500 [01:51<00:20,  3.90it/s]\u001b[A\n",
            " 84% 419/500 [01:51<00:20,  3.91it/s]\u001b[A\n",
            " 84% 420/500 [01:51<00:20,  3.95it/s]\u001b[A\n",
            " 84% 421/500 [01:52<00:20,  3.95it/s]\u001b[A\n",
            " 84% 422/500 [01:52<00:19,  3.95it/s]\u001b[A\n",
            " 85% 423/500 [01:52<00:20,  3.72it/s]\u001b[A\n",
            " 85% 424/500 [01:53<00:21,  3.60it/s]\u001b[A\n",
            " 85% 425/500 [01:53<00:20,  3.70it/s]\u001b[A\n",
            " 85% 426/500 [01:53<00:19,  3.76it/s]\u001b[A\n",
            " 85% 427/500 [01:53<00:19,  3.80it/s]\u001b[A\n",
            " 86% 428/500 [01:54<00:18,  3.85it/s]\u001b[A\n",
            " 86% 429/500 [01:54<00:19,  3.71it/s]\u001b[A\n",
            " 86% 430/500 [01:54<00:18,  3.76it/s]\u001b[A\n",
            " 86% 431/500 [01:54<00:18,  3.64it/s]\u001b[A\n",
            " 86% 432/500 [01:55<00:19,  3.55it/s]\u001b[A\n",
            " 87% 433/500 [01:55<00:18,  3.69it/s]\u001b[A\n",
            " 87% 434/500 [01:55<00:17,  3.76it/s]\u001b[A\n",
            " 87% 435/500 [01:56<00:17,  3.66it/s]\u001b[A\n",
            " 87% 436/500 [01:56<00:18,  3.56it/s]\u001b[A\n",
            " 87% 437/500 [01:56<00:18,  3.49it/s]\u001b[A\n",
            " 88% 438/500 [01:56<00:17,  3.61it/s]\u001b[A\n",
            " 88% 439/500 [01:57<00:17,  3.54it/s]\u001b[A\n",
            " 88% 440/500 [01:57<00:16,  3.66it/s]\u001b[A\n",
            " 88% 441/500 [01:57<00:15,  3.74it/s]\u001b[A\n",
            " 88% 442/500 [01:57<00:15,  3.82it/s]\u001b[A\n",
            " 89% 443/500 [01:58<00:14,  3.84it/s]\u001b[A\n",
            " 89% 444/500 [01:58<00:14,  3.86it/s]\u001b[A\n",
            " 89% 445/500 [01:58<00:14,  3.88it/s]\u001b[A\n",
            " 89% 446/500 [01:58<00:13,  3.91it/s]\u001b[A\n",
            " 89% 447/500 [01:59<00:13,  3.91it/s]\u001b[A\n",
            " 90% 448/500 [01:59<00:13,  3.90it/s]\u001b[A\n",
            " 90% 449/500 [01:59<00:13,  3.71it/s]\u001b[A\n",
            " 90% 450/500 [02:00<00:13,  3.77it/s]\u001b[A\n",
            " 90% 451/500 [02:00<00:12,  3.84it/s]\u001b[A\n",
            " 90% 452/500 [02:00<00:12,  3.85it/s]\u001b[A\n",
            " 91% 453/500 [02:00<00:12,  3.85it/s]\u001b[A\n",
            " 91% 454/500 [02:01<00:11,  3.91it/s]\u001b[A\n",
            " 91% 455/500 [02:01<00:11,  3.75it/s]\u001b[A\n",
            " 91% 456/500 [02:01<00:12,  3.62it/s]\u001b[A\n",
            " 91% 457/500 [02:01<00:12,  3.52it/s]\u001b[A\n",
            " 92% 458/500 [02:02<00:11,  3.64it/s]\u001b[A\n",
            " 92% 459/500 [02:02<00:10,  3.74it/s]\u001b[A\n",
            " 92% 460/500 [02:02<00:11,  3.58it/s]\u001b[A\n",
            " 92% 461/500 [02:03<00:10,  3.69it/s]\u001b[A\n",
            " 92% 462/500 [02:03<00:10,  3.75it/s]\u001b[A\n",
            " 93% 463/500 [02:03<00:09,  3.81it/s]\u001b[A\n",
            " 93% 464/500 [02:03<00:09,  3.86it/s]\u001b[A\n",
            " 93% 465/500 [02:04<00:09,  3.87it/s]\u001b[A\n",
            " 93% 466/500 [02:04<00:08,  3.89it/s]\u001b[A\n",
            " 93% 467/500 [02:04<00:08,  3.72it/s]\u001b[A\n",
            " 94% 468/500 [02:04<00:08,  3.80it/s]\u001b[A\n",
            " 94% 469/500 [02:05<00:08,  3.84it/s]\u001b[A\n",
            " 94% 470/500 [02:05<00:07,  3.89it/s]\u001b[A\n",
            " 94% 471/500 [02:05<00:07,  3.90it/s]\u001b[A\n",
            " 94% 472/500 [02:05<00:07,  3.92it/s]\u001b[A\n",
            " 95% 473/500 [02:06<00:06,  3.93it/s]\u001b[A\n",
            " 95% 474/500 [02:06<00:06,  3.93it/s]\u001b[A\n",
            " 95% 475/500 [02:06<00:06,  3.73it/s]\u001b[A\n",
            " 95% 476/500 [02:06<00:06,  3.79it/s]\u001b[A\n",
            " 95% 477/500 [02:07<00:05,  3.86it/s]\u001b[A\n",
            " 96% 478/500 [02:07<00:05,  3.87it/s]\u001b[A\n",
            " 96% 479/500 [02:07<00:05,  3.69it/s]\u001b[A\n",
            " 96% 480/500 [02:08<00:05,  3.57it/s]\u001b[A\n",
            " 96% 481/500 [02:08<00:05,  3.68it/s]\u001b[A\n",
            " 96% 482/500 [02:08<00:04,  3.78it/s]\u001b[A\n",
            " 97% 483/500 [02:08<00:04,  3.82it/s]\u001b[A\n",
            " 97% 484/500 [02:09<00:04,  3.85it/s]\u001b[A\n",
            " 97% 485/500 [02:09<00:03,  3.88it/s]\u001b[A\n",
            " 97% 486/500 [02:09<00:03,  3.72it/s]\u001b[A\n",
            " 97% 487/500 [02:09<00:03,  3.59it/s]\u001b[A\n",
            " 98% 488/500 [02:10<00:03,  3.68it/s]\u001b[A\n",
            " 98% 489/500 [02:10<00:03,  3.59it/s]\u001b[A\n",
            " 98% 490/500 [02:10<00:02,  3.55it/s]\u001b[A\n",
            " 98% 491/500 [02:10<00:02,  3.65it/s]\u001b[A\n",
            " 98% 492/500 [02:11<00:02,  3.57it/s]\u001b[A\n",
            " 99% 493/500 [02:11<00:01,  3.53it/s]\u001b[A\n",
            " 99% 494/500 [02:11<00:01,  3.48it/s]\u001b[A\n",
            " 99% 495/500 [02:12<00:01,  3.61it/s]\u001b[A\n",
            " 99% 496/500 [02:12<00:01,  3.53it/s]\u001b[A\n",
            " 99% 497/500 [02:12<00:00,  3.66it/s]\u001b[A\n",
            "100% 498/500 [02:12<00:00,  3.74it/s]\u001b[A\n",
            "100% 499/500 [02:13<00:00,  3.79it/s]\u001b[A\n",
            "                                           \n",
            "\u001b[A{'eval_loss': 0.5255132913589478, 'eval_accuracy': 0.7746666666666665, 'eval_runtime': 133.7098, 'eval_samples_per_second': 3.739, 'eval_steps_per_second': 3.739, 'epoch': 4.0}\n",
            " 67% 2248/3372 [4:14:28<2:04:11,  6.63s/it]\n",
            "100% 500/500 [02:13<00:00,  3.63it/s]\u001b[A\n",
            "                                     \u001b[A[INFO|trainer.py:3410] 2024-07-15 18:50:59,525 >> Saving model checkpoint to /content/qwen2-7b/checkpoint-2248\n",
            "/usr/local/lib/python3.10/dist-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n",
            "  warnings.warn(\n",
            "[INFO|configuration_utils.py:733] 2024-07-15 18:50:59,783 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2-7B/snapshots/453ed1575b739b5b03ce3758b23befdb0967f40e/config.json\n",
            "[INFO|configuration_utils.py:796] 2024-07-15 18:50:59,784 >> Model config Qwen2Config {\n",
            "  \"architectures\": [\n",
            "    \"Qwen2ForCausalLM\"\n",
            "  ],\n",
            "  \"attention_dropout\": 0.0,\n",
            "  \"bos_token_id\": 151643,\n",
            "  \"eos_token_id\": 151643,\n",
            "  \"hidden_act\": \"silu\",\n",
            "  \"hidden_size\": 3584,\n",
            "  \"initializer_range\": 0.02,\n",
            "  \"intermediate_size\": 18944,\n",
            "  \"max_position_embeddings\": 131072,\n",
            "  \"max_window_layers\": 28,\n",
            "  \"model_type\": \"qwen2\",\n",
            "  \"num_attention_heads\": 28,\n",
            "  \"num_hidden_layers\": 28,\n",
            "  \"num_key_value_heads\": 4,\n",
            "  \"rms_norm_eps\": 1e-06,\n",
            "  \"rope_theta\": 1000000.0,\n",
            "  \"sliding_window\": 131072,\n",
            "  \"tie_word_embeddings\": false,\n",
            "  \"torch_dtype\": \"bfloat16\",\n",
            "  \"transformers_version\": \"4.41.2\",\n",
            "  \"use_cache\": true,\n",
            "  \"use_sliding_window\": false,\n",
            "  \"vocab_size\": 152064\n",
            "}\n",
            "\n",
            "[INFO|tokenization_utils_base.py:2513] 2024-07-15 18:50:59,970 >> tokenizer config file saved in /content/qwen2-7b/checkpoint-2248/tokenizer_config.json\n",
            "[INFO|tokenization_utils_base.py:2522] 2024-07-15 18:50:59,970 >> Special tokens file saved in /content/qwen2-7b/checkpoint-2248/special_tokens_map.json\n",
            "{'loss': 0.5323, 'grad_norm': 0.26129332184791565, 'learning_rate': 8.229824704832284e-05, 'epoch': 5.0}\n",
            " 83% 2810/3372 [5:16:10<1:01:38,  6.58s/it][INFO|trainer.py:3719] 2024-07-15 19:52:41,540 >> ***** Running Evaluation *****\n",
            "[INFO|trainer.py:3721] 2024-07-15 19:52:41,541 >>   Num examples = 500\n",
            "[INFO|trainer.py:3724] 2024-07-15 19:52:41,541 >>   Batch size = 1\n",
            "\n",
            "  0% 0/500 [00:00<?, ?it/s]\u001b[A\n",
            "  0% 2/500 [00:00<01:03,  7.86it/s]\u001b[A\n",
            "  1% 3/500 [00:00<01:27,  5.71it/s]\u001b[A\n",
            "  1% 4/500 [00:00<01:49,  4.52it/s]\u001b[A\n",
            "  1% 5/500 [00:01<01:55,  4.30it/s]\u001b[A\n",
            "  1% 6/500 [00:01<01:57,  4.20it/s]\u001b[A\n",
            "  1% 7/500 [00:01<01:59,  4.12it/s]\u001b[A\n",
            "  2% 8/500 [00:01<02:07,  3.87it/s]\u001b[A\n",
            "  2% 9/500 [00:02<02:05,  3.90it/s]\u001b[A\n",
            "  2% 10/500 [00:02<02:04,  3.92it/s]\u001b[A\n",
            "  2% 11/500 [00:02<02:10,  3.74it/s]\u001b[A\n",
            "  2% 12/500 [00:02<02:15,  3.61it/s]\u001b[A\n",
            "  3% 13/500 [00:03<02:10,  3.72it/s]\u001b[A\n",
            "  3% 14/500 [00:03<02:15,  3.60it/s]\u001b[A\n",
            "  3% 15/500 [00:03<02:11,  3.68it/s]\u001b[A\n",
            "  3% 16/500 [00:04<02:08,  3.76it/s]\u001b[A\n",
            "  3% 17/500 [00:04<02:06,  3.83it/s]\u001b[A\n",
            "  4% 18/500 [00:04<02:10,  3.69it/s]\u001b[A\n",
            "  4% 19/500 [00:04<02:07,  3.77it/s]\u001b[A\n",
            "  4% 20/500 [00:05<02:11,  3.64it/s]\u001b[A\n",
            "  4% 21/500 [00:05<02:08,  3.74it/s]\u001b[A\n",
            "  4% 22/500 [00:05<02:05,  3.80it/s]\u001b[A\n",
            "  5% 23/500 [00:05<02:10,  3.66it/s]\u001b[A\n",
            "  5% 24/500 [00:06<02:13,  3.56it/s]\u001b[A\n",
            "  5% 25/500 [00:06<02:08,  3.68it/s]\u001b[A\n",
            "  5% 26/500 [00:06<02:06,  3.76it/s]\u001b[A\n",
            "  5% 27/500 [00:06<02:04,  3.81it/s]\u001b[A\n",
            "  6% 28/500 [00:07<02:02,  3.86it/s]\u001b[A\n",
            "  6% 29/500 [00:07<02:01,  3.88it/s]\u001b[A\n",
            "  6% 30/500 [00:07<02:06,  3.71it/s]\u001b[A\n",
            "  6% 31/500 [00:08<02:04,  3.77it/s]\u001b[A\n",
            "  6% 32/500 [00:08<02:01,  3.85it/s]\u001b[A\n",
            "  7% 33/500 [00:08<02:00,  3.87it/s]\u001b[A\n",
            "  7% 34/500 [00:08<01:59,  3.90it/s]\u001b[A\n",
            "  7% 35/500 [00:09<02:05,  3.71it/s]\u001b[A\n",
            "  7% 36/500 [00:09<02:02,  3.79it/s]\u001b[A\n",
            "  7% 37/500 [00:09<02:00,  3.83it/s]\u001b[A\n",
            "  8% 38/500 [00:09<02:05,  3.69it/s]\u001b[A\n",
            "  8% 39/500 [00:10<02:08,  3.58it/s]\u001b[A\n",
            "  8% 40/500 [00:10<02:04,  3.70it/s]\u001b[A\n",
            "  8% 41/500 [00:10<02:07,  3.61it/s]\u001b[A\n",
            "  8% 42/500 [00:10<02:09,  3.53it/s]\u001b[A\n",
            "  9% 43/500 [00:11<02:11,  3.47it/s]\u001b[A\n",
            "  9% 44/500 [00:11<02:05,  3.64it/s]\u001b[A\n",
            "  9% 45/500 [00:11<02:01,  3.73it/s]\u001b[A\n",
            "  9% 46/500 [00:12<02:00,  3.78it/s]\u001b[A\n",
            "  9% 47/500 [00:12<01:58,  3.83it/s]\u001b[A\n",
            " 10% 48/500 [00:12<01:56,  3.88it/s]\u001b[A\n",
            " 10% 49/500 [00:12<02:01,  3.71it/s]\u001b[A\n",
            " 10% 50/500 [00:13<02:05,  3.59it/s]\u001b[A\n",
            " 10% 51/500 [00:13<02:01,  3.69it/s]\u001b[A\n",
            " 10% 52/500 [00:13<02:04,  3.60it/s]\u001b[A\n",
            " 11% 53/500 [00:13<02:00,  3.70it/s]\u001b[A\n",
            " 11% 54/500 [00:14<01:58,  3.76it/s]\u001b[A\n",
            " 11% 55/500 [00:14<01:56,  3.80it/s]\u001b[A\n",
            " 11% 56/500 [00:14<01:55,  3.85it/s]\u001b[A\n",
            " 11% 57/500 [00:14<01:54,  3.87it/s]\u001b[A\n",
            " 12% 58/500 [00:15<01:59,  3.69it/s]\u001b[A\n",
            " 12% 59/500 [00:15<01:56,  3.77it/s]\u001b[A\n",
            " 12% 60/500 [00:15<01:59,  3.68it/s]\u001b[A\n",
            " 12% 61/500 [00:16<01:56,  3.76it/s]\u001b[A\n",
            " 12% 62/500 [00:16<02:00,  3.63it/s]\u001b[A\n",
            " 13% 63/500 [00:16<02:03,  3.54it/s]\u001b[A\n",
            " 13% 64/500 [00:16<01:58,  3.67it/s]\u001b[A\n",
            " 13% 65/500 [00:17<02:02,  3.55it/s]\u001b[A\n",
            " 13% 66/500 [00:17<02:04,  3.49it/s]\u001b[A\n",
            " 13% 67/500 [00:17<01:59,  3.62it/s]\u001b[A\n",
            " 14% 68/500 [00:18<02:02,  3.54it/s]\u001b[A\n",
            " 14% 69/500 [00:18<02:04,  3.47it/s]\u001b[A\n",
            " 14% 70/500 [00:18<01:59,  3.61it/s]\u001b[A\n",
            " 14% 71/500 [00:18<01:56,  3.70it/s]\u001b[A\n",
            " 14% 72/500 [00:19<01:53,  3.78it/s]\u001b[A\n",
            " 15% 73/500 [00:19<01:56,  3.66it/s]\u001b[A\n",
            " 15% 74/500 [00:19<01:59,  3.56it/s]\u001b[A\n",
            " 15% 75/500 [00:19<01:55,  3.66it/s]\u001b[A\n",
            " 15% 76/500 [00:20<01:52,  3.75it/s]\u001b[A\n",
            " 15% 77/500 [00:20<01:51,  3.80it/s]\u001b[A\n",
            " 16% 78/500 [00:20<01:50,  3.83it/s]\u001b[A\n",
            " 16% 79/500 [00:20<01:49,  3.84it/s]\u001b[A\n",
            " 16% 80/500 [00:21<01:48,  3.88it/s]\u001b[A\n",
            " 16% 81/500 [00:21<01:53,  3.70it/s]\u001b[A\n",
            " 16% 82/500 [00:21<01:50,  3.77it/s]\u001b[A\n",
            " 17% 83/500 [00:22<01:54,  3.64it/s]\u001b[A\n",
            " 17% 84/500 [00:22<01:57,  3.54it/s]\u001b[A\n",
            " 17% 85/500 [00:22<01:59,  3.47it/s]\u001b[A\n",
            " 17% 86/500 [00:22<01:55,  3.59it/s]\u001b[A\n",
            " 17% 87/500 [00:23<01:51,  3.71it/s]\u001b[A\n",
            " 18% 88/500 [00:23<01:53,  3.61it/s]\u001b[A\n",
            " 18% 89/500 [00:23<01:50,  3.72it/s]\u001b[A\n",
            " 18% 90/500 [00:23<01:48,  3.78it/s]\u001b[A\n",
            " 18% 91/500 [00:24<01:52,  3.65it/s]\u001b[A\n",
            " 18% 92/500 [00:24<01:55,  3.55it/s]\u001b[A\n",
            " 19% 93/500 [00:24<01:51,  3.65it/s]\u001b[A\n",
            " 19% 94/500 [00:25<01:48,  3.76it/s]\u001b[A\n",
            " 19% 95/500 [00:25<01:46,  3.80it/s]\u001b[A\n",
            " 19% 96/500 [00:25<01:45,  3.83it/s]\u001b[A\n",
            " 19% 97/500 [00:25<01:49,  3.69it/s]\u001b[A\n",
            " 20% 98/500 [00:26<01:46,  3.78it/s]\u001b[A\n",
            " 20% 99/500 [00:26<01:44,  3.82it/s]\u001b[A\n",
            " 20% 100/500 [00:26<01:43,  3.87it/s]\u001b[A\n",
            " 20% 101/500 [00:26<01:43,  3.86it/s]\u001b[A\n",
            " 20% 102/500 [00:27<01:42,  3.89it/s]\u001b[A\n",
            " 21% 103/500 [00:27<01:41,  3.92it/s]\u001b[A\n",
            " 21% 104/500 [00:27<01:39,  3.98it/s]\u001b[A\n",
            " 21% 105/500 [00:27<01:44,  3.77it/s]\u001b[A\n",
            " 21% 106/500 [00:28<01:42,  3.85it/s]\u001b[A\n",
            " 21% 107/500 [00:28<01:41,  3.88it/s]\u001b[A\n",
            " 22% 108/500 [00:28<01:41,  3.88it/s]\u001b[A\n",
            " 22% 109/500 [00:28<01:40,  3.88it/s]\u001b[A\n",
            " 22% 110/500 [00:29<01:44,  3.72it/s]\u001b[A\n",
            " 22% 111/500 [00:29<01:42,  3.78it/s]\u001b[A\n",
            " 22% 112/500 [00:29<01:46,  3.65it/s]\u001b[A\n",
            " 23% 113/500 [00:30<01:48,  3.56it/s]\u001b[A\n",
            " 23% 114/500 [00:30<01:49,  3.51it/s]\u001b[A\n",
            " 23% 115/500 [00:30<01:46,  3.63it/s]\u001b[A\n",
            " 23% 116/500 [00:30<01:47,  3.56it/s]\u001b[A\n",
            " 23% 117/500 [00:31<01:44,  3.66it/s]\u001b[A\n",
            " 24% 118/500 [00:31<01:42,  3.72it/s]\u001b[A\n",
            " 24% 119/500 [00:31<01:41,  3.77it/s]\u001b[A\n",
            " 24% 120/500 [00:32<01:44,  3.63it/s]\u001b[A\n",
            " 24% 121/500 [00:32<01:47,  3.54it/s]\u001b[A\n",
            " 24% 122/500 [00:32<01:43,  3.64it/s]\u001b[A\n",
            " 25% 123/500 [00:32<01:45,  3.56it/s]\u001b[A\n",
            " 25% 124/500 [00:33<01:41,  3.72it/s]\u001b[A\n",
            " 25% 125/500 [00:33<01:38,  3.80it/s]\u001b[A\n",
            " 25% 126/500 [00:33<01:42,  3.65it/s]\u001b[A\n",
            " 25% 127/500 [00:33<01:39,  3.73it/s]\u001b[A\n",
            " 26% 128/500 [00:34<01:37,  3.80it/s]\u001b[A\n",
            " 26% 129/500 [00:34<01:36,  3.84it/s]\u001b[A\n",
            " 26% 130/500 [00:34<01:40,  3.67it/s]\u001b[A\n",
            " 26% 131/500 [00:34<01:38,  3.75it/s]\u001b[A\n",
            " 26% 132/500 [00:35<01:36,  3.83it/s]\u001b[A\n",
            " 27% 133/500 [00:35<01:39,  3.67it/s]\u001b[A\n",
            " 27% 134/500 [00:35<01:37,  3.74it/s]\u001b[A\n",
            " 27% 135/500 [00:36<01:36,  3.79it/s]\u001b[A\n",
            " 27% 136/500 [00:36<01:39,  3.66it/s]\u001b[A\n",
            " 27% 137/500 [00:36<01:36,  3.75it/s]\u001b[A\n",
            " 28% 138/500 [00:36<01:35,  3.80it/s]\u001b[A\n",
            " 28% 139/500 [00:37<01:33,  3.85it/s]\u001b[A\n",
            " 28% 140/500 [00:37<01:32,  3.89it/s]\u001b[A\n",
            " 28% 141/500 [00:37<01:36,  3.73it/s]\u001b[A\n",
            " 28% 142/500 [00:37<01:34,  3.79it/s]\u001b[A\n",
            " 29% 143/500 [00:38<01:38,  3.62it/s]\u001b[A\n",
            " 29% 144/500 [00:38<01:35,  3.72it/s]\u001b[A\n",
            " 29% 145/500 [00:38<01:34,  3.76it/s]\u001b[A\n",
            " 29% 146/500 [00:38<01:32,  3.82it/s]\u001b[A\n",
            " 29% 147/500 [00:39<01:31,  3.85it/s]\u001b[A\n",
            " 30% 148/500 [00:39<01:34,  3.71it/s]\u001b[A\n",
            " 30% 149/500 [00:39<01:37,  3.59it/s]\u001b[A\n",
            " 30% 150/500 [00:40<01:34,  3.69it/s]\u001b[A\n",
            " 30% 151/500 [00:40<01:31,  3.80it/s]\u001b[A\n",
            " 30% 152/500 [00:40<01:30,  3.83it/s]\u001b[A\n",
            " 31% 153/500 [00:40<01:29,  3.88it/s]\u001b[A\n",
            " 31% 154/500 [00:41<01:29,  3.87it/s]\u001b[A\n",
            " 31% 155/500 [00:41<01:33,  3.68it/s]\u001b[A\n",
            " 31% 156/500 [00:41<01:36,  3.57it/s]\u001b[A\n",
            " 31% 157/500 [00:41<01:33,  3.67it/s]\u001b[A\n",
            " 32% 158/500 [00:42<01:35,  3.57it/s]\u001b[A\n",
            " 32% 159/500 [00:42<01:32,  3.67it/s]\u001b[A\n",
            " 32% 160/500 [00:42<01:35,  3.58it/s]\u001b[A\n",
            " 32% 161/500 [00:43<01:31,  3.69it/s]\u001b[A\n",
            " 32% 162/500 [00:43<01:34,  3.58it/s]\u001b[A\n",
            " 33% 163/500 [00:43<01:31,  3.69it/s]\u001b[A\n",
            " 33% 164/500 [00:43<01:33,  3.58it/s]\u001b[A\n",
            " 33% 165/500 [00:44<01:34,  3.54it/s]\u001b[A\n",
            " 33% 166/500 [00:44<01:31,  3.66it/s]\u001b[A\n",
            " 33% 167/500 [00:44<01:33,  3.56it/s]\u001b[A\n",
            " 34% 168/500 [00:45<01:34,  3.50it/s]\u001b[A\n",
            " 34% 169/500 [00:45<01:31,  3.63it/s]\u001b[A\n",
            " 34% 170/500 [00:45<01:36,  3.44it/s]\u001b[A\n",
            " 34% 171/500 [00:45<01:34,  3.47it/s]\u001b[A\n",
            " 34% 172/500 [00:46<01:30,  3.61it/s]\u001b[A\n",
            " 35% 173/500 [00:46<01:28,  3.70it/s]\u001b[A\n",
            " 35% 174/500 [00:46<01:30,  3.60it/s]\u001b[A\n",
            " 35% 175/500 [00:46<01:27,  3.71it/s]\u001b[A\n",
            " 35% 176/500 [00:47<01:24,  3.81it/s]\u001b[A\n",
            " 35% 177/500 [00:47<01:24,  3.83it/s]\u001b[A\n",
            " 36% 178/500 [00:47<01:28,  3.65it/s]\u001b[A\n",
            " 36% 179/500 [00:48<01:29,  3.58it/s]\u001b[A\n",
            " 36% 180/500 [00:48<01:31,  3.51it/s]\u001b[A\n",
            " 36% 181/500 [00:48<01:28,  3.60it/s]\u001b[A\n",
            " 36% 182/500 [00:48<01:25,  3.70it/s]\u001b[A\n",
            " 37% 183/500 [00:49<01:23,  3.79it/s]\u001b[A\n",
            " 37% 184/500 [00:49<01:26,  3.65it/s]\u001b[A\n",
            " 37% 185/500 [00:49<01:24,  3.73it/s]\u001b[A\n",
            " 37% 186/500 [00:49<01:26,  3.62it/s]\u001b[A\n",
            " 37% 187/500 [00:50<01:23,  3.73it/s]\u001b[A\n",
            " 38% 188/500 [00:50<01:21,  3.81it/s]\u001b[A\n",
            " 38% 189/500 [00:50<01:20,  3.85it/s]\u001b[A\n",
            " 38% 190/500 [00:50<01:23,  3.69it/s]\u001b[A\n",
            " 38% 191/500 [00:51<01:21,  3.79it/s]\u001b[A\n",
            " 38% 192/500 [00:51<01:20,  3.83it/s]\u001b[A\n",
            " 39% 193/500 [00:51<01:23,  3.67it/s]\u001b[A\n",
            " 39% 194/500 [00:52<01:25,  3.58it/s]\u001b[A\n",
            " 39% 195/500 [00:52<01:26,  3.53it/s]\u001b[A\n",
            " 39% 196/500 [00:52<01:23,  3.63it/s]\u001b[A\n",
            " 39% 197/500 [00:52<01:21,  3.70it/s]\u001b[A\n",
            " 40% 198/500 [00:53<01:19,  3.78it/s]\u001b[A\n",
            " 40% 199/500 [00:53<01:18,  3.82it/s]\u001b[A\n",
            " 40% 200/500 [00:53<01:17,  3.86it/s]\u001b[A\n",
            " 40% 201/500 [00:53<01:17,  3.87it/s]\u001b[A\n",
            " 40% 202/500 [00:54<01:16,  3.89it/s]\u001b[A\n",
            " 41% 203/500 [00:54<01:15,  3.92it/s]\u001b[A\n",
            " 41% 204/500 [00:54<01:19,  3.71it/s]\u001b[A\n",
            " 41% 205/500 [00:54<01:21,  3.63it/s]\u001b[A\n",
            " 41% 206/500 [00:55<01:19,  3.71it/s]\u001b[A\n",
            " 41% 207/500 [00:55<01:21,  3.61it/s]\u001b[A\n",
            " 42% 208/500 [00:55<01:18,  3.70it/s]\u001b[A\n",
            " 42% 209/500 [00:56<01:17,  3.76it/s]\u001b[A\n",
            " 42% 210/500 [00:56<01:16,  3.81it/s]\u001b[A\n",
            " 42% 211/500 [00:56<01:14,  3.86it/s]\u001b[A\n",
            " 42% 212/500 [00:56<01:13,  3.89it/s]\u001b[A\n",
            " 43% 213/500 [00:57<01:16,  3.73it/s]\u001b[A\n",
            " 43% 214/500 [00:57<01:19,  3.61it/s]\u001b[A\n",
            " 43% 215/500 [00:57<01:16,  3.72it/s]\u001b[A\n",
            " 43% 216/500 [00:57<01:18,  3.60it/s]\u001b[A\n",
            " 43% 217/500 [00:58<01:16,  3.69it/s]\u001b[A\n",
            " 44% 218/500 [00:58<01:15,  3.76it/s]\u001b[A\n",
            " 44% 219/500 [00:58<01:13,  3.81it/s]\u001b[A\n",
            " 44% 220/500 [00:58<01:12,  3.85it/s]\u001b[A\n",
            " 44% 221/500 [00:59<01:12,  3.87it/s]\u001b[A\n",
            " 44% 222/500 [00:59<01:15,  3.70it/s]\u001b[A\n",
            " 45% 223/500 [00:59<01:12,  3.80it/s]\u001b[A\n",
            " 45% 224/500 [01:00<01:11,  3.86it/s]\u001b[A\n",
            " 45% 225/500 [01:00<01:15,  3.67it/s]\u001b[A\n",
            " 45% 226/500 [01:00<01:12,  3.76it/s]\u001b[A\n",
            " 45% 227/500 [01:00<01:15,  3.63it/s]\u001b[A\n",
            " 46% 228/500 [01:01<01:13,  3.72it/s]\u001b[A\n",
            " 46% 229/500 [01:01<01:11,  3.79it/s]\u001b[A\n",
            " 46% 230/500 [01:01<01:13,  3.66it/s]\u001b[A\n",
            " 46% 231/500 [01:01<01:15,  3.56it/s]\u001b[A\n",
            " 46% 232/500 [01:02<01:13,  3.65it/s]\u001b[A\n",
            " 47% 233/500 [01:02<01:15,  3.55it/s]\u001b[A\n",
            " 47% 234/500 [01:02<01:12,  3.66it/s]\u001b[A\n",
            " 47% 235/500 [01:03<01:14,  3.58it/s]\u001b[A\n",
            " 47% 236/500 [01:03<01:12,  3.66it/s]\u001b[A\n",
            " 47% 237/500 [01:03<01:10,  3.74it/s]\u001b[A\n",
            " 48% 238/500 [01:03<01:12,  3.64it/s]\u001b[A\n",
            " 48% 239/500 [01:04<01:09,  3.74it/s]\u001b[A\n",
            " 48% 240/500 [01:04<01:08,  3.79it/s]\u001b[A\n",
            " 48% 241/500 [01:04<01:07,  3.84it/s]\u001b[A\n",
            " 48% 242/500 [01:04<01:06,  3.91it/s]\u001b[A\n",
            " 49% 243/500 [01:05<01:05,  3.93it/s]\u001b[A\n",
            " 49% 244/500 [01:05<01:05,  3.92it/s]\u001b[A\n",
            " 49% 245/500 [01:05<01:04,  3.93it/s]\u001b[A\n",
            " 49% 246/500 [01:05<01:07,  3.75it/s]\u001b[A\n",
            " 49% 247/500 [01:06<01:06,  3.82it/s]\u001b[A\n",
            " 50% 248/500 [01:06<01:05,  3.85it/s]\u001b[A\n",
            " 50% 249/500 [01:06<01:07,  3.70it/s]\u001b[A\n",
            " 50% 250/500 [01:06<01:06,  3.77it/s]\u001b[A\n",
            " 50% 251/500 [01:07<01:05,  3.82it/s]\u001b[A\n",
            " 50% 252/500 [01:07<01:04,  3.85it/s]\u001b[A\n",
            " 51% 253/500 [01:07<01:03,  3.87it/s]\u001b[A\n",
            " 51% 254/500 [01:08<01:03,  3.89it/s]\u001b[A\n",
            " 51% 255/500 [01:08<01:02,  3.89it/s]\u001b[A\n",
            " 51% 256/500 [01:08<01:02,  3.92it/s]\u001b[A\n",
            " 51% 257/500 [01:08<01:05,  3.72it/s]\u001b[A\n",
            " 52% 258/500 [01:09<01:06,  3.63it/s]\u001b[A\n",
            " 52% 259/500 [01:09<01:04,  3.72it/s]\u001b[A\n",
            " 52% 260/500 [01:09<01:03,  3.79it/s]\u001b[A\n",
            " 52% 261/500 [01:09<01:02,  3.81it/s]\u001b[A\n",
            " 52% 262/500 [01:10<01:04,  3.68it/s]\u001b[A\n",
            " 53% 263/500 [01:10<01:06,  3.57it/s]\u001b[A\n",
            " 53% 264/500 [01:10<01:04,  3.67it/s]\u001b[A\n",
            " 53% 265/500 [01:10<01:02,  3.75it/s]\u001b[A\n",
            " 53% 266/500 [01:11<01:04,  3.61it/s]\u001b[A\n",
            " 53% 267/500 [01:11<01:02,  3.71it/s]\u001b[A\n",
            " 54% 268/500 [01:11<01:01,  3.77it/s]\u001b[A\n",
            " 54% 269/500 [01:12<01:03,  3.66it/s]\u001b[A\n",
            " 54% 270/500 [01:12<01:04,  3.56it/s]\u001b[A\n",
            " 54% 271/500 [01:12<01:05,  3.52it/s]\u001b[A\n",
            " 54% 272/500 [01:12<01:02,  3.63it/s]\u001b[A\n",
            " 55% 273/500 [01:13<01:01,  3.72it/s]\u001b[A\n",
            " 55% 274/500 [01:13<00:59,  3.77it/s]\u001b[A\n",
            " 55% 275/500 [01:13<00:59,  3.81it/s]\u001b[A\n",
            " 55% 276/500 [01:13<00:58,  3.85it/s]\u001b[A\n",
            " 55% 277/500 [01:14<01:00,  3.70it/s]\u001b[A\n",
            " 56% 278/500 [01:14<00:58,  3.78it/s]\u001b[A\n",
            " 56% 279/500 [01:14<00:57,  3.84it/s]\u001b[A\n",
            " 56% 280/500 [01:14<00:56,  3.89it/s]\u001b[A\n",
            " 56% 281/500 [01:15<00:59,  3.71it/s]\u001b[A\n",
            " 56% 282/500 [01:15<00:57,  3.77it/s]\u001b[A\n",
            " 57% 283/500 [01:15<00:59,  3.65it/s]\u001b[A\n",
            " 57% 284/500 [01:16<01:00,  3.56it/s]\u001b[A\n",
            " 57% 285/500 [01:16<00:58,  3.68it/s]\u001b[A\n",
            " 57% 286/500 [01:16<01:00,  3.57it/s]\u001b[A\n",
            " 57% 287/500 [01:16<00:58,  3.67it/s]\u001b[A\n",
            " 58% 288/500 [01:17<00:56,  3.76it/s]\u001b[A\n",
            " 58% 289/500 [01:17<00:55,  3.81it/s]\u001b[A\n",
            " 58% 290/500 [01:17<00:54,  3.85it/s]\u001b[A\n",
            " 58% 291/500 [01:17<00:56,  3.69it/s]\u001b[A\n",
            " 58% 292/500 [01:18<00:57,  3.61it/s]\u001b[A\n",
            " 59% 293/500 [01:18<00:58,  3.52it/s]\u001b[A\n",
            " 59% 294/500 [01:18<00:59,  3.46it/s]\u001b[A\n",
            " 59% 295/500 [01:19<00:56,  3.60it/s]\u001b[A\n",
            " 59% 296/500 [01:19<00:55,  3.70it/s]\u001b[A\n",
            " 59% 297/500 [01:19<00:54,  3.75it/s]\u001b[A\n",
            " 60% 298/500 [01:19<00:53,  3.81it/s]\u001b[A\n",
            " 60% 299/500 [01:20<00:52,  3.84it/s]\u001b[A\n",
            " 60% 300/500 [01:20<00:54,  3.69it/s]\u001b[A\n",
            " 60% 301/500 [01:20<00:52,  3.76it/s]\u001b[A\n",
            " 60% 302/500 [01:21<00:54,  3.63it/s]\u001b[A\n",
            " 61% 303/500 [01:21<00:53,  3.71it/s]\u001b[A\n",
            " 61% 304/500 [01:21<00:51,  3.80it/s]\u001b[A\n",
            " 61% 305/500 [01:21<00:50,  3.83it/s]\u001b[A\n",
            " 61% 306/500 [01:22<00:50,  3.85it/s]\u001b[A\n",
            " 61% 307/500 [01:22<00:52,  3.66it/s]\u001b[A\n",
            " 62% 308/500 [01:22<00:53,  3.56it/s]\u001b[A\n",
            " 62% 309/500 [01:22<00:54,  3.52it/s]\u001b[A\n",
            " 62% 310/500 [01:23<00:52,  3.64it/s]\u001b[A\n",
            " 62% 311/500 [01:23<00:50,  3.71it/s]\u001b[A\n",
            " 62% 312/500 [01:23<00:49,  3.77it/s]\u001b[A\n",
            " 63% 313/500 [01:23<00:51,  3.61it/s]\u001b[A\n",
            " 63% 314/500 [01:24<00:52,  3.53it/s]\u001b[A\n",
            " 63% 315/500 [01:24<00:50,  3.64it/s]\u001b[A\n",
            " 63% 316/500 [01:24<00:49,  3.75it/s]\u001b[A\n",
            " 63% 317/500 [01:25<00:48,  3.78it/s]\u001b[A\n",
            " 64% 318/500 [01:25<00:49,  3.64it/s]\u001b[A\n",
            " 64% 319/500 [01:25<00:51,  3.55it/s]\u001b[A\n",
            " 64% 320/500 [01:25<00:49,  3.66it/s]\u001b[A\n",
            " 64% 321/500 [01:26<00:47,  3.74it/s]\u001b[A\n",
            " 64% 322/500 [01:26<00:47,  3.78it/s]\u001b[A\n",
            " 65% 323/500 [01:26<00:48,  3.65it/s]\u001b[A\n",
            " 65% 324/500 [01:26<00:47,  3.72it/s]\u001b[A\n",
            " 65% 325/500 [01:27<00:46,  3.79it/s]\u001b[A\n",
            " 65% 326/500 [01:27<00:44,  3.87it/s]\u001b[A\n",
            " 65% 327/500 [01:27<00:46,  3.71it/s]\u001b[A\n",
            " 66% 328/500 [01:28<00:45,  3.78it/s]\u001b[A\n",
            " 66% 329/500 [01:28<00:44,  3.81it/s]\u001b[A\n",
            " 66% 330/500 [01:28<00:44,  3.84it/s]\u001b[A\n",
            " 66% 331/500 [01:28<00:45,  3.70it/s]\u001b[A\n",
            " 66% 332/500 [01:29<00:44,  3.79it/s]\u001b[A\n",
            " 67% 333/500 [01:29<00:45,  3.64it/s]\u001b[A\n",
            " 67% 334/500 [01:29<00:46,  3.54it/s]\u001b[A\n",
            " 67% 335/500 [01:29<00:45,  3.66it/s]\u001b[A\n",
            " 67% 336/500 [01:30<00:43,  3.74it/s]\u001b[A\n",
            " 67% 337/500 [01:30<00:44,  3.64it/s]\u001b[A\n",
            " 68% 338/500 [01:30<00:43,  3.71it/s]\u001b[A\n",
            " 68% 339/500 [01:31<00:44,  3.61it/s]\u001b[A\n",
            " 68% 340/500 [01:31<00:43,  3.72it/s]\u001b[A\n",
            " 68% 341/500 [01:31<00:41,  3.79it/s]\u001b[A\n",
            " 68% 342/500 [01:31<00:41,  3.84it/s]\u001b[A\n",
            " 69% 343/500 [01:32<00:40,  3.87it/s]\u001b[A\n",
            " 69% 344/500 [01:32<00:42,  3.69it/s]\u001b[A\n",
            " 69% 345/500 [01:32<00:41,  3.75it/s]\u001b[A\n",
            " 69% 346/500 [01:32<00:40,  3.80it/s]\u001b[A\n",
            " 69% 347/500 [01:33<00:41,  3.66it/s]\u001b[A\n",
            " 70% 348/500 [01:33<00:40,  3.74it/s]\u001b[A\n",
            " 70% 349/500 [01:33<00:39,  3.79it/s]\u001b[A\n",
            " 70% 350/500 [01:33<00:39,  3.81it/s]\u001b[A\n",
            " 70% 351/500 [01:34<00:38,  3.86it/s]\u001b[A\n",
            " 70% 352/500 [01:34<00:38,  3.89it/s]\u001b[A\n",
            " 71% 353/500 [01:34<00:37,  3.89it/s]\u001b[A\n",
            " 71% 354/500 [01:34<00:37,  3.90it/s]\u001b[A\n",
            " 71% 355/500 [01:35<00:38,  3.72it/s]\u001b[A\n",
            " 71% 356/500 [01:35<00:37,  3.84it/s]\u001b[A\n",
            " 71% 357/500 [01:35<00:36,  3.88it/s]\u001b[A\n",
            " 72% 358/500 [01:35<00:36,  3.88it/s]\u001b[A\n",
            " 72% 359/500 [01:36<00:38,  3.70it/s]\u001b[A\n",
            " 72% 360/500 [01:36<00:37,  3.76it/s]\u001b[A\n",
            " 72% 361/500 [01:36<00:36,  3.83it/s]\u001b[A\n",
            " 72% 362/500 [01:37<00:35,  3.85it/s]\u001b[A\n",
            " 73% 363/500 [01:37<00:35,  3.87it/s]\u001b[A\n",
            " 73% 364/500 [01:37<00:35,  3.88it/s]\u001b[A\n",
            " 73% 365/500 [01:37<00:34,  3.90it/s]\u001b[A\n",
            " 73% 366/500 [01:38<00:34,  3.90it/s]\u001b[A\n",
            " 73% 367/500 [01:38<00:34,  3.90it/s]\u001b[A\n",
            " 74% 368/500 [01:38<00:33,  3.90it/s]\u001b[A\n",
            " 74% 369/500 [01:38<00:33,  3.92it/s]\u001b[A\n",
            " 74% 370/500 [01:39<00:33,  3.92it/s]\u001b[A\n",
            " 74% 371/500 [01:39<00:32,  3.92it/s]\u001b[A\n",
            " 74% 372/500 [01:39<00:34,  3.72it/s]\u001b[A\n",
            " 75% 373/500 [01:39<00:33,  3.77it/s]\u001b[A\n",
            " 75% 374/500 [01:40<00:32,  3.82it/s]\u001b[A\n",
            " 75% 375/500 [01:40<00:32,  3.86it/s]\u001b[A\n",
            " 75% 376/500 [01:40<00:32,  3.87it/s]\u001b[A\n",
            " 75% 377/500 [01:40<00:33,  3.69it/s]\u001b[A\n",
            " 76% 378/500 [01:41<00:32,  3.77it/s]\u001b[A\n",
            " 76% 379/500 [01:41<00:31,  3.82it/s]\u001b[A\n",
            " 76% 380/500 [01:41<00:31,  3.84it/s]\u001b[A\n",
            " 76% 381/500 [01:42<00:32,  3.69it/s]\u001b[A\n",
            " 76% 382/500 [01:42<00:31,  3.75it/s]\u001b[A\n",
            " 77% 383/500 [01:42<00:30,  3.81it/s]\u001b[A\n",
            " 77% 384/500 [01:42<00:31,  3.66it/s]\u001b[A\n",
            " 77% 385/500 [01:43<00:30,  3.73it/s]\u001b[A\n",
            " 77% 386/500 [01:43<00:30,  3.78it/s]\u001b[A\n",
            " 77% 387/500 [01:43<00:31,  3.63it/s]\u001b[A\n",
            " 78% 388/500 [01:43<00:30,  3.72it/s]\u001b[A\n",
            " 78% 389/500 [01:44<00:29,  3.78it/s]\u001b[A\n",
            " 78% 390/500 [01:44<00:28,  3.81it/s]\u001b[A\n",
            " 78% 391/500 [01:44<00:29,  3.68it/s]\u001b[A\n",
            " 78% 392/500 [01:44<00:30,  3.57it/s]\u001b[A\n",
            " 79% 393/500 [01:45<00:29,  3.68it/s]\u001b[A\n",
            " 79% 394/500 [01:45<00:28,  3.77it/s]\u001b[A\n",
            " 79% 395/500 [01:45<00:28,  3.63it/s]\u001b[A\n",
            " 79% 396/500 [01:46<00:29,  3.54it/s]\u001b[A\n",
            " 79% 397/500 [01:46<00:29,  3.48it/s]\u001b[A\n",
            " 80% 398/500 [01:46<00:28,  3.61it/s]\u001b[A\n",
            " 80% 399/500 [01:46<00:27,  3.72it/s]\u001b[A\n",
            " 80% 400/500 [01:47<00:26,  3.76it/s]\u001b[A\n",
            " 80% 401/500 [01:47<00:26,  3.79it/s]\u001b[A\n",
            " 80% 402/500 [01:47<00:25,  3.82it/s]\u001b[A\n",
            " 81% 403/500 [01:47<00:25,  3.86it/s]\u001b[A\n",
            " 81% 404/500 [01:48<00:24,  3.89it/s]\u001b[A\n",
            " 81% 405/500 [01:48<00:24,  3.90it/s]\u001b[A\n",
            " 81% 406/500 [01:48<00:24,  3.91it/s]\u001b[A\n",
            " 81% 407/500 [01:48<00:23,  3.92it/s]\u001b[A\n",
            " 82% 408/500 [01:49<00:24,  3.70it/s]\u001b[A\n",
            " 82% 409/500 [01:49<00:24,  3.78it/s]\u001b[A\n",
            " 82% 410/500 [01:49<00:23,  3.81it/s]\u001b[A\n",
            " 82% 411/500 [01:49<00:23,  3.84it/s]\u001b[A\n",
            " 82% 412/500 [01:50<00:22,  3.89it/s]\u001b[A\n",
            " 83% 413/500 [01:50<00:22,  3.89it/s]\u001b[A\n",
            " 83% 414/500 [01:50<00:22,  3.90it/s]\u001b[A\n",
            " 83% 415/500 [01:51<00:21,  3.91it/s]\u001b[A\n",
            " 83% 416/500 [01:51<00:21,  3.94it/s]\u001b[A\n",
            " 83% 417/500 [01:51<00:21,  3.92it/s]\u001b[A\n",
            " 84% 418/500 [01:51<00:20,  3.91it/s]\u001b[A\n",
            " 84% 419/500 [01:52<00:20,  3.92it/s]\u001b[A\n",
            " 84% 420/500 [01:52<00:20,  3.96it/s]\u001b[A\n",
            " 84% 421/500 [01:52<00:19,  3.95it/s]\u001b[A\n",
            " 84% 422/500 [01:52<00:19,  3.95it/s]\u001b[A\n",
            " 85% 423/500 [01:53<00:20,  3.72it/s]\u001b[A\n",
            " 85% 424/500 [01:53<00:21,  3.60it/s]\u001b[A\n",
            " 85% 425/500 [01:53<00:20,  3.70it/s]\u001b[A\n",
            " 85% 426/500 [01:53<00:19,  3.75it/s]\u001b[A\n",
            " 85% 427/500 [01:54<00:19,  3.80it/s]\u001b[A\n",
            " 86% 428/500 [01:54<00:18,  3.85it/s]\u001b[A\n",
            " 86% 429/500 [01:54<00:19,  3.68it/s]\u001b[A\n",
            " 86% 430/500 [01:54<00:18,  3.74it/s]\u001b[A\n",
            " 86% 431/500 [01:55<00:19,  3.60it/s]\u001b[A\n",
            " 86% 432/500 [01:55<00:19,  3.54it/s]\u001b[A\n",
            " 87% 433/500 [01:55<00:18,  3.65it/s]\u001b[A\n",
            " 87% 434/500 [01:56<00:17,  3.73it/s]\u001b[A\n",
            " 87% 435/500 [01:56<00:17,  3.64it/s]\u001b[A\n",
            " 87% 436/500 [01:56<00:18,  3.54it/s]\u001b[A\n",
            " 87% 437/500 [01:56<00:18,  3.48it/s]\u001b[A\n",
            " 88% 438/500 [01:57<00:17,  3.59it/s]\u001b[A\n",
            " 88% 439/500 [01:57<00:17,  3.54it/s]\u001b[A\n",
            " 88% 440/500 [01:57<00:16,  3.65it/s]\u001b[A\n",
            " 88% 441/500 [01:58<00:15,  3.71it/s]\u001b[A\n",
            " 88% 442/500 [01:58<00:15,  3.77it/s]\u001b[A\n",
            " 89% 443/500 [01:58<00:14,  3.82it/s]\u001b[A\n",
            " 89% 444/500 [01:58<00:14,  3.86it/s]\u001b[A\n",
            " 89% 445/500 [01:59<00:14,  3.88it/s]\u001b[A\n",
            " 89% 446/500 [01:59<00:13,  3.88it/s]\u001b[A\n",
            " 89% 447/500 [01:59<00:13,  3.90it/s]\u001b[A\n",
            " 90% 448/500 [01:59<00:13,  3.92it/s]\u001b[A\n",
            " 90% 449/500 [02:00<00:13,  3.73it/s]\u001b[A\n",
            " 90% 450/500 [02:00<00:13,  3.80it/s]\u001b[A\n",
            " 90% 451/500 [02:00<00:12,  3.84it/s]\u001b[A\n",
            " 90% 452/500 [02:00<00:12,  3.86it/s]\u001b[A\n",
            " 91% 453/500 [02:01<00:12,  3.88it/s]\u001b[A\n",
            " 91% 454/500 [02:01<00:11,  3.95it/s]\u001b[A\n",
            " 91% 455/500 [02:01<00:12,  3.74it/s]\u001b[A\n",
            " 91% 456/500 [02:01<00:12,  3.61it/s]\u001b[A\n",
            " 91% 457/500 [02:02<00:12,  3.52it/s]\u001b[A\n",
            " 92% 458/500 [02:02<00:11,  3.63it/s]\u001b[A\n",
            " 92% 459/500 [02:02<00:11,  3.73it/s]\u001b[A\n",
            " 92% 460/500 [02:03<00:11,  3.59it/s]\u001b[A\n",
            " 92% 461/500 [02:03<00:10,  3.71it/s]\u001b[A\n",
            " 92% 462/500 [02:03<00:10,  3.77it/s]\u001b[A\n",
            " 93% 463/500 [02:03<00:09,  3.81it/s]\u001b[A\n",
            " 93% 464/500 [02:04<00:09,  3.87it/s]\u001b[A\n",
            " 93% 465/500 [02:04<00:08,  3.90it/s]\u001b[A\n",
            " 93% 466/500 [02:04<00:08,  3.92it/s]\u001b[A\n",
            " 93% 467/500 [02:04<00:08,  3.74it/s]\u001b[A\n",
            " 94% 468/500 [02:05<00:08,  3.81it/s]\u001b[A\n",
            " 94% 469/500 [02:05<00:08,  3.85it/s]\u001b[A\n",
            " 94% 470/500 [02:05<00:07,  3.87it/s]\u001b[A\n",
            " 94% 471/500 [02:05<00:07,  3.88it/s]\u001b[A\n",
            " 94% 472/500 [02:06<00:07,  3.91it/s]\u001b[A\n",
            " 95% 473/500 [02:06<00:06,  3.91it/s]\u001b[A\n",
            " 95% 474/500 [02:06<00:06,  3.90it/s]\u001b[A\n",
            " 95% 475/500 [02:06<00:06,  3.71it/s]\u001b[A\n",
            " 95% 476/500 [02:07<00:06,  3.78it/s]\u001b[A\n",
            " 95% 477/500 [02:07<00:05,  3.85it/s]\u001b[A\n",
            " 96% 478/500 [02:07<00:05,  3.85it/s]\u001b[A\n",
            " 96% 479/500 [02:08<00:05,  3.69it/s]\u001b[A\n",
            " 96% 480/500 [02:08<00:05,  3.58it/s]\u001b[A\n",
            " 96% 481/500 [02:08<00:05,  3.68it/s]\u001b[A\n",
            " 96% 482/500 [02:08<00:04,  3.78it/s]\u001b[A\n",
            " 97% 483/500 [02:09<00:04,  3.81it/s]\u001b[A\n",
            " 97% 484/500 [02:09<00:04,  3.85it/s]\u001b[A\n",
            " 97% 485/500 [02:09<00:03,  3.88it/s]\u001b[A\n",
            " 97% 486/500 [02:09<00:03,  3.72it/s]\u001b[A\n",
            " 97% 487/500 [02:10<00:03,  3.60it/s]\u001b[A\n",
            " 98% 488/500 [02:10<00:03,  3.69it/s]\u001b[A\n",
            " 98% 489/500 [02:10<00:03,  3.59it/s]\u001b[A\n",
            " 98% 490/500 [02:11<00:02,  3.52it/s]\u001b[A\n",
            " 98% 491/500 [02:11<00:02,  3.64it/s]\u001b[A\n",
            " 98% 492/500 [02:11<00:02,  3.54it/s]\u001b[A\n",
            " 99% 493/500 [02:11<00:02,  3.49it/s]\u001b[A\n",
            " 99% 494/500 [02:12<00:01,  3.43it/s]\u001b[A\n",
            " 99% 495/500 [02:12<00:01,  3.57it/s]\u001b[A\n",
            " 99% 496/500 [02:12<00:01,  3.51it/s]\u001b[A\n",
            " 99% 497/500 [02:12<00:00,  3.64it/s]\u001b[A\n",
            "100% 498/500 [02:13<00:00,  3.71it/s]\u001b[A\n",
            "100% 499/500 [02:13<00:00,  3.75it/s]\u001b[A\n",
            "100% 500/500 [02:13<00:00,  3.62it/s]\u001b[A\n",
            "{'eval_loss': 0.518876850605011, 'eval_accuracy': 0.7746666666666665, 'eval_runtime': 134.0397, 'eval_samples_per_second': 3.73, 'eval_steps_per_second': 3.73, 'epoch': 5.0}\n",
            "\n",
            " 83% 2810/3372 [5:18:24<1:01:38,  6.58s/it]\n",
            "                                     \u001b[A[INFO|trainer.py:3410] 2024-07-15 19:54:55,582 >> Saving model checkpoint to /content/qwen2-7b/checkpoint-2810\n",
            "/usr/local/lib/python3.10/dist-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n",
            "  warnings.warn(\n",
            "[INFO|configuration_utils.py:733] 2024-07-15 19:54:55,909 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2-7B/snapshots/453ed1575b739b5b03ce3758b23befdb0967f40e/config.json\n",
            "[INFO|configuration_utils.py:796] 2024-07-15 19:54:55,910 >> Model config Qwen2Config {\n",
            "  \"architectures\": [\n",
            "    \"Qwen2ForCausalLM\"\n",
            "  ],\n",
            "  \"attention_dropout\": 0.0,\n",
            "  \"bos_token_id\": 151643,\n",
            "  \"eos_token_id\": 151643,\n",
            "  \"hidden_act\": \"silu\",\n",
            "  \"hidden_size\": 3584,\n",
            "  \"initializer_range\": 0.02,\n",
            "  \"intermediate_size\": 18944,\n",
            "  \"max_position_embeddings\": 131072,\n",
            "  \"max_window_layers\": 28,\n",
            "  \"model_type\": \"qwen2\",\n",
            "  \"num_attention_heads\": 28,\n",
            "  \"num_hidden_layers\": 28,\n",
            "  \"num_key_value_heads\": 4,\n",
            "  \"rms_norm_eps\": 1e-06,\n",
            "  \"rope_theta\": 1000000.0,\n",
            "  \"sliding_window\": 131072,\n",
            "  \"tie_word_embeddings\": false,\n",
            "  \"torch_dtype\": \"bfloat16\",\n",
            "  \"transformers_version\": \"4.41.2\",\n",
            "  \"use_cache\": true,\n",
            "  \"use_sliding_window\": false,\n",
            "  \"vocab_size\": 152064\n",
            "}\n",
            "\n",
            "[INFO|tokenization_utils_base.py:2513] 2024-07-15 19:54:56,097 >> tokenizer config file saved in /content/qwen2-7b/checkpoint-2810/tokenizer_config.json\n",
            "[INFO|tokenization_utils_base.py:2522] 2024-07-15 19:54:56,098 >> Special tokens file saved in /content/qwen2-7b/checkpoint-2810/special_tokens_map.json\n",
            "{'loss': 0.5229, 'grad_norm': 0.18251831829547882, 'learning_rate': 0.0, 'epoch': 5.99}\n",
            "100% 3372/3372 [6:20:08<00:00,  6.71s/it][INFO|trainer.py:3719] 2024-07-15 20:56:39,578 >> ***** Running Evaluation *****\n",
            "[INFO|trainer.py:3721] 2024-07-15 20:56:39,578 >>   Num examples = 500\n",
            "[INFO|trainer.py:3724] 2024-07-15 20:56:39,578 >>   Batch size = 1\n",
            "\n",
            "  0% 0/500 [00:00<?, ?it/s]\u001b[A\n",
            "  0% 2/500 [00:00<01:01,  8.08it/s]\u001b[A\n",
            "  1% 3/500 [00:00<01:26,  5.77it/s]\u001b[A\n",
            "  1% 4/500 [00:00<01:48,  4.56it/s]\u001b[A\n",
            "  1% 5/500 [00:01<01:54,  4.31it/s]\u001b[A\n",
            "  1% 6/500 [00:01<01:58,  4.18it/s]\u001b[A\n",
            "  1% 7/500 [00:01<01:59,  4.13it/s]\u001b[A\n",
            "  2% 8/500 [00:01<02:08,  3.84it/s]\u001b[A\n",
            "  2% 9/500 [00:02<02:06,  3.88it/s]\u001b[A\n",
            "  2% 10/500 [00:02<02:04,  3.93it/s]\u001b[A\n",
            "  2% 11/500 [00:02<02:11,  3.73it/s]\u001b[A\n",
            "  2% 12/500 [00:02<02:15,  3.60it/s]\u001b[A\n",
            "  3% 13/500 [00:03<02:12,  3.68it/s]\u001b[A\n",
            "  3% 14/500 [00:03<02:15,  3.59it/s]\u001b[A\n",
            "  3% 15/500 [00:03<02:11,  3.69it/s]\u001b[A\n",
            "  3% 16/500 [00:04<02:09,  3.75it/s]\u001b[A\n",
            "  3% 17/500 [00:04<02:07,  3.79it/s]\u001b[A\n",
            "  4% 18/500 [00:04<02:12,  3.64it/s]\u001b[A\n",
            "  4% 19/500 [00:04<02:09,  3.72it/s]\u001b[A\n",
            "  4% 20/500 [00:05<02:12,  3.61it/s]\u001b[A\n",
            "  4% 21/500 [00:05<02:08,  3.72it/s]\u001b[A\n",
            "  4% 22/500 [00:05<02:05,  3.79it/s]\u001b[A\n",
            "  5% 23/500 [00:05<02:10,  3.65it/s]\u001b[A\n",
            "  5% 24/500 [00:06<02:12,  3.59it/s]\u001b[A\n",
            "  5% 25/500 [00:06<02:08,  3.69it/s]\u001b[A\n",
            "  5% 26/500 [00:06<02:05,  3.77it/s]\u001b[A\n",
            "  5% 27/500 [00:06<02:04,  3.79it/s]\u001b[A\n",
            "  6% 28/500 [00:07<02:03,  3.83it/s]\u001b[A\n",
            "  6% 29/500 [00:07<02:02,  3.86it/s]\u001b[A\n",
            "  6% 30/500 [00:07<02:06,  3.71it/s]\u001b[A\n",
            "  6% 31/500 [00:08<02:04,  3.77it/s]\u001b[A\n",
            "  6% 32/500 [00:08<02:02,  3.82it/s]\u001b[A\n",
            "  7% 33/500 [00:08<02:00,  3.86it/s]\u001b[A\n",
            "  7% 34/500 [00:08<02:00,  3.86it/s]\u001b[A\n",
            "  7% 35/500 [00:09<02:06,  3.68it/s]\u001b[A\n",
            "  7% 36/500 [00:09<02:03,  3.76it/s]\u001b[A\n",
            "  7% 37/500 [00:09<02:01,  3.81it/s]\u001b[A\n",
            "  8% 38/500 [00:09<02:05,  3.68it/s]\u001b[A\n",
            "  8% 39/500 [00:10<02:07,  3.61it/s]\u001b[A\n",
            "  8% 40/500 [00:10<02:04,  3.70it/s]\u001b[A\n",
            "  8% 41/500 [00:10<02:07,  3.60it/s]\u001b[A\n",
            "  8% 42/500 [00:11<02:10,  3.52it/s]\u001b[A\n",
            "  9% 43/500 [00:11<02:10,  3.49it/s]\u001b[A\n",
            "  9% 44/500 [00:11<02:05,  3.63it/s]\u001b[A\n",
            "  9% 45/500 [00:11<02:02,  3.72it/s]\u001b[A\n",
            "  9% 46/500 [00:12<02:00,  3.77it/s]\u001b[A\n",
            "  9% 47/500 [00:12<01:57,  3.84it/s]\u001b[A\n",
            " 10% 48/500 [00:12<01:56,  3.88it/s]\u001b[A\n",
            " 10% 49/500 [00:12<02:01,  3.70it/s]\u001b[A\n",
            " 10% 50/500 [00:13<02:05,  3.59it/s]\u001b[A\n",
            " 10% 51/500 [00:13<02:01,  3.69it/s]\u001b[A\n",
            " 10% 52/500 [00:13<02:05,  3.58it/s]\u001b[A\n",
            " 11% 53/500 [00:13<02:01,  3.69it/s]\u001b[A\n",
            " 11% 54/500 [00:14<01:58,  3.77it/s]\u001b[A\n",
            " 11% 55/500 [00:14<01:56,  3.82it/s]\u001b[A\n",
            " 11% 56/500 [00:14<01:55,  3.85it/s]\u001b[A\n",
            " 11% 57/500 [00:14<01:54,  3.88it/s]\u001b[A\n",
            " 12% 58/500 [00:15<01:58,  3.73it/s]\u001b[A\n",
            " 12% 59/500 [00:15<01:55,  3.82it/s]\u001b[A\n",
            " 12% 60/500 [00:15<01:59,  3.69it/s]\u001b[A\n",
            " 12% 61/500 [00:16<01:55,  3.79it/s]\u001b[A\n",
            " 12% 62/500 [00:16<01:59,  3.67it/s]\u001b[A\n",
            " 13% 63/500 [00:16<02:02,  3.56it/s]\u001b[A\n",
            " 13% 64/500 [00:16<01:58,  3.68it/s]\u001b[A\n",
            " 13% 65/500 [00:17<02:01,  3.57it/s]\u001b[A\n",
            " 13% 66/500 [00:17<02:04,  3.50it/s]\u001b[A\n",
            " 13% 67/500 [00:17<01:59,  3.62it/s]\u001b[A\n",
            " 14% 68/500 [00:18<02:01,  3.54it/s]\u001b[A\n",
            " 14% 69/500 [00:18<02:03,  3.48it/s]\u001b[A\n",
            " 14% 70/500 [00:18<01:58,  3.63it/s]\u001b[A\n",
            " 14% 71/500 [00:18<01:55,  3.71it/s]\u001b[A\n",
            " 14% 72/500 [00:19<01:53,  3.79it/s]\u001b[A\n",
            " 15% 73/500 [00:19<01:56,  3.65it/s]\u001b[A\n",
            " 15% 74/500 [00:19<01:59,  3.55it/s]\u001b[A\n",
            " 15% 75/500 [00:19<01:56,  3.66it/s]\u001b[A\n",
            " 15% 76/500 [00:20<01:52,  3.75it/s]\u001b[A\n",
            " 15% 77/500 [00:20<01:51,  3.80it/s]\u001b[A\n",
            " 16% 78/500 [00:20<01:50,  3.83it/s]\u001b[A\n",
            " 16% 79/500 [00:20<01:49,  3.85it/s]\u001b[A\n",
            " 16% 80/500 [00:21<01:47,  3.90it/s]\u001b[A\n",
            " 16% 81/500 [00:21<01:52,  3.72it/s]\u001b[A\n",
            " 16% 82/500 [00:21<01:50,  3.78it/s]\u001b[A\n",
            " 17% 83/500 [00:22<01:54,  3.66it/s]\u001b[A\n",
            " 17% 84/500 [00:22<01:56,  3.58it/s]\u001b[A\n",
            " 17% 85/500 [00:22<01:58,  3.51it/s]\u001b[A\n",
            " 17% 86/500 [00:22<01:54,  3.62it/s]\u001b[A\n",
            " 17% 87/500 [00:23<01:50,  3.73it/s]\u001b[A\n",
            " 18% 88/500 [00:23<01:54,  3.59it/s]\u001b[A\n",
            " 18% 89/500 [00:23<01:51,  3.69it/s]\u001b[A\n",
            " 18% 90/500 [00:23<01:49,  3.76it/s]\u001b[A\n",
            " 18% 91/500 [00:24<01:51,  3.65it/s]\u001b[A\n",
            " 18% 92/500 [00:24<01:54,  3.57it/s]\u001b[A\n",
            " 19% 93/500 [00:24<01:51,  3.66it/s]\u001b[A\n",
            " 19% 94/500 [00:25<01:48,  3.75it/s]\u001b[A\n",
            " 19% 95/500 [00:25<01:46,  3.82it/s]\u001b[A\n",
            " 19% 96/500 [00:25<01:44,  3.85it/s]\u001b[A\n",
            " 19% 97/500 [00:25<01:48,  3.71it/s]\u001b[A\n",
            " 20% 98/500 [00:26<01:46,  3.78it/s]\u001b[A\n",
            " 20% 99/500 [00:26<01:44,  3.83it/s]\u001b[A\n",
            " 20% 100/500 [00:26<01:43,  3.87it/s]\u001b[A\n",
            " 20% 101/500 [00:26<01:42,  3.90it/s]\u001b[A\n",
            " 20% 102/500 [00:27<01:41,  3.91it/s]\u001b[A\n",
            " 21% 103/500 [00:27<01:40,  3.95it/s]\u001b[A\n",
            " 21% 104/500 [00:27<01:39,  3.99it/s]\u001b[A\n",
            " 21% 105/500 [00:27<01:45,  3.75it/s]\u001b[A\n",
            " 21% 106/500 [00:28<01:42,  3.83it/s]\u001b[A\n",
            " 21% 107/500 [00:28<01:41,  3.87it/s]\u001b[A\n",
            " 22% 108/500 [00:28<01:40,  3.90it/s]\u001b[A\n",
            " 22% 109/500 [00:28<01:40,  3.90it/s]\u001b[A\n",
            " 22% 110/500 [00:29<01:45,  3.71it/s]\u001b[A\n",
            " 22% 111/500 [00:29<01:42,  3.78it/s]\u001b[A\n",
            " 22% 112/500 [00:29<01:46,  3.65it/s]\u001b[A\n",
            " 23% 113/500 [00:30<01:49,  3.55it/s]\u001b[A\n",
            " 23% 114/500 [00:30<01:49,  3.51it/s]\u001b[A\n",
            " 23% 115/500 [00:30<01:45,  3.65it/s]\u001b[A\n",
            " 23% 116/500 [00:30<01:48,  3.55it/s]\u001b[A\n",
            " 23% 117/500 [00:31<01:44,  3.66it/s]\u001b[A\n",
            " 24% 118/500 [00:31<01:42,  3.72it/s]\u001b[A\n",
            " 24% 119/500 [00:31<01:40,  3.79it/s]\u001b[A\n",
            " 24% 120/500 [00:32<01:43,  3.66it/s]\u001b[A\n",
            " 24% 121/500 [00:32<01:45,  3.58it/s]\u001b[A\n",
            " 24% 122/500 [00:32<01:42,  3.68it/s]\u001b[A\n",
            " 25% 123/500 [00:32<01:45,  3.59it/s]\u001b[A\n",
            " 25% 124/500 [00:33<01:40,  3.74it/s]\u001b[A\n",
            " 25% 125/500 [00:33<01:38,  3.81it/s]\u001b[A\n",
            " 25% 126/500 [00:33<01:42,  3.67it/s]\u001b[A\n",
            " 25% 127/500 [00:33<01:39,  3.74it/s]\u001b[A\n",
            " 26% 128/500 [00:34<01:37,  3.81it/s]\u001b[A\n",
            " 26% 129/500 [00:34<01:36,  3.85it/s]\u001b[A\n",
            " 26% 130/500 [00:34<01:39,  3.71it/s]\u001b[A\n",
            " 26% 131/500 [00:34<01:37,  3.78it/s]\u001b[A\n",
            " 26% 132/500 [00:35<01:36,  3.81it/s]\u001b[A\n",
            " 27% 133/500 [00:35<01:40,  3.67it/s]\u001b[A\n",
            " 27% 134/500 [00:35<01:38,  3.73it/s]\u001b[A\n",
            " 27% 135/500 [00:35<01:35,  3.82it/s]\u001b[A\n",
            " 27% 136/500 [00:36<01:39,  3.67it/s]\u001b[A\n",
            " 27% 137/500 [00:36<01:36,  3.75it/s]\u001b[A\n",
            " 28% 138/500 [00:36<01:34,  3.82it/s]\u001b[A\n",
            " 28% 139/500 [00:37<01:34,  3.83it/s]\u001b[A\n",
            " 28% 140/500 [00:37<01:32,  3.88it/s]\u001b[A\n",
            " 28% 141/500 [00:37<01:36,  3.71it/s]\u001b[A\n",
            " 28% 142/500 [00:37<01:34,  3.80it/s]\u001b[A\n",
            " 29% 143/500 [00:38<01:37,  3.68it/s]\u001b[A\n",
            " 29% 144/500 [00:38<01:34,  3.75it/s]\u001b[A\n",
            " 29% 145/500 [00:38<01:33,  3.79it/s]\u001b[A\n",
            " 29% 146/500 [00:38<01:32,  3.84it/s]\u001b[A\n",
            " 29% 147/500 [00:39<01:30,  3.89it/s]\u001b[A\n",
            " 30% 148/500 [00:39<01:35,  3.69it/s]\u001b[A\n",
            " 30% 149/500 [00:39<01:38,  3.58it/s]\u001b[A\n",
            " 30% 150/500 [00:40<01:34,  3.70it/s]\u001b[A\n",
            " 30% 151/500 [00:40<01:31,  3.80it/s]\u001b[A\n",
            " 30% 152/500 [00:40<01:31,  3.82it/s]\u001b[A\n",
            " 31% 153/500 [00:40<01:30,  3.85it/s]\u001b[A\n",
            " 31% 154/500 [00:41<01:29,  3.89it/s]\u001b[A\n",
            " 31% 155/500 [00:41<01:33,  3.69it/s]\u001b[A\n",
            " 31% 156/500 [00:41<01:35,  3.58it/s]\u001b[A\n",
            " 31% 157/500 [00:41<01:32,  3.71it/s]\u001b[A\n",
            " 32% 158/500 [00:42<01:34,  3.61it/s]\u001b[A\n",
            " 32% 159/500 [00:42<01:31,  3.72it/s]\u001b[A\n",
            " 32% 160/500 [00:42<01:34,  3.60it/s]\u001b[A\n",
            " 32% 161/500 [00:42<01:31,  3.71it/s]\u001b[A\n",
            " 32% 162/500 [00:43<01:34,  3.59it/s]\u001b[A\n",
            " 33% 163/500 [00:43<01:30,  3.72it/s]\u001b[A\n",
            " 33% 164/500 [00:43<01:32,  3.62it/s]\u001b[A\n",
            " 33% 165/500 [00:44<01:33,  3.57it/s]\u001b[A\n",
            " 33% 166/500 [00:44<01:31,  3.67it/s]\u001b[A\n",
            " 33% 167/500 [00:44<01:32,  3.59it/s]\u001b[A\n",
            " 34% 168/500 [00:44<01:34,  3.50it/s]\u001b[A\n",
            " 34% 169/500 [00:45<01:31,  3.63it/s]\u001b[A\n",
            " 34% 170/500 [00:45<01:33,  3.54it/s]\u001b[A\n",
            " 34% 171/500 [00:45<01:34,  3.48it/s]\u001b[A\n",
            " 34% 172/500 [00:46<01:30,  3.61it/s]\u001b[A\n",
            " 35% 173/500 [00:46<01:28,  3.71it/s]\u001b[A\n",
            " 35% 174/500 [00:46<01:30,  3.60it/s]\u001b[A\n",
            " 35% 175/500 [00:46<01:28,  3.69it/s]\u001b[A\n",
            " 35% 176/500 [00:47<01:25,  3.80it/s]\u001b[A\n",
            " 35% 177/500 [00:47<01:23,  3.85it/s]\u001b[A\n",
            " 36% 178/500 [00:47<01:27,  3.70it/s]\u001b[A\n",
            " 36% 179/500 [00:47<01:29,  3.58it/s]\u001b[A\n",
            " 36% 180/500 [00:48<01:31,  3.51it/s]\u001b[A\n",
            " 36% 181/500 [00:48<01:28,  3.62it/s]\u001b[A\n",
            " 36% 182/500 [00:48<01:25,  3.72it/s]\u001b[A\n",
            " 37% 183/500 [00:48<01:23,  3.80it/s]\u001b[A\n",
            " 37% 184/500 [00:49<01:26,  3.67it/s]\u001b[A\n",
            " 37% 185/500 [00:49<01:24,  3.75it/s]\u001b[A\n",
            " 37% 186/500 [00:49<01:26,  3.63it/s]\u001b[A\n",
            " 37% 187/500 [00:50<01:23,  3.73it/s]\u001b[A\n",
            " 38% 188/500 [00:50<01:21,  3.82it/s]\u001b[A\n",
            " 38% 189/500 [00:50<01:20,  3.86it/s]\u001b[A\n",
            " 38% 190/500 [00:50<01:23,  3.69it/s]\u001b[A\n",
            " 38% 191/500 [00:51<01:22,  3.77it/s]\u001b[A\n",
            " 38% 192/500 [00:51<01:20,  3.84it/s]\u001b[A\n",
            " 39% 193/500 [00:51<01:23,  3.67it/s]\u001b[A\n",
            " 39% 194/500 [00:51<01:25,  3.58it/s]\u001b[A\n",
            " 39% 195/500 [00:52<01:26,  3.52it/s]\u001b[A\n",
            " 39% 196/500 [00:52<01:23,  3.66it/s]\u001b[A\n",
            " 39% 197/500 [00:52<01:21,  3.72it/s]\u001b[A\n",
            " 40% 198/500 [00:53<01:19,  3.79it/s]\u001b[A\n",
            " 40% 199/500 [00:53<01:18,  3.85it/s]\u001b[A\n",
            " 40% 200/500 [00:53<01:17,  3.87it/s]\u001b[A\n",
            " 40% 201/500 [00:53<01:16,  3.91it/s]\u001b[A\n",
            " 40% 202/500 [00:54<01:16,  3.90it/s]\u001b[A\n",
            " 41% 203/500 [00:54<01:15,  3.91it/s]\u001b[A\n",
            " 41% 204/500 [00:54<01:19,  3.72it/s]\u001b[A\n",
            " 41% 205/500 [00:54<01:22,  3.59it/s]\u001b[A\n",
            " 41% 206/500 [00:55<01:19,  3.69it/s]\u001b[A\n",
            " 41% 207/500 [00:55<01:21,  3.60it/s]\u001b[A\n",
            " 42% 208/500 [00:55<01:19,  3.70it/s]\u001b[A\n",
            " 42% 209/500 [00:55<01:17,  3.76it/s]\u001b[A\n",
            " 42% 210/500 [00:56<01:15,  3.83it/s]\u001b[A\n",
            " 42% 211/500 [00:56<01:14,  3.87it/s]\u001b[A\n",
            " 42% 212/500 [00:56<01:13,  3.89it/s]\u001b[A\n",
            " 43% 213/500 [00:57<01:17,  3.71it/s]\u001b[A\n",
            " 43% 214/500 [00:57<01:19,  3.60it/s]\u001b[A\n",
            " 43% 215/500 [00:57<01:16,  3.72it/s]\u001b[A\n",
            " 43% 216/500 [00:57<01:18,  3.62it/s]\u001b[A\n",
            " 43% 217/500 [00:58<01:16,  3.72it/s]\u001b[A\n",
            " 44% 218/500 [00:58<01:14,  3.77it/s]\u001b[A\n",
            " 44% 219/500 [00:58<01:13,  3.83it/s]\u001b[A\n",
            " 44% 220/500 [00:58<01:12,  3.86it/s]\u001b[A\n",
            " 44% 221/500 [00:59<01:11,  3.88it/s]\u001b[A\n",
            " 44% 222/500 [00:59<01:15,  3.70it/s]\u001b[A\n",
            " 45% 223/500 [00:59<01:13,  3.78it/s]\u001b[A\n",
            " 45% 224/500 [00:59<01:12,  3.83it/s]\u001b[A\n",
            " 45% 225/500 [01:00<01:15,  3.65it/s]\u001b[A\n",
            " 45% 226/500 [01:00<01:13,  3.73it/s]\u001b[A\n",
            " 45% 227/500 [01:00<01:15,  3.62it/s]\u001b[A\n",
            " 46% 228/500 [01:01<01:13,  3.72it/s]\u001b[A\n",
            " 46% 229/500 [01:01<01:11,  3.80it/s]\u001b[A\n",
            " 46% 230/500 [01:01<01:14,  3.64it/s]\u001b[A\n",
            " 46% 231/500 [01:01<01:15,  3.55it/s]\u001b[A\n",
            " 46% 232/500 [01:02<01:13,  3.66it/s]\u001b[A\n",
            " 47% 233/500 [01:02<01:14,  3.58it/s]\u001b[A\n",
            " 47% 234/500 [01:02<01:12,  3.68it/s]\u001b[A\n",
            " 47% 235/500 [01:02<01:14,  3.57it/s]\u001b[A\n",
            " 47% 236/500 [01:03<01:11,  3.67it/s]\u001b[A\n",
            " 47% 237/500 [01:03<01:09,  3.76it/s]\u001b[A\n",
            " 48% 238/500 [01:03<01:11,  3.65it/s]\u001b[A\n",
            " 48% 239/500 [01:04<01:09,  3.75it/s]\u001b[A\n",
            " 48% 240/500 [01:04<01:08,  3.81it/s]\u001b[A\n",
            " 48% 241/500 [01:04<01:07,  3.85it/s]\u001b[A\n",
            " 48% 242/500 [01:04<01:05,  3.94it/s]\u001b[A\n",
            " 49% 243/500 [01:05<01:05,  3.95it/s]\u001b[A\n",
            " 49% 244/500 [01:05<01:04,  3.95it/s]\u001b[A\n",
            " 49% 245/500 [01:05<01:04,  3.95it/s]\u001b[A\n",
            " 49% 246/500 [01:05<01:06,  3.80it/s]\u001b[A\n",
            " 49% 247/500 [01:06<01:05,  3.84it/s]\u001b[A\n",
            " 50% 248/500 [01:06<01:04,  3.89it/s]\u001b[A\n",
            " 50% 249/500 [01:06<01:07,  3.72it/s]\u001b[A\n",
            " 50% 250/500 [01:06<01:06,  3.79it/s]\u001b[A\n",
            " 50% 251/500 [01:07<01:04,  3.85it/s]\u001b[A\n",
            " 50% 252/500 [01:07<01:04,  3.87it/s]\u001b[A\n",
            " 51% 253/500 [01:07<01:03,  3.89it/s]\u001b[A\n",
            " 51% 254/500 [01:07<01:02,  3.92it/s]\u001b[A\n",
            " 51% 255/500 [01:08<01:02,  3.91it/s]\u001b[A\n",
            " 51% 256/500 [01:08<01:02,  3.93it/s]\u001b[A\n",
            " 51% 257/500 [01:08<01:04,  3.74it/s]\u001b[A\n",
            " 52% 258/500 [01:08<01:06,  3.65it/s]\u001b[A\n",
            " 52% 259/500 [01:09<01:04,  3.73it/s]\u001b[A\n",
            " 52% 260/500 [01:09<01:03,  3.80it/s]\u001b[A\n",
            " 52% 261/500 [01:09<01:02,  3.82it/s]\u001b[A\n",
            " 52% 262/500 [01:10<01:04,  3.68it/s]\u001b[A\n",
            " 53% 263/500 [01:10<01:06,  3.57it/s]\u001b[A\n",
            " 53% 264/500 [01:10<01:04,  3.68it/s]\u001b[A\n",
            " 53% 265/500 [01:10<01:02,  3.77it/s]\u001b[A\n",
            " 53% 266/500 [01:11<01:04,  3.64it/s]\u001b[A\n",
            " 53% 267/500 [01:11<01:02,  3.73it/s]\u001b[A\n",
            " 54% 268/500 [01:11<01:01,  3.79it/s]\u001b[A\n",
            " 54% 269/500 [01:11<01:02,  3.67it/s]\u001b[A\n",
            " 54% 270/500 [01:12<01:03,  3.60it/s]\u001b[A\n",
            " 54% 271/500 [01:12<01:05,  3.52it/s]\u001b[A\n",
            " 54% 272/500 [01:12<01:02,  3.65it/s]\u001b[A\n",
            " 55% 273/500 [01:13<01:00,  3.73it/s]\u001b[A\n",
            " 55% 274/500 [01:13<00:59,  3.79it/s]\u001b[A\n",
            " 55% 275/500 [01:13<00:58,  3.83it/s]\u001b[A\n",
            " 55% 276/500 [01:13<00:57,  3.86it/s]\u001b[A\n",
            " 55% 277/500 [01:14<01:00,  3.71it/s]\u001b[A\n",
            " 56% 278/500 [01:14<00:58,  3.79it/s]\u001b[A\n",
            " 56% 279/500 [01:14<00:57,  3.85it/s]\u001b[A\n",
            " 56% 280/500 [01:14<00:56,  3.90it/s]\u001b[A\n",
            " 56% 281/500 [01:15<00:59,  3.71it/s]\u001b[A\n",
            " 56% 282/500 [01:15<00:57,  3.78it/s]\u001b[A\n",
            " 57% 283/500 [01:15<00:59,  3.66it/s]\u001b[A\n",
            " 57% 284/500 [01:15<01:00,  3.58it/s]\u001b[A\n",
            " 57% 285/500 [01:16<00:58,  3.68it/s]\u001b[A\n",
            " 57% 286/500 [01:16<00:59,  3.59it/s]\u001b[A\n",
            " 57% 287/500 [01:16<00:57,  3.72it/s]\u001b[A\n",
            " 58% 288/500 [01:17<00:55,  3.80it/s]\u001b[A\n",
            " 58% 289/500 [01:17<00:55,  3.82it/s]\u001b[A\n",
            " 58% 290/500 [01:17<00:54,  3.84it/s]\u001b[A\n",
            " 58% 291/500 [01:17<00:56,  3.69it/s]\u001b[A\n",
            " 58% 292/500 [01:18<00:57,  3.60it/s]\u001b[A\n",
            " 59% 293/500 [01:18<00:58,  3.53it/s]\u001b[A\n",
            " 59% 294/500 [01:18<00:58,  3.50it/s]\u001b[A\n",
            " 59% 295/500 [01:18<00:56,  3.63it/s]\u001b[A\n",
            " 59% 296/500 [01:19<00:54,  3.72it/s]\u001b[A\n",
            " 59% 297/500 [01:19<00:53,  3.76it/s]\u001b[A\n",
            " 60% 298/500 [01:19<00:52,  3.83it/s]\u001b[A\n",
            " 60% 299/500 [01:19<00:51,  3.88it/s]\u001b[A\n",
            " 60% 300/500 [01:20<00:54,  3.70it/s]\u001b[A\n",
            " 60% 301/500 [01:20<00:52,  3.77it/s]\u001b[A\n",
            " 60% 302/500 [01:20<00:54,  3.66it/s]\u001b[A\n",
            " 61% 303/500 [01:21<00:52,  3.74it/s]\u001b[A\n",
            " 61% 304/500 [01:21<00:51,  3.80it/s]\u001b[A\n",
            " 61% 305/500 [01:21<00:50,  3.85it/s]\u001b[A\n",
            " 61% 306/500 [01:21<00:50,  3.87it/s]\u001b[A\n",
            " 61% 307/500 [01:22<00:52,  3.70it/s]\u001b[A\n",
            " 62% 308/500 [01:22<00:53,  3.58it/s]\u001b[A\n",
            " 62% 309/500 [01:22<00:54,  3.51it/s]\u001b[A\n",
            " 62% 310/500 [01:22<00:52,  3.63it/s]\u001b[A\n",
            " 62% 311/500 [01:23<00:50,  3.74it/s]\u001b[A\n",
            " 62% 312/500 [01:23<00:49,  3.79it/s]\u001b[A\n",
            " 63% 313/500 [01:23<00:51,  3.66it/s]\u001b[A\n",
            " 63% 314/500 [01:24<00:52,  3.57it/s]\u001b[A\n",
            " 63% 315/500 [01:24<00:50,  3.69it/s]\u001b[A\n",
            " 63% 316/500 [01:24<00:48,  3.77it/s]\u001b[A\n",
            " 63% 317/500 [01:24<00:47,  3.81it/s]\u001b[A\n",
            " 64% 318/500 [01:25<00:49,  3.68it/s]\u001b[A\n",
            " 64% 319/500 [01:25<00:50,  3.57it/s]\u001b[A\n",
            " 64% 320/500 [01:25<00:49,  3.66it/s]\u001b[A\n",
            " 64% 321/500 [01:25<00:47,  3.75it/s]\u001b[A\n",
            " 64% 322/500 [01:26<00:46,  3.80it/s]\u001b[A\n",
            " 65% 323/500 [01:26<00:48,  3.65it/s]\u001b[A\n",
            " 65% 324/500 [01:26<00:47,  3.72it/s]\u001b[A\n",
            " 65% 325/500 [01:27<00:46,  3.77it/s]\u001b[A\n",
            " 65% 326/500 [01:27<00:44,  3.87it/s]\u001b[A\n",
            " 65% 327/500 [01:27<00:46,  3.72it/s]\u001b[A\n",
            " 66% 328/500 [01:27<00:45,  3.81it/s]\u001b[A\n",
            " 66% 329/500 [01:28<00:44,  3.84it/s]\u001b[A\n",
            " 66% 330/500 [01:28<00:43,  3.89it/s]\u001b[A\n",
            " 66% 331/500 [01:28<00:45,  3.72it/s]\u001b[A\n",
            " 66% 332/500 [01:28<00:44,  3.79it/s]\u001b[A\n",
            " 67% 333/500 [01:29<00:45,  3.65it/s]\u001b[A\n",
            " 67% 334/500 [01:29<00:46,  3.55it/s]\u001b[A\n",
            " 67% 335/500 [01:29<00:44,  3.69it/s]\u001b[A\n",
            " 67% 336/500 [01:29<00:43,  3.76it/s]\u001b[A\n",
            " 67% 337/500 [01:30<00:44,  3.63it/s]\u001b[A\n",
            " 68% 338/500 [01:30<00:43,  3.72it/s]\u001b[A\n",
            " 68% 339/500 [01:30<00:44,  3.60it/s]\u001b[A\n",
            " 68% 340/500 [01:31<00:43,  3.70it/s]\u001b[A\n",
            " 68% 341/500 [01:31<00:41,  3.79it/s]\u001b[A\n",
            " 68% 342/500 [01:31<00:41,  3.84it/s]\u001b[A\n",
            " 69% 343/500 [01:31<00:40,  3.90it/s]\u001b[A\n",
            " 69% 344/500 [01:32<00:41,  3.72it/s]\u001b[A\n",
            " 69% 345/500 [01:32<00:40,  3.78it/s]\u001b[A\n",
            " 69% 346/500 [01:32<00:39,  3.85it/s]\u001b[A\n",
            " 69% 347/500 [01:32<00:41,  3.69it/s]\u001b[A\n",
            " 70% 348/500 [01:33<00:40,  3.75it/s]\u001b[A\n",
            " 70% 349/500 [01:33<00:39,  3.81it/s]\u001b[A\n",
            " 70% 350/500 [01:33<00:38,  3.87it/s]\u001b[A\n",
            " 70% 351/500 [01:33<00:38,  3.89it/s]\u001b[A\n",
            " 70% 352/500 [01:34<00:37,  3.91it/s]\u001b[A\n",
            " 71% 353/500 [01:34<00:37,  3.93it/s]\u001b[A\n",
            " 71% 354/500 [01:34<00:37,  3.94it/s]\u001b[A\n",
            " 71% 355/500 [01:34<00:38,  3.78it/s]\u001b[A\n",
            " 71% 356/500 [01:35<00:37,  3.86it/s]\u001b[A\n",
            " 71% 357/500 [01:35<00:36,  3.88it/s]\u001b[A\n",
            " 72% 358/500 [01:35<00:36,  3.90it/s]\u001b[A\n",
            " 72% 359/500 [01:36<00:37,  3.72it/s]\u001b[A\n",
            " 72% 360/500 [01:36<00:36,  3.81it/s]\u001b[A\n",
            " 72% 361/500 [01:36<00:36,  3.86it/s]\u001b[A\n",
            " 72% 362/500 [01:36<00:35,  3.90it/s]\u001b[A\n",
            " 73% 363/500 [01:37<00:34,  3.92it/s]\u001b[A\n",
            " 73% 364/500 [01:37<00:34,  3.95it/s]\u001b[A\n",
            " 73% 365/500 [01:37<00:34,  3.92it/s]\u001b[A\n",
            " 73% 366/500 [01:37<00:34,  3.92it/s]\u001b[A\n",
            " 73% 367/500 [01:38<00:33,  3.93it/s]\u001b[A\n",
            " 74% 368/500 [01:38<00:33,  3.94it/s]\u001b[A\n",
            " 74% 369/500 [01:38<00:33,  3.94it/s]\u001b[A\n",
            " 74% 370/500 [01:38<00:32,  3.96it/s]\u001b[A\n",
            " 74% 371/500 [01:39<00:32,  3.95it/s]\u001b[A\n",
            " 74% 372/500 [01:39<00:34,  3.75it/s]\u001b[A\n",
            " 75% 373/500 [01:39<00:33,  3.80it/s]\u001b[A\n",
            " 75% 374/500 [01:39<00:32,  3.86it/s]\u001b[A\n",
            " 75% 375/500 [01:40<00:32,  3.87it/s]\u001b[A\n",
            " 75% 376/500 [01:40<00:31,  3.90it/s]\u001b[A\n",
            " 75% 377/500 [01:40<00:33,  3.73it/s]\u001b[A\n",
            " 76% 378/500 [01:40<00:32,  3.80it/s]\u001b[A\n",
            " 76% 379/500 [01:41<00:31,  3.84it/s]\u001b[A\n",
            " 76% 380/500 [01:41<00:30,  3.88it/s]\u001b[A\n",
            " 76% 381/500 [01:41<00:32,  3.68it/s]\u001b[A\n",
            " 76% 382/500 [01:41<00:31,  3.77it/s]\u001b[A\n",
            " 77% 383/500 [01:42<00:30,  3.81it/s]\u001b[A\n",
            " 77% 384/500 [01:42<00:31,  3.67it/s]\u001b[A\n",
            " 77% 385/500 [01:42<00:30,  3.77it/s]\u001b[A\n",
            " 77% 386/500 [01:43<00:29,  3.81it/s]\u001b[A\n",
            " 77% 387/500 [01:43<00:30,  3.68it/s]\u001b[A\n",
            " 78% 388/500 [01:43<00:29,  3.75it/s]\u001b[A\n",
            " 78% 389/500 [01:43<00:29,  3.81it/s]\u001b[A\n",
            " 78% 390/500 [01:44<00:28,  3.84it/s]\u001b[A\n",
            " 78% 391/500 [01:44<00:29,  3.69it/s]\u001b[A\n",
            " 78% 392/500 [01:44<00:29,  3.61it/s]\u001b[A\n",
            " 79% 393/500 [01:44<00:28,  3.70it/s]\u001b[A\n",
            " 79% 394/500 [01:45<00:27,  3.79it/s]\u001b[A\n",
            " 79% 395/500 [01:45<00:29,  3.62it/s]\u001b[A\n",
            " 79% 396/500 [01:45<00:29,  3.53it/s]\u001b[A\n",
            " 79% 397/500 [01:46<00:29,  3.46it/s]\u001b[A\n",
            " 80% 398/500 [01:46<00:28,  3.61it/s]\u001b[A\n",
            " 80% 399/500 [01:46<00:27,  3.70it/s]\u001b[A\n",
            " 80% 400/500 [01:46<00:26,  3.79it/s]\u001b[A\n",
            " 80% 401/500 [01:47<00:25,  3.82it/s]\u001b[A\n",
            " 80% 402/500 [01:47<00:25,  3.85it/s]\u001b[A\n",
            " 81% 403/500 [01:47<00:24,  3.89it/s]\u001b[A\n",
            " 81% 404/500 [01:47<00:24,  3.92it/s]\u001b[A\n",
            " 81% 405/500 [01:48<00:24,  3.94it/s]\u001b[A\n",
            " 81% 406/500 [01:48<00:23,  3.92it/s]\u001b[A\n",
            " 81% 407/500 [01:48<00:23,  3.92it/s]\u001b[A\n",
            " 82% 408/500 [01:48<00:24,  3.73it/s]\u001b[A\n",
            " 82% 409/500 [01:49<00:23,  3.79it/s]\u001b[A\n",
            " 82% 410/500 [01:49<00:23,  3.86it/s]\u001b[A\n",
            " 82% 411/500 [01:49<00:22,  3.89it/s]\u001b[A\n",
            " 82% 412/500 [01:49<00:22,  3.92it/s]\u001b[A\n",
            " 83% 413/500 [01:50<00:22,  3.92it/s]\u001b[A\n",
            " 83% 414/500 [01:50<00:21,  3.91it/s]\u001b[A\n",
            " 83% 415/500 [01:50<00:21,  3.93it/s]\u001b[A\n",
            " 83% 416/500 [01:50<00:21,  3.94it/s]\u001b[A\n",
            " 83% 417/500 [01:51<00:21,  3.93it/s]\u001b[A\n",
            " 84% 418/500 [01:51<00:20,  3.93it/s]\u001b[A\n",
            " 84% 419/500 [01:51<00:20,  3.96it/s]\u001b[A\n",
            " 84% 420/500 [01:51<00:20,  3.97it/s]\u001b[A\n",
            " 84% 421/500 [01:52<00:19,  3.98it/s]\u001b[A\n",
            " 84% 422/500 [01:52<00:19,  3.94it/s]\u001b[A\n",
            " 85% 423/500 [01:52<00:20,  3.74it/s]\u001b[A\n",
            " 85% 424/500 [01:53<00:21,  3.61it/s]\u001b[A\n",
            " 85% 425/500 [01:53<00:20,  3.71it/s]\u001b[A\n",
            " 85% 426/500 [01:53<00:19,  3.79it/s]\u001b[A\n",
            " 85% 427/500 [01:53<00:19,  3.82it/s]\u001b[A\n",
            " 86% 428/500 [01:54<00:18,  3.85it/s]\u001b[A\n",
            " 86% 429/500 [01:54<00:19,  3.70it/s]\u001b[A\n",
            " 86% 430/500 [01:54<00:18,  3.77it/s]\u001b[A\n",
            " 86% 431/500 [01:54<00:18,  3.64it/s]\u001b[A\n",
            " 86% 432/500 [01:55<00:19,  3.54it/s]\u001b[A\n",
            " 87% 433/500 [01:55<00:18,  3.66it/s]\u001b[A\n",
            " 87% 434/500 [01:55<00:17,  3.75it/s]\u001b[A\n",
            " 87% 435/500 [01:55<00:17,  3.63it/s]\u001b[A\n",
            " 87% 436/500 [01:56<00:17,  3.57it/s]\u001b[A\n",
            " 87% 437/500 [01:56<00:17,  3.51it/s]\u001b[A\n",
            " 88% 438/500 [01:56<00:17,  3.63it/s]\u001b[A\n",
            " 88% 439/500 [01:57<00:17,  3.54it/s]\u001b[A\n",
            " 88% 440/500 [01:57<00:16,  3.64it/s]\u001b[A\n",
            " 88% 441/500 [01:57<00:15,  3.74it/s]\u001b[A\n",
            " 88% 442/500 [01:57<00:15,  3.81it/s]\u001b[A\n",
            " 89% 443/500 [01:58<00:14,  3.85it/s]\u001b[A\n",
            " 89% 444/500 [01:58<00:14,  3.88it/s]\u001b[A\n",
            " 89% 445/500 [01:58<00:14,  3.89it/s]\u001b[A\n",
            " 89% 446/500 [01:58<00:13,  3.91it/s]\u001b[A\n",
            " 89% 447/500 [01:59<00:13,  3.94it/s]\u001b[A\n",
            " 90% 448/500 [01:59<00:13,  3.92it/s]\u001b[A\n",
            " 90% 449/500 [01:59<00:13,  3.73it/s]\u001b[A\n",
            " 90% 450/500 [01:59<00:13,  3.80it/s]\u001b[A\n",
            " 90% 451/500 [02:00<00:12,  3.84it/s]\u001b[A\n",
            " 90% 452/500 [02:00<00:12,  3.85it/s]\u001b[A\n",
            " 91% 453/500 [02:00<00:12,  3.87it/s]\u001b[A\n",
            " 91% 454/500 [02:00<00:11,  3.94it/s]\u001b[A\n",
            " 91% 455/500 [02:01<00:12,  3.74it/s]\u001b[A\n",
            " 91% 456/500 [02:01<00:12,  3.64it/s]\u001b[A\n",
            " 91% 457/500 [02:01<00:12,  3.56it/s]\u001b[A\n",
            " 92% 458/500 [02:02<00:11,  3.67it/s]\u001b[A\n",
            " 92% 459/500 [02:02<00:10,  3.74it/s]\u001b[A\n",
            " 92% 460/500 [02:02<00:11,  3.63it/s]\u001b[A\n",
            " 92% 461/500 [02:02<00:10,  3.74it/s]\u001b[A\n",
            " 92% 462/500 [02:03<00:10,  3.79it/s]\u001b[A\n",
            " 93% 463/500 [02:03<00:09,  3.84it/s]\u001b[A\n",
            " 93% 464/500 [02:03<00:09,  3.87it/s]\u001b[A\n",
            " 93% 465/500 [02:03<00:08,  3.90it/s]\u001b[A\n",
            " 93% 466/500 [02:04<00:08,  3.93it/s]\u001b[A\n",
            " 93% 467/500 [02:04<00:08,  3.74it/s]\u001b[A\n",
            " 94% 468/500 [02:04<00:08,  3.81it/s]\u001b[A\n",
            " 94% 469/500 [02:04<00:07,  3.88it/s]\u001b[A\n",
            " 94% 470/500 [02:05<00:07,  3.90it/s]\u001b[A\n",
            " 94% 471/500 [02:05<00:07,  3.93it/s]\u001b[A\n",
            " 94% 472/500 [02:05<00:07,  3.92it/s]\u001b[A\n",
            " 95% 473/500 [02:05<00:06,  3.92it/s]\u001b[A\n",
            " 95% 474/500 [02:06<00:06,  3.92it/s]\u001b[A\n",
            " 95% 475/500 [02:06<00:06,  3.76it/s]\u001b[A\n",
            " 95% 476/500 [02:06<00:06,  3.81it/s]\u001b[A\n",
            " 95% 477/500 [02:07<00:05,  3.88it/s]\u001b[A\n",
            " 96% 478/500 [02:07<00:05,  3.88it/s]\u001b[A\n",
            " 96% 479/500 [02:07<00:05,  3.73it/s]\u001b[A\n",
            " 96% 480/500 [02:07<00:05,  3.61it/s]\u001b[A\n",
            " 96% 481/500 [02:08<00:05,  3.71it/s]\u001b[A\n",
            " 96% 482/500 [02:08<00:04,  3.77it/s]\u001b[A\n",
            " 97% 483/500 [02:08<00:04,  3.84it/s]\u001b[A\n",
            " 97% 484/500 [02:08<00:04,  3.86it/s]\u001b[A\n",
            " 97% 485/500 [02:09<00:03,  3.88it/s]\u001b[A\n",
            " 97% 486/500 [02:09<00:03,  3.72it/s]\u001b[A\n",
            " 97% 487/500 [02:09<00:03,  3.63it/s]\u001b[A\n",
            " 98% 488/500 [02:09<00:03,  3.72it/s]\u001b[A\n",
            " 98% 489/500 [02:10<00:03,  3.61it/s]\u001b[A\n",
            " 98% 490/500 [02:10<00:02,  3.53it/s]\u001b[A\n",
            " 98% 491/500 [02:10<00:02,  3.65it/s]\u001b[A\n",
            " 98% 492/500 [02:11<00:02,  3.56it/s]\u001b[A\n",
            " 99% 493/500 [02:11<00:01,  3.52it/s]\u001b[A\n",
            " 99% 494/500 [02:11<00:01,  3.47it/s]\u001b[A\n",
            " 99% 495/500 [02:11<00:01,  3.62it/s]\u001b[A\n",
            " 99% 496/500 [02:12<00:01,  3.55it/s]\u001b[A\n",
            " 99% 497/500 [02:12<00:00,  3.66it/s]\u001b[A\n",
            "100% 498/500 [02:12<00:00,  3.73it/s]\u001b[A\n",
            "100% 499/500 [02:13<00:00,  3.80it/s]\u001b[A\n",
            "100% 500/500 [02:13<00:00,  3.66it/s]\u001b[A\n",
            "{'eval_loss': 0.5130149126052856, 'eval_accuracy': 0.7746666666666665, 'eval_runtime': 133.5645, 'eval_samples_per_second': 3.744, 'eval_steps_per_second': 3.744, 'epoch': 5.99}\n",
            "\n",
            "100% 3372/3372 [6:22:22<00:00,  6.71s/it]\n",
            "                                     \u001b[A[INFO|trainer.py:3410] 2024-07-15 20:58:53,145 >> Saving model checkpoint to /content/qwen2-7b/checkpoint-3372\n",
            "/usr/local/lib/python3.10/dist-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n",
            "  warnings.warn(\n",
            "[INFO|configuration_utils.py:733] 2024-07-15 20:58:53,452 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2-7B/snapshots/453ed1575b739b5b03ce3758b23befdb0967f40e/config.json\n",
            "[INFO|configuration_utils.py:796] 2024-07-15 20:58:53,453 >> Model config Qwen2Config {\n",
            "  \"architectures\": [\n",
            "    \"Qwen2ForCausalLM\"\n",
            "  ],\n",
            "  \"attention_dropout\": 0.0,\n",
            "  \"bos_token_id\": 151643,\n",
            "  \"eos_token_id\": 151643,\n",
            "  \"hidden_act\": \"silu\",\n",
            "  \"hidden_size\": 3584,\n",
            "  \"initializer_range\": 0.02,\n",
            "  \"intermediate_size\": 18944,\n",
            "  \"max_position_embeddings\": 131072,\n",
            "  \"max_window_layers\": 28,\n",
            "  \"model_type\": \"qwen2\",\n",
            "  \"num_attention_heads\": 28,\n",
            "  \"num_hidden_layers\": 28,\n",
            "  \"num_key_value_heads\": 4,\n",
            "  \"rms_norm_eps\": 1e-06,\n",
            "  \"rope_theta\": 1000000.0,\n",
            "  \"sliding_window\": 131072,\n",
            "  \"tie_word_embeddings\": false,\n",
            "  \"torch_dtype\": \"bfloat16\",\n",
            "  \"transformers_version\": \"4.41.2\",\n",
            "  \"use_cache\": true,\n",
            "  \"use_sliding_window\": false,\n",
            "  \"vocab_size\": 152064\n",
            "}\n",
            "\n",
            "[INFO|tokenization_utils_base.py:2513] 2024-07-15 20:58:53,632 >> tokenizer config file saved in /content/qwen2-7b/checkpoint-3372/tokenizer_config.json\n",
            "[INFO|tokenization_utils_base.py:2522] 2024-07-15 20:58:53,633 >> Special tokens file saved in /content/qwen2-7b/checkpoint-3372/special_tokens_map.json\n",
            "[INFO|trainer.py:2329] 2024-07-15 20:58:54,110 >> \n",
            "\n",
            "Training completed. Do not forget to share your model on huggingface.co/models =)\n",
            "\n",
            "\n",
            "{'train_runtime': 22945.1289, 'train_samples_per_second': 1.177, 'train_steps_per_second': 0.147, 'train_loss': 0.8244021631786125, 'epoch': 5.99}\n",
            "100% 3372/3372 [6:22:23<00:00,  6.80s/it]\n",
            "[INFO|trainer.py:3410] 2024-07-15 20:58:54,115 >> Saving model checkpoint to /content/qwen2-7b\n",
            "/usr/local/lib/python3.10/dist-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n",
            "  warnings.warn(\n",
            "[INFO|configuration_utils.py:733] 2024-07-15 20:58:54,406 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2-7B/snapshots/453ed1575b739b5b03ce3758b23befdb0967f40e/config.json\n",
            "[INFO|configuration_utils.py:796] 2024-07-15 20:58:54,407 >> Model config Qwen2Config {\n",
            "  \"architectures\": [\n",
            "    \"Qwen2ForCausalLM\"\n",
            "  ],\n",
            "  \"attention_dropout\": 0.0,\n",
            "  \"bos_token_id\": 151643,\n",
            "  \"eos_token_id\": 151643,\n",
            "  \"hidden_act\": \"silu\",\n",
            "  \"hidden_size\": 3584,\n",
            "  \"initializer_range\": 0.02,\n",
            "  \"intermediate_size\": 18944,\n",
            "  \"max_position_embeddings\": 131072,\n",
            "  \"max_window_layers\": 28,\n",
            "  \"model_type\": \"qwen2\",\n",
            "  \"num_attention_heads\": 28,\n",
            "  \"num_hidden_layers\": 28,\n",
            "  \"num_key_value_heads\": 4,\n",
            "  \"rms_norm_eps\": 1e-06,\n",
            "  \"rope_theta\": 1000000.0,\n",
            "  \"sliding_window\": 131072,\n",
            "  \"tie_word_embeddings\": false,\n",
            "  \"torch_dtype\": \"bfloat16\",\n",
            "  \"transformers_version\": \"4.41.2\",\n",
            "  \"use_cache\": true,\n",
            "  \"use_sliding_window\": false,\n",
            "  \"vocab_size\": 152064\n",
            "}\n",
            "\n",
            "[INFO|tokenization_utils_base.py:2513] 2024-07-15 20:58:54,600 >> tokenizer config file saved in /content/qwen2-7b/tokenizer_config.json\n",
            "[INFO|tokenization_utils_base.py:2522] 2024-07-15 20:58:54,600 >> Special tokens file saved in /content/qwen2-7b/special_tokens_map.json\n",
            "***** train metrics *****\n",
            "  epoch                    =      5.9947\n",
            "  total_flos               = 396658758GF\n",
            "  train_loss               =      0.8244\n",
            "  train_runtime            =  6:22:25.12\n",
            "  train_samples_per_second =       1.177\n",
            "  train_steps_per_second   =       0.147\n",
            "Figure saved at: /content/qwen2-7b/training_loss.png\n",
            "Figure saved at: /content/qwen2-7b/training_eval_loss.png\n",
            "Figure saved at: /content/qwen2-7b/training_eval_accuracy.png\n",
            "[INFO|trainer.py:3719] 2024-07-15 20:58:55,263 >> ***** Running Evaluation *****\n",
            "[INFO|trainer.py:3721] 2024-07-15 20:58:55,264 >>   Num examples = 500\n",
            "[INFO|trainer.py:3724] 2024-07-15 20:58:55,264 >>   Batch size = 1\n",
            "100% 500/500 [02:13<00:00,  3.74it/s]\n",
            "***** eval metrics *****\n",
            "  epoch                   =     5.9947\n",
            "  eval_accuracy           =     0.7747\n",
            "  eval_loss               =      0.513\n",
            "  eval_runtime            = 0:02:13.97\n",
            "  eval_samples_per_second =      3.732\n",
            "  eval_steps_per_second   =      3.732\n",
            "[INFO|modelcard.py:450] 2024-07-15 21:01:09,246 >> Dropping the following result as it does not have all the necessary fields:\n",
            "{'task': {'name': 'Causal Language Modeling', 'type': 'text-generation'}, 'metrics': [{'name': 'Accuracy', 'type': 'accuracy', 'value': 0.7746666666666665}]}\n",
            "\u001b[34m\u001b[1mwandb\u001b[0m: \n",
            "\u001b[34m\u001b[1mwandb\u001b[0m: Run history:\n",
            "\u001b[34m\u001b[1mwandb\u001b[0m:           eval/accuracy ▁▅█████\n",
            "\u001b[34m\u001b[1mwandb\u001b[0m:               eval/loss █▄▁▁▁▁▁\n",
            "\u001b[34m\u001b[1mwandb\u001b[0m:            eval/runtime ▁▇█▇█▇█\n",
            "\u001b[34m\u001b[1mwandb\u001b[0m: eval/samples_per_second █▂▁▂▁▂▁\n",
            "\u001b[34m\u001b[1mwandb\u001b[0m:   eval/steps_per_second █▂▁▂▁▂▁\n",
            "\u001b[34m\u001b[1mwandb\u001b[0m:             train/epoch ▁▁▂▂▄▄▅▅▇▇████\n",
            "\u001b[34m\u001b[1mwandb\u001b[0m:       train/global_step ▁▁▂▂▄▄▅▅▇▇████\n",
            "\u001b[34m\u001b[1mwandb\u001b[0m:         train/grad_norm █▂▁▃▂▁\n",
            "\u001b[34m\u001b[1mwandb\u001b[0m:     train/learning_rate █▇▅▃▂▁\n",
            "\u001b[34m\u001b[1mwandb\u001b[0m:              train/loss █▃▁▁▁▁\n",
            "\u001b[34m\u001b[1mwandb\u001b[0m: \n",
            "\u001b[34m\u001b[1mwandb\u001b[0m: Run summary:\n",
            "\u001b[34m\u001b[1mwandb\u001b[0m:            eval/accuracy 0.77467\n",
            "\u001b[34m\u001b[1mwandb\u001b[0m:                eval/loss 0.51301\n",
            "\u001b[34m\u001b[1mwandb\u001b[0m:             eval/runtime 133.9784\n",
            "\u001b[34m\u001b[1mwandb\u001b[0m:  eval/samples_per_second 3.732\n",
            "\u001b[34m\u001b[1mwandb\u001b[0m:    eval/steps_per_second 3.732\n",
            "\u001b[34m\u001b[1mwandb\u001b[0m:               total_flos 4.259090989881262e+17\n",
            "\u001b[34m\u001b[1mwandb\u001b[0m:              train/epoch 5.99467\n",
            "\u001b[34m\u001b[1mwandb\u001b[0m:        train/global_step 3372\n",
            "\u001b[34m\u001b[1mwandb\u001b[0m:          train/grad_norm 0.18252\n",
            "\u001b[34m\u001b[1mwandb\u001b[0m:      train/learning_rate 0.0\n",
            "\u001b[34m\u001b[1mwandb\u001b[0m:               train/loss 0.5229\n",
            "\u001b[34m\u001b[1mwandb\u001b[0m:               train_loss 0.8244\n",
            "\u001b[34m\u001b[1mwandb\u001b[0m:            train_runtime 22945.1289\n",
            "\u001b[34m\u001b[1mwandb\u001b[0m: train_samples_per_second 1.177\n",
            "\u001b[34m\u001b[1mwandb\u001b[0m:   train_steps_per_second 0.147\n",
            "\u001b[34m\u001b[1mwandb\u001b[0m: \n",
            "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run \u001b[33m/content/qwen2-7b\u001b[0m at: \u001b[34m\u001b[4mhttps://wandb.ai/inflaton-ai/huggingface/runs/ancw8jgs\u001b[0m\n",
            "\u001b[34m\u001b[1mwandb\u001b[0m: ⭐️ View project at: \u001b[34m\u001b[4mhttps://wandb.ai/inflaton-ai/huggingface\u001b[0m\n",
            "\u001b[34m\u001b[1mwandb\u001b[0m: Synced 5 W&B file(s), 0 media file(s), 0 artifact file(s) and 0 other file(s)\n",
            "\u001b[34m\u001b[1mwandb\u001b[0m: Find logs at: \u001b[35m\u001b[1m./wandb/run-20240715_143630-ancw8jgs/logs\u001b[0m\n",
            "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m The new W&B backend becomes opt-out in version 0.18.0; try it out with `wandb.require(\"core\")`! See https://wandb.me/wandb-core for more information.\n"
          ]
        }
      ],
      "source": [
        "!llamafactory-cli train train_qwen2_7b.json"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "background_save": true
        },
        "id": "PPHT4JDoIvGk",
        "outputId": "868b542e-3cf3-4f96-b3ff-944d48f66e9e"
      },
      "outputs": [
        {
          "data": {
            "application/vnd.google.colaboratory.intrinsic+json": {
              "type": "string"
            },
            "text/plain": [
              "'/content/drive/MyDrive/runs/qwen2-7b'"
            ]
          },
          "execution_count": 16,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "import shutil\n",
        "shutil.move(\"/content/qwen2-7b\", \"/content/drive/MyDrive/runs\")"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 35
        },
        "id": "KQolpdAGpUqx",
        "outputId": "23443a95-ec97-4633-87d6-18d2f27d979e"
      },
      "outputs": [
        {
          "data": {
            "application/vnd.google.colaboratory.intrinsic+json": {
              "type": "string"
            },
            "text/plain": [
              "'/content/qwen2-7b'"
            ]
          },
          "execution_count": 4,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "import shutil\n",
        "shutil.move(\"/content/drive/MyDrive/runs\", \"/content/qwen2-7b\", )"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "mzmNMevzVer3"
      },
      "outputs": [],
      "source": [
        "def evaluate_model_all_epochs(model_name, adapter_path_base, num_train_epochs, start_epoch=0, load_in_4bit=True, num_of_entries=-1):\n",
        "    os.environ[\"MODEL_NAME\"] = model_name\n",
        "    os.environ[\"LOAD_IN_4BIT\"] = \"true\" if load_in_4bit else \"false\"\n",
        "    for i in range(start_epoch, num_train_epochs + 1):\n",
        "        print(f\"Epoch {i}\")\n",
        "        if i == 0:\n",
        "            os.unsetenv(\"ADAPTER_NAME_OR_PATH\")\n",
        "        else:\n",
        "            adapter_path = f\"{adapter_path_base}/checkpoint-{562 * i}\"\n",
        "            os.environ[\"ADAPTER_NAME_OR_PATH\"] = adapter_path\n",
        "\n",
        "        !python llm_toolkit/eval_logical_reasoning.py {num_of_entries}"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "3THuVusvVtt8",
        "outputId": "2095b621-3aff-4215-f61d-0711acd42e63"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "Epoch 0\n",
            "loading env vars from: /content/drive/.shortcut-targets-by-id/1E09lTnfbsjtTgQg65dQ3y9D2R6l8waxR/logical-reasoning/.env\n",
            "Adding /content/drive/.shortcut-targets-by-id/1E09lTnfbsjtTgQg65dQ3y9D2R6l8waxR/logical-reasoning to sys.path\n",
            "2024-07-16 03:59:05.588323: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n",
            "2024-07-16 03:59:05.639368: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n",
            "2024-07-16 03:59:05.639412: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n",
            "2024-07-16 03:59:05.640960: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n",
            "2024-07-16 03:59:05.648585: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
            "To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
            "2024-07-16 03:59:06.864846: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n",
            "loading /content/drive/.shortcut-targets-by-id/1E09lTnfbsjtTgQg65dQ3y9D2R6l8waxR/logical-reasoning/llm_toolkit/logical_reasoning_utils.py\n",
            "Qwen/Qwen2-7B None False datasets/mgtv results/mgtv-results_02_qwen2_7b_colab.csv\n",
            "(1) GPU = NVIDIA L4. Max memory = 22.168 GB.\n",
            "0.0 GB of memory reserved.\n",
            "loading model: Qwen/Qwen2-7B\n",
            "tokenizer_config.json: 100% 1.29k/1.29k [00:00<00:00, 8.65MB/s]\n",
            "vocab.json: 100% 2.78M/2.78M [00:00<00:00, 8.33MB/s]\n",
            "merges.txt: 100% 1.67M/1.67M [00:00<00:00, 6.20MB/s]\n",
            "tokenizer.json: 100% 7.03M/7.03M [00:00<00:00, 15.8MB/s]\n",
            "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
            "config.json: 100% 664/664 [00:00<00:00, 4.79MB/s]\n",
            "model.safetensors.index.json: 100% 27.8k/27.8k [00:00<00:00, 74.4MB/s]\n",
            "Downloading shards:   0% 0/4 [00:00<?, ?it/s]\n",
            "model-00001-of-00004.safetensors:   0% 0.00/3.95G [00:00<?, ?B/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:   0% 10.5M/3.95G [00:00<01:25, 46.3MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:   1% 21.0M/3.95G [00:00<01:07, 58.4MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:   1% 31.5M/3.95G [00:00<00:54, 72.3MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:   1% 52.4M/3.95G [00:00<00:51, 74.9MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:   2% 73.4M/3.95G [00:00<00:39, 97.4MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:   2% 94.4M/3.95G [00:01<00:34, 110MB/s] \u001b[A\n",
            "model-00001-of-00004.safetensors:   3% 115M/3.95G [00:01<00:30, 124MB/s] \u001b[A\n",
            "model-00001-of-00004.safetensors:   3% 136M/3.95G [00:01<00:27, 140MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:   4% 157M/3.95G [00:01<00:25, 147MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:   5% 178M/3.95G [00:01<00:24, 152MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:   5% 199M/3.95G [00:01<00:24, 151MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:   6% 220M/3.95G [00:02<00:36, 102MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:   6% 241M/3.95G [00:02<00:31, 118MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:   7% 262M/3.95G [00:02<00:29, 124MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:   7% 283M/3.95G [00:02<00:29, 125MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:   8% 304M/3.95G [00:02<00:27, 132MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:   8% 325M/3.95G [00:02<00:26, 137MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:   9% 346M/3.95G [00:02<00:25, 141MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:   9% 367M/3.95G [00:02<00:23, 149MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  10% 388M/3.95G [00:03<00:24, 147MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  10% 409M/3.95G [00:03<00:23, 154MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  11% 430M/3.95G [00:03<00:21, 163MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  11% 451M/3.95G [00:03<00:23, 151MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  12% 472M/3.95G [00:03<00:21, 162MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  12% 493M/3.95G [00:03<00:20, 165MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  13% 514M/3.95G [00:03<00:25, 137MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  14% 535M/3.95G [00:04<00:24, 139MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  14% 556M/3.95G [00:04<00:29, 116MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  15% 577M/3.95G [00:04<00:26, 127MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  15% 598M/3.95G [00:04<00:34, 97.7MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  16% 619M/3.95G [00:05<00:34, 97.6MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  16% 640M/3.95G [00:05<00:31, 105MB/s] \u001b[A\n",
            "model-00001-of-00004.safetensors:  17% 661M/3.95G [00:05<00:28, 117MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  17% 682M/3.95G [00:05<00:28, 116MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  18% 703M/3.95G [00:05<00:26, 122MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  18% 724M/3.95G [00:05<00:23, 137MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  19% 744M/3.95G [00:05<00:22, 142MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  19% 765M/3.95G [00:06<00:21, 148MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  20% 786M/3.95G [00:06<00:19, 159MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  20% 807M/3.95G [00:06<00:19, 165MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  21% 828M/3.95G [00:06<00:18, 165MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  22% 849M/3.95G [00:06<00:20, 152MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  22% 870M/3.95G [00:06<00:19, 160MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  23% 891M/3.95G [00:06<00:21, 141MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  23% 912M/3.95G [00:07<00:21, 140MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  24% 933M/3.95G [00:07<00:19, 152MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  24% 954M/3.95G [00:07<00:19, 153MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  25% 975M/3.95G [00:07<00:19, 151MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  25% 996M/3.95G [00:07<00:19, 153MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  26% 1.02G/3.95G [00:07<00:21, 135MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  26% 1.04G/3.95G [00:08<00:31, 92.9MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  27% 1.06G/3.95G [00:08<00:27, 107MB/s] \u001b[A\n",
            "model-00001-of-00004.safetensors:  27% 1.08G/3.95G [00:08<00:38, 74.3MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  28% 1.10G/3.95G [00:08<00:33, 85.5MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  28% 1.12G/3.95G [00:09<00:33, 83.4MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  29% 1.14G/3.95G [00:09<00:28, 98.5MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  30% 1.16G/3.95G [00:09<00:23, 116MB/s] \u001b[A\n",
            "model-00001-of-00004.safetensors:  30% 1.18G/3.95G [00:09<00:23, 118MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  31% 1.21G/3.95G [00:10<00:34, 80.3MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  31% 1.23G/3.95G [00:10<00:37, 71.8MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  32% 1.25G/3.95G [00:10<00:34, 78.8MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  32% 1.26G/3.95G [00:10<00:35, 75.3MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  32% 1.27G/3.95G [00:11<00:44, 60.6MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  32% 1.28G/3.95G [00:11<00:40, 65.2MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  33% 1.29G/3.95G [00:11<00:40, 65.1MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  33% 1.30G/3.95G [00:11<00:37, 70.5MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  33% 1.32G/3.95G [00:11<00:35, 74.5MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  34% 1.33G/3.95G [00:11<00:38, 68.2MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  34% 1.34G/3.95G [00:12<00:35, 73.3MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  35% 1.36G/3.95G [00:12<00:26, 97.5MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  35% 1.38G/3.95G [00:12<00:23, 108MB/s] \u001b[A\n",
            "model-00001-of-00004.safetensors:  36% 1.41G/3.95G [00:12<00:20, 124MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  36% 1.43G/3.95G [00:12<00:25, 99.8MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  37% 1.45G/3.95G [00:12<00:25, 97.4MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  37% 1.47G/3.95G [00:13<00:27, 89.8MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  37% 1.48G/3.95G [00:13<00:28, 86.7MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  38% 1.49G/3.95G [00:13<00:28, 87.2MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  38% 1.50G/3.95G [00:13<00:27, 90.5MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  38% 1.51G/3.95G [00:13<00:30, 80.1MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  39% 1.53G/3.95G [00:14<00:30, 78.5MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  39% 1.55G/3.95G [00:14<00:31, 75.5MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  40% 1.57G/3.95G [00:14<00:26, 89.2MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  40% 1.58G/3.95G [00:14<00:30, 77.0MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  41% 1.60G/3.95G [00:14<00:26, 88.2MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  41% 1.61G/3.95G [00:15<00:28, 82.8MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  41% 1.64G/3.95G [00:15<00:27, 83.4MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  42% 1.66G/3.95G [00:15<00:23, 95.6MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  43% 1.68G/3.95G [00:15<00:20, 113MB/s] \u001b[A\n",
            "model-00001-of-00004.safetensors:  43% 1.70G/3.95G [00:15<00:17, 125MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  44% 1.72G/3.95G [00:15<00:21, 104MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  44% 1.74G/3.95G [00:16<00:19, 112MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  45% 1.76G/3.95G [00:16<00:17, 124MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  45% 1.78G/3.95G [00:16<00:18, 119MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  46% 1.80G/3.95G [00:16<00:21, 101MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  46% 1.82G/3.95G [00:16<00:20, 104MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  47% 1.85G/3.95G [00:17<00:19, 106MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  47% 1.87G/3.95G [00:17<00:17, 118MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  48% 1.89G/3.95G [00:17<00:15, 131MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  48% 1.91G/3.95G [00:17<00:19, 106MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  49% 1.93G/3.95G [00:17<00:17, 118MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  49% 1.95G/3.95G [00:17<00:17, 112MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  50% 1.97G/3.95G [00:18<00:16, 116MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  50% 1.99G/3.95G [00:18<00:17, 114MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  51% 2.01G/3.95G [00:18<00:16, 119MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  52% 2.03G/3.95G [00:18<00:17, 107MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  52% 2.06G/3.95G [00:18<00:16, 112MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  53% 2.08G/3.95G [00:19<00:15, 123MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  53% 2.10G/3.95G [00:19<00:16, 115MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  54% 2.12G/3.95G [00:19<00:20, 88.3MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  54% 2.13G/3.95G [00:19<00:23, 77.4MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  54% 2.15G/3.95G [00:20<00:22, 78.8MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  55% 2.16G/3.95G [00:20<00:29, 59.8MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  55% 2.18G/3.95G [00:20<00:22, 78.1MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  56% 2.20G/3.95G [00:20<00:18, 93.2MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  56% 2.22G/3.95G [00:20<00:15, 109MB/s] \u001b[A\n",
            "model-00001-of-00004.safetensors:  57% 2.24G/3.95G [00:20<00:13, 123MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  57% 2.26G/3.95G [00:21<00:14, 116MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  58% 2.29G/3.95G [00:21<00:12, 133MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  58% 2.31G/3.95G [00:21<00:14, 111MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  59% 2.33G/3.95G [00:22<00:24, 65.5MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  60% 2.35G/3.95G [00:22<00:19, 80.0MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  60% 2.37G/3.95G [00:22<00:17, 89.2MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  61% 2.39G/3.95G [00:22<00:16, 95.8MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  61% 2.41G/3.95G [00:22<00:18, 82.7MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  62% 2.43G/3.95G [00:23<00:15, 97.0MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  62% 2.45G/3.95G [00:23<00:14, 102MB/s] \u001b[A\n",
            "model-00001-of-00004.safetensors:  63% 2.47G/3.95G [00:23<00:13, 113MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  63% 2.50G/3.95G [00:23<00:18, 78.1MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  64% 2.51G/3.95G [00:24<00:21, 67.7MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  64% 2.53G/3.95G [00:24<00:17, 80.7MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  65% 2.55G/3.95G [00:24<00:15, 91.1MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  65% 2.57G/3.95G [00:24<00:13, 100MB/s] \u001b[A\n",
            "model-00001-of-00004.safetensors:  66% 2.59G/3.95G [00:24<00:12, 106MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  66% 2.61G/3.95G [00:24<00:11, 114MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  67% 2.63G/3.95G [00:25<00:12, 104MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  67% 2.65G/3.95G [00:25<00:11, 115MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  68% 2.67G/3.95G [00:25<00:13, 94.8MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  68% 2.69G/3.95G [00:25<00:11, 111MB/s] \u001b[A\n",
            "model-00001-of-00004.safetensors:  69% 2.72G/3.95G [00:26<00:14, 83.4MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  69% 2.74G/3.95G [00:26<00:14, 85.8MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  70% 2.75G/3.95G [00:26<00:13, 85.7MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  70% 2.76G/3.95G [00:26<00:17, 69.0MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  70% 2.77G/3.95G [00:26<00:17, 65.9MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  70% 2.78G/3.95G [00:27<00:17, 65.2MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  71% 2.79G/3.95G [00:27<00:16, 68.2MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  71% 2.81G/3.95G [00:27<00:18, 61.2MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  71% 2.82G/3.95G [00:27<00:16, 67.1MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  72% 2.83G/3.95G [00:27<00:16, 67.4MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  72% 2.84G/3.95G [00:27<00:15, 71.8MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  72% 2.85G/3.95G [00:28<00:19, 55.6MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  73% 2.86G/3.95G [00:28<00:21, 51.0MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  73% 2.88G/3.95G [00:28<00:17, 60.2MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  73% 2.89G/3.95G [00:28<00:16, 65.3MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  74% 2.90G/3.95G [00:29<00:14, 70.6MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  74% 2.92G/3.95G [00:29<00:17, 60.2MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  74% 2.93G/3.95G [00:30<00:37, 27.1MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  74% 2.94G/3.95G [00:30<00:32, 31.5MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  75% 2.95G/3.95G [00:30<00:26, 37.9MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  75% 2.97G/3.95G [00:30<00:17, 57.1MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  76% 2.99G/3.95G [00:30<00:14, 64.1MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  76% 3.00G/3.95G [00:31<00:13, 68.5MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  77% 3.02G/3.95G [00:31<00:10, 87.9MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  77% 3.04G/3.95G [00:31<00:13, 69.6MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  77% 3.05G/3.95G [00:31<00:12, 74.0MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  78% 3.07G/3.95G [00:31<00:10, 80.8MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  78% 3.08G/3.95G [00:32<00:11, 73.9MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  78% 3.09G/3.95G [00:32<00:14, 59.6MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  79% 3.11G/3.95G [00:32<00:11, 73.7MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  79% 3.14G/3.95G [00:32<00:09, 88.3MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  80% 3.16G/3.95G [00:32<00:07, 103MB/s] \u001b[A\n",
            "model-00001-of-00004.safetensors:  81% 3.18G/3.95G [00:33<00:07, 108MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  81% 3.20G/3.95G [00:33<00:06, 115MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  82% 3.22G/3.95G [00:33<00:05, 121MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  82% 3.24G/3.95G [00:33<00:07, 99.0MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  83% 3.26G/3.95G [00:34<00:07, 86.8MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  83% 3.28G/3.95G [00:34<00:08, 77.0MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  84% 3.30G/3.95G [00:34<00:06, 95.2MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  84% 3.32G/3.95G [00:34<00:07, 86.5MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  85% 3.34G/3.95G [00:34<00:05, 101MB/s] \u001b[A\n",
            "model-00001-of-00004.safetensors:  85% 3.37G/3.95G [00:34<00:04, 118MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  86% 3.39G/3.95G [00:35<00:04, 128MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  86% 3.41G/3.95G [00:35<00:03, 135MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  87% 3.43G/3.95G [00:35<00:03, 143MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  87% 3.45G/3.95G [00:35<00:03, 143MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  88% 3.47G/3.95G [00:35<00:03, 151MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  89% 3.49G/3.95G [00:35<00:03, 151MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  89% 3.51G/3.95G [00:35<00:03, 140MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  90% 3.53G/3.95G [00:36<00:02, 149MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  90% 3.55G/3.95G [00:36<00:02, 159MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  91% 3.58G/3.95G [00:36<00:02, 163MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  91% 3.60G/3.95G [00:36<00:02, 146MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  92% 3.62G/3.95G [00:36<00:02, 156MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  92% 3.64G/3.95G [00:36<00:02, 140MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  93% 3.66G/3.95G [00:36<00:01, 147MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  93% 3.68G/3.95G [00:37<00:02, 122MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  94% 3.70G/3.95G [00:37<00:01, 132MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  94% 3.72G/3.95G [00:37<00:01, 145MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  95% 3.74G/3.95G [00:37<00:01, 151MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  95% 3.76G/3.95G [00:37<00:01, 135MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  96% 3.79G/3.95G [00:37<00:01, 138MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  96% 3.81G/3.95G [00:38<00:01, 121MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  97% 3.83G/3.95G [00:38<00:00, 130MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  98% 3.85G/3.95G [00:38<00:00, 137MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  98% 3.87G/3.95G [00:38<00:00, 139MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  99% 3.89G/3.95G [00:38<00:00, 120MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors:  99% 3.91G/3.95G [00:38<00:00, 130MB/s]\u001b[A\n",
            "model-00001-of-00004.safetensors: 100% 3.95G/3.95G [00:39<00:00, 101MB/s]\n",
            "Downloading shards:  25% 1/4 [00:39<01:57, 39.32s/it]\n",
            "model-00002-of-00004.safetensors:   0% 0.00/3.86G [00:00<?, ?B/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:   0% 10.5M/3.86G [00:00<00:54, 70.3MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:   1% 21.0M/3.86G [00:00<00:46, 81.9MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:   1% 31.5M/3.86G [00:00<00:45, 83.8MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:   1% 52.4M/3.86G [00:00<00:34, 110MB/s] \u001b[A\n",
            "model-00002-of-00004.safetensors:   2% 73.4M/3.86G [00:00<00:28, 133MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:   2% 94.4M/3.86G [00:00<00:25, 149MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:   3% 115M/3.86G [00:00<00:25, 148MB/s] \u001b[A\n",
            "model-00002-of-00004.safetensors:   4% 136M/3.86G [00:01<00:26, 139MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:   4% 157M/3.86G [00:01<00:25, 147MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:   5% 178M/3.86G [00:01<00:37, 99.1MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:   5% 199M/3.86G [00:01<00:33, 109MB/s] \u001b[A\n",
            "model-00002-of-00004.safetensors:   6% 220M/3.86G [00:01<00:31, 116MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:   6% 241M/3.86G [00:01<00:28, 129MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:   7% 262M/3.86G [00:02<00:27, 133MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:   7% 283M/3.86G [00:02<00:27, 130MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:   8% 304M/3.86G [00:02<00:26, 133MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:   8% 325M/3.86G [00:02<00:24, 144MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:   9% 346M/3.86G [00:02<00:27, 129MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:   9% 367M/3.86G [00:02<00:25, 138MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  10% 388M/3.86G [00:03<00:24, 141MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  11% 409M/3.86G [00:03<00:23, 150MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  11% 430M/3.86G [00:03<00:24, 143MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  12% 451M/3.86G [00:03<00:22, 150MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  12% 472M/3.86G [00:03<00:23, 144MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  13% 493M/3.86G [00:03<00:22, 151MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  13% 514M/3.86G [00:03<00:21, 154MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  14% 535M/3.86G [00:03<00:20, 165MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  14% 556M/3.86G [00:04<00:18, 174MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  15% 577M/3.86G [00:04<00:18, 174MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  15% 598M/3.86G [00:04<00:22, 147MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  16% 619M/3.86G [00:04<00:22, 143MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  17% 640M/3.86G [00:04<00:21, 148MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  17% 661M/3.86G [00:04<00:20, 159MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  18% 682M/3.86G [00:04<00:20, 156MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  18% 703M/3.86G [00:05<00:34, 92.6MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  19% 724M/3.86G [00:05<00:33, 95.1MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  19% 744M/3.86G [00:05<00:28, 110MB/s] \u001b[A\n",
            "model-00002-of-00004.safetensors:  20% 765M/3.86G [00:06<00:56, 54.5MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  20% 786M/3.86G [00:06<00:54, 56.4MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  21% 797M/3.86G [00:07<00:53, 57.3MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  21% 818M/3.86G [00:07<00:41, 72.9MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  22% 839M/3.86G [00:07<00:35, 86.4MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  22% 860M/3.86G [00:07<00:30, 98.2MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  23% 881M/3.86G [00:07<00:29, 100MB/s] \u001b[A\n",
            "model-00002-of-00004.safetensors:  23% 902M/3.86G [00:07<00:30, 96.8MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  24% 923M/3.86G [00:08<00:46, 63.6MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  24% 933M/3.86G [00:08<01:03, 46.3MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  25% 954M/3.86G [00:09<00:49, 58.4MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  25% 965M/3.86G [00:09<00:46, 62.1MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  26% 986M/3.86G [00:09<00:49, 57.6MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  26% 996M/3.86G [00:09<00:48, 58.9MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  26% 1.01G/3.86G [00:10<00:46, 61.7MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  27% 1.03G/3.86G [00:10<00:40, 69.9MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  27% 1.04G/3.86G [00:10<00:38, 73.5MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  27% 1.05G/3.86G [00:10<00:42, 65.6MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  27% 1.06G/3.86G [00:10<01:00, 46.6MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  28% 1.07G/3.86G [00:11<01:00, 45.9MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  28% 1.08G/3.86G [00:11<00:53, 52.4MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  28% 1.09G/3.86G [00:11<01:10, 39.5MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  28% 1.10G/3.86G [00:11<01:02, 44.0MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  29% 1.11G/3.86G [00:12<01:57, 23.5MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  29% 1.12G/3.86G [00:13<02:03, 22.2MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  29% 1.13G/3.86G [00:13<01:37, 28.0MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  30% 1.14G/3.86G [00:13<01:19, 34.4MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  30% 1.16G/3.86G [00:14<01:07, 39.8MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  31% 1.18G/3.86G [00:14<00:47, 56.2MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  31% 1.20G/3.86G [00:14<00:53, 50.3MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  31% 1.22G/3.86G [00:14<00:38, 69.4MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  32% 1.23G/3.86G [00:14<00:37, 70.6MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  32% 1.25G/3.86G [00:14<00:29, 87.6MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  33% 1.27G/3.86G [00:15<00:30, 85.5MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  33% 1.29G/3.86G [00:15<00:26, 97.5MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  34% 1.31G/3.86G [00:15<00:23, 107MB/s] \u001b[A\n",
            "model-00002-of-00004.safetensors:  34% 1.33G/3.86G [00:15<00:23, 106MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  35% 1.35G/3.86G [00:15<00:21, 119MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  36% 1.37G/3.86G [00:16<00:19, 129MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  36% 1.39G/3.86G [00:16<00:18, 132MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  37% 1.42G/3.86G [00:16<00:17, 142MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  37% 1.44G/3.86G [00:16<00:15, 153MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  38% 1.46G/3.86G [00:16<00:15, 151MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  38% 1.48G/3.86G [00:16<00:17, 136MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  39% 1.50G/3.86G [00:16<00:17, 136MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  39% 1.52G/3.86G [00:17<00:36, 63.7MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  40% 1.54G/3.86G [00:17<00:34, 67.0MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  40% 1.55G/3.86G [00:18<00:38, 60.1MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  40% 1.56G/3.86G [00:18<00:35, 64.8MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  41% 1.58G/3.86G [00:18<00:30, 75.9MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  42% 1.60G/3.86G [00:18<00:27, 81.4MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  42% 1.61G/3.86G [00:18<00:29, 75.5MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  42% 1.64G/3.86G [00:19<00:24, 90.0MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  43% 1.65G/3.86G [00:19<00:24, 89.0MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  43% 1.67G/3.86G [00:19<00:21, 103MB/s] \u001b[A\n",
            "model-00002-of-00004.safetensors:  44% 1.69G/3.86G [00:19<00:37, 58.3MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  44% 1.71G/3.86G [00:20<00:45, 47.6MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  44% 1.72G/3.86G [00:20<00:46, 46.4MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  45% 1.74G/3.86G [00:20<00:36, 58.5MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  45% 1.75G/3.86G [00:21<00:44, 47.2MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  46% 1.76G/3.86G [00:21<00:40, 51.5MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  46% 1.78G/3.86G [00:21<00:37, 56.2MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  47% 1.80G/3.86G [00:22<00:30, 67.4MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  47% 1.82G/3.86G [00:22<00:27, 74.8MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  47% 1.84G/3.86G [00:22<00:30, 66.5MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  48% 1.86G/3.86G [00:22<00:24, 82.4MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  49% 1.88G/3.86G [00:22<00:20, 97.3MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  49% 1.90G/3.86G [00:23<00:24, 80.3MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  50% 1.92G/3.86G [00:23<00:20, 96.3MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  50% 1.94G/3.86G [00:23<00:18, 105MB/s] \u001b[A\n",
            "model-00002-of-00004.safetensors:  51% 1.96G/3.86G [00:23<00:16, 118MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  51% 1.98G/3.86G [00:23<00:19, 98.7MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  52% 2.00G/3.86G [00:24<00:20, 92.8MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  52% 2.02G/3.86G [00:24<00:30, 60.0MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  53% 2.03G/3.86G [00:24<00:32, 56.2MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  53% 2.04G/3.86G [00:25<00:30, 58.9MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  53% 2.07G/3.86G [00:25<00:26, 69.2MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  54% 2.09G/3.86G [00:25<00:23, 74.3MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  55% 2.11G/3.86G [00:25<00:23, 74.6MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  55% 2.13G/3.86G [00:26<00:21, 81.1MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  56% 2.15G/3.86G [00:26<00:29, 58.5MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  56% 2.16G/3.86G [00:26<00:33, 51.1MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  56% 2.18G/3.86G [00:27<00:25, 67.3MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  57% 2.20G/3.86G [00:27<00:20, 79.7MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  57% 2.21G/3.86G [00:27<00:22, 72.4MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  58% 2.22G/3.86G [00:27<00:23, 71.1MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  58% 2.23G/3.86G [00:27<00:25, 64.3MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  58% 2.24G/3.86G [00:27<00:23, 68.5MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  59% 2.26G/3.86G [00:28<00:18, 84.9MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  59% 2.29G/3.86G [00:28<00:15, 102MB/s] \u001b[A\n",
            "model-00002-of-00004.safetensors:  60% 2.31G/3.86G [00:28<00:13, 118MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  60% 2.33G/3.86G [00:28<00:11, 137MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  61% 2.35G/3.86G [00:28<00:10, 150MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  61% 2.37G/3.86G [00:28<00:13, 112MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  62% 2.39G/3.86G [00:29<00:12, 122MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  62% 2.41G/3.86G [00:29<00:10, 133MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  63% 2.43G/3.86G [00:29<00:09, 148MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  63% 2.45G/3.86G [00:29<00:09, 154MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  64% 2.47G/3.86G [00:29<00:08, 155MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  65% 2.50G/3.86G [00:29<00:08, 157MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  65% 2.52G/3.86G [00:29<00:08, 165MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  66% 2.54G/3.86G [00:29<00:07, 171MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  66% 2.56G/3.86G [00:29<00:07, 175MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  67% 2.58G/3.86G [00:30<00:06, 184MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  67% 2.60G/3.86G [00:30<00:07, 180MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  68% 2.62G/3.86G [00:30<00:07, 177MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  68% 2.64G/3.86G [00:30<00:06, 182MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  69% 2.66G/3.86G [00:30<00:06, 173MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  69% 2.68G/3.86G [00:30<00:06, 175MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  70% 2.71G/3.86G [00:30<00:08, 130MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  71% 2.73G/3.86G [00:31<00:09, 125MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  71% 2.75G/3.86G [00:31<00:09, 121MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  72% 2.77G/3.86G [00:31<00:08, 125MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  72% 2.79G/3.86G [00:31<00:07, 140MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  73% 2.81G/3.86G [00:31<00:06, 155MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  73% 2.83G/3.86G [00:31<00:06, 157MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  74% 2.85G/3.86G [00:31<00:06, 151MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  74% 2.87G/3.86G [00:32<00:06, 160MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  75% 2.89G/3.86G [00:32<00:06, 160MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  75% 2.92G/3.86G [00:32<00:06, 147MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  76% 2.94G/3.86G [00:32<00:06, 154MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  77% 2.96G/3.86G [00:32<00:06, 147MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  77% 2.98G/3.86G [00:32<00:05, 148MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  78% 3.00G/3.86G [00:32<00:05, 153MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  78% 3.02G/3.86G [00:33<00:05, 162MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  79% 3.04G/3.86G [00:33<00:05, 158MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  79% 3.06G/3.86G [00:33<00:04, 163MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  80% 3.08G/3.86G [00:33<00:04, 163MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  80% 3.10G/3.86G [00:33<00:04, 156MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  81% 3.12G/3.86G [00:33<00:05, 136MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  81% 3.15G/3.86G [00:33<00:05, 139MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  82% 3.17G/3.86G [00:33<00:04, 152MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  82% 3.19G/3.86G [00:34<00:04, 150MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  83% 3.21G/3.86G [00:34<00:04, 147MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  84% 3.23G/3.86G [00:34<00:04, 150MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  84% 3.25G/3.86G [00:34<00:03, 156MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  85% 3.27G/3.86G [00:34<00:03, 164MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  85% 3.29G/3.86G [00:34<00:03, 172MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  86% 3.31G/3.86G [00:34<00:03, 181MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  86% 3.33G/3.86G [00:35<00:03, 173MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  87% 3.36G/3.86G [00:35<00:02, 175MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  87% 3.38G/3.86G [00:35<00:02, 176MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  88% 3.40G/3.86G [00:35<00:02, 182MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  88% 3.42G/3.86G [00:35<00:02, 184MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  89% 3.44G/3.86G [00:35<00:02, 189MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  90% 3.46G/3.86G [00:35<00:02, 193MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  90% 3.48G/3.86G [00:35<00:01, 195MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  91% 3.50G/3.86G [00:35<00:01, 187MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  91% 3.52G/3.86G [00:36<00:01, 175MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  92% 3.54G/3.86G [00:36<00:02, 122MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  93% 3.58G/3.86G [00:36<00:01, 145MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  93% 3.60G/3.86G [00:36<00:01, 156MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  94% 3.62G/3.86G [00:36<00:01, 159MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  94% 3.64G/3.86G [00:36<00:01, 164MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  95% 3.66G/3.86G [00:36<00:01, 171MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  95% 3.68G/3.86G [00:37<00:01, 168MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  96% 3.70G/3.86G [00:37<00:00, 171MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  96% 3.72G/3.86G [00:37<00:00, 175MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  97% 3.74G/3.86G [00:37<00:00, 174MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  97% 3.76G/3.86G [00:37<00:00, 153MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  98% 3.79G/3.86G [00:37<00:00, 96.4MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors:  98% 3.81G/3.86G [00:38<00:00, 103MB/s] \u001b[A\n",
            "model-00002-of-00004.safetensors:  99% 3.83G/3.86G [00:38<00:00, 84.8MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors: 100% 3.85G/3.86G [00:38<00:00, 77.6MB/s]\u001b[A\n",
            "model-00002-of-00004.safetensors: 100% 3.86G/3.86G [00:39<00:00, 99.0MB/s]\n",
            "Downloading shards:  50% 2/4 [01:18<01:18, 39.30s/it]\n",
            "model-00003-of-00004.safetensors:   0% 0.00/3.86G [00:00<?, ?B/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:   0% 10.5M/3.86G [00:00<01:10, 55.0MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:   1% 31.5M/3.86G [00:00<00:38, 98.3MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:   1% 41.9M/3.86G [00:00<00:40, 94.8MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:   2% 62.9M/3.86G [00:00<00:32, 116MB/s] \u001b[A\n",
            "model-00003-of-00004.safetensors:   2% 83.9M/3.86G [00:00<00:29, 130MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:   3% 105M/3.86G [00:00<00:26, 141MB/s] \u001b[A\n",
            "model-00003-of-00004.safetensors:   3% 126M/3.86G [00:01<00:26, 140MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:   4% 147M/3.86G [00:01<00:33, 111MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:   4% 168M/3.86G [00:01<00:29, 124MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:   5% 189M/3.86G [00:01<00:30, 119MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:   5% 210M/3.86G [00:01<00:28, 130MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:   6% 231M/3.86G [00:01<00:27, 133MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:   7% 252M/3.86G [00:02<00:25, 141MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:   7% 273M/3.86G [00:02<00:24, 144MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:   8% 294M/3.86G [00:02<00:23, 151MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:   8% 315M/3.86G [00:02<00:23, 151MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:   9% 336M/3.86G [00:02<00:22, 157MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:   9% 357M/3.86G [00:02<00:21, 166MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  10% 377M/3.86G [00:02<00:22, 157MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  10% 398M/3.86G [00:02<00:21, 163MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  11% 419M/3.86G [00:03<00:21, 164MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  11% 440M/3.86G [00:03<00:22, 151MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  12% 461M/3.86G [00:03<00:25, 133MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  12% 482M/3.86G [00:03<00:26, 129MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  13% 503M/3.86G [00:03<00:24, 140MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  14% 524M/3.86G [00:03<00:21, 152MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  14% 545M/3.86G [00:03<00:21, 151MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  15% 566M/3.86G [00:04<00:22, 148MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  15% 587M/3.86G [00:04<00:22, 148MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  16% 608M/3.86G [00:04<00:21, 150MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  16% 629M/3.86G [00:04<00:20, 158MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  17% 650M/3.86G [00:04<00:20, 155MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  17% 671M/3.86G [00:04<00:22, 142MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  18% 692M/3.86G [00:04<00:24, 131MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  18% 713M/3.86G [00:05<00:23, 134MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  19% 734M/3.86G [00:05<00:22, 139MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  20% 755M/3.86G [00:05<00:22, 139MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  20% 776M/3.86G [00:05<00:24, 126MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  21% 797M/3.86G [00:05<00:25, 121MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  21% 818M/3.86G [00:05<00:23, 128MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  22% 839M/3.86G [00:06<00:21, 143MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  22% 860M/3.86G [00:06<00:20, 146MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  23% 881M/3.86G [00:06<00:21, 136MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  23% 902M/3.86G [00:06<00:22, 130MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  24% 923M/3.86G [00:06<00:20, 140MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  24% 944M/3.86G [00:06<00:19, 151MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  25% 965M/3.86G [00:07<00:22, 130MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  26% 986M/3.86G [00:07<00:21, 132MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  26% 1.01G/3.86G [00:07<00:23, 119MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  27% 1.03G/3.86G [00:07<00:21, 134MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  27% 1.05G/3.86G [00:07<00:19, 145MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  28% 1.07G/3.86G [00:07<00:19, 142MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  28% 1.09G/3.86G [00:07<00:19, 141MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  29% 1.11G/3.86G [00:08<00:18, 149MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  29% 1.13G/3.86G [00:08<00:18, 150MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  30% 1.15G/3.86G [00:08<00:17, 155MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  30% 1.17G/3.86G [00:08<00:16, 160MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  31% 1.20G/3.86G [00:08<00:16, 157MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  31% 1.22G/3.86G [00:08<00:18, 145MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  32% 1.24G/3.86G [00:08<00:19, 135MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  33% 1.26G/3.86G [00:09<00:17, 150MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  33% 1.28G/3.86G [00:09<00:16, 154MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  34% 1.30G/3.86G [00:09<00:15, 164MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  34% 1.32G/3.86G [00:09<00:15, 164MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  35% 1.34G/3.86G [00:09<00:18, 134MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  35% 1.36G/3.86G [00:09<00:16, 149MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  36% 1.38G/3.86G [00:09<00:16, 154MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  36% 1.41G/3.86G [00:09<00:16, 149MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  37% 1.43G/3.86G [00:10<00:18, 133MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  37% 1.45G/3.86G [00:10<00:22, 110MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  38% 1.47G/3.86G [00:10<00:26, 91.5MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  39% 1.49G/3.86G [00:10<00:23, 99.8MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  39% 1.51G/3.86G [00:11<00:22, 103MB/s] \u001b[A\n",
            "model-00003-of-00004.safetensors:  40% 1.53G/3.86G [00:11<00:21, 108MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  40% 1.55G/3.86G [00:11<00:21, 107MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  41% 1.57G/3.86G [00:12<00:34, 67.4MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  41% 1.59G/3.86G [00:12<00:28, 79.7MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  42% 1.60G/3.86G [00:12<00:33, 67.7MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  42% 1.61G/3.86G [00:12<00:32, 69.3MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  42% 1.63G/3.86G [00:12<00:42, 53.0MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  42% 1.64G/3.86G [00:13<00:37, 59.2MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  43% 1.65G/3.86G [00:13<00:35, 61.6MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  43% 1.66G/3.86G [00:13<00:32, 67.1MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  43% 1.67G/3.86G [00:13<00:35, 62.5MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  44% 1.69G/3.86G [00:13<00:25, 85.5MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  44% 1.70G/3.86G [00:13<00:30, 71.8MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  44% 1.71G/3.86G [00:14<00:30, 70.5MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  44% 1.72G/3.86G [00:14<00:29, 73.1MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  45% 1.73G/3.86G [00:14<00:28, 75.0MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  45% 1.74G/3.86G [00:14<00:28, 74.1MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  46% 1.76G/3.86G [00:14<00:26, 80.8MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  46% 1.77G/3.86G [00:14<00:27, 76.5MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  46% 1.79G/3.86G [00:15<00:28, 72.5MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  47% 1.80G/3.86G [00:15<00:27, 76.1MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  47% 1.82G/3.86G [00:15<00:27, 73.1MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  47% 1.84G/3.86G [00:15<00:31, 64.3MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  48% 1.85G/3.86G [00:16<00:32, 62.6MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  48% 1.87G/3.86G [00:16<00:25, 79.3MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  49% 1.89G/3.86G [00:16<00:20, 95.9MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  49% 1.91G/3.86G [00:16<00:16, 117MB/s] \u001b[A\n",
            "model-00003-of-00004.safetensors:  50% 1.93G/3.86G [00:16<00:15, 122MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  50% 1.95G/3.86G [00:16<00:14, 133MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  51% 1.97G/3.86G [00:16<00:12, 146MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  52% 1.99G/3.86G [00:16<00:12, 149MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  52% 2.01G/3.86G [00:17<00:11, 159MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  53% 2.03G/3.86G [00:17<00:11, 155MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  53% 2.06G/3.86G [00:17<00:12, 150MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  54% 2.08G/3.86G [00:17<00:11, 160MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  54% 2.10G/3.86G [00:17<00:11, 157MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  55% 2.12G/3.86G [00:17<00:10, 160MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  55% 2.14G/3.86G [00:17<00:12, 137MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  56% 2.16G/3.86G [00:18<00:14, 120MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  56% 2.18G/3.86G [00:18<00:13, 125MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  57% 2.20G/3.86G [00:18<00:12, 138MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  58% 2.22G/3.86G [00:18<00:11, 145MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  58% 2.24G/3.86G [00:18<00:11, 142MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  59% 2.26G/3.86G [00:18<00:11, 138MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  59% 2.29G/3.86G [00:19<00:11, 133MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  60% 2.31G/3.86G [00:19<00:14, 111MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  60% 2.33G/3.86G [00:19<00:12, 125MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  61% 2.35G/3.86G [00:19<00:11, 134MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  61% 2.37G/3.86G [00:19<00:10, 143MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  62% 2.39G/3.86G [00:19<00:09, 151MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  62% 2.41G/3.86G [00:20<00:17, 82.9MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  63% 2.43G/3.86G [00:20<00:18, 78.5MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  63% 2.45G/3.86G [00:20<00:15, 90.8MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  64% 2.47G/3.86G [00:20<00:14, 97.4MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  65% 2.50G/3.86G [00:21<00:15, 87.7MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  65% 2.52G/3.86G [00:21<00:13, 98.6MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  66% 2.54G/3.86G [00:21<00:11, 113MB/s] \u001b[A\n",
            "model-00003-of-00004.safetensors:  66% 2.56G/3.86G [00:21<00:11, 114MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  67% 2.58G/3.86G [00:21<00:11, 117MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  67% 2.60G/3.86G [00:21<00:09, 132MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  68% 2.62G/3.86G [00:22<00:09, 136MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  68% 2.64G/3.86G [00:22<00:08, 142MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  69% 2.66G/3.86G [00:22<00:09, 131MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  69% 2.68G/3.86G [00:22<00:08, 140MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  70% 2.71G/3.86G [00:22<00:08, 143MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  71% 2.73G/3.86G [00:22<00:08, 136MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  71% 2.75G/3.86G [00:23<00:09, 124MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  72% 2.77G/3.86G [00:23<00:11, 97.7MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  72% 2.79G/3.86G [00:23<00:09, 112MB/s] \u001b[A\n",
            "model-00003-of-00004.safetensors:  73% 2.81G/3.86G [00:23<00:09, 116MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  73% 2.83G/3.86G [00:23<00:07, 132MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  74% 2.85G/3.86G [00:23<00:07, 130MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  74% 2.87G/3.86G [00:24<00:07, 138MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  75% 2.89G/3.86G [00:24<00:07, 122MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  75% 2.92G/3.86G [00:24<00:07, 124MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  76% 2.94G/3.86G [00:24<00:08, 109MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  77% 2.96G/3.86G [00:24<00:07, 125MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  77% 2.98G/3.86G [00:25<00:13, 66.6MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  78% 3.00G/3.86G [00:25<00:11, 77.3MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  78% 3.02G/3.86G [00:25<00:09, 86.3MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  79% 3.04G/3.86G [00:26<00:12, 66.6MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  79% 3.05G/3.86G [00:26<00:16, 50.1MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  79% 3.06G/3.86G [00:26<00:15, 52.8MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  80% 3.08G/3.86G [00:27<00:11, 65.4MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  80% 3.09G/3.86G [00:27<00:12, 62.0MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  81% 3.11G/3.86G [00:27<00:09, 77.8MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  81% 3.12G/3.86G [00:27<00:09, 75.0MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  81% 3.14G/3.86G [00:27<00:09, 73.7MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  81% 3.15G/3.86G [00:27<00:09, 79.1MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  82% 3.16G/3.86G [00:28<00:09, 71.4MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  82% 3.17G/3.86G [00:28<00:08, 77.8MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  82% 3.18G/3.86G [00:28<00:12, 53.2MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  82% 3.19G/3.86G [00:29<00:23, 29.2MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  83% 3.21G/3.86G [00:29<00:15, 41.5MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  83% 3.22G/3.86G [00:29<00:14, 44.6MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  84% 3.23G/3.86G [00:29<00:14, 44.5MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  84% 3.24G/3.86G [00:30<00:12, 48.8MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  84% 3.25G/3.86G [00:30<00:11, 51.7MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  85% 3.27G/3.86G [00:30<00:08, 72.8MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  85% 3.28G/3.86G [00:30<00:08, 67.4MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  85% 3.30G/3.86G [00:30<00:07, 78.9MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  86% 3.31G/3.86G [00:31<00:07, 70.8MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  86% 3.33G/3.86G [00:31<00:05, 92.5MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  87% 3.36G/3.86G [00:31<00:04, 112MB/s] \u001b[A\n",
            "model-00003-of-00004.safetensors:  87% 3.38G/3.86G [00:31<00:04, 98.3MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  88% 3.40G/3.86G [00:31<00:04, 114MB/s] \u001b[A\n",
            "model-00003-of-00004.safetensors:  88% 3.42G/3.86G [00:31<00:04, 111MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  89% 3.44G/3.86G [00:32<00:03, 116MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  90% 3.46G/3.86G [00:32<00:03, 117MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  90% 3.49G/3.86G [00:32<00:03, 121MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  91% 3.51G/3.86G [00:32<00:02, 129MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  91% 3.53G/3.86G [00:32<00:02, 131MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  92% 3.55G/3.86G [00:32<00:02, 143MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  93% 3.58G/3.86G [00:33<00:02, 128MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  93% 3.60G/3.86G [00:33<00:01, 138MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  94% 3.62G/3.86G [00:33<00:01, 145MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  94% 3.64G/3.86G [00:33<00:01, 154MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  95% 3.66G/3.86G [00:33<00:01, 160MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  95% 3.68G/3.86G [00:33<00:01, 156MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  96% 3.70G/3.86G [00:33<00:01, 158MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  96% 3.72G/3.86G [00:33<00:00, 164MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  97% 3.74G/3.86G [00:34<00:00, 148MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  97% 3.76G/3.86G [00:34<00:00, 128MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  98% 3.79G/3.86G [00:34<00:00, 104MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  98% 3.81G/3.86G [00:34<00:00, 115MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors:  99% 3.83G/3.86G [00:34<00:00, 117MB/s]\u001b[A\n",
            "model-00003-of-00004.safetensors: 100% 3.86G/3.86G [00:35<00:00, 110MB/s]\n",
            "Downloading shards:  75% 3/4 [01:53<00:37, 37.50s/it]\n",
            "model-00004-of-00004.safetensors:   0% 0.00/3.56G [00:00<?, ?B/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:   0% 10.5M/3.56G [00:00<00:44, 79.8MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:   1% 21.0M/3.56G [00:00<00:53, 66.4MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:   1% 41.9M/3.56G [00:00<00:33, 105MB/s] \u001b[A\n",
            "model-00004-of-00004.safetensors:   2% 62.9M/3.56G [00:00<00:34, 99.9MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:   2% 73.4M/3.56G [00:01<00:59, 58.6MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:   2% 83.9M/3.56G [00:01<00:55, 62.5MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:   3% 105M/3.56G [00:01<00:48, 71.8MB/s] \u001b[A\n",
            "model-00004-of-00004.safetensors:   4% 126M/3.56G [00:01<00:37, 92.5MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:   4% 147M/3.56G [00:01<00:31, 107MB/s] \u001b[A\n",
            "model-00004-of-00004.safetensors:   5% 168M/3.56G [00:01<00:28, 120MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:   5% 189M/3.56G [00:02<00:34, 98.6MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:   6% 210M/3.56G [00:02<00:34, 97.8MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:   6% 231M/3.56G [00:02<00:39, 84.1MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:   7% 252M/3.56G [00:02<00:34, 94.5MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:   8% 273M/3.56G [00:02<00:30, 108MB/s] \u001b[A\n",
            "model-00004-of-00004.safetensors:   8% 294M/3.56G [00:03<00:28, 115MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:   9% 315M/3.56G [00:03<00:28, 112MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:   9% 336M/3.56G [00:03<00:36, 88.3MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  10% 357M/3.56G [00:03<00:31, 102MB/s] \u001b[A\n",
            "model-00004-of-00004.safetensors:  11% 377M/3.56G [00:04<00:42, 75.0MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  11% 388M/3.56G [00:04<00:43, 73.6MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  11% 398M/3.56G [00:04<00:51, 61.3MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  12% 419M/3.56G [00:04<00:41, 75.5MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  12% 430M/3.56G [00:05<00:49, 63.1MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  12% 440M/3.56G [00:05<00:54, 56.8MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  13% 451M/3.56G [00:05<00:49, 62.3MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  13% 461M/3.56G [00:05<00:51, 59.7MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  14% 482M/3.56G [00:05<00:36, 84.4MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  14% 503M/3.56G [00:05<00:31, 96.3MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  15% 524M/3.56G [00:06<00:27, 112MB/s] \u001b[A\n",
            "model-00004-of-00004.safetensors:  15% 545M/3.56G [00:06<00:24, 122MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  16% 566M/3.56G [00:06<00:26, 111MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  17% 587M/3.56G [00:06<00:24, 121MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  17% 608M/3.56G [00:06<00:22, 132MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  18% 629M/3.56G [00:06<00:20, 140MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  18% 650M/3.56G [00:06<00:19, 145MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  19% 671M/3.56G [00:07<00:19, 151MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  19% 692M/3.56G [00:07<00:42, 67.4MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  20% 713M/3.56G [00:08<01:03, 44.5MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  21% 734M/3.56G [00:08<00:49, 56.5MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  21% 755M/3.56G [00:08<00:39, 70.6MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  22% 776M/3.56G [00:09<00:33, 81.8MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  22% 797M/3.56G [00:09<00:32, 84.5MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  23% 818M/3.56G [00:09<00:27, 101MB/s] \u001b[A\n",
            "model-00004-of-00004.safetensors:  24% 839M/3.56G [00:09<00:25, 105MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  24% 860M/3.56G [00:09<00:23, 117MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  25% 881M/3.56G [00:09<00:24, 109MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  25% 902M/3.56G [00:10<00:38, 68.8MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  26% 912M/3.56G [00:10<00:47, 55.2MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  26% 923M/3.56G [00:11<00:47, 56.0MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  26% 933M/3.56G [00:11<00:44, 59.6MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  27% 944M/3.56G [00:11<00:47, 55.5MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  27% 954M/3.56G [00:11<00:43, 59.6MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  27% 975M/3.56G [00:11<00:44, 58.2MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  28% 986M/3.56G [00:12<00:45, 56.7MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  28% 996M/3.56G [00:12<00:43, 59.0MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  28% 1.01G/3.56G [00:12<00:40, 62.5MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  29% 1.02G/3.56G [00:12<00:42, 60.4MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  29% 1.04G/3.56G [00:13<00:45, 55.0MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  30% 1.06G/3.56G [00:13<00:40, 62.4MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  30% 1.08G/3.56G [00:13<00:31, 77.6MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  31% 1.10G/3.56G [00:13<00:26, 93.8MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  32% 1.12G/3.56G [00:13<00:22, 110MB/s] \u001b[A\n",
            "model-00004-of-00004.safetensors:  32% 1.14G/3.56G [00:13<00:19, 125MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  33% 1.16G/3.56G [00:14<00:30, 78.2MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  33% 1.18G/3.56G [00:14<00:32, 73.7MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  34% 1.21G/3.56G [00:14<00:26, 89.4MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  34% 1.23G/3.56G [00:14<00:22, 103MB/s] \u001b[A\n",
            "model-00004-of-00004.safetensors:  35% 1.25G/3.56G [00:15<00:19, 115MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  36% 1.27G/3.56G [00:15<00:24, 95.2MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  36% 1.29G/3.56G [00:15<00:20, 110MB/s] \u001b[A\n",
            "model-00004-of-00004.safetensors:  37% 1.31G/3.56G [00:15<00:18, 121MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  37% 1.33G/3.56G [00:15<00:17, 125MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  38% 1.35G/3.56G [00:15<00:15, 140MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  39% 1.37G/3.56G [00:16<00:16, 130MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  39% 1.39G/3.56G [00:16<00:15, 138MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  40% 1.42G/3.56G [00:16<00:14, 143MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  40% 1.44G/3.56G [00:16<00:15, 139MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  41% 1.46G/3.56G [00:16<00:14, 144MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  42% 1.48G/3.56G [00:16<00:15, 136MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  42% 1.50G/3.56G [00:16<00:15, 131MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  43% 1.52G/3.56G [00:17<00:16, 122MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  43% 1.54G/3.56G [00:17<00:14, 136MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  44% 1.56G/3.56G [00:17<00:14, 134MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  45% 1.58G/3.56G [00:17<00:15, 128MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  45% 1.60G/3.56G [00:17<00:14, 132MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  46% 1.63G/3.56G [00:17<00:15, 123MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  46% 1.65G/3.56G [00:18<00:18, 103MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  47% 1.67G/3.56G [00:18<00:16, 118MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  47% 1.69G/3.56G [00:18<00:14, 128MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  48% 1.71G/3.56G [00:18<00:14, 127MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  49% 1.73G/3.56G [00:18<00:14, 130MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  49% 1.75G/3.56G [00:18<00:13, 132MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  50% 1.77G/3.56G [00:19<00:12, 138MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  50% 1.79G/3.56G [00:19<00:12, 142MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  51% 1.81G/3.56G [00:19<00:11, 155MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  52% 1.84G/3.56G [00:19<00:12, 135MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  52% 1.86G/3.56G [00:19<00:14, 120MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  53% 1.88G/3.56G [00:19<00:13, 129MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  53% 1.90G/3.56G [00:20<00:11, 141MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  54% 1.92G/3.56G [00:20<00:10, 149MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  55% 1.94G/3.56G [00:20<00:10, 157MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  55% 1.96G/3.56G [00:20<00:11, 139MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  56% 1.98G/3.56G [00:20<00:10, 143MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  56% 2.00G/3.56G [00:20<00:10, 146MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  57% 2.02G/3.56G [00:20<00:10, 149MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  57% 2.04G/3.56G [00:21<00:11, 134MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  58% 2.07G/3.56G [00:21<00:10, 145MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  59% 2.09G/3.56G [00:21<00:09, 151MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  59% 2.11G/3.56G [00:21<00:09, 155MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  60% 2.13G/3.56G [00:21<00:09, 143MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  60% 2.15G/3.56G [00:21<00:09, 154MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  61% 2.17G/3.56G [00:21<00:09, 142MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  62% 2.19G/3.56G [00:22<00:09, 144MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  62% 2.21G/3.56G [00:22<00:09, 146MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  63% 2.23G/3.56G [00:22<00:08, 152MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  63% 2.25G/3.56G [00:22<00:08, 153MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  64% 2.28G/3.56G [00:22<00:08, 146MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  65% 2.30G/3.56G [00:22<00:08, 155MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  65% 2.32G/3.56G [00:22<00:08, 140MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  66% 2.34G/3.56G [00:23<00:09, 127MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  66% 2.36G/3.56G [00:23<00:09, 122MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  67% 2.38G/3.56G [00:23<00:08, 134MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  68% 2.40G/3.56G [00:23<00:13, 86.9MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  68% 2.42G/3.56G [00:24<00:14, 77.6MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  68% 2.43G/3.56G [00:24<00:15, 74.6MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  69% 2.45G/3.56G [00:24<00:13, 82.1MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  70% 2.47G/3.56G [00:24<00:13, 78.9MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  70% 2.49G/3.56G [00:25<00:15, 70.1MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  70% 2.50G/3.56G [00:25<00:14, 74.9MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  70% 2.51G/3.56G [00:25<00:15, 67.8MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  71% 2.53G/3.56G [00:25<00:13, 74.8MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  71% 2.54G/3.56G [00:25<00:16, 60.7MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  72% 2.55G/3.56G [00:26<00:22, 45.3MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  72% 2.57G/3.56G [00:26<00:14, 66.0MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  73% 2.58G/3.56G [00:26<00:13, 70.1MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  73% 2.60G/3.56G [00:26<00:13, 72.0MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  73% 2.61G/3.56G [00:26<00:13, 68.4MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  74% 2.63G/3.56G [00:27<00:10, 87.1MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  75% 2.65G/3.56G [00:27<00:10, 87.2MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  75% 2.67G/3.56G [00:27<00:09, 88.9MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  76% 2.69G/3.56G [00:27<00:08, 108MB/s] \u001b[A\n",
            "model-00004-of-00004.safetensors:  76% 2.72G/3.56G [00:28<00:09, 90.3MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  77% 2.74G/3.56G [00:28<00:10, 78.9MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  78% 2.76G/3.56G [00:28<00:09, 86.4MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  78% 2.77G/3.56G [00:28<00:09, 81.4MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  78% 2.79G/3.56G [00:29<00:11, 68.6MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  79% 2.81G/3.56G [00:29<00:09, 74.9MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  80% 2.83G/3.56G [00:29<00:07, 91.2MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  80% 2.85G/3.56G [00:29<00:06, 105MB/s] \u001b[A\n",
            "model-00004-of-00004.safetensors:  81% 2.87G/3.56G [00:29<00:06, 113MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  81% 2.89G/3.56G [00:29<00:05, 122MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  82% 2.92G/3.56G [00:30<00:05, 117MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  83% 2.94G/3.56G [00:30<00:04, 131MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  83% 2.96G/3.56G [00:30<00:04, 141MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  84% 2.98G/3.56G [00:30<00:04, 132MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  84% 3.00G/3.56G [00:30<00:04, 131MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  85% 3.02G/3.56G [00:30<00:03, 137MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  86% 3.04G/3.56G [00:30<00:03, 136MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  86% 3.06G/3.56G [00:31<00:04, 116MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  87% 3.08G/3.56G [00:31<00:04, 118MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  87% 3.10G/3.56G [00:31<00:03, 127MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  88% 3.12G/3.56G [00:31<00:03, 139MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  88% 3.15G/3.56G [00:31<00:02, 152MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  89% 3.17G/3.56G [00:31<00:02, 163MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  90% 3.19G/3.56G [00:31<00:02, 171MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  90% 3.21G/3.56G [00:32<00:01, 178MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  91% 3.23G/3.56G [00:32<00:01, 183MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  91% 3.25G/3.56G [00:32<00:01, 180MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  92% 3.27G/3.56G [00:32<00:01, 180MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  93% 3.29G/3.56G [00:32<00:01, 154MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  93% 3.31G/3.56G [00:32<00:01, 137MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  94% 3.33G/3.56G [00:32<00:01, 138MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  94% 3.36G/3.56G [00:33<00:01, 139MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  95% 3.38G/3.56G [00:33<00:01, 142MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  96% 3.40G/3.56G [00:33<00:01, 148MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  96% 3.42G/3.56G [00:33<00:00, 158MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  97% 3.44G/3.56G [00:33<00:00, 166MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  97% 3.46G/3.56G [00:33<00:00, 146MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  98% 3.48G/3.56G [00:33<00:00, 148MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  98% 3.50G/3.56G [00:34<00:00, 112MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors:  99% 3.52G/3.56G [00:34<00:00, 123MB/s]\u001b[A\n",
            "model-00004-of-00004.safetensors: 100% 3.56G/3.56G [00:34<00:00, 103MB/s]\n",
            "Downloading shards: 100% 4/4 [02:28<00:00, 37.18s/it]\n",
            "Loading checkpoint shards: 100% 4/4 [00:05<00:00,  1.42s/it]\n",
            "generation_config.json: 100% 138/138 [00:00<00:00, 1.10MB/s]\n",
            "(2) GPU = NVIDIA L4. Max memory = 22.168 GB.\n",
            "16.104 GB of memory reserved.\n",
            "loading train/test data files\n",
            "Generating train split: 25000 examples [00:00, 30924.36 examples/s]\n",
            "Generating test split: 3000 examples [00:00, 7022.05 examples/s]\n",
            "Map: 100% 25000/25000 [00:01<00:00, 17607.02 examples/s]\n",
            "Map: 100% 3000/3000 [00:00<00:00, 17746.41 examples/s]\n",
            "DatasetDict({\n",
            "    train: Dataset({\n",
            "        features: ['text', 'label', 'answer', 'title', 'puzzle', 'truth', 'train_text', 'prompt'],\n",
            "        num_rows: 25000\n",
            "    })\n",
            "    test: Dataset({\n",
            "        features: ['text', 'label', 'answer', 'title', 'puzzle', 'truth', 'train_text', 'prompt'],\n",
            "        num_rows: 3000\n",
            "    })\n",
            "})\n",
            "--------------------------------------------------\n",
            "text: 甄加索是自杀吗\n",
            "--------------------------------------------------\n",
            "label: 不是\n",
            "--------------------------------------------------\n",
            "answer: nan\n",
            "--------------------------------------------------\n",
            "title: 海岸之谜\n",
            "--------------------------------------------------\n",
            "puzzle: 在远离城市喧嚣的海边小屋，一天清晨，邻居发现甄加索僵卧在沙滩上，已无生命迹象。现场没有发现任何打斗的迹象。请问甄加索的死因是什么？\n",
            "--------------------------------------------------\n",
            "truth: 甄加索是一位热爱自然的画家，他每年都会来到这个海边小屋寻找灵感。在他生命的最后几天，他一直在创作一幅描绘海洋生物的画作。在画即将完成的前一天晚上，他骑着自行车外出，打算在海边观赏夜景。然而，他在沙滩上意外发现了一只搁浅的海豚，为了救助这只海豚，他耗费了极大的体力，最终成功将其送回海中。筋疲力尽的甄加索在沙滩上睡着了，由于他患有严重的心脏病，却未告知旁人，在寒冷的海风中，他的心脏停止了跳动。因此，警方在现场只发现了车轮痕迹和未完成的画作，而没有发现任何他杀的迹象。\n",
            "--------------------------------------------------\n",
            "train_text: <|im_start|>system\n",
            "You are an expert in logical reasoning.<|im_end|>\n",
            "<|im_start|>user\n",
            "你是一个逻辑游戏的主持人。游戏规则如下：\n",
            "\n",
            "1. 参与者会得到一个谜题。\n",
            "2. 参与者可以通过提问来获取线索，尝试解开谜题。\n",
            "3. 对于每个问题，主持人将根据实际情况回答以下五个选项之一：是、不是、不重要、回答正确、问法错误。\n",
            "4. 回答中不能添加任何其它信息，也不能省略选项中的任何一个字。例如，不可以把“不是”省略成“不”。\n",
            "5. 参与者需要根据回答来推理，并最终找出谜题的正确答案。\n",
            "\n",
            "请严格按照这些规则回答参与者提出的问题。\n",
            "\n",
            "谜题: 在远离城市喧嚣的海边小屋，一天清晨，邻居发现甄加索僵卧在沙滩上，已无生命迹象。现场没有发现任何打斗的迹象。请问甄加索的死因是什么？\n",
            "\n",
            "实际情况: 甄加索是一位热爱自然的画家，他每年都会来到这个海边小屋寻找灵感。在他生命的最后几天，他一直在创作一幅描绘海洋生物的画作。在画即将完成的前一天晚上，他骑着自行车外出，打算在海边观赏夜景。然而，他在沙滩上意外发现了一只搁浅的海豚，为了救助这只海豚，他耗费了极大的体力，最终成功将其送回海中。筋疲力尽的甄加索在沙滩上睡着了，由于他患有严重的心脏病，却未告知旁人，在寒冷的海风中，他的心脏停止了跳动。因此，警方在现场只发现了车轮痕迹和未完成的画作，而没有发现任何他杀的迹象。\n",
            "\n",
            "参与者提出的问题: 甄加索是自杀吗\n",
            "<|im_end|>\n",
            "<|im_start|>assistant\n",
            "不是<|endoftext|>\n",
            "--------------------------------------------------\n",
            "prompt: <|im_start|>system\n",
            "You are an expert in logical reasoning.<|im_end|>\n",
            "<|im_start|>user\n",
            "你是一个逻辑游戏的主持人。游戏规则如下：\n",
            "\n",
            "1. 参与者会得到一个谜题。\n",
            "2. 参与者可以通过提问来获取线索，尝试解开谜题。\n",
            "3. 对于每个问题，主持人将根据实际情况回答以下五个选项之一：是、不是、不重要、回答正确、问法错误。\n",
            "4. 回答中不能添加任何其它信息，也不能省略选项中的任何一个字。例如，不可以把“不是”省略成“不”。\n",
            "5. 参与者需要根据回答来推理，并最终找出谜题的正确答案。\n",
            "\n",
            "请严格按照这些规则回答参与者提出的问题。\n",
            "\n",
            "谜题: 在远离城市喧嚣的海边小屋，一天清晨，邻居发现甄加索僵卧在沙滩上，已无生命迹象。现场没有发现任何打斗的迹象。请问甄加索的死因是什么？\n",
            "\n",
            "实际情况: 甄加索是一位热爱自然的画家，他每年都会来到这个海边小屋寻找灵感。在他生命的最后几天，他一直在创作一幅描绘海洋生物的画作。在画即将完成的前一天晚上，他骑着自行车外出，打算在海边观赏夜景。然而，他在沙滩上意外发现了一只搁浅的海豚，为了救助这只海豚，他耗费了极大的体力，最终成功将其送回海中。筋疲力尽的甄加索在沙滩上睡着了，由于他患有严重的心脏病，却未告知旁人，在寒冷的海风中，他的心脏停止了跳动。因此，警方在现场只发现了车轮痕迹和未完成的画作，而没有发现任何他杀的迹象。\n",
            "\n",
            "参与者提出的问题: 甄加索是自杀吗\n",
            "<|im_end|>\n",
            "<|im_start|>assistant\n",
            "\n",
            "--------------------------------------------------\n",
            "text: 死者受伤了吗\n",
            "--------------------------------------------------\n",
            "label: 不是\n",
            "--------------------------------------------------\n",
            "answer: nan\n",
            "--------------------------------------------------\n",
            "title: 甄庄哭声\n",
            "--------------------------------------------------\n",
            "puzzle: 在一个安静的夜晚，小村庄的湖边突然传来了阵阵哭泣声。第二天早晨，村长甄锐发现湖边的石头上放着一顶破旧的帽子，但没有人知道这顶帽子是从哪里来的，哭泣声又是为何。请还原故事真相。\n",
            "--------------------------------------------------\n",
            "truth: 原来，这顶破旧的帽子属于一个小男孩，他小时候与爷爷在湖边生活。爷爷教他钓鱼、游泳，还告诉他湖中的海龟是他们的朋友。后来，小男孩随父母去了城市生活，但每年夏天都会回到村子探望爷爷。然而，去年夏天，爷爷因病去世，小男孩伤心欲绝。今年夏天，他回到村子，来到湖边，想起和爷爷的美好回忆，忍不住哭泣。他将爷爷的帽子放在湖边的石头上，希望能让爷爷的在天之灵得到安慰。那晚的哭泣声正是小男孩在祭莫他亲爱的爷爷。\n",
            "--------------------------------------------------\n",
            "train_text: <|im_start|>system\n",
            "You are an expert in logical reasoning.<|im_end|>\n",
            "<|im_start|>user\n",
            "你是一个逻辑游戏的主持人。游戏规则如下：\n",
            "\n",
            "1. 参与者会得到一个谜题。\n",
            "2. 参与者可以通过提问来获取线索，尝试解开谜题。\n",
            "3. 对于每个问题，主持人将根据实际情况回答以下五个选项之一：是、不是、不重要、回答正确、问法错误。\n",
            "4. 回答中不能添加任何其它信息，也不能省略选项中的任何一个字。例如，不可以把“不是”省略成“不”。\n",
            "5. 参与者需要根据回答来推理，并最终找出谜题的正确答案。\n",
            "\n",
            "请严格按照这些规则回答参与者提出的问题。\n",
            "\n",
            "谜题: 在一个安静的夜晚，小村庄的湖边突然传来了阵阵哭泣声。第二天早晨，村长甄锐发现湖边的石头上放着一顶破旧的帽子，但没有人知道这顶帽子是从哪里来的，哭泣声又是为何。请还原故事真相。\n",
            "\n",
            "实际情况: 原来，这顶破旧的帽子属于一个小男孩，他小时候与爷爷在湖边生活。爷爷教他钓鱼、游泳，还告诉他湖中的海龟是他们的朋友。后来，小男孩随父母去了城市生活，但每年夏天都会回到村子探望爷爷。然而，去年夏天，爷爷因病去世，小男孩伤心欲绝。今年夏天，他回到村子，来到湖边，想起和爷爷的美好回忆，忍不住哭泣。他将爷爷的帽子放在湖边的石头上，希望能让爷爷的在天之灵得到安慰。那晚的哭泣声正是小男孩在祭莫他亲爱的爷爷。\n",
            "\n",
            "参与者提出的问题: 死者受伤了吗\n",
            "<|im_end|>\n",
            "<|im_start|>assistant\n",
            "不是<|endoftext|>\n",
            "--------------------------------------------------\n",
            "prompt: <|im_start|>system\n",
            "You are an expert in logical reasoning.<|im_end|>\n",
            "<|im_start|>user\n",
            "你是一个逻辑游戏的主持人。游戏规则如下：\n",
            "\n",
            "1. 参与者会得到一个谜题。\n",
            "2. 参与者可以通过提问来获取线索，尝试解开谜题。\n",
            "3. 对于每个问题，主持人将根据实际情况回答以下五个选项之一：是、不是、不重要、回答正确、问法错误。\n",
            "4. 回答中不能添加任何其它信息，也不能省略选项中的任何一个字。例如，不可以把“不是”省略成“不”。\n",
            "5. 参与者需要根据回答来推理，并最终找出谜题的正确答案。\n",
            "\n",
            "请严格按照这些规则回答参与者提出的问题。\n",
            "\n",
            "谜题: 在一个安静的夜晚，小村庄的湖边突然传来了阵阵哭泣声。第二天早晨，村长甄锐发现湖边的石头上放着一顶破旧的帽子，但没有人知道这顶帽子是从哪里来的，哭泣声又是为何。请还原故事真相。\n",
            "\n",
            "实际情况: 原来，这顶破旧的帽子属于一个小男孩，他小时候与爷爷在湖边生活。爷爷教他钓鱼、游泳，还告诉他湖中的海龟是他们的朋友。后来，小男孩随父母去了城市生活，但每年夏天都会回到村子探望爷爷。然而，去年夏天，爷爷因病去世，小男孩伤心欲绝。今年夏天，他回到村子，来到湖边，想起和爷爷的美好回忆，忍不住哭泣。他将爷爷的帽子放在湖边的石头上，希望能让爷爷的在天之灵得到安慰。那晚的哭泣声正是小男孩在祭莫他亲爱的爷爷。\n",
            "\n",
            "参与者提出的问题: 死者受伤了吗\n",
            "<|im_end|>\n",
            "<|im_start|>assistant\n",
            "\n",
            "Evaluating model: Qwen/Qwen2-7B\n",
            "  0% 0/3000 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.\n",
            "  0% 0/3000 [34:34<?, ?it/s]\n",
            "Traceback (most recent call last):\n",
            "  File \"/content/drive/.shortcut-targets-by-id/1E09lTnfbsjtTgQg65dQ3y9D2R6l8waxR/logical-reasoning/llm_toolkit/eval_logical_reasoning.py\", line 58, in <module>\n",
            "    predictions = eval_model(model, tokenizer, datasets[\"test\"])\n",
            "  File \"/content/drive/.shortcut-targets-by-id/1E09lTnfbsjtTgQg65dQ3y9D2R6l8waxR/logical-reasoning/llm_toolkit/logical_reasoning_utils.py\", line 215, in eval_model\n",
            "    outputs = model.generate(**inputs, max_new_tokens=4096, use_cache=False)\n",
            "  File \"/usr/local/lib/python3.10/dist-packages/torch/utils/_contextlib.py\", line 115, in decorate_context\n",
            "    return func(*args, **kwargs)\n",
            "  File \"/usr/local/lib/python3.10/dist-packages/transformers/generation/utils.py\", line 1758, in generate\n",
            "    result = self._sample(\n",
            "  File \"/usr/local/lib/python3.10/dist-packages/transformers/generation/utils.py\", line 2397, in _sample\n",
            "    outputs = self(\n",
            "  File \"/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py\", line 1532, in _wrapped_call_impl\n",
            "    return self._call_impl(*args, **kwargs)\n",
            "  File \"/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py\", line 1541, in _call_impl\n",
            "    return forward_call(*args, **kwargs)\n",
            "  File \"/usr/local/lib/python3.10/dist-packages/transformers/models/qwen2/modeling_qwen2.py\", line 1163, in forward\n",
            "    logits = logits.float()\n",
            "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 2.00 GiB. GPU \n",
            "Epoch 1\n",
            "loading env vars from: /content/drive/.shortcut-targets-by-id/1E09lTnfbsjtTgQg65dQ3y9D2R6l8waxR/logical-reasoning/.env\n",
            "Adding /content/drive/.shortcut-targets-by-id/1E09lTnfbsjtTgQg65dQ3y9D2R6l8waxR/logical-reasoning to sys.path\n",
            "2024-07-16 04:36:42.030763: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n",
            "2024-07-16 04:36:42.082994: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n",
            "2024-07-16 04:36:42.083052: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n",
            "2024-07-16 04:36:42.084468: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n",
            "2024-07-16 04:36:42.092383: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
            "To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
            "2024-07-16 04:36:43.353969: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n",
            "loading /content/drive/.shortcut-targets-by-id/1E09lTnfbsjtTgQg65dQ3y9D2R6l8waxR/logical-reasoning/llm_toolkit/logical_reasoning_utils.py\n",
            "Qwen/Qwen2-7B /content/qwen2-7b/qwen2-7b/checkpoint-562 False datasets/mgtv results/mgtv-results_02_qwen2_7b_colab.csv\n",
            "(1) GPU = NVIDIA L4. Max memory = 22.168 GB.\n",
            "0.0 GB of memory reserved.\n",
            "loading model: Qwen/Qwen2-7B\n",
            "[INFO|tokenization_utils_base.py:2108] 2024-07-16 04:36:49,648 >> loading file vocab.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2-7B/snapshots/453ed1575b739b5b03ce3758b23befdb0967f40e/vocab.json\n",
            "[INFO|tokenization_utils_base.py:2108] 2024-07-16 04:36:49,648 >> loading file merges.txt from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2-7B/snapshots/453ed1575b739b5b03ce3758b23befdb0967f40e/merges.txt\n",
            "[INFO|tokenization_utils_base.py:2108] 2024-07-16 04:36:49,648 >> loading file tokenizer.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2-7B/snapshots/453ed1575b739b5b03ce3758b23befdb0967f40e/tokenizer.json\n",
            "[INFO|tokenization_utils_base.py:2108] 2024-07-16 04:36:49,648 >> loading file added_tokens.json from cache at None\n",
            "[INFO|tokenization_utils_base.py:2108] 2024-07-16 04:36:49,648 >> loading file special_tokens_map.json from cache at None\n",
            "[INFO|tokenization_utils_base.py:2108] 2024-07-16 04:36:49,648 >> loading file tokenizer_config.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2-7B/snapshots/453ed1575b739b5b03ce3758b23befdb0967f40e/tokenizer_config.json\n",
            "[WARNING|logging.py:314] 2024-07-16 04:36:49,914 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
            "07/16/2024 04:36:49 - INFO - llamafactory.data.template - Replace eos token: <|im_end|>\n",
            "07/16/2024 04:36:49 - INFO - llamafactory.data.template - Add <|im_start|> to stop words.\n",
            "/usr/local/lib/python3.10/dist-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n",
            "  warnings.warn(\n",
            "[INFO|configuration_utils.py:733] 2024-07-16 04:36:50,018 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2-7B/snapshots/453ed1575b739b5b03ce3758b23befdb0967f40e/config.json\n",
            "[INFO|configuration_utils.py:796] 2024-07-16 04:36:50,019 >> Model config Qwen2Config {\n",
            "  \"_name_or_path\": \"Qwen/Qwen2-7B\",\n",
            "  \"architectures\": [\n",
            "    \"Qwen2ForCausalLM\"\n",
            "  ],\n",
            "  \"attention_dropout\": 0.0,\n",
            "  \"bos_token_id\": 151643,\n",
            "  \"eos_token_id\": 151643,\n",
            "  \"hidden_act\": \"silu\",\n",
            "  \"hidden_size\": 3584,\n",
            "  \"initializer_range\": 0.02,\n",
            "  \"intermediate_size\": 18944,\n",
            "  \"max_position_embeddings\": 131072,\n",
            "  \"max_window_layers\": 28,\n",
            "  \"model_type\": \"qwen2\",\n",
            "  \"num_attention_heads\": 28,\n",
            "  \"num_hidden_layers\": 28,\n",
            "  \"num_key_value_heads\": 4,\n",
            "  \"rms_norm_eps\": 1e-06,\n",
            "  \"rope_theta\": 1000000.0,\n",
            "  \"sliding_window\": 131072,\n",
            "  \"tie_word_embeddings\": false,\n",
            "  \"torch_dtype\": \"bfloat16\",\n",
            "  \"transformers_version\": \"4.41.2\",\n",
            "  \"use_cache\": true,\n",
            "  \"use_sliding_window\": false,\n",
            "  \"vocab_size\": 152064\n",
            "}\n",
            "\n",
            "07/16/2024 04:36:50 - INFO - llamafactory.model.patcher - Using KV cache for faster generation.\n",
            "[INFO|modeling_utils.py:3474] 2024-07-16 04:36:50,051 >> loading weights file model.safetensors from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2-7B/snapshots/453ed1575b739b5b03ce3758b23befdb0967f40e/model.safetensors.index.json\n",
            "[INFO|modeling_utils.py:1519] 2024-07-16 04:36:50,054 >> Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16.\n",
            "[INFO|configuration_utils.py:962] 2024-07-16 04:36:50,055 >> Generate config GenerationConfig {\n",
            "  \"bos_token_id\": 151643,\n",
            "  \"eos_token_id\": 151643\n",
            "}\n",
            "\n",
            "Loading checkpoint shards: 100% 4/4 [00:06<00:00,  1.66s/it]\n",
            "[INFO|modeling_utils.py:4280] 2024-07-16 04:36:59,526 >> All model checkpoint weights were used when initializing Qwen2ForCausalLM.\n",
            "\n",
            "[INFO|modeling_utils.py:4288] 2024-07-16 04:36:59,526 >> All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at Qwen/Qwen2-7B.\n",
            "If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training.\n",
            "[INFO|configuration_utils.py:917] 2024-07-16 04:36:59,673 >> loading configuration file generation_config.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2-7B/snapshots/453ed1575b739b5b03ce3758b23befdb0967f40e/generation_config.json\n",
            "[INFO|configuration_utils.py:962] 2024-07-16 04:36:59,673 >> Generate config GenerationConfig {\n",
            "  \"bos_token_id\": 151643,\n",
            "  \"eos_token_id\": 151643,\n",
            "  \"max_new_tokens\": 2048\n",
            "}\n",
            "\n",
            "07/16/2024 04:37:00 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.\n",
            "07/16/2024 04:37:01 - INFO - llamafactory.model.adapter - Merged 1 adapter(s).\n",
            "07/16/2024 04:37:01 - INFO - llamafactory.model.adapter - Loaded adapter(s): /content/qwen2-7b/qwen2-7b/checkpoint-562\n",
            "07/16/2024 04:37:01 - INFO - llamafactory.model.loader - all params: 7,615,616,512\n",
            "(2) GPU = NVIDIA L4. Max memory = 22.168 GB.\n",
            "16.521 GB of memory reserved.\n",
            "loading train/test data files\n",
            "Map: 100% 25000/25000 [00:01<00:00, 22266.31 examples/s]\n",
            "Map: 100% 3000/3000 [00:00<00:00, 22229.64 examples/s]\n",
            "DatasetDict({\n",
            "    train: Dataset({\n",
            "        features: ['text', 'label', 'answer', 'title', 'puzzle', 'truth', 'train_text', 'prompt'],\n",
            "        num_rows: 25000\n",
            "    })\n",
            "    test: Dataset({\n",
            "        features: ['text', 'label', 'answer', 'title', 'puzzle', 'truth', 'train_text', 'prompt'],\n",
            "        num_rows: 3000\n",
            "    })\n",
            "})\n",
            "--------------------------------------------------\n",
            "text: 甄加索是自杀吗\n",
            "--------------------------------------------------\n",
            "label: 不是\n",
            "--------------------------------------------------\n",
            "answer: nan\n",
            "--------------------------------------------------\n",
            "title: 海岸之谜\n",
            "--------------------------------------------------\n",
            "puzzle: 在远离城市喧嚣的海边小屋，一天清晨，邻居发现甄加索僵卧在沙滩上，已无生命迹象。现场没有发现任何打斗的迹象。请问甄加索的死因是什么？\n",
            "--------------------------------------------------\n",
            "truth: 甄加索是一位热爱自然的画家，他每年都会来到这个海边小屋寻找灵感。在他生命的最后几天，他一直在创作一幅描绘海洋生物的画作。在画即将完成的前一天晚上，他骑着自行车外出，打算在海边观赏夜景。然而，他在沙滩上意外发现了一只搁浅的海豚，为了救助这只海豚，他耗费了极大的体力，最终成功将其送回海中。筋疲力尽的甄加索在沙滩上睡着了，由于他患有严重的心脏病，却未告知旁人，在寒冷的海风中，他的心脏停止了跳动。因此，警方在现场只发现了车轮痕迹和未完成的画作，而没有发现任何他杀的迹象。\n",
            "--------------------------------------------------\n",
            "train_text: <|im_start|>system\n",
            "You are an expert in logical reasoning.<|im_end|>\n",
            "<|im_start|>user\n",
            "你是一个逻辑游戏的主持人。游戏规则如下：\n",
            "\n",
            "1. 参与者会得到一个谜题。\n",
            "2. 参与者可以通过提问来获取线索，尝试解开谜题。\n",
            "3. 对于每个问题，主持人将根据实际情况回答以下五个选项之一：是、不是、不重要、回答正确、问法错误。\n",
            "4. 回答中不能添加任何其它信息，也不能省略选项中的任何一个字。例如，不可以把“不是”省略成“不”。\n",
            "5. 参与者需要根据回答来推理，并最终找出谜题的正确答案。\n",
            "\n",
            "请严格按照这些规则回答参与者提出的问题。\n",
            "\n",
            "谜题: 在远离城市喧嚣的海边小屋，一天清晨，邻居发现甄加索僵卧在沙滩上，已无生命迹象。现场没有发现任何打斗的迹象。请问甄加索的死因是什么？\n",
            "\n",
            "实际情况: 甄加索是一位热爱自然的画家，他每年都会来到这个海边小屋寻找灵感。在他生命的最后几天，他一直在创作一幅描绘海洋生物的画作。在画即将完成的前一天晚上，他骑着自行车外出，打算在海边观赏夜景。然而，他在沙滩上意外发现了一只搁浅的海豚，为了救助这只海豚，他耗费了极大的体力，最终成功将其送回海中。筋疲力尽的甄加索在沙滩上睡着了，由于他患有严重的心脏病，却未告知旁人，在寒冷的海风中，他的心脏停止了跳动。因此，警方在现场只发现了车轮痕迹和未完成的画作，而没有发现任何他杀的迹象。\n",
            "\n",
            "参与者提出的问题: 甄加索是自杀吗\n",
            "<|im_end|>\n",
            "<|im_start|>assistant\n",
            "不是<|im_end|>\n",
            "--------------------------------------------------\n",
            "prompt: <|im_start|>system\n",
            "You are an expert in logical reasoning.<|im_end|>\n",
            "<|im_start|>user\n",
            "你是一个逻辑游戏的主持人。游戏规则如下：\n",
            "\n",
            "1. 参与者会得到一个谜题。\n",
            "2. 参与者可以通过提问来获取线索，尝试解开谜题。\n",
            "3. 对于每个问题，主持人将根据实际情况回答以下五个选项之一：是、不是、不重要、回答正确、问法错误。\n",
            "4. 回答中不能添加任何其它信息，也不能省略选项中的任何一个字。例如，不可以把“不是”省略成“不”。\n",
            "5. 参与者需要根据回答来推理，并最终找出谜题的正确答案。\n",
            "\n",
            "请严格按照这些规则回答参与者提出的问题。\n",
            "\n",
            "谜题: 在远离城市喧嚣的海边小屋，一天清晨，邻居发现甄加索僵卧在沙滩上，已无生命迹象。现场没有发现任何打斗的迹象。请问甄加索的死因是什么？\n",
            "\n",
            "实际情况: 甄加索是一位热爱自然的画家，他每年都会来到这个海边小屋寻找灵感。在他生命的最后几天，他一直在创作一幅描绘海洋生物的画作。在画即将完成的前一天晚上，他骑着自行车外出，打算在海边观赏夜景。然而，他在沙滩上意外发现了一只搁浅的海豚，为了救助这只海豚，他耗费了极大的体力，最终成功将其送回海中。筋疲力尽的甄加索在沙滩上睡着了，由于他患有严重的心脏病，却未告知旁人，在寒冷的海风中，他的心脏停止了跳动。因此，警方在现场只发现了车轮痕迹和未完成的画作，而没有发现任何他杀的迹象。\n",
            "\n",
            "参与者提出的问题: 甄加索是自杀吗\n",
            "<|im_end|>\n",
            "<|im_start|>assistant\n",
            "\n",
            "--------------------------------------------------\n",
            "text: 死者受伤了吗\n",
            "--------------------------------------------------\n",
            "label: 不是\n",
            "--------------------------------------------------\n",
            "answer: nan\n",
            "--------------------------------------------------\n",
            "title: 甄庄哭声\n",
            "--------------------------------------------------\n",
            "puzzle: 在一个安静的夜晚，小村庄的湖边突然传来了阵阵哭泣声。第二天早晨，村长甄锐发现湖边的石头上放着一顶破旧的帽子，但没有人知道这顶帽子是从哪里来的，哭泣声又是为何。请还原故事真相。\n",
            "--------------------------------------------------\n",
            "truth: 原来，这顶破旧的帽子属于一个小男孩，他小时候与爷爷在湖边生活。爷爷教他钓鱼、游泳，还告诉他湖中的海龟是他们的朋友。后来，小男孩随父母去了城市生活，但每年夏天都会回到村子探望爷爷。然而，去年夏天，爷爷因病去世，小男孩伤心欲绝。今年夏天，他回到村子，来到湖边，想起和爷爷的美好回忆，忍不住哭泣。他将爷爷的帽子放在湖边的石头上，希望能让爷爷的在天之灵得到安慰。那晚的哭泣声正是小男孩在祭莫他亲爱的爷爷。\n",
            "--------------------------------------------------\n",
            "train_text: <|im_start|>system\n",
            "You are an expert in logical reasoning.<|im_end|>\n",
            "<|im_start|>user\n",
            "你是一个逻辑游戏的主持人。游戏规则如下：\n",
            "\n",
            "1. 参与者会得到一个谜题。\n",
            "2. 参与者可以通过提问来获取线索，尝试解开谜题。\n",
            "3. 对于每个问题，主持人将根据实际情况回答以下五个选项之一：是、不是、不重要、回答正确、问法错误。\n",
            "4. 回答中不能添加任何其它信息，也不能省略选项中的任何一个字。例如，不可以把“不是”省略成“不”。\n",
            "5. 参与者需要根据回答来推理，并最终找出谜题的正确答案。\n",
            "\n",
            "请严格按照这些规则回答参与者提出的问题。\n",
            "\n",
            "谜题: 在一个安静的夜晚，小村庄的湖边突然传来了阵阵哭泣声。第二天早晨，村长甄锐发现湖边的石头上放着一顶破旧的帽子，但没有人知道这顶帽子是从哪里来的，哭泣声又是为何。请还原故事真相。\n",
            "\n",
            "实际情况: 原来，这顶破旧的帽子属于一个小男孩，他小时候与爷爷在湖边生活。爷爷教他钓鱼、游泳，还告诉他湖中的海龟是他们的朋友。后来，小男孩随父母去了城市生活，但每年夏天都会回到村子探望爷爷。然而，去年夏天，爷爷因病去世，小男孩伤心欲绝。今年夏天，他回到村子，来到湖边，想起和爷爷的美好回忆，忍不住哭泣。他将爷爷的帽子放在湖边的石头上，希望能让爷爷的在天之灵得到安慰。那晚的哭泣声正是小男孩在祭莫他亲爱的爷爷。\n",
            "\n",
            "参与者提出的问题: 死者受伤了吗\n",
            "<|im_end|>\n",
            "<|im_start|>assistant\n",
            "不是<|im_end|>\n",
            "--------------------------------------------------\n",
            "prompt: <|im_start|>system\n",
            "You are an expert in logical reasoning.<|im_end|>\n",
            "<|im_start|>user\n",
            "你是一个逻辑游戏的主持人。游戏规则如下：\n",
            "\n",
            "1. 参与者会得到一个谜题。\n",
            "2. 参与者可以通过提问来获取线索，尝试解开谜题。\n",
            "3. 对于每个问题，主持人将根据实际情况回答以下五个选项之一：是、不是、不重要、回答正确、问法错误。\n",
            "4. 回答中不能添加任何其它信息，也不能省略选项中的任何一个字。例如，不可以把“不是”省略成“不”。\n",
            "5. 参与者需要根据回答来推理，并最终找出谜题的正确答案。\n",
            "\n",
            "请严格按照这些规则回答参与者提出的问题。\n",
            "\n",
            "谜题: 在一个安静的夜晚，小村庄的湖边突然传来了阵阵哭泣声。第二天早晨，村长甄锐发现湖边的石头上放着一顶破旧的帽子，但没有人知道这顶帽子是从哪里来的，哭泣声又是为何。请还原故事真相。\n",
            "\n",
            "实际情况: 原来，这顶破旧的帽子属于一个小男孩，他小时候与爷爷在湖边生活。爷爷教他钓鱼、游泳，还告诉他湖中的海龟是他们的朋友。后来，小男孩随父母去了城市生活，但每年夏天都会回到村子探望爷爷。然而，去年夏天，爷爷因病去世，小男孩伤心欲绝。今年夏天，他回到村子，来到湖边，想起和爷爷的美好回忆，忍不住哭泣。他将爷爷的帽子放在湖边的石头上，希望能让爷爷的在天之灵得到安慰。那晚的哭泣声正是小男孩在祭莫他亲爱的爷爷。\n",
            "\n",
            "参与者提出的问题: 死者受伤了吗\n",
            "<|im_end|>\n",
            "<|im_start|>assistant\n",
            "\n",
            "Evaluating model: Qwen/Qwen2-7B\n",
            "  0% 0/3000 [00:00<?, ?it/s][WARNING|utils.py:1421] 2024-07-16 04:37:03,978 >> Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.\n",
            "  0% 0/3000 [30:43<?, ?it/s]\n",
            "Traceback (most recent call last):\n",
            "  File \"/content/drive/.shortcut-targets-by-id/1E09lTnfbsjtTgQg65dQ3y9D2R6l8waxR/logical-reasoning/llm_toolkit/eval_logical_reasoning.py\", line 58, in <module>\n",
            "    predictions = eval_model(model, tokenizer, datasets[\"test\"])\n",
            "  File \"/content/drive/.shortcut-targets-by-id/1E09lTnfbsjtTgQg65dQ3y9D2R6l8waxR/logical-reasoning/llm_toolkit/logical_reasoning_utils.py\", line 215, in eval_model\n",
            "    outputs = model.generate(**inputs, max_new_tokens=4096, use_cache=False)\n",
            "  File \"/usr/local/lib/python3.10/dist-packages/torch/utils/_contextlib.py\", line 115, in decorate_context\n",
            "    return func(*args, **kwargs)\n",
            "  File \"/usr/local/lib/python3.10/dist-packages/transformers/generation/utils.py\", line 1758, in generate\n",
            "    result = self._sample(\n",
            "  File \"/usr/local/lib/python3.10/dist-packages/transformers/generation/utils.py\", line 2397, in _sample\n",
            "    outputs = self(\n",
            "  File \"/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py\", line 1532, in _wrapped_call_impl\n",
            "    return self._call_impl(*args, **kwargs)\n",
            "  File \"/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py\", line 1541, in _call_impl\n",
            "    return forward_call(*args, **kwargs)\n",
            "  File \"/usr/local/lib/python3.10/dist-packages/transformers/models/qwen2/modeling_qwen2.py\", line 1163, in forward\n",
            "    logits = logits.float()\n",
            "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 1.92 GiB. GPU \n",
            "Epoch 2\n",
            "loading env vars from: /content/drive/.shortcut-targets-by-id/1E09lTnfbsjtTgQg65dQ3y9D2R6l8waxR/logical-reasoning/.env\n",
            "Adding /content/drive/.shortcut-targets-by-id/1E09lTnfbsjtTgQg65dQ3y9D2R6l8waxR/logical-reasoning to sys.path\n",
            "2024-07-16 05:07:51.574401: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n",
            "2024-07-16 05:07:51.624732: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n",
            "2024-07-16 05:07:51.624785: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n",
            "2024-07-16 05:07:51.626182: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n",
            "2024-07-16 05:07:51.633853: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
            "To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
            "2024-07-16 05:07:52.903770: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n",
            "loading /content/drive/.shortcut-targets-by-id/1E09lTnfbsjtTgQg65dQ3y9D2R6l8waxR/logical-reasoning/llm_toolkit/logical_reasoning_utils.py\n",
            "Qwen/Qwen2-7B /content/qwen2-7b/qwen2-7b/checkpoint-1124 False datasets/mgtv results/mgtv-results_02_qwen2_7b_colab.csv\n",
            "(1) GPU = NVIDIA L4. Max memory = 22.168 GB.\n",
            "0.0 GB of memory reserved.\n",
            "loading model: Qwen/Qwen2-7B\n",
            "[INFO|tokenization_utils_base.py:2108] 2024-07-16 05:07:59,358 >> loading file vocab.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2-7B/snapshots/453ed1575b739b5b03ce3758b23befdb0967f40e/vocab.json\n",
            "[INFO|tokenization_utils_base.py:2108] 2024-07-16 05:07:59,358 >> loading file merges.txt from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2-7B/snapshots/453ed1575b739b5b03ce3758b23befdb0967f40e/merges.txt\n",
            "[INFO|tokenization_utils_base.py:2108] 2024-07-16 05:07:59,358 >> loading file tokenizer.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2-7B/snapshots/453ed1575b739b5b03ce3758b23befdb0967f40e/tokenizer.json\n",
            "[INFO|tokenization_utils_base.py:2108] 2024-07-16 05:07:59,358 >> loading file added_tokens.json from cache at None\n",
            "[INFO|tokenization_utils_base.py:2108] 2024-07-16 05:07:59,358 >> loading file special_tokens_map.json from cache at None\n",
            "[INFO|tokenization_utils_base.py:2108] 2024-07-16 05:07:59,358 >> loading file tokenizer_config.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2-7B/snapshots/453ed1575b739b5b03ce3758b23befdb0967f40e/tokenizer_config.json\n",
            "[WARNING|logging.py:314] 2024-07-16 05:07:59,635 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
            "07/16/2024 05:07:59 - INFO - llamafactory.data.template - Replace eos token: <|im_end|>\n",
            "07/16/2024 05:07:59 - INFO - llamafactory.data.template - Add <|im_start|> to stop words.\n",
            "/usr/local/lib/python3.10/dist-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n",
            "  warnings.warn(\n",
            "[INFO|configuration_utils.py:733] 2024-07-16 05:07:59,725 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2-7B/snapshots/453ed1575b739b5b03ce3758b23befdb0967f40e/config.json\n",
            "[INFO|configuration_utils.py:796] 2024-07-16 05:07:59,727 >> Model config Qwen2Config {\n",
            "  \"_name_or_path\": \"Qwen/Qwen2-7B\",\n",
            "  \"architectures\": [\n",
            "    \"Qwen2ForCausalLM\"\n",
            "  ],\n",
            "  \"attention_dropout\": 0.0,\n",
            "  \"bos_token_id\": 151643,\n",
            "  \"eos_token_id\": 151643,\n",
            "  \"hidden_act\": \"silu\",\n",
            "  \"hidden_size\": 3584,\n",
            "  \"initializer_range\": 0.02,\n",
            "  \"intermediate_size\": 18944,\n",
            "  \"max_position_embeddings\": 131072,\n",
            "  \"max_window_layers\": 28,\n",
            "  \"model_type\": \"qwen2\",\n",
            "  \"num_attention_heads\": 28,\n",
            "  \"num_hidden_layers\": 28,\n",
            "  \"num_key_value_heads\": 4,\n",
            "  \"rms_norm_eps\": 1e-06,\n",
            "  \"rope_theta\": 1000000.0,\n",
            "  \"sliding_window\": 131072,\n",
            "  \"tie_word_embeddings\": false,\n",
            "  \"torch_dtype\": \"bfloat16\",\n",
            "  \"transformers_version\": \"4.41.2\",\n",
            "  \"use_cache\": true,\n",
            "  \"use_sliding_window\": false,\n",
            "  \"vocab_size\": 152064\n",
            "}\n",
            "\n",
            "07/16/2024 05:07:59 - INFO - llamafactory.model.patcher - Using KV cache for faster generation.\n",
            "[INFO|modeling_utils.py:3474] 2024-07-16 05:07:59,758 >> loading weights file model.safetensors from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2-7B/snapshots/453ed1575b739b5b03ce3758b23befdb0967f40e/model.safetensors.index.json\n",
            "[INFO|modeling_utils.py:1519] 2024-07-16 05:07:59,761 >> Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16.\n",
            "[INFO|configuration_utils.py:962] 2024-07-16 05:07:59,762 >> Generate config GenerationConfig {\n",
            "  \"bos_token_id\": 151643,\n",
            "  \"eos_token_id\": 151643\n",
            "}\n",
            "\n",
            "Loading checkpoint shards: 100% 4/4 [00:05<00:00,  1.44s/it]\n",
            "[INFO|modeling_utils.py:4280] 2024-07-16 05:08:08,371 >> All model checkpoint weights were used when initializing Qwen2ForCausalLM.\n",
            "\n",
            "[INFO|modeling_utils.py:4288] 2024-07-16 05:08:08,371 >> All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at Qwen/Qwen2-7B.\n",
            "If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training.\n",
            "[INFO|configuration_utils.py:917] 2024-07-16 05:08:08,465 >> loading configuration file generation_config.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2-7B/snapshots/453ed1575b739b5b03ce3758b23befdb0967f40e/generation_config.json\n",
            "[INFO|configuration_utils.py:962] 2024-07-16 05:08:08,465 >> Generate config GenerationConfig {\n",
            "  \"bos_token_id\": 151643,\n",
            "  \"eos_token_id\": 151643,\n",
            "  \"max_new_tokens\": 2048\n",
            "}\n",
            "\n",
            "07/16/2024 05:08:09 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.\n",
            "07/16/2024 05:08:09 - INFO - llamafactory.model.adapter - Merged 1 adapter(s).\n",
            "07/16/2024 05:08:09 - INFO - llamafactory.model.adapter - Loaded adapter(s): /content/qwen2-7b/qwen2-7b/checkpoint-1124\n",
            "07/16/2024 05:08:09 - INFO - llamafactory.model.loader - all params: 7,615,616,512\n",
            "(2) GPU = NVIDIA L4. Max memory = 22.168 GB.\n",
            "16.521 GB of memory reserved.\n",
            "loading train/test data files\n",
            "DatasetDict({\n",
            "    train: Dataset({\n",
            "        features: ['text', 'label', 'answer', 'title', 'puzzle', 'truth', 'train_text', 'prompt'],\n",
            "        num_rows: 25000\n",
            "    })\n",
            "    test: Dataset({\n",
            "        features: ['text', 'label', 'answer', 'title', 'puzzle', 'truth', 'train_text', 'prompt'],\n",
            "        num_rows: 3000\n",
            "    })\n",
            "})\n",
            "--------------------------------------------------\n",
            "text: 甄加索是自杀吗\n",
            "--------------------------------------------------\n",
            "label: 不是\n",
            "--------------------------------------------------\n",
            "answer: nan\n",
            "--------------------------------------------------\n",
            "title: 海岸之谜\n",
            "--------------------------------------------------\n",
            "puzzle: 在远离城市喧嚣的海边小屋，一天清晨，邻居发现甄加索僵卧在沙滩上，已无生命迹象。现场没有发现任何打斗的迹象。请问甄加索的死因是什么？\n",
            "--------------------------------------------------\n",
            "truth: 甄加索是一位热爱自然的画家，他每年都会来到这个海边小屋寻找灵感。在他生命的最后几天，他一直在创作一幅描绘海洋生物的画作。在画即将完成的前一天晚上，他骑着自行车外出，打算在海边观赏夜景。然而，他在沙滩上意外发现了一只搁浅的海豚，为了救助这只海豚，他耗费了极大的体力，最终成功将其送回海中。筋疲力尽的甄加索在沙滩上睡着了，由于他患有严重的心脏病，却未告知旁人，在寒冷的海风中，他的心脏停止了跳动。因此，警方在现场只发现了车轮痕迹和未完成的画作，而没有发现任何他杀的迹象。\n",
            "--------------------------------------------------\n",
            "train_text: <|im_start|>system\n",
            "You are an expert in logical reasoning.<|im_end|>\n",
            "<|im_start|>user\n",
            "你是一个逻辑游戏的主持人。游戏规则如下：\n",
            "\n",
            "1. 参与者会得到一个谜题。\n",
            "2. 参与者可以通过提问来获取线索，尝试解开谜题。\n",
            "3. 对于每个问题，主持人将根据实际情况回答以下五个选项之一：是、不是、不重要、回答正确、问法错误。\n",
            "4. 回答中不能添加任何其它信息，也不能省略选项中的任何一个字。例如，不可以把“不是”省略成“不”。\n",
            "5. 参与者需要根据回答来推理，并最终找出谜题的正确答案。\n",
            "\n",
            "请严格按照这些规则回答参与者提出的问题。\n",
            "\n",
            "谜题: 在远离城市喧嚣的海边小屋，一天清晨，邻居发现甄加索僵卧在沙滩上，已无生命迹象。现场没有发现任何打斗的迹象。请问甄加索的死因是什么？\n",
            "\n",
            "实际情况: 甄加索是一位热爱自然的画家，他每年都会来到这个海边小屋寻找灵感。在他生命的最后几天，他一直在创作一幅描绘海洋生物的画作。在画即将完成的前一天晚上，他骑着自行车外出，打算在海边观赏夜景。然而，他在沙滩上意外发现了一只搁浅的海豚，为了救助这只海豚，他耗费了极大的体力，最终成功将其送回海中。筋疲力尽的甄加索在沙滩上睡着了，由于他患有严重的心脏病，却未告知旁人，在寒冷的海风中，他的心脏停止了跳动。因此，警方在现场只发现了车轮痕迹和未完成的画作，而没有发现任何他杀的迹象。\n",
            "\n",
            "参与者提出的问题: 甄加索是自杀吗\n",
            "<|im_end|>\n",
            "<|im_start|>assistant\n",
            "不是<|im_end|>\n",
            "--------------------------------------------------\n",
            "prompt: <|im_start|>system\n",
            "You are an expert in logical reasoning.<|im_end|>\n",
            "<|im_start|>user\n",
            "你是一个逻辑游戏的主持人。游戏规则如下：\n",
            "\n",
            "1. 参与者会得到一个谜题。\n",
            "2. 参与者可以通过提问来获取线索，尝试解开谜题。\n",
            "3. 对于每个问题，主持人将根据实际情况回答以下五个选项之一：是、不是、不重要、回答正确、问法错误。\n",
            "4. 回答中不能添加任何其它信息，也不能省略选项中的任何一个字。例如，不可以把“不是”省略成“不”。\n",
            "5. 参与者需要根据回答来推理，并最终找出谜题的正确答案。\n",
            "\n",
            "请严格按照这些规则回答参与者提出的问题。\n",
            "\n",
            "谜题: 在远离城市喧嚣的海边小屋，一天清晨，邻居发现甄加索僵卧在沙滩上，已无生命迹象。现场没有发现任何打斗的迹象。请问甄加索的死因是什么？\n",
            "\n",
            "实际情况: 甄加索是一位热爱自然的画家，他每年都会来到这个海边小屋寻找灵感。在他生命的最后几天，他一直在创作一幅描绘海洋生物的画作。在画即将完成的前一天晚上，他骑着自行车外出，打算在海边观赏夜景。然而，他在沙滩上意外发现了一只搁浅的海豚，为了救助这只海豚，他耗费了极大的体力，最终成功将其送回海中。筋疲力尽的甄加索在沙滩上睡着了，由于他患有严重的心脏病，却未告知旁人，在寒冷的海风中，他的心脏停止了跳动。因此，警方在现场只发现了车轮痕迹和未完成的画作，而没有发现任何他杀的迹象。\n",
            "\n",
            "参与者提出的问题: 甄加索是自杀吗\n",
            "<|im_end|>\n",
            "<|im_start|>assistant\n",
            "\n",
            "--------------------------------------------------\n",
            "text: 死者受伤了吗\n",
            "--------------------------------------------------\n",
            "label: 不是\n",
            "--------------------------------------------------\n",
            "answer: nan\n",
            "--------------------------------------------------\n",
            "title: 甄庄哭声\n",
            "--------------------------------------------------\n",
            "puzzle: 在一个安静的夜晚，小村庄的湖边突然传来了阵阵哭泣声。第二天早晨，村长甄锐发现湖边的石头上放着一顶破旧的帽子，但没有人知道这顶帽子是从哪里来的，哭泣声又是为何。请还原故事真相。\n",
            "--------------------------------------------------\n",
            "truth: 原来，这顶破旧的帽子属于一个小男孩，他小时候与爷爷在湖边生活。爷爷教他钓鱼、游泳，还告诉他湖中的海龟是他们的朋友。后来，小男孩随父母去了城市生活，但每年夏天都会回到村子探望爷爷。然而，去年夏天，爷爷因病去世，小男孩伤心欲绝。今年夏天，他回到村子，来到湖边，想起和爷爷的美好回忆，忍不住哭泣。他将爷爷的帽子放在湖边的石头上，希望能让爷爷的在天之灵得到安慰。那晚的哭泣声正是小男孩在祭莫他亲爱的爷爷。\n",
            "--------------------------------------------------\n",
            "train_text: <|im_start|>system\n",
            "You are an expert in logical reasoning.<|im_end|>\n",
            "<|im_start|>user\n",
            "你是一个逻辑游戏的主持人。游戏规则如下：\n",
            "\n",
            "1. 参与者会得到一个谜题。\n",
            "2. 参与者可以通过提问来获取线索，尝试解开谜题。\n",
            "3. 对于每个问题，主持人将根据实际情况回答以下五个选项之一：是、不是、不重要、回答正确、问法错误。\n",
            "4. 回答中不能添加任何其它信息，也不能省略选项中的任何一个字。例如，不可以把“不是”省略成“不”。\n",
            "5. 参与者需要根据回答来推理，并最终找出谜题的正确答案。\n",
            "\n",
            "请严格按照这些规则回答参与者提出的问题。\n",
            "\n",
            "谜题: 在一个安静的夜晚，小村庄的湖边突然传来了阵阵哭泣声。第二天早晨，村长甄锐发现湖边的石头上放着一顶破旧的帽子，但没有人知道这顶帽子是从哪里来的，哭泣声又是为何。请还原故事真相。\n",
            "\n",
            "实际情况: 原来，这顶破旧的帽子属于一个小男孩，他小时候与爷爷在湖边生活。爷爷教他钓鱼、游泳，还告诉他湖中的海龟是他们的朋友。后来，小男孩随父母去了城市生活，但每年夏天都会回到村子探望爷爷。然而，去年夏天，爷爷因病去世，小男孩伤心欲绝。今年夏天，他回到村子，来到湖边，想起和爷爷的美好回忆，忍不住哭泣。他将爷爷的帽子放在湖边的石头上，希望能让爷爷的在天之灵得到安慰。那晚的哭泣声正是小男孩在祭莫他亲爱的爷爷。\n",
            "\n",
            "参与者提出的问题: 死者受伤了吗\n",
            "<|im_end|>\n",
            "<|im_start|>assistant\n",
            "不是<|im_end|>\n",
            "--------------------------------------------------\n",
            "prompt: <|im_start|>system\n",
            "You are an expert in logical reasoning.<|im_end|>\n",
            "<|im_start|>user\n",
            "你是一个逻辑游戏的主持人。游戏规则如下：\n",
            "\n",
            "1. 参与者会得到一个谜题。\n",
            "2. 参与者可以通过提问来获取线索，尝试解开谜题。\n",
            "3. 对于每个问题，主持人将根据实际情况回答以下五个选项之一：是、不是、不重要、回答正确、问法错误。\n",
            "4. 回答中不能添加任何其它信息，也不能省略选项中的任何一个字。例如，不可以把“不是”省略成“不”。\n",
            "5. 参与者需要根据回答来推理，并最终找出谜题的正确答案。\n",
            "\n",
            "请严格按照这些规则回答参与者提出的问题。\n",
            "\n",
            "谜题: 在一个安静的夜晚，小村庄的湖边突然传来了阵阵哭泣声。第二天早晨，村长甄锐发现湖边的石头上放着一顶破旧的帽子，但没有人知道这顶帽子是从哪里来的，哭泣声又是为何。请还原故事真相。\n",
            "\n",
            "实际情况: 原来，这顶破旧的帽子属于一个小男孩，他小时候与爷爷在湖边生活。爷爷教他钓鱼、游泳，还告诉他湖中的海龟是他们的朋友。后来，小男孩随父母去了城市生活，但每年夏天都会回到村子探望爷爷。然而，去年夏天，爷爷因病去世，小男孩伤心欲绝。今年夏天，他回到村子，来到湖边，想起和爷爷的美好回忆，忍不住哭泣。他将爷爷的帽子放在湖边的石头上，希望能让爷爷的在天之灵得到安慰。那晚的哭泣声正是小男孩在祭莫他亲爱的爷爷。\n",
            "\n",
            "参与者提出的问题: 死者受伤了吗\n",
            "<|im_end|>\n",
            "<|im_start|>assistant\n",
            "\n",
            "Evaluating model: Qwen/Qwen2-7B\n",
            "  0% 0/3000 [00:00<?, ?it/s][WARNING|utils.py:1421] 2024-07-16 05:08:11,185 >> Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.\n",
            "  0% 0/3000 [31:25<?, ?it/s]\n",
            "Traceback (most recent call last):\n",
            "  File \"/content/drive/.shortcut-targets-by-id/1E09lTnfbsjtTgQg65dQ3y9D2R6l8waxR/logical-reasoning/llm_toolkit/eval_logical_reasoning.py\", line 58, in <module>\n",
            "    predictions = eval_model(model, tokenizer, datasets[\"test\"])\n",
            "  File \"/content/drive/.shortcut-targets-by-id/1E09lTnfbsjtTgQg65dQ3y9D2R6l8waxR/logical-reasoning/llm_toolkit/logical_reasoning_utils.py\", line 215, in eval_model\n",
            "    outputs = model.generate(**inputs, max_new_tokens=4096, use_cache=False)\n",
            "  File \"/usr/local/lib/python3.10/dist-packages/torch/utils/_contextlib.py\", line 115, in decorate_context\n",
            "    return func(*args, **kwargs)\n",
            "  File \"/usr/local/lib/python3.10/dist-packages/transformers/generation/utils.py\", line 1758, in generate\n",
            "    result = self._sample(\n",
            "  File \"/usr/local/lib/python3.10/dist-packages/transformers/generation/utils.py\", line 2397, in _sample\n",
            "    outputs = self(\n",
            "  File \"/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py\", line 1532, in _wrapped_call_impl\n",
            "    return self._call_impl(*args, **kwargs)\n",
            "  File \"/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py\", line 1541, in _call_impl\n",
            "    return forward_call(*args, **kwargs)\n",
            "  File \"/usr/local/lib/python3.10/dist-packages/transformers/models/qwen2/modeling_qwen2.py\", line 1163, in forward\n",
            "    logits = logits.float()\n",
            "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 1.92 GiB. GPU \n",
            "Epoch 3\n",
            "loading env vars from: /content/drive/.shortcut-targets-by-id/1E09lTnfbsjtTgQg65dQ3y9D2R6l8waxR/logical-reasoning/.env\n",
            "Adding /content/drive/.shortcut-targets-by-id/1E09lTnfbsjtTgQg65dQ3y9D2R6l8waxR/logical-reasoning to sys.path\n",
            "2024-07-16 05:39:41.116319: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n",
            "2024-07-16 05:39:41.166809: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n",
            "2024-07-16 05:39:41.166878: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n",
            "2024-07-16 05:39:41.168319: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n",
            "2024-07-16 05:39:41.175971: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
            "To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
            "2024-07-16 05:39:42.445909: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n",
            "loading /content/drive/.shortcut-targets-by-id/1E09lTnfbsjtTgQg65dQ3y9D2R6l8waxR/logical-reasoning/llm_toolkit/logical_reasoning_utils.py\n",
            "Qwen/Qwen2-7B /content/qwen2-7b/qwen2-7b/checkpoint-1686 False datasets/mgtv results/mgtv-results_02_qwen2_7b_colab.csv\n",
            "(1) GPU = NVIDIA L4. Max memory = 22.168 GB.\n",
            "0.0 GB of memory reserved.\n",
            "loading model: Qwen/Qwen2-7B\n",
            "[INFO|tokenization_utils_base.py:2108] 2024-07-16 05:39:48,848 >> loading file vocab.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2-7B/snapshots/453ed1575b739b5b03ce3758b23befdb0967f40e/vocab.json\n",
            "[INFO|tokenization_utils_base.py:2108] 2024-07-16 05:39:48,849 >> loading file merges.txt from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2-7B/snapshots/453ed1575b739b5b03ce3758b23befdb0967f40e/merges.txt\n",
            "[INFO|tokenization_utils_base.py:2108] 2024-07-16 05:39:48,849 >> loading file tokenizer.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2-7B/snapshots/453ed1575b739b5b03ce3758b23befdb0967f40e/tokenizer.json\n",
            "[INFO|tokenization_utils_base.py:2108] 2024-07-16 05:39:48,849 >> loading file added_tokens.json from cache at None\n",
            "[INFO|tokenization_utils_base.py:2108] 2024-07-16 05:39:48,849 >> loading file special_tokens_map.json from cache at None\n",
            "[INFO|tokenization_utils_base.py:2108] 2024-07-16 05:39:48,849 >> loading file tokenizer_config.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2-7B/snapshots/453ed1575b739b5b03ce3758b23befdb0967f40e/tokenizer_config.json\n",
            "[WARNING|logging.py:314] 2024-07-16 05:39:49,128 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
            "07/16/2024 05:39:49 - INFO - llamafactory.data.template - Replace eos token: <|im_end|>\n",
            "07/16/2024 05:39:49 - INFO - llamafactory.data.template - Add <|im_start|> to stop words.\n",
            "/usr/local/lib/python3.10/dist-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n",
            "  warnings.warn(\n",
            "[INFO|configuration_utils.py:733] 2024-07-16 05:39:49,227 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2-7B/snapshots/453ed1575b739b5b03ce3758b23befdb0967f40e/config.json\n",
            "[INFO|configuration_utils.py:796] 2024-07-16 05:39:49,228 >> Model config Qwen2Config {\n",
            "  \"_name_or_path\": \"Qwen/Qwen2-7B\",\n",
            "  \"architectures\": [\n",
            "    \"Qwen2ForCausalLM\"\n",
            "  ],\n",
            "  \"attention_dropout\": 0.0,\n",
            "  \"bos_token_id\": 151643,\n",
            "  \"eos_token_id\": 151643,\n",
            "  \"hidden_act\": \"silu\",\n",
            "  \"hidden_size\": 3584,\n",
            "  \"initializer_range\": 0.02,\n",
            "  \"intermediate_size\": 18944,\n",
            "  \"max_position_embeddings\": 131072,\n",
            "  \"max_window_layers\": 28,\n",
            "  \"model_type\": \"qwen2\",\n",
            "  \"num_attention_heads\": 28,\n",
            "  \"num_hidden_layers\": 28,\n",
            "  \"num_key_value_heads\": 4,\n",
            "  \"rms_norm_eps\": 1e-06,\n",
            "  \"rope_theta\": 1000000.0,\n",
            "  \"sliding_window\": 131072,\n",
            "  \"tie_word_embeddings\": false,\n",
            "  \"torch_dtype\": \"bfloat16\",\n",
            "  \"transformers_version\": \"4.41.2\",\n",
            "  \"use_cache\": true,\n",
            "  \"use_sliding_window\": false,\n",
            "  \"vocab_size\": 152064\n",
            "}\n",
            "\n",
            "07/16/2024 05:39:49 - INFO - llamafactory.model.patcher - Using KV cache for faster generation.\n",
            "[INFO|modeling_utils.py:3474] 2024-07-16 05:39:49,260 >> loading weights file model.safetensors from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2-7B/snapshots/453ed1575b739b5b03ce3758b23befdb0967f40e/model.safetensors.index.json\n",
            "[INFO|modeling_utils.py:1519] 2024-07-16 05:39:49,263 >> Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16.\n",
            "[INFO|configuration_utils.py:962] 2024-07-16 05:39:49,264 >> Generate config GenerationConfig {\n",
            "  \"bos_token_id\": 151643,\n",
            "  \"eos_token_id\": 151643\n",
            "}\n",
            "\n",
            "Loading checkpoint shards: 100% 4/4 [00:05<00:00,  1.43s/it]\n",
            "[INFO|modeling_utils.py:4280] 2024-07-16 05:39:57,929 >> All model checkpoint weights were used when initializing Qwen2ForCausalLM.\n",
            "\n",
            "[INFO|modeling_utils.py:4288] 2024-07-16 05:39:57,929 >> All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at Qwen/Qwen2-7B.\n",
            "If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training.\n",
            "[INFO|configuration_utils.py:917] 2024-07-16 05:39:58,030 >> loading configuration file generation_config.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2-7B/snapshots/453ed1575b739b5b03ce3758b23befdb0967f40e/generation_config.json\n",
            "[INFO|configuration_utils.py:962] 2024-07-16 05:39:58,030 >> Generate config GenerationConfig {\n",
            "  \"bos_token_id\": 151643,\n",
            "  \"eos_token_id\": 151643,\n",
            "  \"max_new_tokens\": 2048\n",
            "}\n",
            "\n",
            "07/16/2024 05:39:58 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.\n",
            "07/16/2024 05:39:59 - INFO - llamafactory.model.adapter - Merged 1 adapter(s).\n",
            "07/16/2024 05:39:59 - INFO - llamafactory.model.adapter - Loaded adapter(s): /content/qwen2-7b/qwen2-7b/checkpoint-1686\n",
            "07/16/2024 05:39:59 - INFO - llamafactory.model.loader - all params: 7,615,616,512\n",
            "(2) GPU = NVIDIA L4. Max memory = 22.168 GB.\n",
            "16.521 GB of memory reserved.\n",
            "loading train/test data files\n",
            "DatasetDict({\n",
            "    train: Dataset({\n",
            "        features: ['text', 'label', 'answer', 'title', 'puzzle', 'truth', 'train_text', 'prompt'],\n",
            "        num_rows: 25000\n",
            "    })\n",
            "    test: Dataset({\n",
            "        features: ['text', 'label', 'answer', 'title', 'puzzle', 'truth', 'train_text', 'prompt'],\n",
            "        num_rows: 3000\n",
            "    })\n",
            "})\n",
            "--------------------------------------------------\n",
            "text: 甄加索是自杀吗\n",
            "--------------------------------------------------\n",
            "label: 不是\n",
            "--------------------------------------------------\n",
            "answer: nan\n",
            "--------------------------------------------------\n",
            "title: 海岸之谜\n",
            "--------------------------------------------------\n",
            "puzzle: 在远离城市喧嚣的海边小屋，一天清晨，邻居发现甄加索僵卧在沙滩上，已无生命迹象。现场没有发现任何打斗的迹象。请问甄加索的死因是什么？\n",
            "--------------------------------------------------\n",
            "truth: 甄加索是一位热爱自然的画家，他每年都会来到这个海边小屋寻找灵感。在他生命的最后几天，他一直在创作一幅描绘海洋生物的画作。在画即将完成的前一天晚上，他骑着自行车外出，打算在海边观赏夜景。然而，他在沙滩上意外发现了一只搁浅的海豚，为了救助这只海豚，他耗费了极大的体力，最终成功将其送回海中。筋疲力尽的甄加索在沙滩上睡着了，由于他患有严重的心脏病，却未告知旁人，在寒冷的海风中，他的心脏停止了跳动。因此，警方在现场只发现了车轮痕迹和未完成的画作，而没有发现任何他杀的迹象。\n",
            "--------------------------------------------------\n",
            "train_text: <|im_start|>system\n",
            "You are an expert in logical reasoning.<|im_end|>\n",
            "<|im_start|>user\n",
            "你是一个逻辑游戏的主持人。游戏规则如下：\n",
            "\n",
            "1. 参与者会得到一个谜题。\n",
            "2. 参与者可以通过提问来获取线索，尝试解开谜题。\n",
            "3. 对于每个问题，主持人将根据实际情况回答以下五个选项之一：是、不是、不重要、回答正确、问法错误。\n",
            "4. 回答中不能添加任何其它信息，也不能省略选项中的任何一个字。例如，不可以把“不是”省略成“不”。\n",
            "5. 参与者需要根据回答来推理，并最终找出谜题的正确答案。\n",
            "\n",
            "请严格按照这些规则回答参与者提出的问题。\n",
            "\n",
            "谜题: 在远离城市喧嚣的海边小屋，一天清晨，邻居发现甄加索僵卧在沙滩上，已无生命迹象。现场没有发现任何打斗的迹象。请问甄加索的死因是什么？\n",
            "\n",
            "实际情况: 甄加索是一位热爱自然的画家，他每年都会来到这个海边小屋寻找灵感。在他生命的最后几天，他一直在创作一幅描绘海洋生物的画作。在画即将完成的前一天晚上，他骑着自行车外出，打算在海边观赏夜景。然而，他在沙滩上意外发现了一只搁浅的海豚，为了救助这只海豚，他耗费了极大的体力，最终成功将其送回海中。筋疲力尽的甄加索在沙滩上睡着了，由于他患有严重的心脏病，却未告知旁人，在寒冷的海风中，他的心脏停止了跳动。因此，警方在现场只发现了车轮痕迹和未完成的画作，而没有发现任何他杀的迹象。\n",
            "\n",
            "参与者提出的问题: 甄加索是自杀吗\n",
            "<|im_end|>\n",
            "<|im_start|>assistant\n",
            "不是<|im_end|>\n",
            "--------------------------------------------------\n",
            "prompt: <|im_start|>system\n",
            "You are an expert in logical reasoning.<|im_end|>\n",
            "<|im_start|>user\n",
            "你是一个逻辑游戏的主持人。游戏规则如下：\n",
            "\n",
            "1. 参与者会得到一个谜题。\n",
            "2. 参与者可以通过提问来获取线索，尝试解开谜题。\n",
            "3. 对于每个问题，主持人将根据实际情况回答以下五个选项之一：是、不是、不重要、回答正确、问法错误。\n",
            "4. 回答中不能添加任何其它信息，也不能省略选项中的任何一个字。例如，不可以把“不是”省略成“不”。\n",
            "5. 参与者需要根据回答来推理，并最终找出谜题的正确答案。\n",
            "\n",
            "请严格按照这些规则回答参与者提出的问题。\n",
            "\n",
            "谜题: 在远离城市喧嚣的海边小屋，一天清晨，邻居发现甄加索僵卧在沙滩上，已无生命迹象。现场没有发现任何打斗的迹象。请问甄加索的死因是什么？\n",
            "\n",
            "实际情况: 甄加索是一位热爱自然的画家，他每年都会来到这个海边小屋寻找灵感。在他生命的最后几天，他一直在创作一幅描绘海洋生物的画作。在画即将完成的前一天晚上，他骑着自行车外出，打算在海边观赏夜景。然而，他在沙滩上意外发现了一只搁浅的海豚，为了救助这只海豚，他耗费了极大的体力，最终成功将其送回海中。筋疲力尽的甄加索在沙滩上睡着了，由于他患有严重的心脏病，却未告知旁人，在寒冷的海风中，他的心脏停止了跳动。因此，警方在现场只发现了车轮痕迹和未完成的画作，而没有发现任何他杀的迹象。\n",
            "\n",
            "参与者提出的问题: 甄加索是自杀吗\n",
            "<|im_end|>\n",
            "<|im_start|>assistant\n",
            "\n",
            "--------------------------------------------------\n",
            "text: 死者受伤了吗\n",
            "--------------------------------------------------\n",
            "label: 不是\n",
            "--------------------------------------------------\n",
            "answer: nan\n",
            "--------------------------------------------------\n",
            "title: 甄庄哭声\n",
            "--------------------------------------------------\n",
            "puzzle: 在一个安静的夜晚，小村庄的湖边突然传来了阵阵哭泣声。第二天早晨，村长甄锐发现湖边的石头上放着一顶破旧的帽子，但没有人知道这顶帽子是从哪里来的，哭泣声又是为何。请还原故事真相。\n",
            "--------------------------------------------------\n",
            "truth: 原来，这顶破旧的帽子属于一个小男孩，他小时候与爷爷在湖边生活。爷爷教他钓鱼、游泳，还告诉他湖中的海龟是他们的朋友。后来，小男孩随父母去了城市生活，但每年夏天都会回到村子探望爷爷。然而，去年夏天，爷爷因病去世，小男孩伤心欲绝。今年夏天，他回到村子，来到湖边，想起和爷爷的美好回忆，忍不住哭泣。他将爷爷的帽子放在湖边的石头上，希望能让爷爷的在天之灵得到安慰。那晚的哭泣声正是小男孩在祭莫他亲爱的爷爷。\n",
            "--------------------------------------------------\n",
            "train_text: <|im_start|>system\n",
            "You are an expert in logical reasoning.<|im_end|>\n",
            "<|im_start|>user\n",
            "你是一个逻辑游戏的主持人。游戏规则如下：\n",
            "\n",
            "1. 参与者会得到一个谜题。\n",
            "2. 参与者可以通过提问来获取线索，尝试解开谜题。\n",
            "3. 对于每个问题，主持人将根据实际情况回答以下五个选项之一：是、不是、不重要、回答正确、问法错误。\n",
            "4. 回答中不能添加任何其它信息，也不能省略选项中的任何一个字。例如，不可以把“不是”省略成“不”。\n",
            "5. 参与者需要根据回答来推理，并最终找出谜题的正确答案。\n",
            "\n",
            "请严格按照这些规则回答参与者提出的问题。\n",
            "\n",
            "谜题: 在一个安静的夜晚，小村庄的湖边突然传来了阵阵哭泣声。第二天早晨，村长甄锐发现湖边的石头上放着一顶破旧的帽子，但没有人知道这顶帽子是从哪里来的，哭泣声又是为何。请还原故事真相。\n",
            "\n",
            "实际情况: 原来，这顶破旧的帽子属于一个小男孩，他小时候与爷爷在湖边生活。爷爷教他钓鱼、游泳，还告诉他湖中的海龟是他们的朋友。后来，小男孩随父母去了城市生活，但每年夏天都会回到村子探望爷爷。然而，去年夏天，爷爷因病去世，小男孩伤心欲绝。今年夏天，他回到村子，来到湖边，想起和爷爷的美好回忆，忍不住哭泣。他将爷爷的帽子放在湖边的石头上，希望能让爷爷的在天之灵得到安慰。那晚的哭泣声正是小男孩在祭莫他亲爱的爷爷。\n",
            "\n",
            "参与者提出的问题: 死者受伤了吗\n",
            "<|im_end|>\n",
            "<|im_start|>assistant\n",
            "不是<|im_end|>\n",
            "--------------------------------------------------\n",
            "prompt: <|im_start|>system\n",
            "You are an expert in logical reasoning.<|im_end|>\n",
            "<|im_start|>user\n",
            "你是一个逻辑游戏的主持人。游戏规则如下：\n",
            "\n",
            "1. 参与者会得到一个谜题。\n",
            "2. 参与者可以通过提问来获取线索，尝试解开谜题。\n",
            "3. 对于每个问题，主持人将根据实际情况回答以下五个选项之一：是、不是、不重要、回答正确、问法错误。\n",
            "4. 回答中不能添加任何其它信息，也不能省略选项中的任何一个字。例如，不可以把“不是”省略成“不”。\n",
            "5. 参与者需要根据回答来推理，并最终找出谜题的正确答案。\n",
            "\n",
            "请严格按照这些规则回答参与者提出的问题。\n",
            "\n",
            "谜题: 在一个安静的夜晚，小村庄的湖边突然传来了阵阵哭泣声。第二天早晨，村长甄锐发现湖边的石头上放着一顶破旧的帽子，但没有人知道这顶帽子是从哪里来的，哭泣声又是为何。请还原故事真相。\n",
            "\n",
            "实际情况: 原来，这顶破旧的帽子属于一个小男孩，他小时候与爷爷在湖边生活。爷爷教他钓鱼、游泳，还告诉他湖中的海龟是他们的朋友。后来，小男孩随父母去了城市生活，但每年夏天都会回到村子探望爷爷。然而，去年夏天，爷爷因病去世，小男孩伤心欲绝。今年夏天，他回到村子，来到湖边，想起和爷爷的美好回忆，忍不住哭泣。他将爷爷的帽子放在湖边的石头上，希望能让爷爷的在天之灵得到安慰。那晚的哭泣声正是小男孩在祭莫他亲爱的爷爷。\n",
            "\n",
            "参与者提出的问题: 死者受伤了吗\n",
            "<|im_end|>\n",
            "<|im_start|>assistant\n",
            "\n",
            "Evaluating model: Qwen/Qwen2-7B\n",
            "  0% 0/3000 [00:00<?, ?it/s][WARNING|utils.py:1421] 2024-07-16 05:40:00,710 >> Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.\n",
            "  0% 0/3000 [01:33<?, ?it/s]\n",
            "Traceback (most recent call last):\n",
            "  File \"/usr/local/lib/python3.10/dist-packages/torch/utils/_contextlib.py\", line 115, in decorate_context\n",
            "    return func(*args, **kwargs)\n",
            "  File \"/usr/local/lib/python3.10/dist-packages/transformers/generation/utils.py\", line 1758, in generate\n",
            "    result = self._sample(\n",
            "  File \"/usr/local/lib/python3.10/dist-packages/transformers/generation/utils.py\", line 2397, in _sample\n",
            "    outputs = self(\n",
            "  File \"/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py\", line 1532, in _wrapped_call_impl\n",
            "    return self._call_impl(*args, **kwargs)\n",
            "  File \"/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py\", line 1541, in _call_impl\n",
            "    return forward_call(*args, **kwargs)\n",
            "  File \"/usr/local/lib/python3.10/dist-packages/transformers/models/qwen2/modeling_qwen2.py\", line 1149, in forward\n",
            "    outputs = self.model(\n",
            "  File \"/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py\", line 1532, in _wrapped_call_impl\n",
            "    return self._call_impl(*args, **kwargs)\n",
            "  File \"/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py\", line 1541, in _call_impl\n",
            "    return forward_call(*args, **kwargs)\n",
            "  File \"/usr/local/lib/python3.10/dist-packages/transformers/models/qwen2/modeling_qwen2.py\", line 1034, in forward\n",
            "    layer_outputs = decoder_layer(\n",
            "  File \"/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py\", line 1532, in _wrapped_call_impl\n",
            "    return self._call_impl(*args, **kwargs)\n",
            "  File \"/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py\", line 1541, in _call_impl\n",
            "    return forward_call(*args, **kwargs)\n",
            "  File \"/usr/local/lib/python3.10/dist-packages/transformers/models/qwen2/modeling_qwen2.py\", line 748, in forward\n",
            "    hidden_states, self_attn_weights, present_key_value = self.self_attn(\n",
            "  File \"/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py\", line 1532, in _wrapped_call_impl\n",
            "    return self._call_impl(*args, **kwargs)\n",
            "  File \"/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py\", line 1541, in _call_impl\n",
            "    return forward_call(*args, **kwargs)\n",
            "  File \"/usr/local/lib/python3.10/dist-packages/transformers/models/qwen2/modeling_qwen2.py\", line 657, in forward\n",
            "    query_states, key_states = apply_rotary_pos_emb(query_states, key_states, cos, sin, position_ids)\n",
            "  File \"/usr/local/lib/python3.10/dist-packages/transformers/models/qwen2/modeling_qwen2.py\", line 161, in apply_rotary_pos_emb\n",
            "    q_embed = (q * cos) + (rotate_half(q) * sin)\n",
            "  File \"/usr/local/lib/python3.10/dist-packages/transformers/models/qwen2/modeling_qwen2.py\", line 134, in rotate_half\n",
            "    return torch.cat((-x2, x1), dim=-1)\n",
            "KeyboardInterrupt\n",
            "\n",
            "During handling of the above exception, another exception occurred:\n",
            "\n",
            "Traceback (most recent call last):\n",
            "  File \"/content/drive/.shortcut-targets-by-id/1E09lTnfbsjtTgQg65dQ3y9D2R6l8waxR/logical-reasoning/llm_toolkit/eval_logical_reasoning.py\", line 58, in <module>\n",
            "    predictions = eval_model(model, tokenizer, datasets[\"test\"])\n",
            "  File \"/content/drive/.shortcut-targets-by-id/1E09lTnfbsjtTgQg65dQ3y9D2R6l8waxR/logical-reasoning/llm_toolkit/logical_reasoning_utils.py\", line 215, in eval_model\n",
            "    outputs = model.generate(**inputs, max_new_tokens=4096, use_cache=False)\n",
            "  File \"/usr/local/lib/python3.10/dist-packages/torch/utils/_contextlib.py\", line 114, in decorate_context\n",
            "    with ctx_factory():\n",
            "  File \"/usr/local/lib/python3.10/dist-packages/torch/autograd/grad_mode.py\", line 84, in __exit__\n",
            "    torch.set_grad_enabled(self.prev)\n",
            "  File \"/usr/local/lib/python3.10/dist-packages/torch/autograd/grad_mode.py\", line 183, in __init__\n",
            "    def __init__(self, mode: bool) -> None:\n",
            "KeyboardInterrupt\n",
            "Epoch 4\n"
          ]
        }
      ],
      "source": [
        "%%time\n",
        "\n",
        "evaluate_model_all_epochs(\"Qwen/Qwen2-7B\", \"/content/qwen2-7b/qwen2-7b\", 4, start_epoch=0, load_in_4bit=False, num_of_entries=-1)"
      ]
    }
  ],
  "metadata": {
    "accelerator": "GPU",
    "colab": {
      "gpuType": "L4",
      "provenance": []
    },
    "kernelspec": {
      "display_name": "Python 3",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.12.4"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}