{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "id": "mo-H82fsy1jy" }, "outputs": [], "source": [ "%load_ext autoreload\n", "%autoreload 2" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "bYpMYw2Wz7Bv", "outputId": "cc9dce0a-c5f4-421c-ed41-d79c3a1b3577" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "workding dir: c:\\Users\\HT\\Documents\\URP\\logical-reasoning\n" ] } ], "source": [ "import os\n", "import sys\n", "from pathlib import Path\n", "\n", "workding_dir = str(Path.cwd().parent)\n", "os.chdir(workding_dir)\n", "sys.path.append(workding_dir)\n", "print(\"working dir:\", workding_dir)" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "aA2yLesz27M8", "outputId": "32909874-deee-44b8-c3de-5476cc3008f1" }, "outputs": [ { "data": { "text/plain": [ "False" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "need_to_setup_env = False\n", "need_to_setup_env" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "u0QXyHU_5DQR", "outputId": "54672b45-b5dc-48ef-efd2-5e8545e7b78b" }, "outputs": [], "source": [ "if need_to_setup_env:\n", " %pip install -r requirements.txt" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "RKmGaYU_5OkA", "outputId": "27c8c14b-1538-41e0-e3dd-dc37c650f5fd" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "loading env vars from: c:\\Users\\HT\\Documents\\URP\\logical-reasoning\\.env.qwen2_7b\n" ] }, { "data": { "text/plain": [ "True" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from dotenv import find_dotenv, load_dotenv\n", "\n", "found_dotenv = find_dotenv(\".env.qwen2_7b\")\n", "\n", "if len(found_dotenv) == 0:\n", " found_dotenv = find_dotenv(\".env.example\")\n", "print(f\"loading env vars from: {found_dotenv}\")\n", "load_dotenv(found_dotenv, override=True)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "Xa7KxkuzUeS9", "outputId": "6c71b30e-7b02-44ef-feeb-df94989be7f3" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Qwen/Qwen2-7B None False datasets/mgtv results/mgtv-results_qwen2_7b.csv False\n" ] } ], "source": [ "import os\n", "\n", "model_name = os.getenv(\"MODEL_NAME\")\n", "adapter_name_or_path = os.getenv(\"ADAPTER_NAME_OR_PATH\")\n", "load_in_4bit = os.getenv(\"LOAD_IN_4BIT\") == \"true\"\n", "data_path = os.getenv(\"LOGICAL_REASONING_DATA_PATH\")\n", "results_path = os.getenv(\"LOGICAL_REASONING_RESULTS_PATH\")\n", "use_english_datasets = os.getenv(\"USE_ENGLISH_DATASETS\") == \"true\"\n", "\n", "print(model_name, adapter_name_or_path, load_in_4bit, data_path, results_path, use_english_datasets)" ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 379 }, "id": "goEFOG9Z5TvW", "outputId": "1491df15-1eca-43ac-89d2-ca1f74b42297" }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
textlabelanswertitlepuzzletruth
0偷的人信神吗不是NaN乡村之谜:消失的南瓜在甄家村里,有一个古老的传说:每年南瓜丰收的季节,南瓜田里总有一个最大的南瓜会不翼而飞,村民...真相原来与一位年迈的农夫有关。这位农夫年轻时,曾与一位美丽的姑娘相恋。他们约定在南瓜丰收的季...
1偷南瓜是为了来年丰收吗不是NaN乡村之谜:消失的南瓜在甄家村里,有一个古老的传说:每年南瓜丰收的季节,南瓜田里总有一个最大的南瓜会不翼而飞,村民...真相原来与一位年迈的农夫有关。这位农夫年轻时,曾与一位美丽的姑娘相恋。他们约定在南瓜丰收的季...
2村庄里的人喜欢南瓜嘛不重要NaN乡村之谜:消失的南瓜在甄家村里,有一个古老的传说:每年南瓜丰收的季节,南瓜田里总有一个最大的南瓜会不翼而飞,村民...真相原来与一位年迈的农夫有关。这位农夫年轻时,曾与一位美丽的姑娘相恋。他们约定在南瓜丰收的季...
3村庄里的人每年都需要用南瓜做祭品嘛不是NaN乡村之谜:消失的南瓜在甄家村里,有一个古老的传说:每年南瓜丰收的季节,南瓜田里总有一个最大的南瓜会不翼而飞,村民...真相原来与一位年迈的农夫有关。这位农夫年轻时,曾与一位美丽的姑娘相恋。他们约定在南瓜丰收的季...
4是村里的人偷的么NaN乡村之谜:消失的南瓜在甄家村里,有一个古老的传说:每年南瓜丰收的季节,南瓜田里总有一个最大的南瓜会不翼而飞,村民...真相原来与一位年迈的农夫有关。这位农夫年轻时,曾与一位美丽的姑娘相恋。他们约定在南瓜丰收的季...
\n", "
" ], "text/plain": [ " text label answer title \\\n", "0 偷的人信神吗 不是 NaN 乡村之谜:消失的南瓜 \n", "1 偷南瓜是为了来年丰收吗 不是 NaN 乡村之谜:消失的南瓜 \n", "2 村庄里的人喜欢南瓜嘛 不重要 NaN 乡村之谜:消失的南瓜 \n", "3 村庄里的人每年都需要用南瓜做祭品嘛 不是 NaN 乡村之谜:消失的南瓜 \n", "4 是村里的人偷的么 是 NaN 乡村之谜:消失的南瓜 \n", "\n", " puzzle \\\n", "0 在甄家村里,有一个古老的传说:每年南瓜丰收的季节,南瓜田里总有一个最大的南瓜会不翼而飞,村民... \n", "1 在甄家村里,有一个古老的传说:每年南瓜丰收的季节,南瓜田里总有一个最大的南瓜会不翼而飞,村民... \n", "2 在甄家村里,有一个古老的传说:每年南瓜丰收的季节,南瓜田里总有一个最大的南瓜会不翼而飞,村民... \n", "3 在甄家村里,有一个古老的传说:每年南瓜丰收的季节,南瓜田里总有一个最大的南瓜会不翼而飞,村民... \n", "4 在甄家村里,有一个古老的传说:每年南瓜丰收的季节,南瓜田里总有一个最大的南瓜会不翼而飞,村民... \n", "\n", " truth \n", "0 真相原来与一位年迈的农夫有关。这位农夫年轻时,曾与一位美丽的姑娘相恋。他们约定在南瓜丰收的季... \n", "1 真相原来与一位年迈的农夫有关。这位农夫年轻时,曾与一位美丽的姑娘相恋。他们约定在南瓜丰收的季... \n", "2 真相原来与一位年迈的农夫有关。这位农夫年轻时,曾与一位美丽的姑娘相恋。他们约定在南瓜丰收的季... \n", "3 真相原来与一位年迈的农夫有关。这位农夫年轻时,曾与一位美丽的姑娘相恋。他们约定在南瓜丰收的季... \n", "4 真相原来与一位年迈的农夫有关。这位农夫年轻时,曾与一位美丽的姑娘相恋。他们约定在南瓜丰收的季... " ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pandas as pd\n", "df = pd.read_csv(\"datasets/mgtv/train.csv\")\n", "df.head()" ] }, { "cell_type": "code", "execution_count": 20, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "zPuVZfdRICtY", "outputId": "7b423111-fe0e-47ab-eeb8-438bc10c7930" }, "outputs": [ { "data": { "text/plain": [ "{'instruction': '你是一个逻辑游戏的主持人。游戏规则如下:1. 参与者会得到一个谜题。2. 参与者可以通过提问来获取线索,尝试解开谜题。3. 对于每个问题,主持人将根据实际情况回答以下五个选项之一:是、不是、不重要、回答正确、问法错误。4. 回答中不能添加任何其它信息,也不能省略选项中的任何一个字。例如,不可以把“不是”省略成“不”。5. 参与者需要根据回答来推理,并最终找出谜题的正确答案。请严格按照这些规则回答参与者提出的问题。谜题: {}实际情况: {}参与者提出的问题: {}',\n", " 'input': '谜题: 乡村之谜:消失的南瓜 在甄家村里,有一个古老的传说:每年南瓜丰收的季节,南瓜田里总有一个最大的南瓜会不翼而飞,村民们对此现象困惑不解。请找出南瓜失踪背后的原因。实际情况: 真相原来与一位年迈的农夫有关。这位农夫年轻时,曾与一位美丽的姑娘相恋。他们约定在南瓜丰收的季节结婚。然而,命运弄人,姑娘在婚礼前的一场意外中离世。悲伤的农夫为了纪念心爱的姑娘,每年都会将最大的南瓜偷走,放到姑娘的墓前,以此寄托自己的哀思。这一行为延续了多年,成为了乡村里一个神秘的传说。参与者提出的问题: 偷的人信神吗',\n", " 'output': '不是'}" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dataset_data = [\n", " {\n", " \"instruction\": \"你是一个逻辑游戏的主持人。游戏规则如下:1. 参与者会得到一个谜题。2. 参与者可以通过提问来获取线索,尝试解开谜题。3. 对于每个问题,主持人将根据实际情况回答以下五个选项之一:是、不是、不重要、回答正确、问法错误。4. 回答中不能添加任何其它信息,也不能省略选项中的任何一个字。例如,不可以把“不是”省略成“不”。5. 参与者需要根据回答来推理,并最终找出谜题的正确答案。请严格按照这些规则回答参与者提出的问题。谜题: {}实际情况: {}参与者提出的问题: {}\",\n", " \"input\": \"谜题: \" + row_dict[\"title\"] + \" \" + row_dict[\"puzzle\"] + \"实际情况: \" + row_dict[\"truth\"] + \"参与者提出的问题: \" + row_dict[\"text\"],\n", " \"output\": row_dict[\"label\"]\n", " }\n", " for row_dict in df.to_dict(orient=\"records\")\n", "]\n", "\n", "dataset_data[0]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "unuNtJc5_AIL", "outputId": "d3b87976-e32d-4b8f-fcc9-56b048604526" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "JSON file saved to /content/LLaMA-Factory/data/mgtv_train.json\n" ] } ], "source": [ "import os\n", "import json\n", "\n", "# Define the directory where you want to save the JSON file\n", "output_dir = \"/content/LLaMA-Factory/data/\"\n", "\n", "# Ensure the directory exists\n", "os.makedirs(output_dir, exist_ok=True)\n", "\n", "# Define the full path for the JSON file\n", "json_file_path = os.path.join(output_dir, \"mgtv_train.json\")\n", "\n", "# Save the dataset data to the specified path\n", "with open(json_file_path, \"w\") as f:\n", " json.dump(dataset_data, f)\n", "\n", "print(f\"JSON file saved to {json_file_path}\")\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "zyxvE1nfX8cq", "outputId": "1d3bddc5-289f-48b7-c2ce-5e9bd1684ea0" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "/content/LLaMA-Factory\n" ] } ], "source": [ "import json\n", "%cd /content/LLaMA-Factory/\n", "\n", "args = dict(\n", " model_name_or_path=\"Qwen/Qwen2-7B\", # use Qwen/Qwen2-7B-Instruct model\n", "\n", " stage=\"sft\", # do supervised fine-tuning\n", " do_train=True,\n", " finetuning_type=\"lora\", # use LoRA adapters to save memory\n", " lora_target=\"all\", # attach LoRA adapters to all linear layers\n", " quantization_bit=4,\n", " loraplus_lr_ratio=16.0, # 16x base LoRA learning rate\n", "\n", " dataset=\"mgtv_train\",\n", " template=\"qwen\",\n", " cutoff_len=4096,\n", " max_samples=5000,\n", " overwrite_cache=\"true\",\n", " preprocessing_num_workers=16,\n", "\n", " output_dir=\"/content/qwen2-7b\",\n", " logging_steps=562,\n", " save_steps=562,\n", " plot_loss=\"true\",\n", " overwrite_output_dir=\"true\",\n", "\n", " per_device_train_batch_size=1, # the batch size\n", " gradient_accumulation_steps=8, # the gradient accumulation steps\n", " learning_rate=0.001, # the learning rate\n", " num_train_epochs=6.0, # the epochs of training\n", " lr_scheduler_type=\"cosine\", # use cosine learning rate scheduler\n", " warmup_ratio=0.1, # use warmup scheduler\n", " bf16=True,\n", " ddp_timeout=180000000, #5.71 years lol\n", "\n", " val_size=0.1,\n", " per_device_eval_batch_size=1,\n", " eval_strategy=\"steps\",\n", " eval_steps=562,\n", "\n", " report_to=\"wandb\",\n", ")\n", "\n", "with open(\"train_qwen2_7b.json\", \"w\", encoding=\"utf-8\") as f:\n", " json.dump(args, f, indent=2)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "QlYqm4TePib3" }, "outputs": [], "source": [ "with open(\"data/dataset_info.json\", 'r+') as file:\n", " # First we load existing data into a dict.\n", " file_data = json.load(file)\n", " # Insert new_data at the beginning of the emp_details list.\n", " qwen2_7b = {\"mgtv_train\": {\n", " \"file_name\": \"mgtv_train.json\"\n", " }\n", " }\n", "\n", " qwen2_7b.update(file_data)\n", " file.seek(0)\n", " # convert back to json.\n", " json.dump(qwen2_7b, file, indent=2)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "background_save": true, "base_uri": "https://localhost:8080/" }, "id": "VEuCMjMpITg-", "outputId": "76cf7882-3ae8-4c53-8d6c-c59b3557af0e" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "2024-07-15 14:34:28.658348: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n", "2024-07-15 14:34:28.710574: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n", "2024-07-15 14:34:28.710630: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n", "2024-07-15 14:34:28.712064: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n", "2024-07-15 14:34:28.719927: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", "To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", "2024-07-15 14:34:29.954969: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n", "07/15/2024 14:34:36 - WARNING - llamafactory.hparams.parser - We recommend enable `upcast_layernorm` in quantized training.\n", "07/15/2024 14:34:36 - INFO - llamafactory.hparams.parser - Process rank: 0, device: cuda:0, n_gpu: 1, distributed training: False, compute dtype: torch.bfloat16\n", "tokenizer_config.json: 100% 1.29k/1.29k [00:00<00:00, 9.74MB/s]\n", "vocab.json: 100% 2.78M/2.78M [00:00<00:00, 10.4MB/s]\n", "merges.txt: 100% 1.67M/1.67M [00:00<00:00, 6.67MB/s]\n", "tokenizer.json: 100% 7.03M/7.03M [00:00<00:00, 18.8MB/s]\n", "[INFO|tokenization_utils_base.py:2108] 2024-07-15 14:34:38,471 >> loading file vocab.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2-7B/snapshots/453ed1575b739b5b03ce3758b23befdb0967f40e/vocab.json\n", "[INFO|tokenization_utils_base.py:2108] 2024-07-15 14:34:38,471 >> loading file merges.txt from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2-7B/snapshots/453ed1575b739b5b03ce3758b23befdb0967f40e/merges.txt\n", "[INFO|tokenization_utils_base.py:2108] 2024-07-15 14:34:38,471 >> loading file tokenizer.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2-7B/snapshots/453ed1575b739b5b03ce3758b23befdb0967f40e/tokenizer.json\n", "[INFO|tokenization_utils_base.py:2108] 2024-07-15 14:34:38,471 >> loading file added_tokens.json from cache at None\n", "[INFO|tokenization_utils_base.py:2108] 2024-07-15 14:34:38,471 >> loading file special_tokens_map.json from cache at None\n", "[INFO|tokenization_utils_base.py:2108] 2024-07-15 14:34:38,471 >> loading file tokenizer_config.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2-7B/snapshots/453ed1575b739b5b03ce3758b23befdb0967f40e/tokenizer_config.json\n", "[WARNING|logging.py:314] 2024-07-15 14:34:38,733 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n", "07/15/2024 14:34:38 - INFO - llamafactory.data.template - Replace eos token: <|im_end|>\n", "07/15/2024 14:34:38 - INFO - llamafactory.data.loader - Loading dataset mgtv_train.json...\n", "Generating train split: 25000 examples [00:01, 18396.69 examples/s]\n", "/usr/local/lib/python3.10/dist-packages/multiprocess/popen_fork.py:66: RuntimeWarning: os.fork() was called. os.fork() is incompatible with multithreaded code, and JAX is multithreaded, so this will likely lead to a deadlock.\n", " self.pid = os.fork()\n", "Converting format of dataset (num_proc=16): 100% 5000/5000 [00:00<00:00, 21217.02 examples/s]\n", "Running tokenizer on dataset (num_proc=16): 100% 5000/5000 [00:02<00:00, 1705.27 examples/s]\n", "training example:\n", "input_ids:\n", "[151644, 8948, 198, 2610, 525, 264, 10950, 17847, 13, 151645, 198, 151644, 872, 198, 56568, 101909, 104913, 99329, 9370, 106040, 1773, 99329, 104190, 104506, 5122, 16, 13, 26853, 224, 57218, 28946, 36993, 101051, 46944, 107969, 33872, 1773, 17, 13, 26853, 224, 57218, 28946, 105125, 107666, 36407, 45912, 105814, 3837, 104482, 117647, 107969, 33872, 1773, 18, 13, 69162, 34204, 103991, 86119, 3837, 106040, 44063, 100345, 107591, 102104, 87752, 105220, 109487, 100653, 5122, 20412, 5373, 99520, 5373, 16530, 99335, 5373, 102104, 88991, 5373, 56007, 24339, 32100, 1773, 19, 13, 49602, 252, 99590, 15946, 53153, 42855, 99885, 102158, 27369, 3837, 105827, 65770, 99475, 109487, 101047, 110281, 18600, 1773, 77557, 3837, 108620, 99360, 2073, 99520, 854, 65770, 99475, 12857, 2073, 16530, 55807, 20, 13, 26853, 224, 57218, 28946, 85106, 100345, 102104, 36407, 113272, 90395, 103941, 109363, 107969, 33872, 9370, 88991, 102349, 1773, 14880, 110439, 100001, 104190, 102104, 111842, 101080, 103936, 1773, 107969, 33872, 25, 4687, 107591, 25, 4687, 111842, 101080, 103936, 25, 5613, 107969, 33872, 25, 220, 100833, 53930, 107969, 5122, 102505, 9370, 115865, 73562, 109628, 45629, 105489, 3837, 104133, 111718, 106023, 5122, 101988, 115865, 110731, 9370, 105419, 3837, 115865, 99810, 69249, 59743, 104133, 104003, 115865, 36993, 16530, 101401, 68536, 99723, 3837, 115967, 104270, 102060, 110666, 112031, 1773, 14880, 109363, 115865, 110786, 101423, 104249, 1773, 107591, 25, 10236, 250, 253, 48921, 101221, 57218, 101961, 7948, 100894, 9370, 99288, 99818, 101063, 1773, 104269, 99288, 99818, 100774, 13343, 3837, 99798, 57218, 101961, 105664, 102373, 48921, 100271, 1773, 99650, 105616, 18493, 115865, 110731, 9370, 105419, 104388, 1773, 103968, 3837, 102606, 102115, 17340, 3837, 102373, 18493, 106340, 24562, 99774, 82224, 104424, 15946, 99372, 99244, 1773, 110597, 9370, 99288, 99818, 100012, 101416, 63109, 99242, 9370, 102373, 3837, 101988, 101938, 44063, 104003, 115865, 101329, 99314, 3837, 107974, 102373, 9370, 104575, 24562, 3837, 105699, 116418, 100005, 103000, 90663, 1773, 100147, 101070, 105443, 34187, 100097, 3837, 104989, 100833, 69249, 46944, 105190, 9370, 106023, 1773, 111842, 101080, 103936, 25, 4891, 223, 115, 100623, 21317, 99315, 101037, 151645, 198, 151644, 77091, 198, 99520, 151645]\n", "inputs:\n", "<|im_start|>system\n", "You are a helpful assistant.<|im_end|>\n", "<|im_start|>user\n", "你是一个逻辑游戏的主持人。游戏规则如下:1. 参与者会得到一个谜题。2. 参与者可以通过提问来获取线索,尝试解开谜题。3. 对于每个问题,主持人将根据实际情况回答以下五个选项之一:是、不是、不重要、回答正确、问法错误。4. 回答中不能添加任何其它信息,也不能省略选项中的任何一个字。例如,不可以把“不是”省略成“不”。5. 参与者需要根据回答来推理,并最终找出谜题的正确答案。请严格按照这些规则回答参与者提出的问题。谜题: {}实际情况: {}参与者提出的问题: {}\n", "谜题: 乡村之谜:消失的南瓜 在甄家村里,有一个古老的传说:每年南瓜丰收的季节,南瓜田里总有一个最大的南瓜会不翼而飞,村民们对此现象困惑不解。请找出南瓜失踪背后的原因。实际情况: 真相原来与一位年迈的农夫有关。这位农夫年轻时,曾与一位美丽的姑娘相恋。他们约定在南瓜丰收的季节结婚。然而,命运弄人,姑娘在婚礼前的一场意外中离世。悲伤的农夫为了纪念心爱的姑娘,每年都会将最大的南瓜偷走,放到姑娘的墓前,以此寄托自己的哀思。这一行为延续了多年,成为了乡村里一个神秘的传说。参与者提出的问题: 偷的人信神吗<|im_end|>\n", "<|im_start|>assistant\n", "不是<|im_end|>\n", "label_ids:\n", "[-100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 99520, 151645]\n", "labels:\n", "不是<|im_end|>\n", "/usr/local/lib/python3.10/dist-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n", " warnings.warn(\n", "config.json: 100% 664/664 [00:00<00:00, 5.23MB/s]\n", "[INFO|configuration_utils.py:733] 2024-07-15 14:34:44,448 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2-7B/snapshots/453ed1575b739b5b03ce3758b23befdb0967f40e/config.json\n", "[INFO|configuration_utils.py:796] 2024-07-15 14:34:44,451 >> Model config Qwen2Config {\n", " \"_name_or_path\": \"Qwen/Qwen2-7B\",\n", " \"architectures\": [\n", " \"Qwen2ForCausalLM\"\n", " ],\n", " \"attention_dropout\": 0.0,\n", " \"bos_token_id\": 151643,\n", " \"eos_token_id\": 151643,\n", " \"hidden_act\": \"silu\",\n", " \"hidden_size\": 3584,\n", " \"initializer_range\": 0.02,\n", " \"intermediate_size\": 18944,\n", " \"max_position_embeddings\": 131072,\n", " \"max_window_layers\": 28,\n", " \"model_type\": \"qwen2\",\n", " \"num_attention_heads\": 28,\n", " \"num_hidden_layers\": 28,\n", " \"num_key_value_heads\": 4,\n", " \"rms_norm_eps\": 1e-06,\n", " \"rope_theta\": 1000000.0,\n", " \"sliding_window\": 131072,\n", " \"tie_word_embeddings\": false,\n", " \"torch_dtype\": \"bfloat16\",\n", " \"transformers_version\": \"4.41.2\",\n", " \"use_cache\": true,\n", " \"use_sliding_window\": false,\n", " \"vocab_size\": 152064\n", "}\n", "\n", "07/15/2024 14:34:44 - INFO - llamafactory.model.model_utils.quantization - Quantizing model to 4 bit with bitsandbytes.\n", "model.safetensors.index.json: 100% 27.8k/27.8k [00:00<00:00, 99.4MB/s]\n", "[INFO|modeling_utils.py:3474] 2024-07-15 14:34:44,977 >> loading weights file model.safetensors from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2-7B/snapshots/453ed1575b739b5b03ce3758b23befdb0967f40e/model.safetensors.index.json\n", "Downloading shards: 0% 0/4 [00:00> Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16.\n", "[INFO|configuration_utils.py:962] 2024-07-15 14:36:16,412 >> Generate config GenerationConfig {\n", " \"bos_token_id\": 151643,\n", " \"eos_token_id\": 151643\n", "}\n", "\n", "Loading checkpoint shards: 100% 4/4 [00:06<00:00, 1.55s/it]\n", "[INFO|modeling_utils.py:4280] 2024-07-15 14:36:26,291 >> All model checkpoint weights were used when initializing Qwen2ForCausalLM.\n", "\n", "[INFO|modeling_utils.py:4288] 2024-07-15 14:36:26,291 >> All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at Qwen/Qwen2-7B.\n", "If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training.\n", "generation_config.json: 100% 138/138 [00:00<00:00, 1.11MB/s]\n", "[INFO|configuration_utils.py:917] 2024-07-15 14:36:26,489 >> loading configuration file generation_config.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2-7B/snapshots/453ed1575b739b5b03ce3758b23befdb0967f40e/generation_config.json\n", "[INFO|configuration_utils.py:962] 2024-07-15 14:36:26,489 >> Generate config GenerationConfig {\n", " \"bos_token_id\": 151643,\n", " \"eos_token_id\": 151643,\n", " \"max_new_tokens\": 2048\n", "}\n", "\n", "07/15/2024 14:36:27 - INFO - llamafactory.model.model_utils.checkpointing - Gradient checkpointing enabled.\n", "07/15/2024 14:36:27 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.\n", "07/15/2024 14:36:27 - INFO - llamafactory.model.adapter - Upcasting trainable params to float32.\n", "07/15/2024 14:36:27 - INFO - llamafactory.model.adapter - Fine-tuning method: LoRA\n", "07/15/2024 14:36:27 - INFO - llamafactory.model.model_utils.misc - Found linear modules: q_proj,down_proj,o_proj,gate_proj,v_proj,up_proj,k_proj\n", "07/15/2024 14:36:27 - INFO - llamafactory.model.loader - trainable params: 20,185,088 || all params: 7,635,801,600 || trainable%: 0.2643\n", "[INFO|trainer.py:641] 2024-07-15 14:36:27,732 >> Using auto half precision backend\n", "07/15/2024 14:36:28 - INFO - llamafactory.train.trainer_utils - Using LoRA+ optimizer with loraplus lr ratio 16.00.\n", "[INFO|trainer.py:2078] 2024-07-15 14:36:28,977 >> ***** Running training *****\n", "[INFO|trainer.py:2079] 2024-07-15 14:36:28,977 >> Num examples = 4,500\n", "[INFO|trainer.py:2080] 2024-07-15 14:36:28,977 >> Num Epochs = 6\n", "[INFO|trainer.py:2081] 2024-07-15 14:36:28,977 >> Instantaneous batch size per device = 1\n", "[INFO|trainer.py:2084] 2024-07-15 14:36:28,977 >> Total train batch size (w. parallel, distributed & accumulation) = 8\n", "[INFO|trainer.py:2085] 2024-07-15 14:36:28,977 >> Gradient Accumulation steps = 8\n", "[INFO|trainer.py:2086] 2024-07-15 14:36:28,977 >> Total optimization steps = 3,372\n", "[INFO|trainer.py:2087] 2024-07-15 14:36:28,981 >> Number of trainable parameters = 20,185,088\n", "[INFO|integration_utils.py:723] 2024-07-15 14:36:28,986 >> Automatic Weights & Biases logging enabled, to disable set os.environ[\"WANDB_DISABLED\"] = \"true\"\n", "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m The `run_name` is currently set to the same value as `TrainingArguments.output_dir`. If this was not intended, please specify a different run name by setting the `TrainingArguments.run_name` parameter.\n", "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33minflaton-sg\u001b[0m (\u001b[33minflaton-ai\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n", "\u001b[34m\u001b[1mwandb\u001b[0m: Tracking run with wandb version 0.17.4\n", "\u001b[34m\u001b[1mwandb\u001b[0m: Run data is saved locally in \u001b[35m\u001b[1m/content/LLaMA-Factory/wandb/run-20240715_143630-ancw8jgs\u001b[0m\n", "\u001b[34m\u001b[1mwandb\u001b[0m: Run \u001b[1m`wandb offline`\u001b[0m to turn off syncing.\n", "\u001b[34m\u001b[1mwandb\u001b[0m: Syncing run \u001b[33m/content/qwen2-7b\u001b[0m\n", "\u001b[34m\u001b[1mwandb\u001b[0m: ⭐️ View project at \u001b[34m\u001b[4mhttps://wandb.ai/inflaton-ai/huggingface\u001b[0m\n", "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run at \u001b[34m\u001b[4mhttps://wandb.ai/inflaton-ai/huggingface/runs/ancw8jgs\u001b[0m\n", "{'loss': 1.9143, 'grad_norm': 2.1186106204986572, 'learning_rate': 0.000986610734407955, 'epoch': 1.0}\n", " 17% 562/3372 [1:01:09<5:02:54, 6.47s/it][INFO|trainer.py:3719] 2024-07-15 15:37:39,784 >> ***** Running Evaluation *****\n", "[INFO|trainer.py:3721] 2024-07-15 15:37:39,784 >> Num examples = 500\n", "[INFO|trainer.py:3724] 2024-07-15 15:37:39,785 >> Batch size = 1\n", "\n", " 0% 0/500 [00:00> Saving model checkpoint to /content/qwen2-7b/checkpoint-562\n", "/usr/local/lib/python3.10/dist-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n", " warnings.warn(\n", "[INFO|configuration_utils.py:733] 2024-07-15 15:39:50,101 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2-7B/snapshots/453ed1575b739b5b03ce3758b23befdb0967f40e/config.json\n", "[INFO|configuration_utils.py:796] 2024-07-15 15:39:50,102 >> Model config Qwen2Config {\n", " \"architectures\": [\n", " \"Qwen2ForCausalLM\"\n", " ],\n", " \"attention_dropout\": 0.0,\n", " \"bos_token_id\": 151643,\n", " \"eos_token_id\": 151643,\n", " \"hidden_act\": \"silu\",\n", " \"hidden_size\": 3584,\n", " \"initializer_range\": 0.02,\n", " \"intermediate_size\": 18944,\n", " \"max_position_embeddings\": 131072,\n", " \"max_window_layers\": 28,\n", " \"model_type\": \"qwen2\",\n", " \"num_attention_heads\": 28,\n", " \"num_hidden_layers\": 28,\n", " \"num_key_value_heads\": 4,\n", " \"rms_norm_eps\": 1e-06,\n", " \"rope_theta\": 1000000.0,\n", " \"sliding_window\": 131072,\n", " \"tie_word_embeddings\": false,\n", " \"torch_dtype\": \"bfloat16\",\n", " \"transformers_version\": \"4.41.2\",\n", " \"use_cache\": true,\n", " \"use_sliding_window\": false,\n", " \"vocab_size\": 152064\n", "}\n", "\n", "[INFO|tokenization_utils_base.py:2513] 2024-07-15 15:39:50,298 >> tokenizer config file saved in /content/qwen2-7b/checkpoint-562/tokenizer_config.json\n", "[INFO|tokenization_utils_base.py:2522] 2024-07-15 15:39:50,298 >> Special tokens file saved in /content/qwen2-7b/checkpoint-562/special_tokens_map.json\n", "{'loss': 0.847, 'grad_norm': 0.33948227763175964, 'learning_rate': 0.0008433439152121052, 'epoch': 2.0}\n", " 33% 1124/3372 [2:04:27<4:05:43, 6.56s/it][INFO|trainer.py:3719] 2024-07-15 16:40:58,115 >> ***** Running Evaluation *****\n", "[INFO|trainer.py:3721] 2024-07-15 16:40:58,115 >> Num examples = 500\n", "[INFO|trainer.py:3724] 2024-07-15 16:40:58,115 >> Batch size = 1\n", "\n", " 0% 0/500 [00:00> Saving model checkpoint to /content/qwen2-7b/checkpoint-1124\n", "/usr/local/lib/python3.10/dist-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n", " warnings.warn(\n", "[INFO|configuration_utils.py:733] 2024-07-15 16:43:11,686 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2-7B/snapshots/453ed1575b739b5b03ce3758b23befdb0967f40e/config.json\n", "[INFO|configuration_utils.py:796] 2024-07-15 16:43:11,687 >> Model config Qwen2Config {\n", " \"architectures\": [\n", " \"Qwen2ForCausalLM\"\n", " ],\n", " \"attention_dropout\": 0.0,\n", " \"bos_token_id\": 151643,\n", " \"eos_token_id\": 151643,\n", " \"hidden_act\": \"silu\",\n", " \"hidden_size\": 3584,\n", " \"initializer_range\": 0.02,\n", " \"intermediate_size\": 18944,\n", " \"max_position_embeddings\": 131072,\n", " \"max_window_layers\": 28,\n", " \"model_type\": \"qwen2\",\n", " \"num_attention_heads\": 28,\n", " \"num_hidden_layers\": 28,\n", " \"num_key_value_heads\": 4,\n", " \"rms_norm_eps\": 1e-06,\n", " \"rope_theta\": 1000000.0,\n", " \"sliding_window\": 131072,\n", " \"tie_word_embeddings\": false,\n", " \"torch_dtype\": \"bfloat16\",\n", " \"transformers_version\": \"4.41.2\",\n", " \"use_cache\": true,\n", " \"use_sliding_window\": false,\n", " \"vocab_size\": 152064\n", "}\n", "\n", "[INFO|tokenization_utils_base.py:2513] 2024-07-15 16:43:11,872 >> tokenizer config file saved in /content/qwen2-7b/checkpoint-1124/tokenizer_config.json\n", "[INFO|tokenization_utils_base.py:2522] 2024-07-15 16:43:11,873 >> Special tokens file saved in /content/qwen2-7b/checkpoint-1124/special_tokens_map.json\n", "{'loss': 0.5831, 'grad_norm': 0.08739642798900604, 'learning_rate': 0.0005870506865895984, 'epoch': 3.0}\n", " 50% 1686/3372 [3:08:20<3:02:02, 6.48s/it][INFO|trainer.py:3719] 2024-07-15 17:44:50,834 >> ***** Running Evaluation *****\n", "[INFO|trainer.py:3721] 2024-07-15 17:44:50,834 >> Num examples = 500\n", "[INFO|trainer.py:3724] 2024-07-15 17:44:50,835 >> Batch size = 1\n", "\n", " 0% 0/500 [00:00> Saving model checkpoint to /content/qwen2-7b/checkpoint-1686\n", "/usr/local/lib/python3.10/dist-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n", " warnings.warn(\n", "[INFO|configuration_utils.py:733] 2024-07-15 17:47:04,904 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2-7B/snapshots/453ed1575b739b5b03ce3758b23befdb0967f40e/config.json\n", "[INFO|configuration_utils.py:796] 2024-07-15 17:47:04,905 >> Model config Qwen2Config {\n", " \"architectures\": [\n", " \"Qwen2ForCausalLM\"\n", " ],\n", " \"attention_dropout\": 0.0,\n", " \"bos_token_id\": 151643,\n", " \"eos_token_id\": 151643,\n", " \"hidden_act\": \"silu\",\n", " \"hidden_size\": 3584,\n", " \"initializer_range\": 0.02,\n", " \"intermediate_size\": 18944,\n", " \"max_position_embeddings\": 131072,\n", " \"max_window_layers\": 28,\n", " \"model_type\": \"qwen2\",\n", " \"num_attention_heads\": 28,\n", " \"num_hidden_layers\": 28,\n", " \"num_key_value_heads\": 4,\n", " \"rms_norm_eps\": 1e-06,\n", " \"rope_theta\": 1000000.0,\n", " \"sliding_window\": 131072,\n", " \"tie_word_embeddings\": false,\n", " \"torch_dtype\": \"bfloat16\",\n", " \"transformers_version\": \"4.41.2\",\n", " \"use_cache\": true,\n", " \"use_sliding_window\": false,\n", " \"vocab_size\": 152064\n", "}\n", "\n", "[INFO|tokenization_utils_base.py:2513] 2024-07-15 17:47:05,097 >> tokenizer config file saved in /content/qwen2-7b/checkpoint-1686/tokenizer_config.json\n", "[INFO|tokenization_utils_base.py:2522] 2024-07-15 17:47:05,098 >> Special tokens file saved in /content/qwen2-7b/checkpoint-1686/special_tokens_map.json\n", "{'loss': 0.5469, 'grad_norm': 0.5723391771316528, 'learning_rate': 0.00030210098232438424, 'epoch': 4.0}\n", " 67% 2248/3372 [4:12:15<2:04:11, 6.63s/it][INFO|trainer.py:3719] 2024-07-15 18:48:45,813 >> ***** Running Evaluation *****\n", "[INFO|trainer.py:3721] 2024-07-15 18:48:45,813 >> Num examples = 500\n", "[INFO|trainer.py:3724] 2024-07-15 18:48:45,814 >> Batch size = 1\n", "\n", " 0% 0/500 [00:00> Saving model checkpoint to /content/qwen2-7b/checkpoint-2248\n", "/usr/local/lib/python3.10/dist-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n", " warnings.warn(\n", "[INFO|configuration_utils.py:733] 2024-07-15 18:50:59,783 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2-7B/snapshots/453ed1575b739b5b03ce3758b23befdb0967f40e/config.json\n", "[INFO|configuration_utils.py:796] 2024-07-15 18:50:59,784 >> Model config Qwen2Config {\n", " \"architectures\": [\n", " \"Qwen2ForCausalLM\"\n", " ],\n", " \"attention_dropout\": 0.0,\n", " \"bos_token_id\": 151643,\n", " \"eos_token_id\": 151643,\n", " \"hidden_act\": \"silu\",\n", " \"hidden_size\": 3584,\n", " \"initializer_range\": 0.02,\n", " \"intermediate_size\": 18944,\n", " \"max_position_embeddings\": 131072,\n", " \"max_window_layers\": 28,\n", " \"model_type\": \"qwen2\",\n", " \"num_attention_heads\": 28,\n", " \"num_hidden_layers\": 28,\n", " \"num_key_value_heads\": 4,\n", " \"rms_norm_eps\": 1e-06,\n", " \"rope_theta\": 1000000.0,\n", " \"sliding_window\": 131072,\n", " \"tie_word_embeddings\": false,\n", " \"torch_dtype\": \"bfloat16\",\n", " \"transformers_version\": \"4.41.2\",\n", " \"use_cache\": true,\n", " \"use_sliding_window\": false,\n", " \"vocab_size\": 152064\n", "}\n", "\n", "[INFO|tokenization_utils_base.py:2513] 2024-07-15 18:50:59,970 >> tokenizer config file saved in /content/qwen2-7b/checkpoint-2248/tokenizer_config.json\n", "[INFO|tokenization_utils_base.py:2522] 2024-07-15 18:50:59,970 >> Special tokens file saved in /content/qwen2-7b/checkpoint-2248/special_tokens_map.json\n", "{'loss': 0.5323, 'grad_norm': 0.26129332184791565, 'learning_rate': 8.229824704832284e-05, 'epoch': 5.0}\n", " 83% 2810/3372 [5:16:10<1:01:38, 6.58s/it][INFO|trainer.py:3719] 2024-07-15 19:52:41,540 >> ***** Running Evaluation *****\n", "[INFO|trainer.py:3721] 2024-07-15 19:52:41,541 >> Num examples = 500\n", "[INFO|trainer.py:3724] 2024-07-15 19:52:41,541 >> Batch size = 1\n", "\n", " 0% 0/500 [00:00> Saving model checkpoint to /content/qwen2-7b/checkpoint-2810\n", "/usr/local/lib/python3.10/dist-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n", " warnings.warn(\n", "[INFO|configuration_utils.py:733] 2024-07-15 19:54:55,909 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2-7B/snapshots/453ed1575b739b5b03ce3758b23befdb0967f40e/config.json\n", "[INFO|configuration_utils.py:796] 2024-07-15 19:54:55,910 >> Model config Qwen2Config {\n", " \"architectures\": [\n", " \"Qwen2ForCausalLM\"\n", " ],\n", " \"attention_dropout\": 0.0,\n", " \"bos_token_id\": 151643,\n", " \"eos_token_id\": 151643,\n", " \"hidden_act\": \"silu\",\n", " \"hidden_size\": 3584,\n", " \"initializer_range\": 0.02,\n", " \"intermediate_size\": 18944,\n", " \"max_position_embeddings\": 131072,\n", " \"max_window_layers\": 28,\n", " \"model_type\": \"qwen2\",\n", " \"num_attention_heads\": 28,\n", " \"num_hidden_layers\": 28,\n", " \"num_key_value_heads\": 4,\n", " \"rms_norm_eps\": 1e-06,\n", " \"rope_theta\": 1000000.0,\n", " \"sliding_window\": 131072,\n", " \"tie_word_embeddings\": false,\n", " \"torch_dtype\": \"bfloat16\",\n", " \"transformers_version\": \"4.41.2\",\n", " \"use_cache\": true,\n", " \"use_sliding_window\": false,\n", " \"vocab_size\": 152064\n", "}\n", "\n", "[INFO|tokenization_utils_base.py:2513] 2024-07-15 19:54:56,097 >> tokenizer config file saved in /content/qwen2-7b/checkpoint-2810/tokenizer_config.json\n", "[INFO|tokenization_utils_base.py:2522] 2024-07-15 19:54:56,098 >> Special tokens file saved in /content/qwen2-7b/checkpoint-2810/special_tokens_map.json\n", "{'loss': 0.5229, 'grad_norm': 0.18251831829547882, 'learning_rate': 0.0, 'epoch': 5.99}\n", "100% 3372/3372 [6:20:08<00:00, 6.71s/it][INFO|trainer.py:3719] 2024-07-15 20:56:39,578 >> ***** Running Evaluation *****\n", "[INFO|trainer.py:3721] 2024-07-15 20:56:39,578 >> Num examples = 500\n", "[INFO|trainer.py:3724] 2024-07-15 20:56:39,578 >> Batch size = 1\n", "\n", " 0% 0/500 [00:00> Saving model checkpoint to /content/qwen2-7b/checkpoint-3372\n", "/usr/local/lib/python3.10/dist-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n", " warnings.warn(\n", "[INFO|configuration_utils.py:733] 2024-07-15 20:58:53,452 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2-7B/snapshots/453ed1575b739b5b03ce3758b23befdb0967f40e/config.json\n", "[INFO|configuration_utils.py:796] 2024-07-15 20:58:53,453 >> Model config Qwen2Config {\n", " \"architectures\": [\n", " \"Qwen2ForCausalLM\"\n", " ],\n", " \"attention_dropout\": 0.0,\n", " \"bos_token_id\": 151643,\n", " \"eos_token_id\": 151643,\n", " \"hidden_act\": \"silu\",\n", " \"hidden_size\": 3584,\n", " \"initializer_range\": 0.02,\n", " \"intermediate_size\": 18944,\n", " \"max_position_embeddings\": 131072,\n", " \"max_window_layers\": 28,\n", " \"model_type\": \"qwen2\",\n", " \"num_attention_heads\": 28,\n", " \"num_hidden_layers\": 28,\n", " \"num_key_value_heads\": 4,\n", " \"rms_norm_eps\": 1e-06,\n", " \"rope_theta\": 1000000.0,\n", " \"sliding_window\": 131072,\n", " \"tie_word_embeddings\": false,\n", " \"torch_dtype\": \"bfloat16\",\n", " \"transformers_version\": \"4.41.2\",\n", " \"use_cache\": true,\n", " \"use_sliding_window\": false,\n", " \"vocab_size\": 152064\n", "}\n", "\n", "[INFO|tokenization_utils_base.py:2513] 2024-07-15 20:58:53,632 >> tokenizer config file saved in /content/qwen2-7b/checkpoint-3372/tokenizer_config.json\n", "[INFO|tokenization_utils_base.py:2522] 2024-07-15 20:58:53,633 >> Special tokens file saved in /content/qwen2-7b/checkpoint-3372/special_tokens_map.json\n", "[INFO|trainer.py:2329] 2024-07-15 20:58:54,110 >> \n", "\n", "Training completed. Do not forget to share your model on huggingface.co/models =)\n", "\n", "\n", "{'train_runtime': 22945.1289, 'train_samples_per_second': 1.177, 'train_steps_per_second': 0.147, 'train_loss': 0.8244021631786125, 'epoch': 5.99}\n", "100% 3372/3372 [6:22:23<00:00, 6.80s/it]\n", "[INFO|trainer.py:3410] 2024-07-15 20:58:54,115 >> Saving model checkpoint to /content/qwen2-7b\n", "/usr/local/lib/python3.10/dist-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n", " warnings.warn(\n", "[INFO|configuration_utils.py:733] 2024-07-15 20:58:54,406 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2-7B/snapshots/453ed1575b739b5b03ce3758b23befdb0967f40e/config.json\n", "[INFO|configuration_utils.py:796] 2024-07-15 20:58:54,407 >> Model config Qwen2Config {\n", " \"architectures\": [\n", " \"Qwen2ForCausalLM\"\n", " ],\n", " \"attention_dropout\": 0.0,\n", " \"bos_token_id\": 151643,\n", " \"eos_token_id\": 151643,\n", " \"hidden_act\": \"silu\",\n", " \"hidden_size\": 3584,\n", " \"initializer_range\": 0.02,\n", " \"intermediate_size\": 18944,\n", " \"max_position_embeddings\": 131072,\n", " \"max_window_layers\": 28,\n", " \"model_type\": \"qwen2\",\n", " \"num_attention_heads\": 28,\n", " \"num_hidden_layers\": 28,\n", " \"num_key_value_heads\": 4,\n", " \"rms_norm_eps\": 1e-06,\n", " \"rope_theta\": 1000000.0,\n", " \"sliding_window\": 131072,\n", " \"tie_word_embeddings\": false,\n", " \"torch_dtype\": \"bfloat16\",\n", " \"transformers_version\": \"4.41.2\",\n", " \"use_cache\": true,\n", " \"use_sliding_window\": false,\n", " \"vocab_size\": 152064\n", "}\n", "\n", "[INFO|tokenization_utils_base.py:2513] 2024-07-15 20:58:54,600 >> tokenizer config file saved in /content/qwen2-7b/tokenizer_config.json\n", "[INFO|tokenization_utils_base.py:2522] 2024-07-15 20:58:54,600 >> Special tokens file saved in /content/qwen2-7b/special_tokens_map.json\n", "***** train metrics *****\n", " epoch = 5.9947\n", " total_flos = 396658758GF\n", " train_loss = 0.8244\n", " train_runtime = 6:22:25.12\n", " train_samples_per_second = 1.177\n", " train_steps_per_second = 0.147\n", "Figure saved at: /content/qwen2-7b/training_loss.png\n", "Figure saved at: /content/qwen2-7b/training_eval_loss.png\n", "Figure saved at: /content/qwen2-7b/training_eval_accuracy.png\n", "[INFO|trainer.py:3719] 2024-07-15 20:58:55,263 >> ***** Running Evaluation *****\n", "[INFO|trainer.py:3721] 2024-07-15 20:58:55,264 >> Num examples = 500\n", "[INFO|trainer.py:3724] 2024-07-15 20:58:55,264 >> Batch size = 1\n", "100% 500/500 [02:13<00:00, 3.74it/s]\n", "***** eval metrics *****\n", " epoch = 5.9947\n", " eval_accuracy = 0.7747\n", " eval_loss = 0.513\n", " eval_runtime = 0:02:13.97\n", " eval_samples_per_second = 3.732\n", " eval_steps_per_second = 3.732\n", "[INFO|modelcard.py:450] 2024-07-15 21:01:09,246 >> Dropping the following result as it does not have all the necessary fields:\n", "{'task': {'name': 'Causal Language Modeling', 'type': 'text-generation'}, 'metrics': [{'name': 'Accuracy', 'type': 'accuracy', 'value': 0.7746666666666665}]}\n", "\u001b[34m\u001b[1mwandb\u001b[0m: \n", "\u001b[34m\u001b[1mwandb\u001b[0m: Run history:\n", "\u001b[34m\u001b[1mwandb\u001b[0m: eval/accuracy ▁▅█████\n", "\u001b[34m\u001b[1mwandb\u001b[0m: eval/loss █▄▁▁▁▁▁\n", "\u001b[34m\u001b[1mwandb\u001b[0m: eval/runtime ▁▇█▇█▇█\n", "\u001b[34m\u001b[1mwandb\u001b[0m: eval/samples_per_second █▂▁▂▁▂▁\n", "\u001b[34m\u001b[1mwandb\u001b[0m: eval/steps_per_second █▂▁▂▁▂▁\n", "\u001b[34m\u001b[1mwandb\u001b[0m: train/epoch ▁▁▂▂▄▄▅▅▇▇████\n", "\u001b[34m\u001b[1mwandb\u001b[0m: train/global_step ▁▁▂▂▄▄▅▅▇▇████\n", "\u001b[34m\u001b[1mwandb\u001b[0m: train/grad_norm █▂▁▃▂▁\n", "\u001b[34m\u001b[1mwandb\u001b[0m: train/learning_rate █▇▅▃▂▁\n", "\u001b[34m\u001b[1mwandb\u001b[0m: train/loss █▃▁▁▁▁\n", "\u001b[34m\u001b[1mwandb\u001b[0m: \n", "\u001b[34m\u001b[1mwandb\u001b[0m: Run summary:\n", "\u001b[34m\u001b[1mwandb\u001b[0m: eval/accuracy 0.77467\n", "\u001b[34m\u001b[1mwandb\u001b[0m: eval/loss 0.51301\n", "\u001b[34m\u001b[1mwandb\u001b[0m: eval/runtime 133.9784\n", "\u001b[34m\u001b[1mwandb\u001b[0m: eval/samples_per_second 3.732\n", "\u001b[34m\u001b[1mwandb\u001b[0m: eval/steps_per_second 3.732\n", "\u001b[34m\u001b[1mwandb\u001b[0m: total_flos 4.259090989881262e+17\n", "\u001b[34m\u001b[1mwandb\u001b[0m: train/epoch 5.99467\n", "\u001b[34m\u001b[1mwandb\u001b[0m: train/global_step 3372\n", "\u001b[34m\u001b[1mwandb\u001b[0m: train/grad_norm 0.18252\n", "\u001b[34m\u001b[1mwandb\u001b[0m: train/learning_rate 0.0\n", "\u001b[34m\u001b[1mwandb\u001b[0m: train/loss 0.5229\n", "\u001b[34m\u001b[1mwandb\u001b[0m: train_loss 0.8244\n", "\u001b[34m\u001b[1mwandb\u001b[0m: train_runtime 22945.1289\n", "\u001b[34m\u001b[1mwandb\u001b[0m: train_samples_per_second 1.177\n", "\u001b[34m\u001b[1mwandb\u001b[0m: train_steps_per_second 0.147\n", "\u001b[34m\u001b[1mwandb\u001b[0m: \n", "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run \u001b[33m/content/qwen2-7b\u001b[0m at: \u001b[34m\u001b[4mhttps://wandb.ai/inflaton-ai/huggingface/runs/ancw8jgs\u001b[0m\n", "\u001b[34m\u001b[1mwandb\u001b[0m: ⭐️ View project at: \u001b[34m\u001b[4mhttps://wandb.ai/inflaton-ai/huggingface\u001b[0m\n", "\u001b[34m\u001b[1mwandb\u001b[0m: Synced 5 W&B file(s), 0 media file(s), 0 artifact file(s) and 0 other file(s)\n", "\u001b[34m\u001b[1mwandb\u001b[0m: Find logs at: \u001b[35m\u001b[1m./wandb/run-20240715_143630-ancw8jgs/logs\u001b[0m\n", "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m The new W&B backend becomes opt-out in version 0.18.0; try it out with `wandb.require(\"core\")`! See https://wandb.me/wandb-core for more information.\n" ] } ], "source": [ "!llamafactory-cli train train_qwen2_7b.json" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "background_save": true }, "id": "PPHT4JDoIvGk", "outputId": "868b542e-3cf3-4f96-b3ff-944d48f66e9e" }, "outputs": [ { "data": { "application/vnd.google.colaboratory.intrinsic+json": { "type": "string" }, "text/plain": [ "'/content/drive/MyDrive/runs/qwen2-7b'" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import shutil\n", "shutil.move(\"/content/qwen2-7b\", \"/content/drive/MyDrive/runs\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 35 }, "id": "KQolpdAGpUqx", "outputId": "23443a95-ec97-4633-87d6-18d2f27d979e" }, "outputs": [ { "data": { "application/vnd.google.colaboratory.intrinsic+json": { "type": "string" }, "text/plain": [ "'/content/qwen2-7b'" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import shutil\n", "shutil.move(\"/content/drive/MyDrive/runs\", \"/content/qwen2-7b\", )" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "mzmNMevzVer3" }, "outputs": [], "source": [ "def evaluate_model_all_epochs(model_name, adapter_path_base, num_train_epochs, start_epoch=0, load_in_4bit=True, num_of_entries=-1):\n", " os.environ[\"MODEL_NAME\"] = model_name\n", " os.environ[\"LOAD_IN_4BIT\"] = \"true\" if load_in_4bit else \"false\"\n", " for i in range(start_epoch, num_train_epochs + 1):\n", " print(f\"Epoch {i}\")\n", " if i == 0:\n", " os.unsetenv(\"ADAPTER_NAME_OR_PATH\")\n", " else:\n", " adapter_path = f\"{adapter_path_base}/checkpoint-{562 * i}\"\n", " os.environ[\"ADAPTER_NAME_OR_PATH\"] = adapter_path\n", "\n", " !python llm_toolkit/eval_logical_reasoning.py {num_of_entries}" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "3THuVusvVtt8", "outputId": "2095b621-3aff-4215-f61d-0711acd42e63" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Epoch 0\n", "loading env vars from: /content/drive/.shortcut-targets-by-id/1E09lTnfbsjtTgQg65dQ3y9D2R6l8waxR/logical-reasoning/.env\n", "Adding /content/drive/.shortcut-targets-by-id/1E09lTnfbsjtTgQg65dQ3y9D2R6l8waxR/logical-reasoning to sys.path\n", "2024-07-16 03:59:05.588323: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n", "2024-07-16 03:59:05.639368: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n", "2024-07-16 03:59:05.639412: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n", "2024-07-16 03:59:05.640960: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n", "2024-07-16 03:59:05.648585: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", "To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", "2024-07-16 03:59:06.864846: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n", "loading /content/drive/.shortcut-targets-by-id/1E09lTnfbsjtTgQg65dQ3y9D2R6l8waxR/logical-reasoning/llm_toolkit/logical_reasoning_utils.py\n", "Qwen/Qwen2-7B None False datasets/mgtv results/mgtv-results_02_qwen2_7b_colab.csv\n", "(1) GPU = NVIDIA L4. Max memory = 22.168 GB.\n", "0.0 GB of memory reserved.\n", "loading model: Qwen/Qwen2-7B\n", "tokenizer_config.json: 100% 1.29k/1.29k [00:00<00:00, 8.65MB/s]\n", "vocab.json: 100% 2.78M/2.78M [00:00<00:00, 8.33MB/s]\n", "merges.txt: 100% 1.67M/1.67M [00:00<00:00, 6.20MB/s]\n", "tokenizer.json: 100% 7.03M/7.03M [00:00<00:00, 15.8MB/s]\n", "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n", "config.json: 100% 664/664 [00:00<00:00, 4.79MB/s]\n", "model.safetensors.index.json: 100% 27.8k/27.8k [00:00<00:00, 74.4MB/s]\n", "Downloading shards: 0% 0/4 [00:00system\n", "You are an expert in logical reasoning.<|im_end|>\n", "<|im_start|>user\n", "你是一个逻辑游戏的主持人。游戏规则如下:\n", "\n", "1. 参与者会得到一个谜题。\n", "2. 参与者可以通过提问来获取线索,尝试解开谜题。\n", "3. 对于每个问题,主持人将根据实际情况回答以下五个选项之一:是、不是、不重要、回答正确、问法错误。\n", "4. 回答中不能添加任何其它信息,也不能省略选项中的任何一个字。例如,不可以把“不是”省略成“不”。\n", "5. 参与者需要根据回答来推理,并最终找出谜题的正确答案。\n", "\n", "请严格按照这些规则回答参与者提出的问题。\n", "\n", "谜题: 在远离城市喧嚣的海边小屋,一天清晨,邻居发现甄加索僵卧在沙滩上,已无生命迹象。现场没有发现任何打斗的迹象。请问甄加索的死因是什么?\n", "\n", "实际情况: 甄加索是一位热爱自然的画家,他每年都会来到这个海边小屋寻找灵感。在他生命的最后几天,他一直在创作一幅描绘海洋生物的画作。在画即将完成的前一天晚上,他骑着自行车外出,打算在海边观赏夜景。然而,他在沙滩上意外发现了一只搁浅的海豚,为了救助这只海豚,他耗费了极大的体力,最终成功将其送回海中。筋疲力尽的甄加索在沙滩上睡着了,由于他患有严重的心脏病,却未告知旁人,在寒冷的海风中,他的心脏停止了跳动。因此,警方在现场只发现了车轮痕迹和未完成的画作,而没有发现任何他杀的迹象。\n", "\n", "参与者提出的问题: 甄加索是自杀吗\n", "<|im_end|>\n", "<|im_start|>assistant\n", "不是<|endoftext|>\n", "--------------------------------------------------\n", "prompt: <|im_start|>system\n", "You are an expert in logical reasoning.<|im_end|>\n", "<|im_start|>user\n", "你是一个逻辑游戏的主持人。游戏规则如下:\n", "\n", "1. 参与者会得到一个谜题。\n", "2. 参与者可以通过提问来获取线索,尝试解开谜题。\n", "3. 对于每个问题,主持人将根据实际情况回答以下五个选项之一:是、不是、不重要、回答正确、问法错误。\n", "4. 回答中不能添加任何其它信息,也不能省略选项中的任何一个字。例如,不可以把“不是”省略成“不”。\n", "5. 参与者需要根据回答来推理,并最终找出谜题的正确答案。\n", "\n", "请严格按照这些规则回答参与者提出的问题。\n", "\n", "谜题: 在远离城市喧嚣的海边小屋,一天清晨,邻居发现甄加索僵卧在沙滩上,已无生命迹象。现场没有发现任何打斗的迹象。请问甄加索的死因是什么?\n", "\n", "实际情况: 甄加索是一位热爱自然的画家,他每年都会来到这个海边小屋寻找灵感。在他生命的最后几天,他一直在创作一幅描绘海洋生物的画作。在画即将完成的前一天晚上,他骑着自行车外出,打算在海边观赏夜景。然而,他在沙滩上意外发现了一只搁浅的海豚,为了救助这只海豚,他耗费了极大的体力,最终成功将其送回海中。筋疲力尽的甄加索在沙滩上睡着了,由于他患有严重的心脏病,却未告知旁人,在寒冷的海风中,他的心脏停止了跳动。因此,警方在现场只发现了车轮痕迹和未完成的画作,而没有发现任何他杀的迹象。\n", "\n", "参与者提出的问题: 甄加索是自杀吗\n", "<|im_end|>\n", "<|im_start|>assistant\n", "\n", "--------------------------------------------------\n", "text: 死者受伤了吗\n", "--------------------------------------------------\n", "label: 不是\n", "--------------------------------------------------\n", "answer: nan\n", "--------------------------------------------------\n", "title: 甄庄哭声\n", "--------------------------------------------------\n", "puzzle: 在一个安静的夜晚,小村庄的湖边突然传来了阵阵哭泣声。第二天早晨,村长甄锐发现湖边的石头上放着一顶破旧的帽子,但没有人知道这顶帽子是从哪里来的,哭泣声又是为何。请还原故事真相。\n", "--------------------------------------------------\n", "truth: 原来,这顶破旧的帽子属于一个小男孩,他小时候与爷爷在湖边生活。爷爷教他钓鱼、游泳,还告诉他湖中的海龟是他们的朋友。后来,小男孩随父母去了城市生活,但每年夏天都会回到村子探望爷爷。然而,去年夏天,爷爷因病去世,小男孩伤心欲绝。今年夏天,他回到村子,来到湖边,想起和爷爷的美好回忆,忍不住哭泣。他将爷爷的帽子放在湖边的石头上,希望能让爷爷的在天之灵得到安慰。那晚的哭泣声正是小男孩在祭莫他亲爱的爷爷。\n", "--------------------------------------------------\n", "train_text: <|im_start|>system\n", "You are an expert in logical reasoning.<|im_end|>\n", "<|im_start|>user\n", "你是一个逻辑游戏的主持人。游戏规则如下:\n", "\n", "1. 参与者会得到一个谜题。\n", "2. 参与者可以通过提问来获取线索,尝试解开谜题。\n", "3. 对于每个问题,主持人将根据实际情况回答以下五个选项之一:是、不是、不重要、回答正确、问法错误。\n", "4. 回答中不能添加任何其它信息,也不能省略选项中的任何一个字。例如,不可以把“不是”省略成“不”。\n", "5. 参与者需要根据回答来推理,并最终找出谜题的正确答案。\n", "\n", "请严格按照这些规则回答参与者提出的问题。\n", "\n", "谜题: 在一个安静的夜晚,小村庄的湖边突然传来了阵阵哭泣声。第二天早晨,村长甄锐发现湖边的石头上放着一顶破旧的帽子,但没有人知道这顶帽子是从哪里来的,哭泣声又是为何。请还原故事真相。\n", "\n", "实际情况: 原来,这顶破旧的帽子属于一个小男孩,他小时候与爷爷在湖边生活。爷爷教他钓鱼、游泳,还告诉他湖中的海龟是他们的朋友。后来,小男孩随父母去了城市生活,但每年夏天都会回到村子探望爷爷。然而,去年夏天,爷爷因病去世,小男孩伤心欲绝。今年夏天,他回到村子,来到湖边,想起和爷爷的美好回忆,忍不住哭泣。他将爷爷的帽子放在湖边的石头上,希望能让爷爷的在天之灵得到安慰。那晚的哭泣声正是小男孩在祭莫他亲爱的爷爷。\n", "\n", "参与者提出的问题: 死者受伤了吗\n", "<|im_end|>\n", "<|im_start|>assistant\n", "不是<|endoftext|>\n", "--------------------------------------------------\n", "prompt: <|im_start|>system\n", "You are an expert in logical reasoning.<|im_end|>\n", "<|im_start|>user\n", "你是一个逻辑游戏的主持人。游戏规则如下:\n", "\n", "1. 参与者会得到一个谜题。\n", "2. 参与者可以通过提问来获取线索,尝试解开谜题。\n", "3. 对于每个问题,主持人将根据实际情况回答以下五个选项之一:是、不是、不重要、回答正确、问法错误。\n", "4. 回答中不能添加任何其它信息,也不能省略选项中的任何一个字。例如,不可以把“不是”省略成“不”。\n", "5. 参与者需要根据回答来推理,并最终找出谜题的正确答案。\n", "\n", "请严格按照这些规则回答参与者提出的问题。\n", "\n", "谜题: 在一个安静的夜晚,小村庄的湖边突然传来了阵阵哭泣声。第二天早晨,村长甄锐发现湖边的石头上放着一顶破旧的帽子,但没有人知道这顶帽子是从哪里来的,哭泣声又是为何。请还原故事真相。\n", "\n", "实际情况: 原来,这顶破旧的帽子属于一个小男孩,他小时候与爷爷在湖边生活。爷爷教他钓鱼、游泳,还告诉他湖中的海龟是他们的朋友。后来,小男孩随父母去了城市生活,但每年夏天都会回到村子探望爷爷。然而,去年夏天,爷爷因病去世,小男孩伤心欲绝。今年夏天,他回到村子,来到湖边,想起和爷爷的美好回忆,忍不住哭泣。他将爷爷的帽子放在湖边的石头上,希望能让爷爷的在天之灵得到安慰。那晚的哭泣声正是小男孩在祭莫他亲爱的爷爷。\n", "\n", "参与者提出的问题: 死者受伤了吗\n", "<|im_end|>\n", "<|im_start|>assistant\n", "\n", "Evaluating model: Qwen/Qwen2-7B\n", " 0% 0/3000 [00:00\n", " predictions = eval_model(model, tokenizer, datasets[\"test\"])\n", " File \"/content/drive/.shortcut-targets-by-id/1E09lTnfbsjtTgQg65dQ3y9D2R6l8waxR/logical-reasoning/llm_toolkit/logical_reasoning_utils.py\", line 215, in eval_model\n", " outputs = model.generate(**inputs, max_new_tokens=4096, use_cache=False)\n", " File \"/usr/local/lib/python3.10/dist-packages/torch/utils/_contextlib.py\", line 115, in decorate_context\n", " return func(*args, **kwargs)\n", " File \"/usr/local/lib/python3.10/dist-packages/transformers/generation/utils.py\", line 1758, in generate\n", " result = self._sample(\n", " File \"/usr/local/lib/python3.10/dist-packages/transformers/generation/utils.py\", line 2397, in _sample\n", " outputs = self(\n", " File \"/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py\", line 1532, in _wrapped_call_impl\n", " return self._call_impl(*args, **kwargs)\n", " File \"/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py\", line 1541, in _call_impl\n", " return forward_call(*args, **kwargs)\n", " File \"/usr/local/lib/python3.10/dist-packages/transformers/models/qwen2/modeling_qwen2.py\", line 1163, in forward\n", " logits = logits.float()\n", "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 2.00 GiB. GPU \n", "Epoch 1\n", "loading env vars from: /content/drive/.shortcut-targets-by-id/1E09lTnfbsjtTgQg65dQ3y9D2R6l8waxR/logical-reasoning/.env\n", "Adding /content/drive/.shortcut-targets-by-id/1E09lTnfbsjtTgQg65dQ3y9D2R6l8waxR/logical-reasoning to sys.path\n", "2024-07-16 04:36:42.030763: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n", "2024-07-16 04:36:42.082994: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n", "2024-07-16 04:36:42.083052: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n", "2024-07-16 04:36:42.084468: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n", "2024-07-16 04:36:42.092383: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", "To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", "2024-07-16 04:36:43.353969: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n", "loading /content/drive/.shortcut-targets-by-id/1E09lTnfbsjtTgQg65dQ3y9D2R6l8waxR/logical-reasoning/llm_toolkit/logical_reasoning_utils.py\n", "Qwen/Qwen2-7B /content/qwen2-7b/qwen2-7b/checkpoint-562 False datasets/mgtv results/mgtv-results_02_qwen2_7b_colab.csv\n", "(1) GPU = NVIDIA L4. Max memory = 22.168 GB.\n", "0.0 GB of memory reserved.\n", "loading model: Qwen/Qwen2-7B\n", "[INFO|tokenization_utils_base.py:2108] 2024-07-16 04:36:49,648 >> loading file vocab.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2-7B/snapshots/453ed1575b739b5b03ce3758b23befdb0967f40e/vocab.json\n", "[INFO|tokenization_utils_base.py:2108] 2024-07-16 04:36:49,648 >> loading file merges.txt from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2-7B/snapshots/453ed1575b739b5b03ce3758b23befdb0967f40e/merges.txt\n", "[INFO|tokenization_utils_base.py:2108] 2024-07-16 04:36:49,648 >> loading file tokenizer.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2-7B/snapshots/453ed1575b739b5b03ce3758b23befdb0967f40e/tokenizer.json\n", "[INFO|tokenization_utils_base.py:2108] 2024-07-16 04:36:49,648 >> loading file added_tokens.json from cache at None\n", "[INFO|tokenization_utils_base.py:2108] 2024-07-16 04:36:49,648 >> loading file special_tokens_map.json from cache at None\n", "[INFO|tokenization_utils_base.py:2108] 2024-07-16 04:36:49,648 >> loading file tokenizer_config.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2-7B/snapshots/453ed1575b739b5b03ce3758b23befdb0967f40e/tokenizer_config.json\n", "[WARNING|logging.py:314] 2024-07-16 04:36:49,914 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n", "07/16/2024 04:36:49 - INFO - llamafactory.data.template - Replace eos token: <|im_end|>\n", "07/16/2024 04:36:49 - INFO - llamafactory.data.template - Add <|im_start|> to stop words.\n", "/usr/local/lib/python3.10/dist-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n", " warnings.warn(\n", "[INFO|configuration_utils.py:733] 2024-07-16 04:36:50,018 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2-7B/snapshots/453ed1575b739b5b03ce3758b23befdb0967f40e/config.json\n", "[INFO|configuration_utils.py:796] 2024-07-16 04:36:50,019 >> Model config Qwen2Config {\n", " \"_name_or_path\": \"Qwen/Qwen2-7B\",\n", " \"architectures\": [\n", " \"Qwen2ForCausalLM\"\n", " ],\n", " \"attention_dropout\": 0.0,\n", " \"bos_token_id\": 151643,\n", " \"eos_token_id\": 151643,\n", " \"hidden_act\": \"silu\",\n", " \"hidden_size\": 3584,\n", " \"initializer_range\": 0.02,\n", " \"intermediate_size\": 18944,\n", " \"max_position_embeddings\": 131072,\n", " \"max_window_layers\": 28,\n", " \"model_type\": \"qwen2\",\n", " \"num_attention_heads\": 28,\n", " \"num_hidden_layers\": 28,\n", " \"num_key_value_heads\": 4,\n", " \"rms_norm_eps\": 1e-06,\n", " \"rope_theta\": 1000000.0,\n", " \"sliding_window\": 131072,\n", " \"tie_word_embeddings\": false,\n", " \"torch_dtype\": \"bfloat16\",\n", " \"transformers_version\": \"4.41.2\",\n", " \"use_cache\": true,\n", " \"use_sliding_window\": false,\n", " \"vocab_size\": 152064\n", "}\n", "\n", "07/16/2024 04:36:50 - INFO - llamafactory.model.patcher - Using KV cache for faster generation.\n", "[INFO|modeling_utils.py:3474] 2024-07-16 04:36:50,051 >> loading weights file model.safetensors from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2-7B/snapshots/453ed1575b739b5b03ce3758b23befdb0967f40e/model.safetensors.index.json\n", "[INFO|modeling_utils.py:1519] 2024-07-16 04:36:50,054 >> Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16.\n", "[INFO|configuration_utils.py:962] 2024-07-16 04:36:50,055 >> Generate config GenerationConfig {\n", " \"bos_token_id\": 151643,\n", " \"eos_token_id\": 151643\n", "}\n", "\n", "Loading checkpoint shards: 100% 4/4 [00:06<00:00, 1.66s/it]\n", "[INFO|modeling_utils.py:4280] 2024-07-16 04:36:59,526 >> All model checkpoint weights were used when initializing Qwen2ForCausalLM.\n", "\n", "[INFO|modeling_utils.py:4288] 2024-07-16 04:36:59,526 >> All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at Qwen/Qwen2-7B.\n", "If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training.\n", "[INFO|configuration_utils.py:917] 2024-07-16 04:36:59,673 >> loading configuration file generation_config.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2-7B/snapshots/453ed1575b739b5b03ce3758b23befdb0967f40e/generation_config.json\n", "[INFO|configuration_utils.py:962] 2024-07-16 04:36:59,673 >> Generate config GenerationConfig {\n", " \"bos_token_id\": 151643,\n", " \"eos_token_id\": 151643,\n", " \"max_new_tokens\": 2048\n", "}\n", "\n", "07/16/2024 04:37:00 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.\n", "07/16/2024 04:37:01 - INFO - llamafactory.model.adapter - Merged 1 adapter(s).\n", "07/16/2024 04:37:01 - INFO - llamafactory.model.adapter - Loaded adapter(s): /content/qwen2-7b/qwen2-7b/checkpoint-562\n", "07/16/2024 04:37:01 - INFO - llamafactory.model.loader - all params: 7,615,616,512\n", "(2) GPU = NVIDIA L4. Max memory = 22.168 GB.\n", "16.521 GB of memory reserved.\n", "loading train/test data files\n", "Map: 100% 25000/25000 [00:01<00:00, 22266.31 examples/s]\n", "Map: 100% 3000/3000 [00:00<00:00, 22229.64 examples/s]\n", "DatasetDict({\n", " train: Dataset({\n", " features: ['text', 'label', 'answer', 'title', 'puzzle', 'truth', 'train_text', 'prompt'],\n", " num_rows: 25000\n", " })\n", " test: Dataset({\n", " features: ['text', 'label', 'answer', 'title', 'puzzle', 'truth', 'train_text', 'prompt'],\n", " num_rows: 3000\n", " })\n", "})\n", "--------------------------------------------------\n", "text: 甄加索是自杀吗\n", "--------------------------------------------------\n", "label: 不是\n", "--------------------------------------------------\n", "answer: nan\n", "--------------------------------------------------\n", "title: 海岸之谜\n", "--------------------------------------------------\n", "puzzle: 在远离城市喧嚣的海边小屋,一天清晨,邻居发现甄加索僵卧在沙滩上,已无生命迹象。现场没有发现任何打斗的迹象。请问甄加索的死因是什么?\n", "--------------------------------------------------\n", "truth: 甄加索是一位热爱自然的画家,他每年都会来到这个海边小屋寻找灵感。在他生命的最后几天,他一直在创作一幅描绘海洋生物的画作。在画即将完成的前一天晚上,他骑着自行车外出,打算在海边观赏夜景。然而,他在沙滩上意外发现了一只搁浅的海豚,为了救助这只海豚,他耗费了极大的体力,最终成功将其送回海中。筋疲力尽的甄加索在沙滩上睡着了,由于他患有严重的心脏病,却未告知旁人,在寒冷的海风中,他的心脏停止了跳动。因此,警方在现场只发现了车轮痕迹和未完成的画作,而没有发现任何他杀的迹象。\n", "--------------------------------------------------\n", "train_text: <|im_start|>system\n", "You are an expert in logical reasoning.<|im_end|>\n", "<|im_start|>user\n", "你是一个逻辑游戏的主持人。游戏规则如下:\n", "\n", "1. 参与者会得到一个谜题。\n", "2. 参与者可以通过提问来获取线索,尝试解开谜题。\n", "3. 对于每个问题,主持人将根据实际情况回答以下五个选项之一:是、不是、不重要、回答正确、问法错误。\n", "4. 回答中不能添加任何其它信息,也不能省略选项中的任何一个字。例如,不可以把“不是”省略成“不”。\n", "5. 参与者需要根据回答来推理,并最终找出谜题的正确答案。\n", "\n", "请严格按照这些规则回答参与者提出的问题。\n", "\n", "谜题: 在远离城市喧嚣的海边小屋,一天清晨,邻居发现甄加索僵卧在沙滩上,已无生命迹象。现场没有发现任何打斗的迹象。请问甄加索的死因是什么?\n", "\n", "实际情况: 甄加索是一位热爱自然的画家,他每年都会来到这个海边小屋寻找灵感。在他生命的最后几天,他一直在创作一幅描绘海洋生物的画作。在画即将完成的前一天晚上,他骑着自行车外出,打算在海边观赏夜景。然而,他在沙滩上意外发现了一只搁浅的海豚,为了救助这只海豚,他耗费了极大的体力,最终成功将其送回海中。筋疲力尽的甄加索在沙滩上睡着了,由于他患有严重的心脏病,却未告知旁人,在寒冷的海风中,他的心脏停止了跳动。因此,警方在现场只发现了车轮痕迹和未完成的画作,而没有发现任何他杀的迹象。\n", "\n", "参与者提出的问题: 甄加索是自杀吗\n", "<|im_end|>\n", "<|im_start|>assistant\n", "不是<|im_end|>\n", "--------------------------------------------------\n", "prompt: <|im_start|>system\n", "You are an expert in logical reasoning.<|im_end|>\n", "<|im_start|>user\n", "你是一个逻辑游戏的主持人。游戏规则如下:\n", "\n", "1. 参与者会得到一个谜题。\n", "2. 参与者可以通过提问来获取线索,尝试解开谜题。\n", "3. 对于每个问题,主持人将根据实际情况回答以下五个选项之一:是、不是、不重要、回答正确、问法错误。\n", "4. 回答中不能添加任何其它信息,也不能省略选项中的任何一个字。例如,不可以把“不是”省略成“不”。\n", "5. 参与者需要根据回答来推理,并最终找出谜题的正确答案。\n", "\n", "请严格按照这些规则回答参与者提出的问题。\n", "\n", "谜题: 在远离城市喧嚣的海边小屋,一天清晨,邻居发现甄加索僵卧在沙滩上,已无生命迹象。现场没有发现任何打斗的迹象。请问甄加索的死因是什么?\n", "\n", "实际情况: 甄加索是一位热爱自然的画家,他每年都会来到这个海边小屋寻找灵感。在他生命的最后几天,他一直在创作一幅描绘海洋生物的画作。在画即将完成的前一天晚上,他骑着自行车外出,打算在海边观赏夜景。然而,他在沙滩上意外发现了一只搁浅的海豚,为了救助这只海豚,他耗费了极大的体力,最终成功将其送回海中。筋疲力尽的甄加索在沙滩上睡着了,由于他患有严重的心脏病,却未告知旁人,在寒冷的海风中,他的心脏停止了跳动。因此,警方在现场只发现了车轮痕迹和未完成的画作,而没有发现任何他杀的迹象。\n", "\n", "参与者提出的问题: 甄加索是自杀吗\n", "<|im_end|>\n", "<|im_start|>assistant\n", "\n", "--------------------------------------------------\n", "text: 死者受伤了吗\n", "--------------------------------------------------\n", "label: 不是\n", "--------------------------------------------------\n", "answer: nan\n", "--------------------------------------------------\n", "title: 甄庄哭声\n", "--------------------------------------------------\n", "puzzle: 在一个安静的夜晚,小村庄的湖边突然传来了阵阵哭泣声。第二天早晨,村长甄锐发现湖边的石头上放着一顶破旧的帽子,但没有人知道这顶帽子是从哪里来的,哭泣声又是为何。请还原故事真相。\n", "--------------------------------------------------\n", "truth: 原来,这顶破旧的帽子属于一个小男孩,他小时候与爷爷在湖边生活。爷爷教他钓鱼、游泳,还告诉他湖中的海龟是他们的朋友。后来,小男孩随父母去了城市生活,但每年夏天都会回到村子探望爷爷。然而,去年夏天,爷爷因病去世,小男孩伤心欲绝。今年夏天,他回到村子,来到湖边,想起和爷爷的美好回忆,忍不住哭泣。他将爷爷的帽子放在湖边的石头上,希望能让爷爷的在天之灵得到安慰。那晚的哭泣声正是小男孩在祭莫他亲爱的爷爷。\n", "--------------------------------------------------\n", "train_text: <|im_start|>system\n", "You are an expert in logical reasoning.<|im_end|>\n", "<|im_start|>user\n", "你是一个逻辑游戏的主持人。游戏规则如下:\n", "\n", "1. 参与者会得到一个谜题。\n", "2. 参与者可以通过提问来获取线索,尝试解开谜题。\n", "3. 对于每个问题,主持人将根据实际情况回答以下五个选项之一:是、不是、不重要、回答正确、问法错误。\n", "4. 回答中不能添加任何其它信息,也不能省略选项中的任何一个字。例如,不可以把“不是”省略成“不”。\n", "5. 参与者需要根据回答来推理,并最终找出谜题的正确答案。\n", "\n", "请严格按照这些规则回答参与者提出的问题。\n", "\n", "谜题: 在一个安静的夜晚,小村庄的湖边突然传来了阵阵哭泣声。第二天早晨,村长甄锐发现湖边的石头上放着一顶破旧的帽子,但没有人知道这顶帽子是从哪里来的,哭泣声又是为何。请还原故事真相。\n", "\n", "实际情况: 原来,这顶破旧的帽子属于一个小男孩,他小时候与爷爷在湖边生活。爷爷教他钓鱼、游泳,还告诉他湖中的海龟是他们的朋友。后来,小男孩随父母去了城市生活,但每年夏天都会回到村子探望爷爷。然而,去年夏天,爷爷因病去世,小男孩伤心欲绝。今年夏天,他回到村子,来到湖边,想起和爷爷的美好回忆,忍不住哭泣。他将爷爷的帽子放在湖边的石头上,希望能让爷爷的在天之灵得到安慰。那晚的哭泣声正是小男孩在祭莫他亲爱的爷爷。\n", "\n", "参与者提出的问题: 死者受伤了吗\n", "<|im_end|>\n", "<|im_start|>assistant\n", "不是<|im_end|>\n", "--------------------------------------------------\n", "prompt: <|im_start|>system\n", "You are an expert in logical reasoning.<|im_end|>\n", "<|im_start|>user\n", "你是一个逻辑游戏的主持人。游戏规则如下:\n", "\n", "1. 参与者会得到一个谜题。\n", "2. 参与者可以通过提问来获取线索,尝试解开谜题。\n", "3. 对于每个问题,主持人将根据实际情况回答以下五个选项之一:是、不是、不重要、回答正确、问法错误。\n", "4. 回答中不能添加任何其它信息,也不能省略选项中的任何一个字。例如,不可以把“不是”省略成“不”。\n", "5. 参与者需要根据回答来推理,并最终找出谜题的正确答案。\n", "\n", "请严格按照这些规则回答参与者提出的问题。\n", "\n", "谜题: 在一个安静的夜晚,小村庄的湖边突然传来了阵阵哭泣声。第二天早晨,村长甄锐发现湖边的石头上放着一顶破旧的帽子,但没有人知道这顶帽子是从哪里来的,哭泣声又是为何。请还原故事真相。\n", "\n", "实际情况: 原来,这顶破旧的帽子属于一个小男孩,他小时候与爷爷在湖边生活。爷爷教他钓鱼、游泳,还告诉他湖中的海龟是他们的朋友。后来,小男孩随父母去了城市生活,但每年夏天都会回到村子探望爷爷。然而,去年夏天,爷爷因病去世,小男孩伤心欲绝。今年夏天,他回到村子,来到湖边,想起和爷爷的美好回忆,忍不住哭泣。他将爷爷的帽子放在湖边的石头上,希望能让爷爷的在天之灵得到安慰。那晚的哭泣声正是小男孩在祭莫他亲爱的爷爷。\n", "\n", "参与者提出的问题: 死者受伤了吗\n", "<|im_end|>\n", "<|im_start|>assistant\n", "\n", "Evaluating model: Qwen/Qwen2-7B\n", " 0% 0/3000 [00:00> Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.\n", " 0% 0/3000 [30:43\n", " predictions = eval_model(model, tokenizer, datasets[\"test\"])\n", " File \"/content/drive/.shortcut-targets-by-id/1E09lTnfbsjtTgQg65dQ3y9D2R6l8waxR/logical-reasoning/llm_toolkit/logical_reasoning_utils.py\", line 215, in eval_model\n", " outputs = model.generate(**inputs, max_new_tokens=4096, use_cache=False)\n", " File \"/usr/local/lib/python3.10/dist-packages/torch/utils/_contextlib.py\", line 115, in decorate_context\n", " return func(*args, **kwargs)\n", " File \"/usr/local/lib/python3.10/dist-packages/transformers/generation/utils.py\", line 1758, in generate\n", " result = self._sample(\n", " File \"/usr/local/lib/python3.10/dist-packages/transformers/generation/utils.py\", line 2397, in _sample\n", " outputs = self(\n", " File \"/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py\", line 1532, in _wrapped_call_impl\n", " return self._call_impl(*args, **kwargs)\n", " File \"/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py\", line 1541, in _call_impl\n", " return forward_call(*args, **kwargs)\n", " File \"/usr/local/lib/python3.10/dist-packages/transformers/models/qwen2/modeling_qwen2.py\", line 1163, in forward\n", " logits = logits.float()\n", "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 1.92 GiB. GPU \n", "Epoch 2\n", "loading env vars from: /content/drive/.shortcut-targets-by-id/1E09lTnfbsjtTgQg65dQ3y9D2R6l8waxR/logical-reasoning/.env\n", "Adding /content/drive/.shortcut-targets-by-id/1E09lTnfbsjtTgQg65dQ3y9D2R6l8waxR/logical-reasoning to sys.path\n", "2024-07-16 05:07:51.574401: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n", "2024-07-16 05:07:51.624732: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n", "2024-07-16 05:07:51.624785: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n", "2024-07-16 05:07:51.626182: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n", "2024-07-16 05:07:51.633853: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", "To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", "2024-07-16 05:07:52.903770: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n", "loading /content/drive/.shortcut-targets-by-id/1E09lTnfbsjtTgQg65dQ3y9D2R6l8waxR/logical-reasoning/llm_toolkit/logical_reasoning_utils.py\n", "Qwen/Qwen2-7B /content/qwen2-7b/qwen2-7b/checkpoint-1124 False datasets/mgtv results/mgtv-results_02_qwen2_7b_colab.csv\n", "(1) GPU = NVIDIA L4. Max memory = 22.168 GB.\n", "0.0 GB of memory reserved.\n", "loading model: Qwen/Qwen2-7B\n", "[INFO|tokenization_utils_base.py:2108] 2024-07-16 05:07:59,358 >> loading file vocab.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2-7B/snapshots/453ed1575b739b5b03ce3758b23befdb0967f40e/vocab.json\n", "[INFO|tokenization_utils_base.py:2108] 2024-07-16 05:07:59,358 >> loading file merges.txt from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2-7B/snapshots/453ed1575b739b5b03ce3758b23befdb0967f40e/merges.txt\n", "[INFO|tokenization_utils_base.py:2108] 2024-07-16 05:07:59,358 >> loading file tokenizer.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2-7B/snapshots/453ed1575b739b5b03ce3758b23befdb0967f40e/tokenizer.json\n", "[INFO|tokenization_utils_base.py:2108] 2024-07-16 05:07:59,358 >> loading file added_tokens.json from cache at None\n", "[INFO|tokenization_utils_base.py:2108] 2024-07-16 05:07:59,358 >> loading file special_tokens_map.json from cache at None\n", "[INFO|tokenization_utils_base.py:2108] 2024-07-16 05:07:59,358 >> loading file tokenizer_config.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2-7B/snapshots/453ed1575b739b5b03ce3758b23befdb0967f40e/tokenizer_config.json\n", "[WARNING|logging.py:314] 2024-07-16 05:07:59,635 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n", "07/16/2024 05:07:59 - INFO - llamafactory.data.template - Replace eos token: <|im_end|>\n", "07/16/2024 05:07:59 - INFO - llamafactory.data.template - Add <|im_start|> to stop words.\n", "/usr/local/lib/python3.10/dist-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n", " warnings.warn(\n", "[INFO|configuration_utils.py:733] 2024-07-16 05:07:59,725 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2-7B/snapshots/453ed1575b739b5b03ce3758b23befdb0967f40e/config.json\n", "[INFO|configuration_utils.py:796] 2024-07-16 05:07:59,727 >> Model config Qwen2Config {\n", " \"_name_or_path\": \"Qwen/Qwen2-7B\",\n", " \"architectures\": [\n", " \"Qwen2ForCausalLM\"\n", " ],\n", " \"attention_dropout\": 0.0,\n", " \"bos_token_id\": 151643,\n", " \"eos_token_id\": 151643,\n", " \"hidden_act\": \"silu\",\n", " \"hidden_size\": 3584,\n", " \"initializer_range\": 0.02,\n", " \"intermediate_size\": 18944,\n", " \"max_position_embeddings\": 131072,\n", " \"max_window_layers\": 28,\n", " \"model_type\": \"qwen2\",\n", " \"num_attention_heads\": 28,\n", " \"num_hidden_layers\": 28,\n", " \"num_key_value_heads\": 4,\n", " \"rms_norm_eps\": 1e-06,\n", " \"rope_theta\": 1000000.0,\n", " \"sliding_window\": 131072,\n", " \"tie_word_embeddings\": false,\n", " \"torch_dtype\": \"bfloat16\",\n", " \"transformers_version\": \"4.41.2\",\n", " \"use_cache\": true,\n", " \"use_sliding_window\": false,\n", " \"vocab_size\": 152064\n", "}\n", "\n", "07/16/2024 05:07:59 - INFO - llamafactory.model.patcher - Using KV cache for faster generation.\n", "[INFO|modeling_utils.py:3474] 2024-07-16 05:07:59,758 >> loading weights file model.safetensors from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2-7B/snapshots/453ed1575b739b5b03ce3758b23befdb0967f40e/model.safetensors.index.json\n", "[INFO|modeling_utils.py:1519] 2024-07-16 05:07:59,761 >> Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16.\n", "[INFO|configuration_utils.py:962] 2024-07-16 05:07:59,762 >> Generate config GenerationConfig {\n", " \"bos_token_id\": 151643,\n", " \"eos_token_id\": 151643\n", "}\n", "\n", "Loading checkpoint shards: 100% 4/4 [00:05<00:00, 1.44s/it]\n", "[INFO|modeling_utils.py:4280] 2024-07-16 05:08:08,371 >> All model checkpoint weights were used when initializing Qwen2ForCausalLM.\n", "\n", "[INFO|modeling_utils.py:4288] 2024-07-16 05:08:08,371 >> All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at Qwen/Qwen2-7B.\n", "If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training.\n", "[INFO|configuration_utils.py:917] 2024-07-16 05:08:08,465 >> loading configuration file generation_config.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2-7B/snapshots/453ed1575b739b5b03ce3758b23befdb0967f40e/generation_config.json\n", "[INFO|configuration_utils.py:962] 2024-07-16 05:08:08,465 >> Generate config GenerationConfig {\n", " \"bos_token_id\": 151643,\n", " \"eos_token_id\": 151643,\n", " \"max_new_tokens\": 2048\n", "}\n", "\n", "07/16/2024 05:08:09 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.\n", "07/16/2024 05:08:09 - INFO - llamafactory.model.adapter - Merged 1 adapter(s).\n", "07/16/2024 05:08:09 - INFO - llamafactory.model.adapter - Loaded adapter(s): /content/qwen2-7b/qwen2-7b/checkpoint-1124\n", "07/16/2024 05:08:09 - INFO - llamafactory.model.loader - all params: 7,615,616,512\n", "(2) GPU = NVIDIA L4. Max memory = 22.168 GB.\n", "16.521 GB of memory reserved.\n", "loading train/test data files\n", "DatasetDict({\n", " train: Dataset({\n", " features: ['text', 'label', 'answer', 'title', 'puzzle', 'truth', 'train_text', 'prompt'],\n", " num_rows: 25000\n", " })\n", " test: Dataset({\n", " features: ['text', 'label', 'answer', 'title', 'puzzle', 'truth', 'train_text', 'prompt'],\n", " num_rows: 3000\n", " })\n", "})\n", "--------------------------------------------------\n", "text: 甄加索是自杀吗\n", "--------------------------------------------------\n", "label: 不是\n", "--------------------------------------------------\n", "answer: nan\n", "--------------------------------------------------\n", "title: 海岸之谜\n", "--------------------------------------------------\n", "puzzle: 在远离城市喧嚣的海边小屋,一天清晨,邻居发现甄加索僵卧在沙滩上,已无生命迹象。现场没有发现任何打斗的迹象。请问甄加索的死因是什么?\n", "--------------------------------------------------\n", "truth: 甄加索是一位热爱自然的画家,他每年都会来到这个海边小屋寻找灵感。在他生命的最后几天,他一直在创作一幅描绘海洋生物的画作。在画即将完成的前一天晚上,他骑着自行车外出,打算在海边观赏夜景。然而,他在沙滩上意外发现了一只搁浅的海豚,为了救助这只海豚,他耗费了极大的体力,最终成功将其送回海中。筋疲力尽的甄加索在沙滩上睡着了,由于他患有严重的心脏病,却未告知旁人,在寒冷的海风中,他的心脏停止了跳动。因此,警方在现场只发现了车轮痕迹和未完成的画作,而没有发现任何他杀的迹象。\n", "--------------------------------------------------\n", "train_text: <|im_start|>system\n", "You are an expert in logical reasoning.<|im_end|>\n", "<|im_start|>user\n", "你是一个逻辑游戏的主持人。游戏规则如下:\n", "\n", "1. 参与者会得到一个谜题。\n", "2. 参与者可以通过提问来获取线索,尝试解开谜题。\n", "3. 对于每个问题,主持人将根据实际情况回答以下五个选项之一:是、不是、不重要、回答正确、问法错误。\n", "4. 回答中不能添加任何其它信息,也不能省略选项中的任何一个字。例如,不可以把“不是”省略成“不”。\n", "5. 参与者需要根据回答来推理,并最终找出谜题的正确答案。\n", "\n", "请严格按照这些规则回答参与者提出的问题。\n", "\n", "谜题: 在远离城市喧嚣的海边小屋,一天清晨,邻居发现甄加索僵卧在沙滩上,已无生命迹象。现场没有发现任何打斗的迹象。请问甄加索的死因是什么?\n", "\n", "实际情况: 甄加索是一位热爱自然的画家,他每年都会来到这个海边小屋寻找灵感。在他生命的最后几天,他一直在创作一幅描绘海洋生物的画作。在画即将完成的前一天晚上,他骑着自行车外出,打算在海边观赏夜景。然而,他在沙滩上意外发现了一只搁浅的海豚,为了救助这只海豚,他耗费了极大的体力,最终成功将其送回海中。筋疲力尽的甄加索在沙滩上睡着了,由于他患有严重的心脏病,却未告知旁人,在寒冷的海风中,他的心脏停止了跳动。因此,警方在现场只发现了车轮痕迹和未完成的画作,而没有发现任何他杀的迹象。\n", "\n", "参与者提出的问题: 甄加索是自杀吗\n", "<|im_end|>\n", "<|im_start|>assistant\n", "不是<|im_end|>\n", "--------------------------------------------------\n", "prompt: <|im_start|>system\n", "You are an expert in logical reasoning.<|im_end|>\n", "<|im_start|>user\n", "你是一个逻辑游戏的主持人。游戏规则如下:\n", "\n", "1. 参与者会得到一个谜题。\n", "2. 参与者可以通过提问来获取线索,尝试解开谜题。\n", "3. 对于每个问题,主持人将根据实际情况回答以下五个选项之一:是、不是、不重要、回答正确、问法错误。\n", "4. 回答中不能添加任何其它信息,也不能省略选项中的任何一个字。例如,不可以把“不是”省略成“不”。\n", "5. 参与者需要根据回答来推理,并最终找出谜题的正确答案。\n", "\n", "请严格按照这些规则回答参与者提出的问题。\n", "\n", "谜题: 在远离城市喧嚣的海边小屋,一天清晨,邻居发现甄加索僵卧在沙滩上,已无生命迹象。现场没有发现任何打斗的迹象。请问甄加索的死因是什么?\n", "\n", "实际情况: 甄加索是一位热爱自然的画家,他每年都会来到这个海边小屋寻找灵感。在他生命的最后几天,他一直在创作一幅描绘海洋生物的画作。在画即将完成的前一天晚上,他骑着自行车外出,打算在海边观赏夜景。然而,他在沙滩上意外发现了一只搁浅的海豚,为了救助这只海豚,他耗费了极大的体力,最终成功将其送回海中。筋疲力尽的甄加索在沙滩上睡着了,由于他患有严重的心脏病,却未告知旁人,在寒冷的海风中,他的心脏停止了跳动。因此,警方在现场只发现了车轮痕迹和未完成的画作,而没有发现任何他杀的迹象。\n", "\n", "参与者提出的问题: 甄加索是自杀吗\n", "<|im_end|>\n", "<|im_start|>assistant\n", "\n", "--------------------------------------------------\n", "text: 死者受伤了吗\n", "--------------------------------------------------\n", "label: 不是\n", "--------------------------------------------------\n", "answer: nan\n", "--------------------------------------------------\n", "title: 甄庄哭声\n", "--------------------------------------------------\n", "puzzle: 在一个安静的夜晚,小村庄的湖边突然传来了阵阵哭泣声。第二天早晨,村长甄锐发现湖边的石头上放着一顶破旧的帽子,但没有人知道这顶帽子是从哪里来的,哭泣声又是为何。请还原故事真相。\n", "--------------------------------------------------\n", "truth: 原来,这顶破旧的帽子属于一个小男孩,他小时候与爷爷在湖边生活。爷爷教他钓鱼、游泳,还告诉他湖中的海龟是他们的朋友。后来,小男孩随父母去了城市生活,但每年夏天都会回到村子探望爷爷。然而,去年夏天,爷爷因病去世,小男孩伤心欲绝。今年夏天,他回到村子,来到湖边,想起和爷爷的美好回忆,忍不住哭泣。他将爷爷的帽子放在湖边的石头上,希望能让爷爷的在天之灵得到安慰。那晚的哭泣声正是小男孩在祭莫他亲爱的爷爷。\n", "--------------------------------------------------\n", "train_text: <|im_start|>system\n", "You are an expert in logical reasoning.<|im_end|>\n", "<|im_start|>user\n", "你是一个逻辑游戏的主持人。游戏规则如下:\n", "\n", "1. 参与者会得到一个谜题。\n", "2. 参与者可以通过提问来获取线索,尝试解开谜题。\n", "3. 对于每个问题,主持人将根据实际情况回答以下五个选项之一:是、不是、不重要、回答正确、问法错误。\n", "4. 回答中不能添加任何其它信息,也不能省略选项中的任何一个字。例如,不可以把“不是”省略成“不”。\n", "5. 参与者需要根据回答来推理,并最终找出谜题的正确答案。\n", "\n", "请严格按照这些规则回答参与者提出的问题。\n", "\n", "谜题: 在一个安静的夜晚,小村庄的湖边突然传来了阵阵哭泣声。第二天早晨,村长甄锐发现湖边的石头上放着一顶破旧的帽子,但没有人知道这顶帽子是从哪里来的,哭泣声又是为何。请还原故事真相。\n", "\n", "实际情况: 原来,这顶破旧的帽子属于一个小男孩,他小时候与爷爷在湖边生活。爷爷教他钓鱼、游泳,还告诉他湖中的海龟是他们的朋友。后来,小男孩随父母去了城市生活,但每年夏天都会回到村子探望爷爷。然而,去年夏天,爷爷因病去世,小男孩伤心欲绝。今年夏天,他回到村子,来到湖边,想起和爷爷的美好回忆,忍不住哭泣。他将爷爷的帽子放在湖边的石头上,希望能让爷爷的在天之灵得到安慰。那晚的哭泣声正是小男孩在祭莫他亲爱的爷爷。\n", "\n", "参与者提出的问题: 死者受伤了吗\n", "<|im_end|>\n", "<|im_start|>assistant\n", "不是<|im_end|>\n", "--------------------------------------------------\n", "prompt: <|im_start|>system\n", "You are an expert in logical reasoning.<|im_end|>\n", "<|im_start|>user\n", "你是一个逻辑游戏的主持人。游戏规则如下:\n", "\n", "1. 参与者会得到一个谜题。\n", "2. 参与者可以通过提问来获取线索,尝试解开谜题。\n", "3. 对于每个问题,主持人将根据实际情况回答以下五个选项之一:是、不是、不重要、回答正确、问法错误。\n", "4. 回答中不能添加任何其它信息,也不能省略选项中的任何一个字。例如,不可以把“不是”省略成“不”。\n", "5. 参与者需要根据回答来推理,并最终找出谜题的正确答案。\n", "\n", "请严格按照这些规则回答参与者提出的问题。\n", "\n", "谜题: 在一个安静的夜晚,小村庄的湖边突然传来了阵阵哭泣声。第二天早晨,村长甄锐发现湖边的石头上放着一顶破旧的帽子,但没有人知道这顶帽子是从哪里来的,哭泣声又是为何。请还原故事真相。\n", "\n", "实际情况: 原来,这顶破旧的帽子属于一个小男孩,他小时候与爷爷在湖边生活。爷爷教他钓鱼、游泳,还告诉他湖中的海龟是他们的朋友。后来,小男孩随父母去了城市生活,但每年夏天都会回到村子探望爷爷。然而,去年夏天,爷爷因病去世,小男孩伤心欲绝。今年夏天,他回到村子,来到湖边,想起和爷爷的美好回忆,忍不住哭泣。他将爷爷的帽子放在湖边的石头上,希望能让爷爷的在天之灵得到安慰。那晚的哭泣声正是小男孩在祭莫他亲爱的爷爷。\n", "\n", "参与者提出的问题: 死者受伤了吗\n", "<|im_end|>\n", "<|im_start|>assistant\n", "\n", "Evaluating model: Qwen/Qwen2-7B\n", " 0% 0/3000 [00:00> Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.\n", " 0% 0/3000 [31:25\n", " predictions = eval_model(model, tokenizer, datasets[\"test\"])\n", " File \"/content/drive/.shortcut-targets-by-id/1E09lTnfbsjtTgQg65dQ3y9D2R6l8waxR/logical-reasoning/llm_toolkit/logical_reasoning_utils.py\", line 215, in eval_model\n", " outputs = model.generate(**inputs, max_new_tokens=4096, use_cache=False)\n", " File \"/usr/local/lib/python3.10/dist-packages/torch/utils/_contextlib.py\", line 115, in decorate_context\n", " return func(*args, **kwargs)\n", " File \"/usr/local/lib/python3.10/dist-packages/transformers/generation/utils.py\", line 1758, in generate\n", " result = self._sample(\n", " File \"/usr/local/lib/python3.10/dist-packages/transformers/generation/utils.py\", line 2397, in _sample\n", " outputs = self(\n", " File \"/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py\", line 1532, in _wrapped_call_impl\n", " return self._call_impl(*args, **kwargs)\n", " File \"/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py\", line 1541, in _call_impl\n", " return forward_call(*args, **kwargs)\n", " File \"/usr/local/lib/python3.10/dist-packages/transformers/models/qwen2/modeling_qwen2.py\", line 1163, in forward\n", " logits = logits.float()\n", "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 1.92 GiB. GPU \n", "Epoch 3\n", "loading env vars from: /content/drive/.shortcut-targets-by-id/1E09lTnfbsjtTgQg65dQ3y9D2R6l8waxR/logical-reasoning/.env\n", "Adding /content/drive/.shortcut-targets-by-id/1E09lTnfbsjtTgQg65dQ3y9D2R6l8waxR/logical-reasoning to sys.path\n", "2024-07-16 05:39:41.116319: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n", "2024-07-16 05:39:41.166809: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n", "2024-07-16 05:39:41.166878: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n", "2024-07-16 05:39:41.168319: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n", "2024-07-16 05:39:41.175971: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", "To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", "2024-07-16 05:39:42.445909: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n", "loading /content/drive/.shortcut-targets-by-id/1E09lTnfbsjtTgQg65dQ3y9D2R6l8waxR/logical-reasoning/llm_toolkit/logical_reasoning_utils.py\n", "Qwen/Qwen2-7B /content/qwen2-7b/qwen2-7b/checkpoint-1686 False datasets/mgtv results/mgtv-results_02_qwen2_7b_colab.csv\n", "(1) GPU = NVIDIA L4. Max memory = 22.168 GB.\n", "0.0 GB of memory reserved.\n", "loading model: Qwen/Qwen2-7B\n", "[INFO|tokenization_utils_base.py:2108] 2024-07-16 05:39:48,848 >> loading file vocab.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2-7B/snapshots/453ed1575b739b5b03ce3758b23befdb0967f40e/vocab.json\n", "[INFO|tokenization_utils_base.py:2108] 2024-07-16 05:39:48,849 >> loading file merges.txt from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2-7B/snapshots/453ed1575b739b5b03ce3758b23befdb0967f40e/merges.txt\n", "[INFO|tokenization_utils_base.py:2108] 2024-07-16 05:39:48,849 >> loading file tokenizer.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2-7B/snapshots/453ed1575b739b5b03ce3758b23befdb0967f40e/tokenizer.json\n", "[INFO|tokenization_utils_base.py:2108] 2024-07-16 05:39:48,849 >> loading file added_tokens.json from cache at None\n", "[INFO|tokenization_utils_base.py:2108] 2024-07-16 05:39:48,849 >> loading file special_tokens_map.json from cache at None\n", "[INFO|tokenization_utils_base.py:2108] 2024-07-16 05:39:48,849 >> loading file tokenizer_config.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2-7B/snapshots/453ed1575b739b5b03ce3758b23befdb0967f40e/tokenizer_config.json\n", "[WARNING|logging.py:314] 2024-07-16 05:39:49,128 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n", "07/16/2024 05:39:49 - INFO - llamafactory.data.template - Replace eos token: <|im_end|>\n", "07/16/2024 05:39:49 - INFO - llamafactory.data.template - Add <|im_start|> to stop words.\n", "/usr/local/lib/python3.10/dist-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n", " warnings.warn(\n", "[INFO|configuration_utils.py:733] 2024-07-16 05:39:49,227 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2-7B/snapshots/453ed1575b739b5b03ce3758b23befdb0967f40e/config.json\n", "[INFO|configuration_utils.py:796] 2024-07-16 05:39:49,228 >> Model config Qwen2Config {\n", " \"_name_or_path\": \"Qwen/Qwen2-7B\",\n", " \"architectures\": [\n", " \"Qwen2ForCausalLM\"\n", " ],\n", " \"attention_dropout\": 0.0,\n", " \"bos_token_id\": 151643,\n", " \"eos_token_id\": 151643,\n", " \"hidden_act\": \"silu\",\n", " \"hidden_size\": 3584,\n", " \"initializer_range\": 0.02,\n", " \"intermediate_size\": 18944,\n", " \"max_position_embeddings\": 131072,\n", " \"max_window_layers\": 28,\n", " \"model_type\": \"qwen2\",\n", " \"num_attention_heads\": 28,\n", " \"num_hidden_layers\": 28,\n", " \"num_key_value_heads\": 4,\n", " \"rms_norm_eps\": 1e-06,\n", " \"rope_theta\": 1000000.0,\n", " \"sliding_window\": 131072,\n", " \"tie_word_embeddings\": false,\n", " \"torch_dtype\": \"bfloat16\",\n", " \"transformers_version\": \"4.41.2\",\n", " \"use_cache\": true,\n", " \"use_sliding_window\": false,\n", " \"vocab_size\": 152064\n", "}\n", "\n", "07/16/2024 05:39:49 - INFO - llamafactory.model.patcher - Using KV cache for faster generation.\n", "[INFO|modeling_utils.py:3474] 2024-07-16 05:39:49,260 >> loading weights file model.safetensors from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2-7B/snapshots/453ed1575b739b5b03ce3758b23befdb0967f40e/model.safetensors.index.json\n", "[INFO|modeling_utils.py:1519] 2024-07-16 05:39:49,263 >> Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16.\n", "[INFO|configuration_utils.py:962] 2024-07-16 05:39:49,264 >> Generate config GenerationConfig {\n", " \"bos_token_id\": 151643,\n", " \"eos_token_id\": 151643\n", "}\n", "\n", "Loading checkpoint shards: 100% 4/4 [00:05<00:00, 1.43s/it]\n", "[INFO|modeling_utils.py:4280] 2024-07-16 05:39:57,929 >> All model checkpoint weights were used when initializing Qwen2ForCausalLM.\n", "\n", "[INFO|modeling_utils.py:4288] 2024-07-16 05:39:57,929 >> All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at Qwen/Qwen2-7B.\n", "If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training.\n", "[INFO|configuration_utils.py:917] 2024-07-16 05:39:58,030 >> loading configuration file generation_config.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2-7B/snapshots/453ed1575b739b5b03ce3758b23befdb0967f40e/generation_config.json\n", "[INFO|configuration_utils.py:962] 2024-07-16 05:39:58,030 >> Generate config GenerationConfig {\n", " \"bos_token_id\": 151643,\n", " \"eos_token_id\": 151643,\n", " \"max_new_tokens\": 2048\n", "}\n", "\n", "07/16/2024 05:39:58 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.\n", "07/16/2024 05:39:59 - INFO - llamafactory.model.adapter - Merged 1 adapter(s).\n", "07/16/2024 05:39:59 - INFO - llamafactory.model.adapter - Loaded adapter(s): /content/qwen2-7b/qwen2-7b/checkpoint-1686\n", "07/16/2024 05:39:59 - INFO - llamafactory.model.loader - all params: 7,615,616,512\n", "(2) GPU = NVIDIA L4. Max memory = 22.168 GB.\n", "16.521 GB of memory reserved.\n", "loading train/test data files\n", "DatasetDict({\n", " train: Dataset({\n", " features: ['text', 'label', 'answer', 'title', 'puzzle', 'truth', 'train_text', 'prompt'],\n", " num_rows: 25000\n", " })\n", " test: Dataset({\n", " features: ['text', 'label', 'answer', 'title', 'puzzle', 'truth', 'train_text', 'prompt'],\n", " num_rows: 3000\n", " })\n", "})\n", "--------------------------------------------------\n", "text: 甄加索是自杀吗\n", "--------------------------------------------------\n", "label: 不是\n", "--------------------------------------------------\n", "answer: nan\n", "--------------------------------------------------\n", "title: 海岸之谜\n", "--------------------------------------------------\n", "puzzle: 在远离城市喧嚣的海边小屋,一天清晨,邻居发现甄加索僵卧在沙滩上,已无生命迹象。现场没有发现任何打斗的迹象。请问甄加索的死因是什么?\n", "--------------------------------------------------\n", "truth: 甄加索是一位热爱自然的画家,他每年都会来到这个海边小屋寻找灵感。在他生命的最后几天,他一直在创作一幅描绘海洋生物的画作。在画即将完成的前一天晚上,他骑着自行车外出,打算在海边观赏夜景。然而,他在沙滩上意外发现了一只搁浅的海豚,为了救助这只海豚,他耗费了极大的体力,最终成功将其送回海中。筋疲力尽的甄加索在沙滩上睡着了,由于他患有严重的心脏病,却未告知旁人,在寒冷的海风中,他的心脏停止了跳动。因此,警方在现场只发现了车轮痕迹和未完成的画作,而没有发现任何他杀的迹象。\n", "--------------------------------------------------\n", "train_text: <|im_start|>system\n", "You are an expert in logical reasoning.<|im_end|>\n", "<|im_start|>user\n", "你是一个逻辑游戏的主持人。游戏规则如下:\n", "\n", "1. 参与者会得到一个谜题。\n", "2. 参与者可以通过提问来获取线索,尝试解开谜题。\n", "3. 对于每个问题,主持人将根据实际情况回答以下五个选项之一:是、不是、不重要、回答正确、问法错误。\n", "4. 回答中不能添加任何其它信息,也不能省略选项中的任何一个字。例如,不可以把“不是”省略成“不”。\n", "5. 参与者需要根据回答来推理,并最终找出谜题的正确答案。\n", "\n", "请严格按照这些规则回答参与者提出的问题。\n", "\n", "谜题: 在远离城市喧嚣的海边小屋,一天清晨,邻居发现甄加索僵卧在沙滩上,已无生命迹象。现场没有发现任何打斗的迹象。请问甄加索的死因是什么?\n", "\n", "实际情况: 甄加索是一位热爱自然的画家,他每年都会来到这个海边小屋寻找灵感。在他生命的最后几天,他一直在创作一幅描绘海洋生物的画作。在画即将完成的前一天晚上,他骑着自行车外出,打算在海边观赏夜景。然而,他在沙滩上意外发现了一只搁浅的海豚,为了救助这只海豚,他耗费了极大的体力,最终成功将其送回海中。筋疲力尽的甄加索在沙滩上睡着了,由于他患有严重的心脏病,却未告知旁人,在寒冷的海风中,他的心脏停止了跳动。因此,警方在现场只发现了车轮痕迹和未完成的画作,而没有发现任何他杀的迹象。\n", "\n", "参与者提出的问题: 甄加索是自杀吗\n", "<|im_end|>\n", "<|im_start|>assistant\n", "不是<|im_end|>\n", "--------------------------------------------------\n", "prompt: <|im_start|>system\n", "You are an expert in logical reasoning.<|im_end|>\n", "<|im_start|>user\n", "你是一个逻辑游戏的主持人。游戏规则如下:\n", "\n", "1. 参与者会得到一个谜题。\n", "2. 参与者可以通过提问来获取线索,尝试解开谜题。\n", "3. 对于每个问题,主持人将根据实际情况回答以下五个选项之一:是、不是、不重要、回答正确、问法错误。\n", "4. 回答中不能添加任何其它信息,也不能省略选项中的任何一个字。例如,不可以把“不是”省略成“不”。\n", "5. 参与者需要根据回答来推理,并最终找出谜题的正确答案。\n", "\n", "请严格按照这些规则回答参与者提出的问题。\n", "\n", "谜题: 在远离城市喧嚣的海边小屋,一天清晨,邻居发现甄加索僵卧在沙滩上,已无生命迹象。现场没有发现任何打斗的迹象。请问甄加索的死因是什么?\n", "\n", "实际情况: 甄加索是一位热爱自然的画家,他每年都会来到这个海边小屋寻找灵感。在他生命的最后几天,他一直在创作一幅描绘海洋生物的画作。在画即将完成的前一天晚上,他骑着自行车外出,打算在海边观赏夜景。然而,他在沙滩上意外发现了一只搁浅的海豚,为了救助这只海豚,他耗费了极大的体力,最终成功将其送回海中。筋疲力尽的甄加索在沙滩上睡着了,由于他患有严重的心脏病,却未告知旁人,在寒冷的海风中,他的心脏停止了跳动。因此,警方在现场只发现了车轮痕迹和未完成的画作,而没有发现任何他杀的迹象。\n", "\n", "参与者提出的问题: 甄加索是自杀吗\n", "<|im_end|>\n", "<|im_start|>assistant\n", "\n", "--------------------------------------------------\n", "text: 死者受伤了吗\n", "--------------------------------------------------\n", "label: 不是\n", "--------------------------------------------------\n", "answer: nan\n", "--------------------------------------------------\n", "title: 甄庄哭声\n", "--------------------------------------------------\n", "puzzle: 在一个安静的夜晚,小村庄的湖边突然传来了阵阵哭泣声。第二天早晨,村长甄锐发现湖边的石头上放着一顶破旧的帽子,但没有人知道这顶帽子是从哪里来的,哭泣声又是为何。请还原故事真相。\n", "--------------------------------------------------\n", "truth: 原来,这顶破旧的帽子属于一个小男孩,他小时候与爷爷在湖边生活。爷爷教他钓鱼、游泳,还告诉他湖中的海龟是他们的朋友。后来,小男孩随父母去了城市生活,但每年夏天都会回到村子探望爷爷。然而,去年夏天,爷爷因病去世,小男孩伤心欲绝。今年夏天,他回到村子,来到湖边,想起和爷爷的美好回忆,忍不住哭泣。他将爷爷的帽子放在湖边的石头上,希望能让爷爷的在天之灵得到安慰。那晚的哭泣声正是小男孩在祭莫他亲爱的爷爷。\n", "--------------------------------------------------\n", "train_text: <|im_start|>system\n", "You are an expert in logical reasoning.<|im_end|>\n", "<|im_start|>user\n", "你是一个逻辑游戏的主持人。游戏规则如下:\n", "\n", "1. 参与者会得到一个谜题。\n", "2. 参与者可以通过提问来获取线索,尝试解开谜题。\n", "3. 对于每个问题,主持人将根据实际情况回答以下五个选项之一:是、不是、不重要、回答正确、问法错误。\n", "4. 回答中不能添加任何其它信息,也不能省略选项中的任何一个字。例如,不可以把“不是”省略成“不”。\n", "5. 参与者需要根据回答来推理,并最终找出谜题的正确答案。\n", "\n", "请严格按照这些规则回答参与者提出的问题。\n", "\n", "谜题: 在一个安静的夜晚,小村庄的湖边突然传来了阵阵哭泣声。第二天早晨,村长甄锐发现湖边的石头上放着一顶破旧的帽子,但没有人知道这顶帽子是从哪里来的,哭泣声又是为何。请还原故事真相。\n", "\n", "实际情况: 原来,这顶破旧的帽子属于一个小男孩,他小时候与爷爷在湖边生活。爷爷教他钓鱼、游泳,还告诉他湖中的海龟是他们的朋友。后来,小男孩随父母去了城市生活,但每年夏天都会回到村子探望爷爷。然而,去年夏天,爷爷因病去世,小男孩伤心欲绝。今年夏天,他回到村子,来到湖边,想起和爷爷的美好回忆,忍不住哭泣。他将爷爷的帽子放在湖边的石头上,希望能让爷爷的在天之灵得到安慰。那晚的哭泣声正是小男孩在祭莫他亲爱的爷爷。\n", "\n", "参与者提出的问题: 死者受伤了吗\n", "<|im_end|>\n", "<|im_start|>assistant\n", "不是<|im_end|>\n", "--------------------------------------------------\n", "prompt: <|im_start|>system\n", "You are an expert in logical reasoning.<|im_end|>\n", "<|im_start|>user\n", "你是一个逻辑游戏的主持人。游戏规则如下:\n", "\n", "1. 参与者会得到一个谜题。\n", "2. 参与者可以通过提问来获取线索,尝试解开谜题。\n", "3. 对于每个问题,主持人将根据实际情况回答以下五个选项之一:是、不是、不重要、回答正确、问法错误。\n", "4. 回答中不能添加任何其它信息,也不能省略选项中的任何一个字。例如,不可以把“不是”省略成“不”。\n", "5. 参与者需要根据回答来推理,并最终找出谜题的正确答案。\n", "\n", "请严格按照这些规则回答参与者提出的问题。\n", "\n", "谜题: 在一个安静的夜晚,小村庄的湖边突然传来了阵阵哭泣声。第二天早晨,村长甄锐发现湖边的石头上放着一顶破旧的帽子,但没有人知道这顶帽子是从哪里来的,哭泣声又是为何。请还原故事真相。\n", "\n", "实际情况: 原来,这顶破旧的帽子属于一个小男孩,他小时候与爷爷在湖边生活。爷爷教他钓鱼、游泳,还告诉他湖中的海龟是他们的朋友。后来,小男孩随父母去了城市生活,但每年夏天都会回到村子探望爷爷。然而,去年夏天,爷爷因病去世,小男孩伤心欲绝。今年夏天,他回到村子,来到湖边,想起和爷爷的美好回忆,忍不住哭泣。他将爷爷的帽子放在湖边的石头上,希望能让爷爷的在天之灵得到安慰。那晚的哭泣声正是小男孩在祭莫他亲爱的爷爷。\n", "\n", "参与者提出的问题: 死者受伤了吗\n", "<|im_end|>\n", "<|im_start|>assistant\n", "\n", "Evaluating model: Qwen/Qwen2-7B\n", " 0% 0/3000 [00:00> Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.\n", " 0% 0/3000 [01:33\n", " predictions = eval_model(model, tokenizer, datasets[\"test\"])\n", " File \"/content/drive/.shortcut-targets-by-id/1E09lTnfbsjtTgQg65dQ3y9D2R6l8waxR/logical-reasoning/llm_toolkit/logical_reasoning_utils.py\", line 215, in eval_model\n", " outputs = model.generate(**inputs, max_new_tokens=4096, use_cache=False)\n", " File \"/usr/local/lib/python3.10/dist-packages/torch/utils/_contextlib.py\", line 114, in decorate_context\n", " with ctx_factory():\n", " File \"/usr/local/lib/python3.10/dist-packages/torch/autograd/grad_mode.py\", line 84, in __exit__\n", " torch.set_grad_enabled(self.prev)\n", " File \"/usr/local/lib/python3.10/dist-packages/torch/autograd/grad_mode.py\", line 183, in __init__\n", " def __init__(self, mode: bool) -> None:\n", "KeyboardInterrupt\n", "Epoch 4\n" ] } ], "source": [ "%%time\n", "\n", "evaluate_model_all_epochs(\"Qwen/Qwen2-7B\", \"/content/qwen2-7b/qwen2-7b\", 4, start_epoch=0, load_in_4bit=False, num_of_entries=-1)" ] } ], "metadata": { "accelerator": "GPU", "colab": { "gpuType": "L4", "provenance": [] }, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.4" } }, "nbformat": 4, "nbformat_minor": 0 }