diff --git "a/competition/12b_InternLM_Push_LoRA_to_hub.ipynb" "b/competition/12b_InternLM_Push_LoRA_to_hub.ipynb"
--- "a/competition/12b_InternLM_Push_LoRA_to_hub.ipynb"
+++ "b/competition/12b_InternLM_Push_LoRA_to_hub.ipynb"
@@ -1 +1 @@
-{"cells":[{"cell_type":"code","execution_count":1,"metadata":{"executionInfo":{"elapsed":476,"status":"ok","timestamp":1720679526275,"user":{"displayName":"HUANG DONGHAO _","userId":"00977795705617022768"},"user_tz":-480},"id":"uWKRSV6eZsCn"},"outputs":[],"source":["%load_ext autoreload\n","%autoreload 2"]},{"cell_type":"code","execution_count":2,"metadata":{"application/vnd.databricks.v1+cell":{"cellMetadata":{"byteLimit":2048000,"rowLimit":10000},"inputWidgets":{},"nuid":"eb33b19f-1206-41ee-84e2-e6258a12eef7","showTitle":false,"title":""},"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":2534,"status":"ok","timestamp":1720679529344,"user":{"displayName":"HUANG DONGHAO _","userId":"00977795705617022768"},"user_tz":-480},"id":"xwFh14uiZBrI","outputId":"d767799c-34c2-46a5-f052-378146a55321"},"outputs":[],"source":["from pathlib import Path\n","\n","try:\n"," from google.colab import drive\n","\n"," drive.mount(\"/content/drive\")\n"," workding_dir = \"/content/drive/MyDrive/logical-reasoning/\"\n","except ModuleNotFoundError:\n"," workding_dir = str(Path.cwd().parent)"]},{"cell_type":"code","execution_count":3,"metadata":{"application/vnd.databricks.v1+cell":{"cellMetadata":{"byteLimit":2048000,"rowLimit":10000},"inputWidgets":{},"nuid":"6d394937-6c99-4a7c-9d32-7600a280032f","showTitle":false,"title":""},"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":5,"status":"ok","timestamp":1720679529345,"user":{"displayName":"HUANG DONGHAO _","userId":"00977795705617022768"},"user_tz":-480},"id":"G5pNu3zgZBrL","outputId":"160a554f-fb08-4aa0-bc00-0422fb7c1fac"},"outputs":[{"name":"stdout","output_type":"stream","text":["workding dir: /Users/inflaton/code/engd/projects/logical-reasoning\n"]}],"source":["import os\n","import sys\n","from pathlib import Path\n","\n","os.chdir(workding_dir)\n","sys.path.append(workding_dir)\n","print(\"workding dir:\", workding_dir)"]},{"cell_type":"code","execution_count":4,"metadata":{"application/vnd.databricks.v1+cell":{"cellMetadata":{"byteLimit":2048000,"rowLimit":10000},"inputWidgets":{},"nuid":"9f67ec60-2f24-411c-84eb-0dd664b44775","showTitle":false,"title":""},"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":3,"status":"ok","timestamp":1720679529345,"user":{"displayName":"HUANG DONGHAO _","userId":"00977795705617022768"},"user_tz":-480},"id":"hPCC-6m7ZBrM","outputId":"c7aa2c96-5e99-440a-c148-201d79465ff9"},"outputs":[{"name":"stdout","output_type":"stream","text":["loading env vars from: /Users/inflaton/code/engd/projects/logical-reasoning/.env\n"]},{"data":{"text/plain":["True"]},"execution_count":4,"metadata":{},"output_type":"execute_result"}],"source":["from dotenv import find_dotenv, load_dotenv\n","\n","found_dotenv = find_dotenv(\".env\")\n","\n","if len(found_dotenv) == 0:\n"," found_dotenv = find_dotenv(\".env.example\")\n","print(f\"loading env vars from: {found_dotenv}\")\n","load_dotenv(found_dotenv, override=True)"]},{"cell_type":"code","execution_count":5,"metadata":{"application/vnd.databricks.v1+cell":{"cellMetadata":{"byteLimit":2048000,"rowLimit":10000},"inputWidgets":{},"nuid":"f1597656-8042-4878-9d3b-9ebfb8dd86dc","showTitle":false,"title":""},"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":3,"status":"ok","timestamp":1720679529345,"user":{"displayName":"HUANG DONGHAO _","userId":"00977795705617022768"},"user_tz":-480},"id":"1M3IraVtZBrM","outputId":"29ab35f6-2970-4ade-d85d-3174acf8cda0"},"outputs":[{"name":"stdout","output_type":"stream","text":["internlm/internlm2_5-7b-chat-1m llama-factory/saves/internlm2_5_7b/lora/sft_bf16_p2_full/checkpoint-88 False datasets/mgtv results/mgtv-results_m3.csv\n"]}],"source":["import os\n","\n","model_name = os.getenv(\"MODEL_NAME\")\n","adapter_name_or_path = os.getenv(\"ADAPTER_NAME_OR_PATH\")\n","load_in_4bit = os.getenv(\"LOAD_IN_4BIT\") == \"true\"\n","data_path = os.getenv(\"LOGICAL_REASONING_DATA_PATH\")\n","results_path = os.getenv(\"LOGICAL_REASONING_RESULTS_PATH\")\n","use_english_datasets = os.getenv(\"USE_ENGLISH_DATASETS\") == \"true\"\n","\n","print(model_name, adapter_name_or_path, load_in_4bit, data_path, results_path)"]},{"cell_type":"code","execution_count":6,"metadata":{"application/vnd.databricks.v1+cell":{"cellMetadata":{"byteLimit":2048000,"rowLimit":10000},"inputWidgets":{},"nuid":"b2a43943-9324-4839-9a47-cfa72de2244b","showTitle":false,"title":""},"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":564,"status":"ok","timestamp":1720679529907,"user":{"displayName":"HUANG DONGHAO _","userId":"00977795705617022768"},"user_tz":-480},"id":"UgMvt6dIZBrM","outputId":"ce37581c-fd26-46c2-ad87-d933d99f68f7"},"outputs":[{"name":"stdout","output_type":"stream","text":["Python 3.11.9\n","\u001b[33mWARNING: Package(s) not found: flash-attn\u001b[0m\u001b[33m\n","\u001b[0mCPU times: user 4.51 ms, sys: 7.82 ms, total: 12.3 ms\n","Wall time: 652 ms\n"]}],"source":["%%time\n","!python --version\n","!pip show flash-attn"]},{"cell_type":"code","execution_count":7,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":1685,"status":"ok","timestamp":1720679531591,"user":{"displayName":"HUANG DONGHAO _","userId":"00977795705617022768"},"user_tz":-480},"id":"ZuS_FsLyZBrN","outputId":"2cba0105-c505-4395-afbd-2f2fee6581d0"},"outputs":[{"name":"stdout","output_type":"stream","text":["loading /Users/inflaton/code/engd/projects/logical-reasoning/llm_toolkit/logical_reasoning_utils.py\n","MPS is available\n"]}],"source":["from llm_toolkit.llm_utils import *\n","from llm_toolkit.logical_reasoning_utils import *\n","\n","device = check_gpu()"]},{"cell_type":"code","execution_count":8,"metadata":{},"outputs":[{"name":"stdout","output_type":"stream","text":["loading model: llama-factory/saves/internlm2_5_7b/lora/sft_bf16_p2_full/checkpoint-88 with adapter: None\n"]},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"59cbac92c15441b8b96eeb2079d80a9f","version_major":2,"version_minor":0},"text/plain":["Loading checkpoint shards: 0%| | 0/8 [00:00, ?it/s]"]},"metadata":{},"output_type":"display_data"},{"name":"stdout","output_type":"stream","text":["CPU times: user 2.49 s, sys: 5.85 s, total: 8.34 s\n","Wall time: 27.5 s\n"]}],"source":["%%time\n","\n","# model, tokenizer = load_model(model_name, adapter_name_or_path=adapter_name_or_path, using_llama_factory=False)\n","model, tokenizer = load_model(adapter_name_or_path, using_llama_factory=False)"]},{"cell_type":"code","execution_count":9,"metadata":{},"outputs":[{"name":"stderr","output_type":"stream","text":["/Users/inflaton/anaconda3/envs/logical-reasoning/lib/python3.11/site-packages/transformers/integrations/peft.py:399: FutureWarning: The `active_adapter` method is deprecated and will be removed in a future version.\n"," warnings.warn(\n"]},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"6a97d694bb2c45288969b8f4806e9db3","version_major":2,"version_minor":0},"text/plain":["adapter_model.safetensors: 0%| | 0.00/37.8M [00:00, ?B/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"text/plain":["CommitInfo(commit_url='https://huggingface.co/inflaton-ai/InternLM_2_5-7b_LoRA-Adapter/commit/822b89e129372ed5bc8372489ff6dcfef7d19cd7', commit_message='Upload InternLM2ForCausalLM', commit_description='', oid='822b89e129372ed5bc8372489ff6dcfef7d19cd7', pr_url=None, pr_revision=None, pr_num=None)"]},"execution_count":9,"metadata":{},"output_type":"execute_result"}],"source":["model.push_to_hub(\"inflaton-ai/InternLM_2_5-7b_LoRA-Adapter\")"]},{"cell_type":"code","execution_count":10,"metadata":{},"outputs":[],"source":["del model, tokenizer"]},{"cell_type":"code","execution_count":12,"metadata":{},"outputs":[{"name":"stdout","output_type":"stream","text":["loading model: internlm/internlm2_5-7b-chat-1m with adapter: inflaton-ai/InternLM_2_5-7b_LoRA-Adapter\n"]},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"8e5a9c51fcb44a2e97ebcf0594a68938","version_major":2,"version_minor":0},"text/plain":["Loading checkpoint shards: 0%| | 0/8 [00:00, ?it/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"105a5fdab3534197b34cf5e4d9ed47b3","version_major":2,"version_minor":0},"text/plain":["adapter_config.json: 0%| | 0.00/679 [00:00, ?B/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"55bcfc7ea08e4b43aab7796ba33a32bd","version_major":2,"version_minor":0},"text/plain":["adapter_model.safetensors: 0%| | 0.00/37.8M [00:00, ?B/s]"]},"metadata":{},"output_type":"display_data"},{"name":"stdout","output_type":"stream","text":["CPU times: user 3.06 s, sys: 5.04 s, total: 8.11 s\n","Wall time: 31.6 s\n"]}],"source":["%%time\n","\n","model, tokenizer = load_model(model_name, adapter_name_or_path=\"inflaton-ai/InternLM_2_5-7b_LoRA-Adapter\", using_llama_factory=False)\n","# model, tokenizer = load_model(\"inflaton-ai/InternLM_2_5-7b_LoRA-Adapter\", using_llama_factory=False)"]},{"cell_type":"code","execution_count":13,"metadata":{},"outputs":[{"name":"stdout","output_type":"stream","text":["loading train/test data files\n"]},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"c01335f1a61e4b589b7c33978db1e8c6","version_major":2,"version_minor":0},"text/plain":["Map: 0%| | 0/25000 [00:00, ? examples/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"1e1c242ae44b4a65b7683e8918174989","version_major":2,"version_minor":0},"text/plain":["Map: 0%| | 0/3000 [00:00, ? examples/s]"]},"metadata":{},"output_type":"display_data"},{"name":"stdout","output_type":"stream","text":["DatasetDict({\n"," train: Dataset({\n"," features: ['text', 'label', 'answer', 'title', 'puzzle', 'truth', 'train_text', 'prompt'],\n"," num_rows: 25000\n"," })\n"," test: Dataset({\n"," features: ['text', 'label', 'answer', 'title', 'puzzle', 'truth', 'train_text', 'prompt'],\n"," num_rows: 3000\n"," })\n","})\n"]}],"source":["datasets = load_logical_reasoning_dataset(\n"," data_path,\n"," tokenizer=tokenizer,\n"," chinese_prompt=not use_english_datasets,\n"," using_p1=False,\n",")"]},{"cell_type":"code","execution_count":14,"metadata":{},"outputs":[{"name":"stderr","output_type":"stream","text":["/Users/inflaton/anaconda3/envs/logical-reasoning/lib/python3.11/site-packages/transformers/generation/utils.py:1513: UserWarning: The operator 'aten::isin.Tensor_Tensor_out' is not currently supported on the MPS backend and will fall back to run on the CPU. This may have performance implications. (Triggered internally at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/mps/MPSFallback.mm:13.)\n"," if eos_token_id is not None and torch.isin(elements=eos_token_id, test_elements=pad_token_id).any():\n"]},{"name":"stdout","output_type":"stream","text":["是\n","CPU times: user 579 ms, sys: 330 ms, total: 909 ms\n","Wall time: 2.86 s\n"]}],"source":["%%time\n","\n","prompt1 = datasets[\"test\"][\"prompt\"][1000]\n","\n","gen_kwargs = {\"max_length\": 4096, \"do_sample\": True, \"top_k\": 1}\n","with torch.no_grad():\n"," inputs = tokenizer(\n"," [prompt1],\n"," return_tensors=\"pt\",\n"," ).to(device)\n"," outputs = model.generate(**inputs, **gen_kwargs)\n"," outputs = outputs[:, inputs['input_ids'].shape[1]:]\n"," print(tokenizer.decode(outputs[0], skip_special_tokens=True))"]}],"metadata":{"accelerator":"GPU","application/vnd.databricks.v1+notebook":{"dashboards":[],"environmentMetadata":null,"language":"python","notebookMetadata":{"mostRecentlyExecutedCommandWithImplicitDF":{"commandId":-1,"dataframes":["_sqldf"]},"pythonIndentUnit":4},"notebookName":"10_eval-lf-medium-py3.11","widgets":{}},"colab":{"gpuType":"L4","provenance":[]},"kernelspec":{"display_name":"Python 3","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.11.9"}},"nbformat":4,"nbformat_minor":0}
+{"cells":[{"cell_type":"code","execution_count":1,"metadata":{"executionInfo":{"elapsed":476,"status":"ok","timestamp":1720679526275,"user":{"displayName":"HUANG DONGHAO _","userId":"00977795705617022768"},"user_tz":-480},"id":"uWKRSV6eZsCn"},"outputs":[],"source":["%load_ext autoreload\n","%autoreload 2"]},{"cell_type":"code","execution_count":2,"metadata":{"application/vnd.databricks.v1+cell":{"cellMetadata":{"byteLimit":2048000,"rowLimit":10000},"inputWidgets":{},"nuid":"eb33b19f-1206-41ee-84e2-e6258a12eef7","showTitle":false,"title":""},"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":2534,"status":"ok","timestamp":1720679529344,"user":{"displayName":"HUANG DONGHAO _","userId":"00977795705617022768"},"user_tz":-480},"id":"xwFh14uiZBrI","outputId":"d767799c-34c2-46a5-f052-378146a55321"},"outputs":[],"source":["from pathlib import Path\n","\n","try:\n"," from google.colab import drive\n","\n"," drive.mount(\"/content/drive\")\n"," workding_dir = \"/content/drive/MyDrive/logical-reasoning/\"\n","except ModuleNotFoundError:\n"," workding_dir = str(Path.cwd().parent)"]},{"cell_type":"code","execution_count":3,"metadata":{"application/vnd.databricks.v1+cell":{"cellMetadata":{"byteLimit":2048000,"rowLimit":10000},"inputWidgets":{},"nuid":"6d394937-6c99-4a7c-9d32-7600a280032f","showTitle":false,"title":""},"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":5,"status":"ok","timestamp":1720679529345,"user":{"displayName":"HUANG DONGHAO _","userId":"00977795705617022768"},"user_tz":-480},"id":"G5pNu3zgZBrL","outputId":"160a554f-fb08-4aa0-bc00-0422fb7c1fac"},"outputs":[{"name":"stdout","output_type":"stream","text":["workding dir: /home/inflaton/code/projects/courses/logical-reasoning\n"]}],"source":["import os\n","import sys\n","from pathlib import Path\n","\n","os.chdir(workding_dir)\n","sys.path.append(workding_dir)\n","print(\"workding dir:\", workding_dir)"]},{"cell_type":"code","execution_count":4,"metadata":{"application/vnd.databricks.v1+cell":{"cellMetadata":{"byteLimit":2048000,"rowLimit":10000},"inputWidgets":{},"nuid":"9f67ec60-2f24-411c-84eb-0dd664b44775","showTitle":false,"title":""},"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":3,"status":"ok","timestamp":1720679529345,"user":{"displayName":"HUANG DONGHAO _","userId":"00977795705617022768"},"user_tz":-480},"id":"hPCC-6m7ZBrM","outputId":"c7aa2c96-5e99-440a-c148-201d79465ff9"},"outputs":[{"name":"stdout","output_type":"stream","text":["loading env vars from: /home/inflaton/code/projects/courses/logical-reasoning/.env\n"]},{"data":{"text/plain":["True"]},"execution_count":4,"metadata":{},"output_type":"execute_result"}],"source":["from dotenv import find_dotenv, load_dotenv\n","\n","found_dotenv = find_dotenv(\".env\")\n","\n","if len(found_dotenv) == 0:\n"," found_dotenv = find_dotenv(\".env.example\")\n","print(f\"loading env vars from: {found_dotenv}\")\n","load_dotenv(found_dotenv, override=True)"]},{"cell_type":"code","execution_count":5,"metadata":{"application/vnd.databricks.v1+cell":{"cellMetadata":{"byteLimit":2048000,"rowLimit":10000},"inputWidgets":{},"nuid":"f1597656-8042-4878-9d3b-9ebfb8dd86dc","showTitle":false,"title":""},"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":3,"status":"ok","timestamp":1720679529345,"user":{"displayName":"HUANG DONGHAO _","userId":"00977795705617022768"},"user_tz":-480},"id":"1M3IraVtZBrM","outputId":"29ab35f6-2970-4ade-d85d-3174acf8cda0"},"outputs":[{"name":"stdout","output_type":"stream","text":["internlm/internlm2_5-7b-chat-1m llama-factory/saves/internlm2_5_7b/lora/sft_bf16_p2_full_r2/checkpoint-175 False datasets/mgtv results/mgtv-results_internlm_best.csv\n"]}],"source":["import os\n","\n","model_name = os.getenv(\"MODEL_NAME\")\n","adapter_name_or_path = os.getenv(\"ADAPTER_NAME_OR_PATH\")\n","load_in_4bit = os.getenv(\"LOAD_IN_4BIT\") == \"true\"\n","data_path = os.getenv(\"LOGICAL_REASONING_DATA_PATH\")\n","results_path = os.getenv(\"LOGICAL_REASONING_RESULTS_PATH\")\n","use_english_datasets = os.getenv(\"USE_ENGLISH_DATASETS\") == \"true\"\n","\n","print(model_name, adapter_name_or_path, load_in_4bit, data_path, results_path)"]},{"cell_type":"code","execution_count":6,"metadata":{"application/vnd.databricks.v1+cell":{"cellMetadata":{"byteLimit":2048000,"rowLimit":10000},"inputWidgets":{},"nuid":"b2a43943-9324-4839-9a47-cfa72de2244b","showTitle":false,"title":""},"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":564,"status":"ok","timestamp":1720679529907,"user":{"displayName":"HUANG DONGHAO _","userId":"00977795705617022768"},"user_tz":-480},"id":"UgMvt6dIZBrM","outputId":"ce37581c-fd26-46c2-ad87-d933d99f68f7"},"outputs":[{"name":"stdout","output_type":"stream","text":["Python 3.11.9\n","\u001b[33mWARNING: Package(s) not found: flash-attn\u001b[0m\u001b[33m\n","\u001b[0mName: transformers\n","Version: 4.41.2\n","Summary: State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow\n","Home-page: https://github.com/huggingface/transformers\n","Author: The Hugging Face team (past and future) with the help of all our contributors (https://github.com/huggingface/transformers/graphs/contributors)\n","Author-email: transformers@huggingface.co\n","License: Apache 2.0 License\n","Location: /home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages\n","Requires: filelock, huggingface-hub, numpy, packaging, pyyaml, regex, requests, safetensors, tokenizers, tqdm\n","Required-by: llamafactory, peft, trl, vllm\n","---\n","Name: torch\n","Version: 2.3.0\n","Summary: Tensors and Dynamic neural networks in Python with strong GPU acceleration\n","Home-page: https://pytorch.org/\n","Author: PyTorch Team\n","Author-email: packages@pytorch.org\n","License: BSD-3\n","Location: /home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages\n","Requires: filelock, fsspec, jinja2, networkx, nvidia-cublas-cu12, nvidia-cuda-cupti-cu12, nvidia-cuda-nvrtc-cu12, nvidia-cuda-runtime-cu12, nvidia-cudnn-cu12, nvidia-cufft-cu12, nvidia-curand-cu12, nvidia-cusolver-cu12, nvidia-cusparse-cu12, nvidia-nccl-cu12, nvidia-nvtx-cu12, sympy, triton, typing-extensions\n","Required-by: accelerate, bitsandbytes, peft, torchvision, trl, vllm, vllm-flash-attn, xformers\n","CPU times: user 73.4 ms, sys: 23.9 ms, total: 97.3 ms\n","Wall time: 4.31 s\n"]}],"source":["%%time\n","!python --version\n","!pip show flash-attn transformers torch"]},{"cell_type":"code","execution_count":7,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":1685,"status":"ok","timestamp":1720679531591,"user":{"displayName":"HUANG DONGHAO _","userId":"00977795705617022768"},"user_tz":-480},"id":"ZuS_FsLyZBrN","outputId":"2cba0105-c505-4395-afbd-2f2fee6581d0"},"outputs":[{"name":"stdout","output_type":"stream","text":["loading /home/inflaton/code/projects/courses/logical-reasoning/llm_toolkit/logical_reasoning_utils.py\n","GPU is available\n"]}],"source":["from llm_toolkit.llm_utils import *\n","from llm_toolkit.logical_reasoning_utils import *\n","\n","device = check_gpu()"]},{"cell_type":"code","execution_count":8,"metadata":{},"outputs":[{"data":{"text/plain":["'llama-factory/saves/internlm2_5_7b/lora/sft_bf16_p2_full_r2/checkpoint-175'"]},"execution_count":8,"metadata":{},"output_type":"execute_result"}],"source":["adapter_name_or_path"]},{"cell_type":"code","execution_count":9,"metadata":{},"outputs":[{"name":"stdout","output_type":"stream","text":["loading model: llama-factory/saves/internlm2_5_7b/lora/sft_bf16_p2_full_r2/checkpoint-175 with adapter: None\n"]},{"name":"stderr","output_type":"stream","text":["/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n"," warnings.warn(\n"]},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"97207088e8ac445998f38e27cbec5590","version_major":2,"version_minor":0},"text/plain":["Loading checkpoint shards: 0%| | 0/8 [00:00, ?it/s]"]},"metadata":{},"output_type":"display_data"},{"name":"stderr","output_type":"stream","text":["Some parameters are on the meta device device because they were offloaded to the cpu.\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.20.attention.wqkv.lora_A.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.20.attention.wqkv.lora_B.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.20.attention.wo.lora_A.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.20.attention.wo.lora_B.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.20.feed_forward.w1.lora_A.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.20.feed_forward.w1.lora_B.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.20.feed_forward.w3.lora_A.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.20.feed_forward.w3.lora_B.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.20.feed_forward.w2.lora_A.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.20.feed_forward.w2.lora_B.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.21.attention.wqkv.lora_A.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.21.attention.wqkv.lora_B.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.21.attention.wo.lora_A.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.21.attention.wo.lora_B.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.21.feed_forward.w1.lora_A.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.21.feed_forward.w1.lora_B.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.21.feed_forward.w3.lora_A.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.21.feed_forward.w3.lora_B.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.21.feed_forward.w2.lora_A.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.21.feed_forward.w2.lora_B.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.22.attention.wqkv.lora_A.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.22.attention.wqkv.lora_B.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.22.attention.wo.lora_A.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.22.attention.wo.lora_B.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.22.feed_forward.w1.lora_A.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.22.feed_forward.w1.lora_B.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.22.feed_forward.w3.lora_A.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.22.feed_forward.w3.lora_B.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.22.feed_forward.w2.lora_A.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.22.feed_forward.w2.lora_B.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.23.attention.wqkv.lora_A.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.23.attention.wqkv.lora_B.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.23.attention.wo.lora_A.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.23.attention.wo.lora_B.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.23.feed_forward.w1.lora_A.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.23.feed_forward.w1.lora_B.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.23.feed_forward.w3.lora_A.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.23.feed_forward.w3.lora_B.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.23.feed_forward.w2.lora_A.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.23.feed_forward.w2.lora_B.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.24.attention.wqkv.lora_A.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.24.attention.wqkv.lora_B.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.24.attention.wo.lora_A.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.24.attention.wo.lora_B.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.24.feed_forward.w1.lora_A.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.24.feed_forward.w1.lora_B.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.24.feed_forward.w3.lora_A.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.24.feed_forward.w3.lora_B.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.24.feed_forward.w2.lora_A.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.24.feed_forward.w2.lora_B.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.25.attention.wqkv.lora_A.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.25.attention.wqkv.lora_B.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.25.attention.wo.lora_A.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.25.attention.wo.lora_B.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.25.feed_forward.w1.lora_A.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.25.feed_forward.w1.lora_B.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.25.feed_forward.w3.lora_A.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.25.feed_forward.w3.lora_B.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.25.feed_forward.w2.lora_A.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.25.feed_forward.w2.lora_B.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.26.attention.wqkv.lora_A.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.26.attention.wqkv.lora_B.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.26.attention.wo.lora_A.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.26.attention.wo.lora_B.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.26.feed_forward.w1.lora_A.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.26.feed_forward.w1.lora_B.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.26.feed_forward.w3.lora_A.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.26.feed_forward.w3.lora_B.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.26.feed_forward.w2.lora_A.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.26.feed_forward.w2.lora_B.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.27.attention.wqkv.lora_A.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.27.attention.wqkv.lora_B.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.27.attention.wo.lora_A.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.27.attention.wo.lora_B.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.27.feed_forward.w1.lora_A.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.27.feed_forward.w1.lora_B.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.27.feed_forward.w3.lora_A.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.27.feed_forward.w3.lora_B.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.27.feed_forward.w2.lora_A.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.27.feed_forward.w2.lora_B.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.28.attention.wqkv.lora_A.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.28.attention.wqkv.lora_B.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.28.attention.wo.lora_A.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.28.attention.wo.lora_B.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.28.feed_forward.w1.lora_A.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.28.feed_forward.w1.lora_B.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.28.feed_forward.w3.lora_A.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.28.feed_forward.w3.lora_B.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.28.feed_forward.w2.lora_A.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.28.feed_forward.w2.lora_B.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.29.attention.wqkv.lora_A.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.29.attention.wqkv.lora_B.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.29.attention.wo.lora_A.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.29.attention.wo.lora_B.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.29.feed_forward.w1.lora_A.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.29.feed_forward.w1.lora_B.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.29.feed_forward.w3.lora_A.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.29.feed_forward.w3.lora_B.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.29.feed_forward.w2.lora_A.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.29.feed_forward.w2.lora_B.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.30.attention.wqkv.lora_A.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.30.attention.wqkv.lora_B.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.30.attention.wo.lora_A.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.30.attention.wo.lora_B.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.30.feed_forward.w1.lora_A.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.30.feed_forward.w1.lora_B.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.30.feed_forward.w3.lora_A.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.30.feed_forward.w3.lora_B.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.30.feed_forward.w2.lora_A.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.30.feed_forward.w2.lora_B.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.31.attention.wqkv.lora_A.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.31.attention.wqkv.lora_B.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.31.attention.wo.lora_A.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.31.attention.wo.lora_B.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.31.feed_forward.w1.lora_A.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.31.feed_forward.w1.lora_B.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.31.feed_forward.w3.lora_A.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.31.feed_forward.w3.lora_B.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.31.feed_forward.w2.lora_A.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:2047: UserWarning: for model.layers.31.feed_forward.w2.lora_B.default.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n"," warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n","Some parameters are on the meta device device because they were offloaded to the cpu.\n"]},{"name":"stdout","output_type":"stream","text":["CPU times: user 32 s, sys: 1min 18s, total: 1min 50s\n","Wall time: 12min\n"]}],"source":["%%time\n","\n","model, tokenizer = load_model(adapter_name_or_path, using_llama_factory=False)"]},{"cell_type":"code","execution_count":13,"metadata":{},"outputs":[{"name":"stdout","output_type":"stream","text":["loading train/test data files\n","DatasetDict({\n"," train: Dataset({\n"," features: ['text', 'label', 'answer', 'title', 'puzzle', 'truth', 'train_text', 'prompt'],\n"," num_rows: 25000\n"," })\n"," test: Dataset({\n"," features: ['text', 'label', 'answer', 'title', 'puzzle', 'truth', 'train_text', 'prompt'],\n"," num_rows: 3000\n"," })\n","})\n"]}],"source":["datasets = load_logical_reasoning_dataset(\n"," data_path,\n"," tokenizer=tokenizer,\n"," chinese_prompt=not use_english_datasets,\n"," using_p1=False,\n",")"]},{"cell_type":"code","execution_count":15,"metadata":{},"outputs":[{"name":"stdout","output_type":"stream","text":["--------------------------------------------------\n","text: 哭泣和村庄有关系吗\n","--------------------------------------------------\n","label: 是\n","--------------------------------------------------\n","answer: nan\n","--------------------------------------------------\n","title: 甄庄哭声\n","--------------------------------------------------\n","puzzle: 在一个安静的夜晚,小村庄的湖边突然传来了阵阵哭泣声。第二天早晨,村长甄锐发现湖边的石头上放着一顶破旧的帽子,但没有人知道这顶帽子是从哪里来的,哭泣声又是为何。请还原故事真相。\n","--------------------------------------------------\n","truth: 原来,这顶破旧的帽子属于一个小男孩,他小时候与爷爷在湖边生活。爷爷教他钓鱼、游泳,还告诉他湖中的海龟是他们的朋友。后来,小男孩随父母去了城市生活,但每年夏天都会回到村子探望爷爷。然而,去年夏天,爷爷因病去世,小男孩伤心欲绝。今年夏天,他回到村子,来到湖边,想起和爷爷的美好回忆,忍不住哭泣。他将爷爷的帽子放在湖边的石头上,希望能让爷爷的在天之灵得到安慰。那晚的哭泣声正是小男孩在祭莫他亲爱的爷爷。\n","--------------------------------------------------\n","train_text: <|im_start|>system\n","You are an expert in logical reasoning.<|im_end|>\n","<|im_start|>user\n","你是一个情景猜谜游戏的主持人。游戏规则如下:\n","\n","1. 参与者会得到一个谜面,谜面会描述一个简单又难以理解的事件。\n","2. 主持人知道谜底,谜底是谜面的答案。\n","3. 参与者可以询问任何封闭式问题来找寻事件的真相。\n","4. 对于每个问题,主持人将根据实际情况回答以下五个选项之一:是、不是、不重要、回答正确、问法错误。各回答的判断标准如下:\n"," - 若谜面和谜底能找到问题的答案,回答:是或者不是\n"," - 若谜面和谜底不能直接或者间接推断出问题的答案,回答:不重要\n"," - 若参与者提问不是一个封闭式问题或者问题难以理解,回答:问法错误\n"," - 若参与者提问基本还原了谜底真相,回答:回答正确\n","5. 回答中不能添加任何其它信息,也不能省略选项中的任何一个字。例如,不可以把“不是”省略成“不”。\n","\n","请严格按照这些规则回答参与者提出的问题。\n","\n","**谜面:** 在一个安静的夜晚,小村庄的湖边突然传来了阵阵哭泣声。第二天早晨,村长甄锐发现湖边的石头上放着一顶破旧的帽子,但没有人知道这顶帽子是从哪里来的,哭泣声又是为何。请还原故事真相。\n","\n","**谜底:** 原来,这顶破旧的帽子属于一个小男孩,他小时候与爷爷在湖边生活。爷爷教他钓鱼、游泳,还告诉他湖中的海龟是他们的朋友。后来,小男孩随父母去了城市生活,但每年夏天都会回到村子探望爷爷。然而,去年夏天,爷爷因病去世,小男孩伤心欲绝。今年夏天,他回到村子,来到湖边,想起和爷爷的美好回忆,忍不住哭泣。他将爷爷的帽子放在湖边的石头上,希望能让爷爷的在天之灵得到安慰。那晚的哭泣声正是小男孩在祭莫他亲爱的爷爷。\n","\n","**参与者提出的问题:** 哭泣和村庄有关系吗\n","<|im_end|>\n","<|im_start|>assistant\n","是\n","--------------------------------------------------\n","prompt: <|im_start|>system\n","You are an expert in logical reasoning.<|im_end|>\n","<|im_start|>user\n","你是一个情景猜谜游戏的主持人。游戏规则如下:\n","\n","1. 参与者会得到一个谜面,谜面会描述一个简单又难以理解的事件。\n","2. 主持人知道谜底,谜底是谜面的答案。\n","3. 参与者可以询问任何封闭式问题来找寻事件的真相。\n","4. 对于每个问题,主持人将根据实际情况回答以下五个选项之一:是、不是、不重要、回答正确、问法错误。各回答的判断标准如下:\n"," - 若谜面和谜底能找到问题的答案,回答:是或者不是\n"," - 若谜面和谜底不能直接或者间接推断出问题的答案,回答:不重要\n"," - 若参与者提问不是一个封闭式问题或者问题难以理解,回答:问法错误\n"," - 若参与者提问基本还原了谜底真相,回答:回答正确\n","5. 回答中不能添加任何其它信息,也不能省略选项中的任何一个字。例如,不可以把“不是”省略成“不”。\n","\n","请严格按照这些规则回答参与者提出的问题。\n","\n","**谜面:** 在一个安静的夜晚,小村庄的湖边突然传来了阵阵哭泣声。第二天早晨,村长甄锐发现湖边的石头上放着一顶破旧的帽子,但没有人知道这顶帽子是从哪里来的,哭泣声又是为何。请还原故事真相。\n","\n","**谜底:** 原来,这顶��旧的帽子属于一个小男孩,他小时候与爷爷在湖边生活。爷爷教他钓鱼、游泳,还告诉他湖中的海龟是他们的朋友。后来,小男孩随父母去了城市生活,但每年夏天都会回到村子探望爷爷。然而,去年夏天,爷爷因病去世,小男孩伤心欲绝。今年夏天,他回到村子,来到湖边,想起和爷爷的美好回忆,忍不住哭泣。他将爷爷的帽子放在湖边的石头上,希望能让爷爷的在天之灵得到安慰。那晚的哭泣声正是小男孩在祭莫他亲爱的爷爷。\n","\n","**参与者提出的问题:** 哭泣和村庄有关系吗\n","<|im_end|>\n","<|im_start|>assistant\n","\n"]}],"source":["print_row_details(datasets[\"test\"].to_pandas(), [1000])"]},{"cell_type":"code","execution_count":16,"metadata":{},"outputs":[{"ename":"NotImplementedError","evalue":"Cannot copy out of meta tensor; no data!","output_type":"error","traceback":["\u001b[0;31m---------------------------------------------------------------------------\u001b[0m","\u001b[0;31mNotImplementedError\u001b[0m Traceback (most recent call last)","File \u001b[0;32m:9\u001b[0m\n","File \u001b[0;32m~/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/utils/_contextlib.py:115\u001b[0m, in \u001b[0;36mcontext_decorator..decorate_context\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 112\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m 113\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mdecorate_context\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m 114\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m ctx_factory():\n\u001b[0;32m--> 115\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n","File \u001b[0;32m~/miniconda3/envs/llama-factory/lib/python3.11/site-packages/transformers/generation/utils.py:1758\u001b[0m, in \u001b[0;36mGenerationMixin.generate\u001b[0;34m(self, inputs, generation_config, logits_processor, stopping_criteria, prefix_allowed_tokens_fn, synced_gpus, assistant_model, streamer, negative_prompt_ids, negative_prompt_attention_mask, **kwargs)\u001b[0m\n\u001b[1;32m 1750\u001b[0m input_ids, model_kwargs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_expand_inputs_for_generation(\n\u001b[1;32m 1751\u001b[0m input_ids\u001b[38;5;241m=\u001b[39minput_ids,\n\u001b[1;32m 1752\u001b[0m expand_size\u001b[38;5;241m=\u001b[39mgeneration_config\u001b[38;5;241m.\u001b[39mnum_return_sequences,\n\u001b[1;32m 1753\u001b[0m is_encoder_decoder\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mconfig\u001b[38;5;241m.\u001b[39mis_encoder_decoder,\n\u001b[1;32m 1754\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mmodel_kwargs,\n\u001b[1;32m 1755\u001b[0m )\n\u001b[1;32m 1757\u001b[0m \u001b[38;5;66;03m# 13. run sample (it degenerates to greedy search when `generation_config.do_sample=False`)\u001b[39;00m\n\u001b[0;32m-> 1758\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_sample\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1759\u001b[0m \u001b[43m \u001b[49m\u001b[43minput_ids\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1760\u001b[0m \u001b[43m \u001b[49m\u001b[43mlogits_processor\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mprepared_logits_processor\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1761\u001b[0m \u001b[43m \u001b[49m\u001b[43mlogits_warper\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mprepared_logits_warper\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1762\u001b[0m \u001b[43m \u001b[49m\u001b[43mstopping_criteria\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mprepared_stopping_criteria\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1763\u001b[0m \u001b[43m \u001b[49m\u001b[43mgeneration_config\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mgeneration_config\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1764\u001b[0m \u001b[43m \u001b[49m\u001b[43msynced_gpus\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msynced_gpus\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1765\u001b[0m \u001b[43m \u001b[49m\u001b[43mstreamer\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstreamer\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1766\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mmodel_kwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1767\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1769\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m generation_mode \u001b[38;5;129;01min\u001b[39;00m (GenerationMode\u001b[38;5;241m.\u001b[39mBEAM_SAMPLE, GenerationMode\u001b[38;5;241m.\u001b[39mBEAM_SEARCH):\n\u001b[1;32m 1770\u001b[0m \u001b[38;5;66;03m# 11. prepare logits warper\u001b[39;00m\n\u001b[1;32m 1771\u001b[0m prepared_logits_warper \u001b[38;5;241m=\u001b[39m (\n\u001b[1;32m 1772\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_get_logits_warper(generation_config) \u001b[38;5;28;01mif\u001b[39;00m generation_config\u001b[38;5;241m.\u001b[39mdo_sample \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 1773\u001b[0m )\n","File \u001b[0;32m~/miniconda3/envs/llama-factory/lib/python3.11/site-packages/transformers/generation/utils.py:2397\u001b[0m, in \u001b[0;36mGenerationMixin._sample\u001b[0;34m(self, input_ids, logits_processor, stopping_criteria, generation_config, synced_gpus, streamer, logits_warper, **model_kwargs)\u001b[0m\n\u001b[1;32m 2394\u001b[0m model_inputs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mprepare_inputs_for_generation(input_ids, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mmodel_kwargs)\n\u001b[1;32m 2396\u001b[0m \u001b[38;5;66;03m# forward pass to get next token\u001b[39;00m\n\u001b[0;32m-> 2397\u001b[0m outputs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m(\u001b[49m\n\u001b[1;32m 2398\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mmodel_inputs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2399\u001b[0m \u001b[43m \u001b[49m\u001b[43mreturn_dict\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 2400\u001b[0m \u001b[43m \u001b[49m\u001b[43moutput_attentions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moutput_attentions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2401\u001b[0m \u001b[43m \u001b[49m\u001b[43moutput_hidden_states\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moutput_hidden_states\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2402\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 2404\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m synced_gpus \u001b[38;5;129;01mand\u001b[39;00m this_peer_finished:\n\u001b[1;32m 2405\u001b[0m \u001b[38;5;28;01mcontinue\u001b[39;00m \u001b[38;5;66;03m# don't waste resources running the code we don't need\u001b[39;00m\n","File \u001b[0;32m~/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:1532\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1530\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs) \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[1;32m 1531\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1532\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n","File \u001b[0;32m~/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:1541\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1536\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m 1537\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m 1538\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m 1539\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m 1540\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1541\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1543\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 1544\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n","File \u001b[0;32m~/miniconda3/envs/llama-factory/lib/python3.11/site-packages/accelerate/hooks.py:169\u001b[0m, in \u001b[0;36madd_hook_to_module..new_forward\u001b[0;34m(module, *args, **kwargs)\u001b[0m\n\u001b[1;32m 167\u001b[0m output \u001b[38;5;241m=\u001b[39m module\u001b[38;5;241m.\u001b[39m_old_forward(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m 168\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 169\u001b[0m output \u001b[38;5;241m=\u001b[39m \u001b[43mmodule\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_old_forward\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 170\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m module\u001b[38;5;241m.\u001b[39m_hf_hook\u001b[38;5;241m.\u001b[39mpost_forward(module, output)\n","File \u001b[0;32m~/.cache/huggingface/modules/transformers_modules/internlm/internlm2_5-7b-chat-1m/8d1a709a04d71440ef3df6ebbe204672f411c8b6/modeling_internlm2.py:1204\u001b[0m, in \u001b[0;36mInternLM2ForCausalLM.forward\u001b[0;34m(self, input_ids, attention_mask, position_ids, past_key_values, inputs_embeds, labels, use_cache, output_attentions, output_hidden_states, return_dict, cache_position)\u001b[0m\n\u001b[1;32m 1201\u001b[0m return_dict \u001b[38;5;241m=\u001b[39m return_dict \u001b[38;5;28;01mif\u001b[39;00m return_dict \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mconfig\u001b[38;5;241m.\u001b[39muse_return_dict\n\u001b[1;32m 1203\u001b[0m \u001b[38;5;66;03m# decoder outputs consists of (dec_features, layer_state, dec_hidden, dec_attn)\u001b[39;00m\n\u001b[0;32m-> 1204\u001b[0m outputs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmodel\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1205\u001b[0m \u001b[43m \u001b[49m\u001b[43minput_ids\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minput_ids\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1206\u001b[0m \u001b[43m \u001b[49m\u001b[43mattention_mask\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mattention_mask\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1207\u001b[0m \u001b[43m \u001b[49m\u001b[43mposition_ids\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mposition_ids\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1208\u001b[0m \u001b[43m \u001b[49m\u001b[43mpast_key_values\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpast_key_values\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1209\u001b[0m \u001b[43m \u001b[49m\u001b[43minputs_embeds\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minputs_embeds\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1210\u001b[0m \u001b[43m \u001b[49m\u001b[43muse_cache\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43muse_cache\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1211\u001b[0m \u001b[43m \u001b[49m\u001b[43moutput_attentions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moutput_attentions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1212\u001b[0m \u001b[43m \u001b[49m\u001b[43moutput_hidden_states\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moutput_hidden_states\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1213\u001b[0m \u001b[43m \u001b[49m\u001b[43mreturn_dict\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mreturn_dict\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1214\u001b[0m \u001b[43m \u001b[49m\u001b[43mcache_position\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcache_position\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1215\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1217\u001b[0m hidden_states \u001b[38;5;241m=\u001b[39m outputs[\u001b[38;5;241m0\u001b[39m]\n\u001b[1;32m 1218\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mconfig\u001b[38;5;241m.\u001b[39mpretraining_tp \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m1\u001b[39m:\n","File \u001b[0;32m~/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:1532\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1530\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs) \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[1;32m 1531\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1532\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n","File \u001b[0;32m~/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:1541\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1536\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m 1537\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m 1538\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m 1539\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m 1540\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1541\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1543\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 1544\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n","File \u001b[0;32m~/.cache/huggingface/modules/transformers_modules/internlm/internlm2_5-7b-chat-1m/8d1a709a04d71440ef3df6ebbe204672f411c8b6/modeling_internlm2.py:1004\u001b[0m, in \u001b[0;36mInternLM2Model.forward\u001b[0;34m(self, input_ids, attention_mask, position_ids, past_key_values, inputs_embeds, use_cache, output_attentions, output_hidden_states, return_dict, cache_position)\u001b[0m\n\u001b[1;32m 993\u001b[0m layer_outputs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_gradient_checkpointing_func(\n\u001b[1;32m 994\u001b[0m decoder_layer\u001b[38;5;241m.\u001b[39m\u001b[38;5;21m__call__\u001b[39m,\n\u001b[1;32m 995\u001b[0m hidden_states,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1001\u001b[0m cache_position,\n\u001b[1;32m 1002\u001b[0m )\n\u001b[1;32m 1003\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1004\u001b[0m layer_outputs \u001b[38;5;241m=\u001b[39m \u001b[43mdecoder_layer\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1005\u001b[0m \u001b[43m \u001b[49m\u001b[43mhidden_states\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1006\u001b[0m \u001b[43m \u001b[49m\u001b[43mattention_mask\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcausal_mask\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1007\u001b[0m \u001b[43m \u001b[49m\u001b[43mposition_ids\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mposition_ids\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1008\u001b[0m \u001b[43m \u001b[49m\u001b[43mpast_key_value\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpast_key_values\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1009\u001b[0m \u001b[43m \u001b[49m\u001b[43moutput_attentions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moutput_attentions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1010\u001b[0m \u001b[43m \u001b[49m\u001b[43muse_cache\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43muse_cache\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1011\u001b[0m \u001b[43m \u001b[49m\u001b[43mcache_position\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcache_position\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1012\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1014\u001b[0m hidden_states \u001b[38;5;241m=\u001b[39m layer_outputs[\u001b[38;5;241m0\u001b[39m]\n\u001b[1;32m 1016\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m use_cache:\n","File \u001b[0;32m~/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:1532\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1530\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs) \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[1;32m 1531\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1532\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n","File \u001b[0;32m~/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:1541\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1536\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m 1537\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m 1538\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m 1539\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m 1540\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1541\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1543\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 1544\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n","File \u001b[0;32m~/miniconda3/envs/llama-factory/lib/python3.11/site-packages/accelerate/hooks.py:169\u001b[0m, in \u001b[0;36madd_hook_to_module..new_forward\u001b[0;34m(module, *args, **kwargs)\u001b[0m\n\u001b[1;32m 167\u001b[0m output \u001b[38;5;241m=\u001b[39m module\u001b[38;5;241m.\u001b[39m_old_forward(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m 168\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 169\u001b[0m output \u001b[38;5;241m=\u001b[39m \u001b[43mmodule\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_old_forward\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 170\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m module\u001b[38;5;241m.\u001b[39m_hf_hook\u001b[38;5;241m.\u001b[39mpost_forward(module, output)\n","File \u001b[0;32m~/.cache/huggingface/modules/transformers_modules/internlm/internlm2_5-7b-chat-1m/8d1a709a04d71440ef3df6ebbe204672f411c8b6/modeling_internlm2.py:735\u001b[0m, in \u001b[0;36mInternLM2DecoderLayer.forward\u001b[0;34m(self, hidden_states, attention_mask, position_ids, past_key_value, output_attentions, use_cache, cache_position)\u001b[0m\n\u001b[1;32m 719\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 720\u001b[0m \u001b[38;5;124;03mArgs:\u001b[39;00m\n\u001b[1;32m 721\u001b[0m \u001b[38;5;124;03m hidden_states (`torch.FloatTensor`): input to the layer of shape `(batch, seq_len, embed_dim)`\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 731\u001b[0m \u001b[38;5;124;03m past_key_value (`Tuple(torch.FloatTensor)`, *optional*): cached past key and value projection states\u001b[39;00m\n\u001b[1;32m 732\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 733\u001b[0m residual \u001b[38;5;241m=\u001b[39m hidden_states\n\u001b[0;32m--> 735\u001b[0m hidden_states \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mattention_norm\u001b[49m\u001b[43m(\u001b[49m\u001b[43mhidden_states\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 737\u001b[0m \u001b[38;5;66;03m# Self Attention\u001b[39;00m\n\u001b[1;32m 738\u001b[0m hidden_states, self_attn_weights, present_key_value \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mattention(\n\u001b[1;32m 739\u001b[0m hidden_states\u001b[38;5;241m=\u001b[39mhidden_states,\n\u001b[1;32m 740\u001b[0m attention_mask\u001b[38;5;241m=\u001b[39mattention_mask,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 745\u001b[0m cache_position\u001b[38;5;241m=\u001b[39mcache_position,\n\u001b[1;32m 746\u001b[0m )\n","File \u001b[0;32m~/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:1532\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1530\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs) \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[1;32m 1531\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1532\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n","File \u001b[0;32m~/miniconda3/envs/llama-factory/lib/python3.11/site-packages/torch/nn/modules/module.py:1541\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1536\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m 1537\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m 1538\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m 1539\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m 1540\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1541\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1543\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 1544\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n","File \u001b[0;32m~/miniconda3/envs/llama-factory/lib/python3.11/site-packages/accelerate/hooks.py:164\u001b[0m, in \u001b[0;36madd_hook_to_module..new_forward\u001b[0;34m(module, *args, **kwargs)\u001b[0m\n\u001b[1;32m 163\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mnew_forward\u001b[39m(module, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[0;32m--> 164\u001b[0m args, kwargs \u001b[38;5;241m=\u001b[39m \u001b[43mmodule\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_hf_hook\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpre_forward\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodule\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 165\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m module\u001b[38;5;241m.\u001b[39m_hf_hook\u001b[38;5;241m.\u001b[39mno_grad:\n\u001b[1;32m 166\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m torch\u001b[38;5;241m.\u001b[39mno_grad():\n","File \u001b[0;32m~/miniconda3/envs/llama-factory/lib/python3.11/site-packages/accelerate/hooks.py:109\u001b[0m, in \u001b[0;36mSequentialHook.pre_forward\u001b[0;34m(self, module, *args, **kwargs)\u001b[0m\n\u001b[1;32m 107\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mpre_forward\u001b[39m(\u001b[38;5;28mself\u001b[39m, module, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m 108\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m hook \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhooks:\n\u001b[0;32m--> 109\u001b[0m args, kwargs \u001b[38;5;241m=\u001b[39m \u001b[43mhook\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpre_forward\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodule\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 110\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m args, kwargs\n","File \u001b[0;32m~/miniconda3/envs/llama-factory/lib/python3.11/site-packages/accelerate/hooks.py:354\u001b[0m, in \u001b[0;36mAlignDevicesHook.pre_forward\u001b[0;34m(self, module, *args, **kwargs)\u001b[0m\n\u001b[1;32m 346\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (\n\u001b[1;32m 347\u001b[0m value \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 348\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtied_params_map \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 349\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m value\u001b[38;5;241m.\u001b[39mdata_ptr() \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtied_params_map\n\u001b[1;32m 350\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mexecution_device \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtied_params_map[value\u001b[38;5;241m.\u001b[39mdata_ptr()]\n\u001b[1;32m 351\u001b[0m ):\n\u001b[1;32m 352\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtied_pointers_to_remove\u001b[38;5;241m.\u001b[39madd((value\u001b[38;5;241m.\u001b[39mdata_ptr(), \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mexecution_device))\n\u001b[0;32m--> 354\u001b[0m \u001b[43mset_module_tensor_to_device\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 355\u001b[0m \u001b[43m \u001b[49m\u001b[43mmodule\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 356\u001b[0m \u001b[43m \u001b[49m\u001b[43mname\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 357\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mexecution_device\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 358\u001b[0m \u001b[43m \u001b[49m\u001b[43mvalue\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mvalue\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 359\u001b[0m \u001b[43m \u001b[49m\u001b[43mfp16_statistics\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfp16_statistics\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 360\u001b[0m \u001b[43m \u001b[49m\u001b[43mtied_params_map\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtied_params_map\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 361\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 363\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m send_to_device(args, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mexecution_device), send_to_device(\n\u001b[1;32m 364\u001b[0m kwargs, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mexecution_device, skip_keys\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mskip_keys\n\u001b[1;32m 365\u001b[0m )\n","File \u001b[0;32m~/miniconda3/envs/llama-factory/lib/python3.11/site-packages/accelerate/utils/modeling.py:404\u001b[0m, in \u001b[0;36mset_module_tensor_to_device\u001b[0;34m(module, tensor_name, device, value, dtype, fp16_statistics, tied_params_map)\u001b[0m\n\u001b[1;32m 402\u001b[0m module\u001b[38;5;241m.\u001b[39m_parameters[tensor_name] \u001b[38;5;241m=\u001b[39m param_cls(new_value, requires_grad\u001b[38;5;241m=\u001b[39mold_value\u001b[38;5;241m.\u001b[39mrequires_grad)\n\u001b[1;32m 403\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(value, torch\u001b[38;5;241m.\u001b[39mTensor):\n\u001b[0;32m--> 404\u001b[0m new_value \u001b[38;5;241m=\u001b[39m \u001b[43mvalue\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mto\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdevice\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 405\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 406\u001b[0m new_value \u001b[38;5;241m=\u001b[39m torch\u001b[38;5;241m.\u001b[39mtensor(value, device\u001b[38;5;241m=\u001b[39mdevice)\n","\u001b[0;31mNotImplementedError\u001b[0m: Cannot copy out of meta tensor; no data!"]}],"source":["%%time\n","\n","prompt1 = datasets[\"test\"][\"prompt\"][1000]\n","\n","gen_kwargs = {\"max_length\": 4096, \"do_sample\": True, \"top_k\": 1}\n","with torch.no_grad():\n"," inputs = tokenizer(\n"," [prompt1],\n"," return_tensors=\"pt\",\n"," ).to(device)\n"," outputs = model.generate(**inputs, **gen_kwargs)\n"," outputs = outputs[:, inputs['input_ids'].shape[1]:]\n"," print(tokenizer.decode(outputs[0], skip_special_tokens=True))"]},{"cell_type":"code","execution_count":12,"metadata":{},"outputs":[{"name":"stderr","output_type":"stream","text":["/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/transformers/integrations/peft.py:399: FutureWarning: The `active_adapter` method is deprecated and will be removed in a future version.\n"," warnings.warn(\n","/home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n"," warnings.warn(\n"]},{"ename":"RuntimeError","evalue":"The weights trying to be saved contained shared tensors [{'base_model.model.model.layers.14.attention.wo.lora_A.weight', 'base_model.model.model.layers.26.attention.wo.lora_B.weight', 'base_model.model.model.layers.3.attention.wqkv.lora_A.weight', 'base_model.model.model.layers.2.feed_forward.w3.lora_A.weight', 'base_model.model.model.layers.4.feed_forward.w1.lora_A.weight', 'base_model.model.model.layers.30.attention.wo.lora_A.weight', 'base_model.model.model.layers.14.attention.wqkv.lora_A.weight', 'base_model.model.model.layers.21.attention.wqkv.lora_A.weight', 'base_model.model.model.layers.23.feed_forward.w2.lora_B.weight', 'base_model.model.model.layers.10.feed_forward.w1.lora_A.weight', 'base_model.model.model.layers.31.attention.wo.lora_A.weight', 'base_model.model.model.layers.27.feed_forward.w2.lora_B.weight', 'base_model.model.model.layers.1.feed_forward.w3.lora_A.weight', 'base_model.model.model.layers.5.attention.wo.lora_B.weight', 'base_model.model.model.layers.22.feed_forward.w2.lora_B.weight', 'base_model.model.model.layers.18.attention.wqkv.lora_A.weight', 'base_model.model.model.layers.18.feed_forward.w1.lora_A.weight', 'base_model.model.model.layers.29.attention.wo.lora_B.weight', 'base_model.model.model.layers.0.feed_forward.w2.lora_B.weight', 'base_model.model.model.layers.7.attention.wo.lora_B.weight', 'base_model.model.model.layers.21.attention.wo.lora_B.weight', 'base_model.model.model.layers.0.attention.wo.lora_B.weight', 'base_model.model.model.layers.5.attention.wqkv.lora_A.weight', 'base_model.model.model.layers.28.attention.wo.lora_B.weight', 'base_model.model.model.layers.12.attention.wo.lora_A.weight', 'base_model.model.model.layers.23.feed_forward.w1.lora_A.weight', 'base_model.model.model.layers.9.feed_forward.w2.lora_B.weight', 'base_model.model.model.layers.13.attention.wo.lora_B.weight', 'base_model.model.model.layers.12.attention.wo.lora_B.weight', 'base_model.model.model.layers.22.feed_forward.w1.lora_A.weight', 'base_model.model.model.layers.4.attention.wo.lora_A.weight', 'base_model.model.model.layers.3.attention.wo.lora_A.weight', 'base_model.model.model.layers.27.feed_forward.w1.lora_A.weight', 'base_model.model.model.layers.10.attention.wo.lora_A.weight', 'base_model.model.model.layers.12.feed_forward.w3.lora_A.weight', 'base_model.model.model.layers.2.attention.wo.lora_A.weight', 'base_model.model.model.layers.20.attention.wo.lora_B.weight', 'base_model.model.model.layers.5.feed_forward.w1.lora_A.weight', 'base_model.model.model.layers.7.feed_forward.w1.lora_A.weight', 'base_model.model.model.layers.16.feed_forward.w3.lora_A.weight', 'base_model.model.model.layers.28.attention.wo.lora_A.weight', 'base_model.model.model.layers.28.attention.wqkv.lora_A.weight', 'base_model.model.model.layers.15.attention.wo.lora_A.weight', 'base_model.model.model.layers.11.attention.wqkv.lora_A.weight', 'base_model.model.model.layers.19.attention.wo.lora_A.weight', 'base_model.model.model.layers.11.feed_forward.w1.lora_A.weight', 'base_model.model.model.layers.31.feed_forward.w2.lora_B.weight', 'base_model.model.model.layers.25.attention.wo.lora_A.weight', 'base_model.model.model.layers.6.attention.wo.lora_A.weight', 'base_model.model.model.layers.26.feed_forward.w1.lora_A.weight', 'base_model.model.model.layers.7.attention.wo.lora_A.weight', 'base_model.model.model.layers.7.feed_forward.w2.lora_B.weight', 'base_model.model.model.layers.27.attention.wo.lora_A.weight', 'base_model.model.model.layers.5.feed_forward.w3.lora_A.weight', 'base_model.model.model.layers.29.feed_forward.w3.lora_A.weight', 'base_model.model.model.layers.0.attention.wo.lora_A.weight', 'base_model.model.model.layers.16.attention.wo.lora_A.weight', 'base_model.model.model.layers.28.feed_forward.w1.lora_A.weight', 'base_model.model.model.layers.14.feed_forward.w3.lora_A.weight', 'base_model.model.model.layers.13.feed_forward.w1.lora_A.weight', 'base_model.model.model.layers.20.feed_forward.w2.lora_B.weight', 'base_model.model.model.layers.29.feed_forward.w2.lora_B.weight', 'base_model.model.model.layers.5.feed_forward.w2.lora_B.weight', 'base_model.model.model.layers.2.attention.wqkv.lora_A.weight', 'base_model.model.model.layers.2.feed_forward.w2.lora_B.weight', 'base_model.model.model.layers.22.feed_forward.w3.lora_A.weight', 'base_model.model.model.layers.14.feed_forward.w1.lora_A.weight', 'base_model.model.model.layers.11.attention.wo.lora_B.weight', 'base_model.model.model.layers.31.feed_forward.w1.lora_A.weight', 'base_model.model.model.layers.25.attention.wo.lora_B.weight', 'base_model.model.model.layers.15.feed_forward.w1.lora_A.weight', 'base_model.model.model.layers.18.feed_forward.w3.lora_A.weight', 'base_model.model.model.layers.25.attention.wqkv.lora_A.weight', 'base_model.model.model.layers.27.feed_forward.w3.lora_A.weight', 'base_model.model.model.layers.8.feed_forward.w2.lora_B.weight', 'base_model.model.model.layers.23.attention.wo.lora_A.weight', 'base_model.model.model.layers.9.attention.wo.lora_B.weight', 'base_model.model.model.layers.21.feed_forward.w3.lora_A.weight', 'base_model.model.model.layers.8.attention.wo.lora_B.weight', 'base_model.model.model.layers.22.attention.wo.lora_B.weight', 'base_model.model.model.layers.26.attention.wqkv.lora_A.weight', 'base_model.model.model.layers.24.feed_forward.w3.lora_A.weight', 'base_model.model.model.layers.12.attention.wqkv.lora_A.weight', 'base_model.model.model.layers.13.feed_forward.w2.lora_B.weight', 'base_model.model.model.layers.23.attention.wqkv.lora_A.weight', 'base_model.model.model.layers.16.attention.wqkv.lora_A.weight', 'base_model.model.model.layers.21.attention.wo.lora_A.weight', 'base_model.model.model.layers.28.feed_forward.w2.lora_B.weight', 'base_model.model.model.layers.24.attention.wqkv.lora_A.weight', 'base_model.model.model.layers.17.attention.wo.lora_B.weight', 'base_model.model.model.layers.0.attention.wqkv.lora_A.weight', 'base_model.model.model.layers.15.feed_forward.w3.lora_A.weight', 'base_model.model.model.layers.29.attention.wqkv.lora_A.weight', 'base_model.model.model.layers.8.feed_forward.w3.lora_A.weight', 'base_model.model.model.layers.3.feed_forward.w3.lora_A.weight', 'base_model.model.model.layers.14.attention.wo.lora_B.weight', 'base_model.model.model.layers.9.feed_forward.w1.lora_A.weight', 'base_model.model.model.layers.24.attention.wo.lora_A.weight', 'base_model.model.model.layers.0.feed_forward.w1.lora_A.weight', 'base_model.model.model.layers.1.attention.wo.lora_B.weight', 'base_model.model.model.layers.2.attention.wo.lora_B.weight', 'base_model.model.model.layers.19.feed_forward.w2.lora_B.weight', 'base_model.model.model.layers.31.attention.wqkv.lora_A.weight', 'base_model.model.model.layers.5.attention.wo.lora_A.weight', 'base_model.model.model.layers.14.feed_forward.w2.lora_B.weight', 'base_model.model.model.layers.31.feed_forward.w3.lora_A.weight', 'base_model.model.model.layers.30.attention.wqkv.lora_A.weight', 'base_model.model.model.layers.17.attention.wo.lora_A.weight', 'base_model.model.model.layers.8.feed_forward.w1.lora_A.weight', 'base_model.model.model.layers.27.attention.wqkv.lora_A.weight', 'base_model.model.model.layers.4.feed_forward.w3.lora_A.weight', 'base_model.model.model.layers.30.attention.wo.lora_B.weight', 'base_model.model.model.layers.10.attention.wqkv.lora_A.weight', 'base_model.model.model.layers.2.feed_forward.w1.lora_A.weight', 'base_model.model.model.layers.30.feed_forward.w2.lora_B.weight', 'base_model.model.model.layers.23.attention.wo.lora_B.weight', 'base_model.model.model.layers.0.feed_forward.w3.lora_A.weight', 'base_model.model.model.layers.15.attention.wqkv.lora_A.weight', 'base_model.model.model.layers.17.attention.wqkv.lora_A.weight', 'base_model.model.model.layers.12.feed_forward.w1.lora_A.weight', 'base_model.model.model.layers.15.attention.wo.lora_B.weight', 'base_model.model.model.layers.25.feed_forward.w3.lora_A.weight', 'base_model.model.model.layers.8.attention.wqkv.lora_A.weight', 'base_model.model.model.layers.25.feed_forward.w2.lora_B.weight', 'base_model.model.model.layers.26.feed_forward.w2.lora_B.weight', 'base_model.model.model.layers.10.attention.wo.lora_B.weight', 'base_model.model.model.layers.20.feed_forward.w3.lora_A.weight', 'base_model.model.model.layers.24.attention.wo.lora_B.weight', 'base_model.model.model.layers.29.attention.wo.lora_A.weight', 'base_model.model.model.layers.16.attention.wo.lora_B.weight', 'base_model.model.model.layers.26.feed_forward.w3.lora_A.weight', 'base_model.model.model.layers.11.feed_forward.w3.lora_A.weight', 'base_model.model.model.layers.19.attention.wqkv.lora_A.weight', 'base_model.model.model.layers.19.feed_forward.w3.lora_A.weight', 'base_model.model.model.layers.20.attention.wo.lora_A.weight', 'base_model.model.model.layers.3.feed_forward.w1.lora_A.weight', 'base_model.model.model.layers.31.attention.wo.lora_B.weight', 'base_model.model.model.layers.7.feed_forward.w3.lora_A.weight', 'base_model.model.model.layers.16.feed_forward.w2.lora_B.weight', 'base_model.model.model.layers.1.attention.wqkv.lora_A.weight', 'base_model.model.model.layers.24.feed_forward.w1.lora_A.weight', 'base_model.model.model.layers.9.attention.wo.lora_A.weight', 'base_model.model.model.layers.12.feed_forward.w2.lora_B.weight', 'base_model.model.model.layers.4.attention.wqkv.lora_A.weight', 'base_model.model.model.layers.30.feed_forward.w1.lora_A.weight', 'base_model.model.model.layers.13.attention.wo.lora_A.weight', 'base_model.model.model.layers.9.attention.wqkv.lora_A.weight', 'base_model.model.model.layers.20.attention.wqkv.lora_A.weight', 'base_model.model.model.layers.17.feed_forward.w1.lora_A.weight', 'base_model.model.model.layers.24.feed_forward.w2.lora_B.weight', 'base_model.model.model.layers.3.attention.wo.lora_B.weight', 'base_model.model.model.layers.6.feed_forward.w3.lora_A.weight', 'base_model.model.model.layers.8.attention.wo.lora_A.weight', 'base_model.model.model.layers.10.feed_forward.w2.lora_B.weight', 'base_model.model.model.layers.11.feed_forward.w2.lora_B.weight', 'base_model.model.model.layers.6.attention.wo.lora_B.weight', 'base_model.model.model.layers.6.attention.wqkv.lora_A.weight', 'base_model.model.model.layers.4.feed_forward.w2.lora_B.weight', 'base_model.model.model.layers.16.feed_forward.w1.lora_A.weight', 'base_model.model.model.layers.4.attention.wo.lora_B.weight', 'base_model.model.model.layers.28.feed_forward.w3.lora_A.weight', 'base_model.model.model.layers.6.feed_forward.w2.lora_B.weight', 'base_model.model.model.layers.17.feed_forward.w3.lora_A.weight', 'base_model.model.model.layers.15.feed_forward.w2.lora_B.weight', 'base_model.model.model.layers.30.feed_forward.w3.lora_A.weight', 'base_model.model.model.layers.18.attention.wo.lora_A.weight', 'base_model.model.model.layers.1.feed_forward.w2.lora_B.weight', 'base_model.model.model.layers.25.feed_forward.w1.lora_A.weight', 'base_model.model.model.layers.18.feed_forward.w2.lora_B.weight', 'base_model.model.model.layers.27.attention.wo.lora_B.weight', 'base_model.model.model.layers.22.attention.wqkv.lora_A.weight', 'base_model.model.model.layers.13.attention.wqkv.lora_A.weight', 'base_model.model.model.layers.20.feed_forward.w1.lora_A.weight', 'base_model.model.model.layers.3.feed_forward.w2.lora_B.weight', 'base_model.model.model.layers.11.attention.wo.lora_A.weight', 'base_model.model.model.layers.10.feed_forward.w3.lora_A.weight', 'base_model.model.model.layers.19.attention.wo.lora_B.weight', 'base_model.model.model.layers.21.feed_forward.w2.lora_B.weight', 'base_model.model.model.layers.23.feed_forward.w3.lora_A.weight', 'base_model.model.model.layers.29.feed_forward.w1.lora_A.weight', 'base_model.model.model.layers.7.attention.wqkv.lora_A.weight', 'base_model.model.model.layers.21.feed_forward.w1.lora_A.weight', 'base_model.model.model.layers.17.feed_forward.w2.lora_B.weight', 'base_model.model.model.layers.26.attention.wo.lora_A.weight', 'base_model.model.model.layers.1.feed_forward.w1.lora_A.weight', 'base_model.model.model.layers.1.attention.wo.lora_A.weight', 'base_model.model.model.layers.18.attention.wo.lora_B.weight', 'base_model.model.model.layers.6.feed_forward.w1.lora_A.weight', 'base_model.model.model.layers.19.feed_forward.w1.lora_A.weight', 'base_model.model.model.layers.9.feed_forward.w3.lora_A.weight', 'base_model.model.model.layers.22.attention.wo.lora_A.weight', 'base_model.model.model.layers.13.feed_forward.w3.lora_A.weight'}, {'base_model.model.model.layers.25.attention.wqkv.lora_B.weight', 'base_model.model.model.layers.30.attention.wqkv.lora_B.weight', 'base_model.model.model.layers.10.attention.wqkv.lora_B.weight', 'base_model.model.model.layers.26.attention.wqkv.lora_B.weight', 'base_model.model.model.layers.0.attention.wqkv.lora_B.weight', 'base_model.model.model.layers.18.attention.wqkv.lora_B.weight', 'base_model.model.model.layers.7.attention.wqkv.lora_B.weight', 'base_model.model.model.layers.24.attention.wqkv.lora_B.weight', 'base_model.model.model.layers.4.attention.wqkv.lora_B.weight', 'base_model.model.model.layers.15.attention.wqkv.lora_B.weight', 'base_model.model.model.layers.19.attention.wqkv.lora_B.weight', 'base_model.model.model.layers.13.attention.wqkv.lora_B.weight', 'base_model.model.model.layers.5.attention.wqkv.lora_B.weight', 'base_model.model.model.layers.16.attention.wqkv.lora_B.weight', 'base_model.model.model.layers.14.attention.wqkv.lora_B.weight', 'base_model.model.model.layers.29.attention.wqkv.lora_B.weight', 'base_model.model.model.layers.23.attention.wqkv.lora_B.weight', 'base_model.model.model.layers.3.attention.wqkv.lora_B.weight', 'base_model.model.model.layers.1.attention.wqkv.lora_B.weight', 'base_model.model.model.layers.2.attention.wqkv.lora_B.weight', 'base_model.model.model.layers.9.attention.wqkv.lora_B.weight', 'base_model.model.model.layers.20.attention.wqkv.lora_B.weight', 'base_model.model.model.layers.27.attention.wqkv.lora_B.weight', 'base_model.model.model.layers.22.attention.wqkv.lora_B.weight', 'base_model.model.model.layers.21.attention.wqkv.lora_B.weight', 'base_model.model.model.layers.31.attention.wqkv.lora_B.weight', 'base_model.model.model.layers.6.attention.wqkv.lora_B.weight', 'base_model.model.model.layers.11.attention.wqkv.lora_B.weight', 'base_model.model.model.layers.17.attention.wqkv.lora_B.weight', 'base_model.model.model.layers.8.attention.wqkv.lora_B.weight', 'base_model.model.model.layers.28.attention.wqkv.lora_B.weight', 'base_model.model.model.layers.12.attention.wqkv.lora_B.weight'}, {'base_model.model.model.layers.0.feed_forward.w2.lora_A.weight', 'base_model.model.model.layers.12.feed_forward.w2.lora_A.weight', 'base_model.model.model.layers.12.feed_forward.w3.lora_B.weight', 'base_model.model.model.layers.24.feed_forward.w2.lora_A.weight', 'base_model.model.model.layers.20.feed_forward.w2.lora_A.weight', 'base_model.model.model.layers.0.feed_forward.w1.lora_B.weight', 'base_model.model.model.layers.26.feed_forward.w1.lora_B.weight', 'base_model.model.model.layers.30.feed_forward.w1.lora_B.weight', 'base_model.model.model.layers.23.feed_forward.w1.lora_B.weight', 'base_model.model.model.layers.16.feed_forward.w1.lora_B.weight', 'base_model.model.model.layers.18.feed_forward.w1.lora_B.weight', 'base_model.model.model.layers.31.feed_forward.w3.lora_B.weight', 'base_model.model.model.layers.7.feed_forward.w1.lora_B.weight', 'base_model.model.model.layers.11.feed_forward.w2.lora_A.weight', 'base_model.model.model.layers.23.feed_forward.w2.lora_A.weight', 'base_model.model.model.layers.21.feed_forward.w3.lora_B.weight', 'base_model.model.model.layers.6.feed_forward.w3.lora_B.weight', 'base_model.model.model.layers.11.feed_forward.w1.lora_B.weight', 'base_model.model.model.layers.22.feed_forward.w3.lora_B.weight', 'base_model.model.model.layers.4.feed_forward.w2.lora_A.weight', 'base_model.model.model.layers.17.feed_forward.w3.lora_B.weight', 'base_model.model.model.layers.10.feed_forward.w2.lora_A.weight', 'base_model.model.model.layers.5.feed_forward.w3.lora_B.weight', 'base_model.model.model.layers.16.feed_forward.w3.lora_B.weight', 'base_model.model.model.layers.14.feed_forward.w3.lora_B.weight', 'base_model.model.model.layers.22.feed_forward.w2.lora_A.weight', 'base_model.model.model.layers.27.feed_forward.w1.lora_B.weight', 'base_model.model.model.layers.5.feed_forward.w1.lora_B.weight', 'base_model.model.model.layers.3.feed_forward.w3.lora_B.weight', 'base_model.model.model.layers.28.feed_forward.w2.lora_A.weight', 'base_model.model.model.layers.9.feed_forward.w2.lora_A.weight', 'base_model.model.model.layers.13.feed_forward.w1.lora_B.weight', 'base_model.model.model.layers.15.feed_forward.w3.lora_B.weight', 'base_model.model.model.layers.16.feed_forward.w2.lora_A.weight', 'base_model.model.model.layers.2.feed_forward.w3.lora_B.weight', 'base_model.model.model.layers.18.feed_forward.w2.lora_A.weight', 'base_model.model.model.layers.24.feed_forward.w1.lora_B.weight', 'base_model.model.model.layers.1.feed_forward.w3.lora_B.weight', 'base_model.model.model.layers.25.feed_forward.w3.lora_B.weight', 'base_model.model.model.layers.27.feed_forward.w3.lora_B.weight', 'base_model.model.model.layers.8.feed_forward.w2.lora_A.weight', 'base_model.model.model.layers.9.feed_forward.w3.lora_B.weight', 'base_model.model.model.layers.29.feed_forward.w3.lora_B.weight', 'base_model.model.model.layers.13.feed_forward.w3.lora_B.weight', 'base_model.model.model.layers.17.feed_forward.w2.lora_A.weight', 'base_model.model.model.layers.10.feed_forward.w1.lora_B.weight', 'base_model.model.model.layers.21.feed_forward.w2.lora_A.weight', 'base_model.model.model.layers.30.feed_forward.w3.lora_B.weight', 'base_model.model.model.layers.30.feed_forward.w2.lora_A.weight', 'base_model.model.model.layers.4.feed_forward.w1.lora_B.weight', 'base_model.model.model.layers.15.feed_forward.w2.lora_A.weight', 'base_model.model.model.layers.6.feed_forward.w1.lora_B.weight', 'base_model.model.model.layers.19.feed_forward.w2.lora_A.weight', 'base_model.model.model.layers.9.feed_forward.w1.lora_B.weight', 'base_model.model.model.layers.2.feed_forward.w2.lora_A.weight', 'base_model.model.model.layers.20.feed_forward.w3.lora_B.weight', 'base_model.model.model.layers.19.feed_forward.w3.lora_B.weight', 'base_model.model.model.layers.29.feed_forward.w2.lora_A.weight', 'base_model.model.model.layers.25.feed_forward.w1.lora_B.weight', 'base_model.model.model.layers.14.feed_forward.w1.lora_B.weight', 'base_model.model.model.layers.8.feed_forward.w1.lora_B.weight', 'base_model.model.model.layers.20.feed_forward.w1.lora_B.weight', 'base_model.model.model.layers.18.feed_forward.w3.lora_B.weight', 'base_model.model.model.layers.7.feed_forward.w3.lora_B.weight', 'base_model.model.model.layers.28.feed_forward.w1.lora_B.weight', 'base_model.model.model.layers.6.feed_forward.w2.lora_A.weight', 'base_model.model.model.layers.10.feed_forward.w3.lora_B.weight', 'base_model.model.model.layers.11.feed_forward.w3.lora_B.weight', 'base_model.model.model.layers.21.feed_forward.w1.lora_B.weight', 'base_model.model.model.layers.27.feed_forward.w2.lora_A.weight', 'base_model.model.model.layers.31.feed_forward.w2.lora_A.weight', 'base_model.model.model.layers.23.feed_forward.w3.lora_B.weight', 'base_model.model.model.layers.8.feed_forward.w3.lora_B.weight', 'base_model.model.model.layers.14.feed_forward.w2.lora_A.weight', 'base_model.model.model.layers.5.feed_forward.w2.lora_A.weight', 'base_model.model.model.layers.29.feed_forward.w1.lora_B.weight', 'base_model.model.model.layers.22.feed_forward.w1.lora_B.weight', 'base_model.model.model.layers.3.feed_forward.w1.lora_B.weight', 'base_model.model.model.layers.7.feed_forward.w2.lora_A.weight', 'base_model.model.model.layers.17.feed_forward.w1.lora_B.weight', 'base_model.model.model.layers.0.feed_forward.w3.lora_B.weight', 'base_model.model.model.layers.1.feed_forward.w1.lora_B.weight', 'base_model.model.model.layers.12.feed_forward.w1.lora_B.weight', 'base_model.model.model.layers.13.feed_forward.w2.lora_A.weight', 'base_model.model.model.layers.2.feed_forward.w1.lora_B.weight', 'base_model.model.model.layers.15.feed_forward.w1.lora_B.weight', 'base_model.model.model.layers.19.feed_forward.w1.lora_B.weight', 'base_model.model.model.layers.25.feed_forward.w2.lora_A.weight', 'base_model.model.model.layers.26.feed_forward.w2.lora_A.weight', 'base_model.model.model.layers.24.feed_forward.w3.lora_B.weight', 'base_model.model.model.layers.3.feed_forward.w2.lora_A.weight', 'base_model.model.model.layers.4.feed_forward.w3.lora_B.weight', 'base_model.model.model.layers.26.feed_forward.w3.lora_B.weight', 'base_model.model.model.layers.28.feed_forward.w3.lora_B.weight', 'base_model.model.model.layers.1.feed_forward.w2.lora_A.weight', 'base_model.model.model.layers.31.feed_forward.w1.lora_B.weight'}] that are mismatching the transformers base configuration. Try saving using `safe_serialization=False` or remove this tensor sharing.","output_type":"error","traceback":["\u001b[0;31m---------------------------------------------------------------------------\u001b[0m","\u001b[0;31mRuntimeError\u001b[0m Traceback (most recent call last)","Cell \u001b[0;32mIn[12], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m adapter_name_or_path \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124minflaton-ai/InternLM_2_5-7b_LoRA-r2\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m----> 3\u001b[0m \u001b[43mmodel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpush_to_hub\u001b[49m\u001b[43m(\u001b[49m\u001b[43madapter_name_or_path\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 4\u001b[0m tokenizer\u001b[38;5;241m.\u001b[39mpush_to_hub(adapter_name_or_path)\n","File \u001b[0;32m~/miniconda3/envs/llama-factory/lib/python3.11/site-packages/transformers/modeling_utils.py:2663\u001b[0m, in \u001b[0;36mPreTrainedModel.push_to_hub\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 2661\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m tags:\n\u001b[1;32m 2662\u001b[0m kwargs[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtags\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m tags\n\u001b[0;32m-> 2663\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpush_to_hub\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n","File \u001b[0;32m~/miniconda3/envs/llama-factory/lib/python3.11/site-packages/transformers/utils/hub.py:891\u001b[0m, in \u001b[0;36mPushToHubMixin.push_to_hub\u001b[0;34m(self, repo_id, use_temp_dir, commit_message, private, token, max_shard_size, create_pr, safe_serialization, revision, commit_description, tags, **deprecated_kwargs)\u001b[0m\n\u001b[1;32m 888\u001b[0m files_timestamps \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_get_files_timestamps(work_dir)\n\u001b[1;32m 890\u001b[0m \u001b[38;5;66;03m# Save all files.\u001b[39;00m\n\u001b[0;32m--> 891\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msave_pretrained\u001b[49m\u001b[43m(\u001b[49m\u001b[43mwork_dir\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmax_shard_size\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmax_shard_size\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msafe_serialization\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msafe_serialization\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 893\u001b[0m \u001b[38;5;66;03m# Update model card if needed:\u001b[39;00m\n\u001b[1;32m 894\u001b[0m model_card\u001b[38;5;241m.\u001b[39msave(os\u001b[38;5;241m.\u001b[39mpath\u001b[38;5;241m.\u001b[39mjoin(work_dir, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mREADME.md\u001b[39m\u001b[38;5;124m\"\u001b[39m))\n","File \u001b[0;32m~/miniconda3/envs/llama-factory/lib/python3.11/site-packages/transformers/modeling_utils.py:2574\u001b[0m, in \u001b[0;36mPreTrainedModel.save_pretrained\u001b[0;34m(self, save_directory, is_main_process, state_dict, save_function, push_to_hub, max_shard_size, safe_serialization, variant, token, save_peft_format, **kwargs)\u001b[0m\n\u001b[1;32m 2571\u001b[0m error_names\u001b[38;5;241m.\u001b[39mappend(\u001b[38;5;28mset\u001b[39m(shared_names))\n\u001b[1;32m 2573\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(error_names) \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m0\u001b[39m:\n\u001b[0;32m-> 2574\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mRuntimeError\u001b[39;00m(\n\u001b[1;32m 2575\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mThe weights trying to be saved contained shared tensors \u001b[39m\u001b[38;5;132;01m{\u001b[39;00merror_names\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m that are mismatching the transformers base configuration. Try saving using `safe_serialization=False` or remove this tensor sharing.\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 2576\u001b[0m )\n\u001b[1;32m 2578\u001b[0m \u001b[38;5;66;03m# Shard the model if it is too big.\u001b[39;00m\n\u001b[1;32m 2579\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m _hf_peft_config_loaded:\n","\u001b[0;31mRuntimeError\u001b[0m: The weights trying to be saved contained shared tensors [{'base_model.model.model.layers.14.attention.wo.lora_A.weight', 'base_model.model.model.layers.26.attention.wo.lora_B.weight', 'base_model.model.model.layers.3.attention.wqkv.lora_A.weight', 'base_model.model.model.layers.2.feed_forward.w3.lora_A.weight', 'base_model.model.model.layers.4.feed_forward.w1.lora_A.weight', 'base_model.model.model.layers.30.attention.wo.lora_A.weight', 'base_model.model.model.layers.14.attention.wqkv.lora_A.weight', 'base_model.model.model.layers.21.attention.wqkv.lora_A.weight', 'base_model.model.model.layers.23.feed_forward.w2.lora_B.weight', 'base_model.model.model.layers.10.feed_forward.w1.lora_A.weight', 'base_model.model.model.layers.31.attention.wo.lora_A.weight', 'base_model.model.model.layers.27.feed_forward.w2.lora_B.weight', 'base_model.model.model.layers.1.feed_forward.w3.lora_A.weight', 'base_model.model.model.layers.5.attention.wo.lora_B.weight', 'base_model.model.model.layers.22.feed_forward.w2.lora_B.weight', 'base_model.model.model.layers.18.attention.wqkv.lora_A.weight', 'base_model.model.model.layers.18.feed_forward.w1.lora_A.weight', 'base_model.model.model.layers.29.attention.wo.lora_B.weight', 'base_model.model.model.layers.0.feed_forward.w2.lora_B.weight', 'base_model.model.model.layers.7.attention.wo.lora_B.weight', 'base_model.model.model.layers.21.attention.wo.lora_B.weight', 'base_model.model.model.layers.0.attention.wo.lora_B.weight', 'base_model.model.model.layers.5.attention.wqkv.lora_A.weight', 'base_model.model.model.layers.28.attention.wo.lora_B.weight', 'base_model.model.model.layers.12.attention.wo.lora_A.weight', 'base_model.model.model.layers.23.feed_forward.w1.lora_A.weight', 'base_model.model.model.layers.9.feed_forward.w2.lora_B.weight', 'base_model.model.model.layers.13.attention.wo.lora_B.weight', 'base_model.model.model.layers.12.attention.wo.lora_B.weight', 'base_model.model.model.layers.22.feed_forward.w1.lora_A.weight', 'base_model.model.model.layers.4.attention.wo.lora_A.weight', 'base_model.model.model.layers.3.attention.wo.lora_A.weight', 'base_model.model.model.layers.27.feed_forward.w1.lora_A.weight', 'base_model.model.model.layers.10.attention.wo.lora_A.weight', 'base_model.model.model.layers.12.feed_forward.w3.lora_A.weight', 'base_model.model.model.layers.2.attention.wo.lora_A.weight', 'base_model.model.model.layers.20.attention.wo.lora_B.weight', 'base_model.model.model.layers.5.feed_forward.w1.lora_A.weight', 'base_model.model.model.layers.7.feed_forward.w1.lora_A.weight', 'base_model.model.model.layers.16.feed_forward.w3.lora_A.weight', 'base_model.model.model.layers.28.attention.wo.lora_A.weight', 'base_model.model.model.layers.28.attention.wqkv.lora_A.weight', 'base_model.model.model.layers.15.attention.wo.lora_A.weight', 'base_model.model.model.layers.11.attention.wqkv.lora_A.weight', 'base_model.model.model.layers.19.attention.wo.lora_A.weight', 'base_model.model.model.layers.11.feed_forward.w1.lora_A.weight', 'base_model.model.model.layers.31.feed_forward.w2.lora_B.weight', 'base_model.model.model.layers.25.attention.wo.lora_A.weight', 'base_model.model.model.layers.6.attention.wo.lora_A.weight', 'base_model.model.model.layers.26.feed_forward.w1.lora_A.weight', 'base_model.model.model.layers.7.attention.wo.lora_A.weight', 'base_model.model.model.layers.7.feed_forward.w2.lora_B.weight', 'base_model.model.model.layers.27.attention.wo.lora_A.weight', 'base_model.model.model.layers.5.feed_forward.w3.lora_A.weight', 'base_model.model.model.layers.29.feed_forward.w3.lora_A.weight', 'base_model.model.model.layers.0.attention.wo.lora_A.weight', 'base_model.model.model.layers.16.attention.wo.lora_A.weight', 'base_model.model.model.layers.28.feed_forward.w1.lora_A.weight', 'base_model.model.model.layers.14.feed_forward.w3.lora_A.weight', 'base_model.model.model.layers.13.feed_forward.w1.lora_A.weight', 'base_model.model.model.layers.20.feed_forward.w2.lora_B.weight', 'base_model.model.model.layers.29.feed_forward.w2.lora_B.weight', 'base_model.model.model.layers.5.feed_forward.w2.lora_B.weight', 'base_model.model.model.layers.2.attention.wqkv.lora_A.weight', 'base_model.model.model.layers.2.feed_forward.w2.lora_B.weight', 'base_model.model.model.layers.22.feed_forward.w3.lora_A.weight', 'base_model.model.model.layers.14.feed_forward.w1.lora_A.weight', 'base_model.model.model.layers.11.attention.wo.lora_B.weight', 'base_model.model.model.layers.31.feed_forward.w1.lora_A.weight', 'base_model.model.model.layers.25.attention.wo.lora_B.weight', 'base_model.model.model.layers.15.feed_forward.w1.lora_A.weight', 'base_model.model.model.layers.18.feed_forward.w3.lora_A.weight', 'base_model.model.model.layers.25.attention.wqkv.lora_A.weight', 'base_model.model.model.layers.27.feed_forward.w3.lora_A.weight', 'base_model.model.model.layers.8.feed_forward.w2.lora_B.weight', 'base_model.model.model.layers.23.attention.wo.lora_A.weight', 'base_model.model.model.layers.9.attention.wo.lora_B.weight', 'base_model.model.model.layers.21.feed_forward.w3.lora_A.weight', 'base_model.model.model.layers.8.attention.wo.lora_B.weight', 'base_model.model.model.layers.22.attention.wo.lora_B.weight', 'base_model.model.model.layers.26.attention.wqkv.lora_A.weight', 'base_model.model.model.layers.24.feed_forward.w3.lora_A.weight', 'base_model.model.model.layers.12.attention.wqkv.lora_A.weight', 'base_model.model.model.layers.13.feed_forward.w2.lora_B.weight', 'base_model.model.model.layers.23.attention.wqkv.lora_A.weight', 'base_model.model.model.layers.16.attention.wqkv.lora_A.weight', 'base_model.model.model.layers.21.attention.wo.lora_A.weight', 'base_model.model.model.layers.28.feed_forward.w2.lora_B.weight', 'base_model.model.model.layers.24.attention.wqkv.lora_A.weight', 'base_model.model.model.layers.17.attention.wo.lora_B.weight', 'base_model.model.model.layers.0.attention.wqkv.lora_A.weight', 'base_model.model.model.layers.15.feed_forward.w3.lora_A.weight', 'base_model.model.model.layers.29.attention.wqkv.lora_A.weight', 'base_model.model.model.layers.8.feed_forward.w3.lora_A.weight', 'base_model.model.model.layers.3.feed_forward.w3.lora_A.weight', 'base_model.model.model.layers.14.attention.wo.lora_B.weight', 'base_model.model.model.layers.9.feed_forward.w1.lora_A.weight', 'base_model.model.model.layers.24.attention.wo.lora_A.weight', 'base_model.model.model.layers.0.feed_forward.w1.lora_A.weight', 'base_model.model.model.layers.1.attention.wo.lora_B.weight', 'base_model.model.model.layers.2.attention.wo.lora_B.weight', 'base_model.model.model.layers.19.feed_forward.w2.lora_B.weight', 'base_model.model.model.layers.31.attention.wqkv.lora_A.weight', 'base_model.model.model.layers.5.attention.wo.lora_A.weight', 'base_model.model.model.layers.14.feed_forward.w2.lora_B.weight', 'base_model.model.model.layers.31.feed_forward.w3.lora_A.weight', 'base_model.model.model.layers.30.attention.wqkv.lora_A.weight', 'base_model.model.model.layers.17.attention.wo.lora_A.weight', 'base_model.model.model.layers.8.feed_forward.w1.lora_A.weight', 'base_model.model.model.layers.27.attention.wqkv.lora_A.weight', 'base_model.model.model.layers.4.feed_forward.w3.lora_A.weight', 'base_model.model.model.layers.30.attention.wo.lora_B.weight', 'base_model.model.model.layers.10.attention.wqkv.lora_A.weight', 'base_model.model.model.layers.2.feed_forward.w1.lora_A.weight', 'base_model.model.model.layers.30.feed_forward.w2.lora_B.weight', 'base_model.model.model.layers.23.attention.wo.lora_B.weight', 'base_model.model.model.layers.0.feed_forward.w3.lora_A.weight', 'base_model.model.model.layers.15.attention.wqkv.lora_A.weight', 'base_model.model.model.layers.17.attention.wqkv.lora_A.weight', 'base_model.model.model.layers.12.feed_forward.w1.lora_A.weight', 'base_model.model.model.layers.15.attention.wo.lora_B.weight', 'base_model.model.model.layers.25.feed_forward.w3.lora_A.weight', 'base_model.model.model.layers.8.attention.wqkv.lora_A.weight', 'base_model.model.model.layers.25.feed_forward.w2.lora_B.weight', 'base_model.model.model.layers.26.feed_forward.w2.lora_B.weight', 'base_model.model.model.layers.10.attention.wo.lora_B.weight', 'base_model.model.model.layers.20.feed_forward.w3.lora_A.weight', 'base_model.model.model.layers.24.attention.wo.lora_B.weight', 'base_model.model.model.layers.29.attention.wo.lora_A.weight', 'base_model.model.model.layers.16.attention.wo.lora_B.weight', 'base_model.model.model.layers.26.feed_forward.w3.lora_A.weight', 'base_model.model.model.layers.11.feed_forward.w3.lora_A.weight', 'base_model.model.model.layers.19.attention.wqkv.lora_A.weight', 'base_model.model.model.layers.19.feed_forward.w3.lora_A.weight', 'base_model.model.model.layers.20.attention.wo.lora_A.weight', 'base_model.model.model.layers.3.feed_forward.w1.lora_A.weight', 'base_model.model.model.layers.31.attention.wo.lora_B.weight', 'base_model.model.model.layers.7.feed_forward.w3.lora_A.weight', 'base_model.model.model.layers.16.feed_forward.w2.lora_B.weight', 'base_model.model.model.layers.1.attention.wqkv.lora_A.weight', 'base_model.model.model.layers.24.feed_forward.w1.lora_A.weight', 'base_model.model.model.layers.9.attention.wo.lora_A.weight', 'base_model.model.model.layers.12.feed_forward.w2.lora_B.weight', 'base_model.model.model.layers.4.attention.wqkv.lora_A.weight', 'base_model.model.model.layers.30.feed_forward.w1.lora_A.weight', 'base_model.model.model.layers.13.attention.wo.lora_A.weight', 'base_model.model.model.layers.9.attention.wqkv.lora_A.weight', 'base_model.model.model.layers.20.attention.wqkv.lora_A.weight', 'base_model.model.model.layers.17.feed_forward.w1.lora_A.weight', 'base_model.model.model.layers.24.feed_forward.w2.lora_B.weight', 'base_model.model.model.layers.3.attention.wo.lora_B.weight', 'base_model.model.model.layers.6.feed_forward.w3.lora_A.weight', 'base_model.model.model.layers.8.attention.wo.lora_A.weight', 'base_model.model.model.layers.10.feed_forward.w2.lora_B.weight', 'base_model.model.model.layers.11.feed_forward.w2.lora_B.weight', 'base_model.model.model.layers.6.attention.wo.lora_B.weight', 'base_model.model.model.layers.6.attention.wqkv.lora_A.weight', 'base_model.model.model.layers.4.feed_forward.w2.lora_B.weight', 'base_model.model.model.layers.16.feed_forward.w1.lora_A.weight', 'base_model.model.model.layers.4.attention.wo.lora_B.weight', 'base_model.model.model.layers.28.feed_forward.w3.lora_A.weight', 'base_model.model.model.layers.6.feed_forward.w2.lora_B.weight', 'base_model.model.model.layers.17.feed_forward.w3.lora_A.weight', 'base_model.model.model.layers.15.feed_forward.w2.lora_B.weight', 'base_model.model.model.layers.30.feed_forward.w3.lora_A.weight', 'base_model.model.model.layers.18.attention.wo.lora_A.weight', 'base_model.model.model.layers.1.feed_forward.w2.lora_B.weight', 'base_model.model.model.layers.25.feed_forward.w1.lora_A.weight', 'base_model.model.model.layers.18.feed_forward.w2.lora_B.weight', 'base_model.model.model.layers.27.attention.wo.lora_B.weight', 'base_model.model.model.layers.22.attention.wqkv.lora_A.weight', 'base_model.model.model.layers.13.attention.wqkv.lora_A.weight', 'base_model.model.model.layers.20.feed_forward.w1.lora_A.weight', 'base_model.model.model.layers.3.feed_forward.w2.lora_B.weight', 'base_model.model.model.layers.11.attention.wo.lora_A.weight', 'base_model.model.model.layers.10.feed_forward.w3.lora_A.weight', 'base_model.model.model.layers.19.attention.wo.lora_B.weight', 'base_model.model.model.layers.21.feed_forward.w2.lora_B.weight', 'base_model.model.model.layers.23.feed_forward.w3.lora_A.weight', 'base_model.model.model.layers.29.feed_forward.w1.lora_A.weight', 'base_model.model.model.layers.7.attention.wqkv.lora_A.weight', 'base_model.model.model.layers.21.feed_forward.w1.lora_A.weight', 'base_model.model.model.layers.17.feed_forward.w2.lora_B.weight', 'base_model.model.model.layers.26.attention.wo.lora_A.weight', 'base_model.model.model.layers.1.feed_forward.w1.lora_A.weight', 'base_model.model.model.layers.1.attention.wo.lora_A.weight', 'base_model.model.model.layers.18.attention.wo.lora_B.weight', 'base_model.model.model.layers.6.feed_forward.w1.lora_A.weight', 'base_model.model.model.layers.19.feed_forward.w1.lora_A.weight', 'base_model.model.model.layers.9.feed_forward.w3.lora_A.weight', 'base_model.model.model.layers.22.attention.wo.lora_A.weight', 'base_model.model.model.layers.13.feed_forward.w3.lora_A.weight'}, {'base_model.model.model.layers.25.attention.wqkv.lora_B.weight', 'base_model.model.model.layers.30.attention.wqkv.lora_B.weight', 'base_model.model.model.layers.10.attention.wqkv.lora_B.weight', 'base_model.model.model.layers.26.attention.wqkv.lora_B.weight', 'base_model.model.model.layers.0.attention.wqkv.lora_B.weight', 'base_model.model.model.layers.18.attention.wqkv.lora_B.weight', 'base_model.model.model.layers.7.attention.wqkv.lora_B.weight', 'base_model.model.model.layers.24.attention.wqkv.lora_B.weight', 'base_model.model.model.layers.4.attention.wqkv.lora_B.weight', 'base_model.model.model.layers.15.attention.wqkv.lora_B.weight', 'base_model.model.model.layers.19.attention.wqkv.lora_B.weight', 'base_model.model.model.layers.13.attention.wqkv.lora_B.weight', 'base_model.model.model.layers.5.attention.wqkv.lora_B.weight', 'base_model.model.model.layers.16.attention.wqkv.lora_B.weight', 'base_model.model.model.layers.14.attention.wqkv.lora_B.weight', 'base_model.model.model.layers.29.attention.wqkv.lora_B.weight', 'base_model.model.model.layers.23.attention.wqkv.lora_B.weight', 'base_model.model.model.layers.3.attention.wqkv.lora_B.weight', 'base_model.model.model.layers.1.attention.wqkv.lora_B.weight', 'base_model.model.model.layers.2.attention.wqkv.lora_B.weight', 'base_model.model.model.layers.9.attention.wqkv.lora_B.weight', 'base_model.model.model.layers.20.attention.wqkv.lora_B.weight', 'base_model.model.model.layers.27.attention.wqkv.lora_B.weight', 'base_model.model.model.layers.22.attention.wqkv.lora_B.weight', 'base_model.model.model.layers.21.attention.wqkv.lora_B.weight', 'base_model.model.model.layers.31.attention.wqkv.lora_B.weight', 'base_model.model.model.layers.6.attention.wqkv.lora_B.weight', 'base_model.model.model.layers.11.attention.wqkv.lora_B.weight', 'base_model.model.model.layers.17.attention.wqkv.lora_B.weight', 'base_model.model.model.layers.8.attention.wqkv.lora_B.weight', 'base_model.model.model.layers.28.attention.wqkv.lora_B.weight', 'base_model.model.model.layers.12.attention.wqkv.lora_B.weight'}, {'base_model.model.model.layers.0.feed_forward.w2.lora_A.weight', 'base_model.model.model.layers.12.feed_forward.w2.lora_A.weight', 'base_model.model.model.layers.12.feed_forward.w3.lora_B.weight', 'base_model.model.model.layers.24.feed_forward.w2.lora_A.weight', 'base_model.model.model.layers.20.feed_forward.w2.lora_A.weight', 'base_model.model.model.layers.0.feed_forward.w1.lora_B.weight', 'base_model.model.model.layers.26.feed_forward.w1.lora_B.weight', 'base_model.model.model.layers.30.feed_forward.w1.lora_B.weight', 'base_model.model.model.layers.23.feed_forward.w1.lora_B.weight', 'base_model.model.model.layers.16.feed_forward.w1.lora_B.weight', 'base_model.model.model.layers.18.feed_forward.w1.lora_B.weight', 'base_model.model.model.layers.31.feed_forward.w3.lora_B.weight', 'base_model.model.model.layers.7.feed_forward.w1.lora_B.weight', 'base_model.model.model.layers.11.feed_forward.w2.lora_A.weight', 'base_model.model.model.layers.23.feed_forward.w2.lora_A.weight', 'base_model.model.model.layers.21.feed_forward.w3.lora_B.weight', 'base_model.model.model.layers.6.feed_forward.w3.lora_B.weight', 'base_model.model.model.layers.11.feed_forward.w1.lora_B.weight', 'base_model.model.model.layers.22.feed_forward.w3.lora_B.weight', 'base_model.model.model.layers.4.feed_forward.w2.lora_A.weight', 'base_model.model.model.layers.17.feed_forward.w3.lora_B.weight', 'base_model.model.model.layers.10.feed_forward.w2.lora_A.weight', 'base_model.model.model.layers.5.feed_forward.w3.lora_B.weight', 'base_model.model.model.layers.16.feed_forward.w3.lora_B.weight', 'base_model.model.model.layers.14.feed_forward.w3.lora_B.weight', 'base_model.model.model.layers.22.feed_forward.w2.lora_A.weight', 'base_model.model.model.layers.27.feed_forward.w1.lora_B.weight', 'base_model.model.model.layers.5.feed_forward.w1.lora_B.weight', 'base_model.model.model.layers.3.feed_forward.w3.lora_B.weight', 'base_model.model.model.layers.28.feed_forward.w2.lora_A.weight', 'base_model.model.model.layers.9.feed_forward.w2.lora_A.weight', 'base_model.model.model.layers.13.feed_forward.w1.lora_B.weight', 'base_model.model.model.layers.15.feed_forward.w3.lora_B.weight', 'base_model.model.model.layers.16.feed_forward.w2.lora_A.weight', 'base_model.model.model.layers.2.feed_forward.w3.lora_B.weight', 'base_model.model.model.layers.18.feed_forward.w2.lora_A.weight', 'base_model.model.model.layers.24.feed_forward.w1.lora_B.weight', 'base_model.model.model.layers.1.feed_forward.w3.lora_B.weight', 'base_model.model.model.layers.25.feed_forward.w3.lora_B.weight', 'base_model.model.model.layers.27.feed_forward.w3.lora_B.weight', 'base_model.model.model.layers.8.feed_forward.w2.lora_A.weight', 'base_model.model.model.layers.9.feed_forward.w3.lora_B.weight', 'base_model.model.model.layers.29.feed_forward.w3.lora_B.weight', 'base_model.model.model.layers.13.feed_forward.w3.lora_B.weight', 'base_model.model.model.layers.17.feed_forward.w2.lora_A.weight', 'base_model.model.model.layers.10.feed_forward.w1.lora_B.weight', 'base_model.model.model.layers.21.feed_forward.w2.lora_A.weight', 'base_model.model.model.layers.30.feed_forward.w3.lora_B.weight', 'base_model.model.model.layers.30.feed_forward.w2.lora_A.weight', 'base_model.model.model.layers.4.feed_forward.w1.lora_B.weight', 'base_model.model.model.layers.15.feed_forward.w2.lora_A.weight', 'base_model.model.model.layers.6.feed_forward.w1.lora_B.weight', 'base_model.model.model.layers.19.feed_forward.w2.lora_A.weight', 'base_model.model.model.layers.9.feed_forward.w1.lora_B.weight', 'base_model.model.model.layers.2.feed_forward.w2.lora_A.weight', 'base_model.model.model.layers.20.feed_forward.w3.lora_B.weight', 'base_model.model.model.layers.19.feed_forward.w3.lora_B.weight', 'base_model.model.model.layers.29.feed_forward.w2.lora_A.weight', 'base_model.model.model.layers.25.feed_forward.w1.lora_B.weight', 'base_model.model.model.layers.14.feed_forward.w1.lora_B.weight', 'base_model.model.model.layers.8.feed_forward.w1.lora_B.weight', 'base_model.model.model.layers.20.feed_forward.w1.lora_B.weight', 'base_model.model.model.layers.18.feed_forward.w3.lora_B.weight', 'base_model.model.model.layers.7.feed_forward.w3.lora_B.weight', 'base_model.model.model.layers.28.feed_forward.w1.lora_B.weight', 'base_model.model.model.layers.6.feed_forward.w2.lora_A.weight', 'base_model.model.model.layers.10.feed_forward.w3.lora_B.weight', 'base_model.model.model.layers.11.feed_forward.w3.lora_B.weight', 'base_model.model.model.layers.21.feed_forward.w1.lora_B.weight', 'base_model.model.model.layers.27.feed_forward.w2.lora_A.weight', 'base_model.model.model.layers.31.feed_forward.w2.lora_A.weight', 'base_model.model.model.layers.23.feed_forward.w3.lora_B.weight', 'base_model.model.model.layers.8.feed_forward.w3.lora_B.weight', 'base_model.model.model.layers.14.feed_forward.w2.lora_A.weight', 'base_model.model.model.layers.5.feed_forward.w2.lora_A.weight', 'base_model.model.model.layers.29.feed_forward.w1.lora_B.weight', 'base_model.model.model.layers.22.feed_forward.w1.lora_B.weight', 'base_model.model.model.layers.3.feed_forward.w1.lora_B.weight', 'base_model.model.model.layers.7.feed_forward.w2.lora_A.weight', 'base_model.model.model.layers.17.feed_forward.w1.lora_B.weight', 'base_model.model.model.layers.0.feed_forward.w3.lora_B.weight', 'base_model.model.model.layers.1.feed_forward.w1.lora_B.weight', 'base_model.model.model.layers.12.feed_forward.w1.lora_B.weight', 'base_model.model.model.layers.13.feed_forward.w2.lora_A.weight', 'base_model.model.model.layers.2.feed_forward.w1.lora_B.weight', 'base_model.model.model.layers.15.feed_forward.w1.lora_B.weight', 'base_model.model.model.layers.19.feed_forward.w1.lora_B.weight', 'base_model.model.model.layers.25.feed_forward.w2.lora_A.weight', 'base_model.model.model.layers.26.feed_forward.w2.lora_A.weight', 'base_model.model.model.layers.24.feed_forward.w3.lora_B.weight', 'base_model.model.model.layers.3.feed_forward.w2.lora_A.weight', 'base_model.model.model.layers.4.feed_forward.w3.lora_B.weight', 'base_model.model.model.layers.26.feed_forward.w3.lora_B.weight', 'base_model.model.model.layers.28.feed_forward.w3.lora_B.weight', 'base_model.model.model.layers.1.feed_forward.w2.lora_A.weight', 'base_model.model.model.layers.31.feed_forward.w1.lora_B.weight'}] that are mismatching the transformers base configuration. Try saving using `safe_serialization=False` or remove this tensor sharing."]}],"source":["adapter_name_or_path = \"inflaton-ai/InternLM_2_5-7b_LoRA-r2\"\n","\n","model.push_to_hub(adapter_name_or_path)\n","tokenizer.push_to_hub(adapter_name_or_path)"]},{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[],"source":["del model, tokenizer"]},{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[{"name":"stdout","output_type":"stream","text":["loading model: internlm/internlm2_5-7b-chat-1m with adapter: inflaton-ai/InternLM_2_5-7b_LoRA-Adapter\n"]},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"8e5a9c51fcb44a2e97ebcf0594a68938","version_major":2,"version_minor":0},"text/plain":["Loading checkpoint shards: 0%| | 0/8 [00:00, ?it/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"105a5fdab3534197b34cf5e4d9ed47b3","version_major":2,"version_minor":0},"text/plain":["adapter_config.json: 0%| | 0.00/679 [00:00, ?B/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"55bcfc7ea08e4b43aab7796ba33a32bd","version_major":2,"version_minor":0},"text/plain":["adapter_model.safetensors: 0%| | 0.00/37.8M [00:00, ?B/s]"]},"metadata":{},"output_type":"display_data"},{"name":"stdout","output_type":"stream","text":["CPU times: user 3.06 s, sys: 5.04 s, total: 8.11 s\n","Wall time: 31.6 s\n"]}],"source":["%%time\n","\n","model, tokenizer = load_model(model_name, adapter_name_or_path=adapter_name_or_path, using_llama_factory=False)\n","# model, tokenizer = load_model(\"inflaton-ai/InternLM_2_5-7b_LoRA-Adapter\", using_llama_factory=False)"]},{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[{"name":"stderr","output_type":"stream","text":["/Users/inflaton/anaconda3/envs/logical-reasoning/lib/python3.11/site-packages/transformers/generation/utils.py:1513: UserWarning: The operator 'aten::isin.Tensor_Tensor_out' is not currently supported on the MPS backend and will fall back to run on the CPU. This may have performance implications. (Triggered internally at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/mps/MPSFallback.mm:13.)\n"," if eos_token_id is not None and torch.isin(elements=eos_token_id, test_elements=pad_token_id).any():\n"]},{"name":"stdout","output_type":"stream","text":["是\n","CPU times: user 579 ms, sys: 330 ms, total: 909 ms\n","Wall time: 2.86 s\n"]}],"source":["def evaluate_model(model, tokenizer, model_name, dataset):\n"," print(f\"Evaluating model: {model_name} on {device}\")\n"," predictions = eval_model(model, tokenizer, dataset, device=device)\n","\n"," save_results(\n"," model_name,\n"," results_path,\n"," dataset,\n"," predictions,\n"," debug=False,\n"," )\n","\n"," metrics = calc_metrics(dataset[\"label\"], predictions, debug=False)\n"," print(metrics)"]},{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[],"source":["%%time\n","\n","evaluate_model(model, tokenizer, f\"{model_name}_{adapter_name_or_path}_nv4080\", datasets[\"test\"])"]}],"metadata":{"accelerator":"GPU","application/vnd.databricks.v1+notebook":{"dashboards":[],"environmentMetadata":null,"language":"python","notebookMetadata":{"mostRecentlyExecutedCommandWithImplicitDF":{"commandId":-1,"dataframes":["_sqldf"]},"pythonIndentUnit":4},"notebookName":"10_eval-lf-medium-py3.11","widgets":{}},"colab":{"gpuType":"L4","provenance":[]},"kernelspec":{"display_name":"Python 3","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.11.9"}},"nbformat":4,"nbformat_minor":0}