In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from pathlib import Path

try:
    from google.colab import drive

    drive.mount("/content/drive")
    workding_dir = "/content/drive/MyDrive/logical-reasoning/"
except ModuleNotFoundError:
    workding_dir = str(Path.cwd().parent)

In [3]:
import os
import sys
from pathlib import Path

os.chdir(workding_dir)
sys.path.append(workding_dir)
print("workding dir:", workding_dir)

workding dir: /home/inflaton/code/projects/courses/logical-reasoning


In [4]:
from dotenv import find_dotenv, load_dotenv

found_dotenv = find_dotenv(".env")

if len(found_dotenv) == 0:
    found_dotenv = find_dotenv(".env.example")
print(f"loading env vars from: {found_dotenv}")
load_dotenv(found_dotenv, override=True)

loading env vars from: /home/inflaton/code/projects/courses/logical-reasoning/.env


True

In [5]:
import os

model_name = os.getenv("MODEL_NAME")
adapter_name_or_path = os.getenv("ADAPTER_NAME_OR_PATH")
load_in_4bit = os.getenv("LOAD_IN_4BIT") == "true"
data_path = os.getenv("LOGICAL_REASONING_DATA_PATH")
results_path = os.getenv("LOGICAL_REASONING_RESULTS_PATH")
use_english_datasets = os.getenv("USE_ENGLISH_DATASETS") == "true"


print(model_name, adapter_name_or_path, load_in_4bit, data_path, results_path)

inflaton/InternLM_2_5-7b_LR None False datasets/mgtv results/mgtv-results_merged_model.csv


In [6]:
%%time
!python --version
!pip show flash-attn
!pip show transformers

Python 3.11.9
[0mName: transformers
Version: 4.43.0.dev0
Summary: State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow
Home-page: https://github.com/huggingface/transformers
Author: The Hugging Face team (past and future) with the help of all our contributors (https://github.com/huggingface/transformers/graphs/contributors)
Author-email: transformers@huggingface.co
License: Apache 2.0 License
Location: /home/inflaton/miniconda3/envs/llama-factory/lib/python3.11/site-packages
Requires: filelock, huggingface-hub, numpy, packaging, pyyaml, regex, requests, safetensors, tokenizers, tqdm
Required-by: llamafactory, peft, trl, vllm
CPU times: user 30.7 ms, sys: 1.73 ms, total: 32.4 ms
Wall time: 1.99 s


In [7]:
from llm_toolkit.llm_utils import *
from llm_toolkit.logical_reasoning_utils import *

device = check_gpu()

loading /home/inflaton/code/projects/courses/logical-reasoning/llm_toolkit/logical_reasoning_utils.py
GPU is available


In [8]:
%%time

model, tokenizer = load_model(model_name)

loading model: inflaton/InternLM_2_5-7b_LR


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loading checkpoint shards:   0%|          | 0/8 [00:00<?, ?it/s]



CPU times: user 8.98 s, sys: 22.2 s, total: 31.2 s
Wall time: 8min 31s


In [9]:
datasets = load_logical_reasoning_dataset(
    data_path,
    tokenizer=tokenizer,
    chinese_prompt=not use_english_datasets,
    using_p1=False,
)

loading train/test data files
DatasetDict({
    train: Dataset({
        features: ['text', 'label', 'answer', 'title', 'puzzle', 'truth', 'train_text', 'prompt'],
        num_rows: 25000
    })
    test: Dataset({
        features: ['text', 'label', 'answer', 'title', 'puzzle', 'truth', 'train_text', 'prompt'],
        num_rows: 3000
    })
})


In [10]:
import numpy as np


def evaluate_model_with_repetition_penalty(
    model,
    tokenizer,
    model_name,
    dataset,
    start_repetition_penalty=1.11,
    end_repetition_penalty=1.15,
    step_repetition_penalty=0.01,
):
    print(f"Evaluating model: {model_name} on {device}")

    for repetition_penalty in np.arange(
        start_repetition_penalty,
        end_repetition_penalty + step_repetition_penalty,
        step_repetition_penalty,
    ):
        print(f"*** Evaluating with repetition_penalty: {repetition_penalty}")
        predictions = eval_model(
            model,
            tokenizer,
            dataset,
            device=device,
            repetition_penalty=repetition_penalty,
            max_new_tokens=8,
        )

        model_name_with_rp = f"{model_name}_NV4080_rp{repetition_penalty}"

        save_results(
            model_name_with_rp,
            results_path,
            dataset,
            predictions,
            debug=True,
        )

        metrics = calc_metrics(dataset["label"], predictions, debug=False)
        print(metrics)

In [11]:
evaluate_model_with_repetition_penalty(model, tokenizer, model_name, datasets["test"])

Evaluating model: inflaton/InternLM_2_5-7b_LR on cuda
*** Evaluating with repetition_penalty: 1.11


  0%|          | 1/3000 [03:59<199:46:47, 239.82s/it]

--------
step 1: 不是</s>
--------
step 2: 不是
--------
step 3: 不是
--------
step 4: 不是
--------
step 5: 不是


100%|██████████| 3000/3000 [2:12:10<00:00,  2.64s/it]  


      text label title                                             puzzle  \
0  甄加索是自杀吗    不是  海岸之谜  在远离城市喧嚣的海边小屋，一天清晨，邻居发现甄加索僵卧在沙滩上，已无生命迹象。现场没有发现任...   

                                               truth  \
0  甄加索是一位热爱自然的画家，他每年都会来到这个海边小屋寻找灵感。在他生命的最后几天，他一直在...   

  llama-factory/merged_models/internlm2_5-7b-chat-1m_sft_bf16_p2_full_epoch_6_NV4080  \
0                                                 不是                                   

  llama-factory/merged_models/internlm2_5-7b-chat-1m_sft_bf16_p2_full_epoch_6_M3  \
0                                                 不是                               

  llama-factory/merged_models/internlm2_5-7b-chat-1m_sft_bf16_p2_full_NV4080  \
0                                                 不是                           

  llama-factory/merged_models/internlm2_5-7b-chat-1m_sft_bf16_p2_full_M3  \
0                                                 不是                       

  llama-factory/merged_models/internlm2_5-7b-chat-1m_sft_bf16_p2_full_NV

  0%|          | 1/3000 [00:02<2:14:53,  2.70s/it]

--------
step 1: 不是</s>
--------
step 2: 不是
--------
step 3: 不是
--------
step 4: 不是
--------
step 5: 不是


100%|██████████| 3000/3000 [1:52:01<00:00,  2.24s/it]  


      text label title                                             puzzle  \
0  甄加索是自杀吗    不是  海岸之谜  在远离城市喧嚣的海边小屋，一天清晨，邻居发现甄加索僵卧在沙滩上，已无生命迹象。现场没有发现任...   

                                               truth  \
0  甄加索是一位热爱自然的画家，他每年都会来到这个海边小屋寻找灵感。在他生命的最后几天，他一直在...   

  llama-factory/merged_models/internlm2_5-7b-chat-1m_sft_bf16_p2_full_epoch_6_NV4080  \
0                                                 不是                                   

  llama-factory/merged_models/internlm2_5-7b-chat-1m_sft_bf16_p2_full_epoch_6_M3  \
0                                                 不是                               

  llama-factory/merged_models/internlm2_5-7b-chat-1m_sft_bf16_p2_full_NV4080  \
0                                                 不是                           

  llama-factory/merged_models/internlm2_5-7b-chat-1m_sft_bf16_p2_full_M3  \
0                                                 不是                       

  llama-factory/merged_models/internlm2_5-7b-chat-1m_sft_bf16_p2_full_NV

  0%|          | 1/3000 [00:01<1:30:17,  1.81s/it]

--------
step 1: 不是</s>
--------
step 2: 不是
--------
step 3: 不是
--------
step 4: 不是
--------
step 5: 不是


100%|██████████| 3000/3000 [1:34:52<00:00,  1.90s/it]


      text label title                                             puzzle  \
0  甄加索是自杀吗    不是  海岸之谜  在远离城市喧嚣的海边小屋，一天清晨，邻居发现甄加索僵卧在沙滩上，已无生命迹象。现场没有发现任...   

                                               truth  \
0  甄加索是一位热爱自然的画家，他每年都会来到这个海边小屋寻找灵感。在他生命的最后几天，他一直在...   

  llama-factory/merged_models/internlm2_5-7b-chat-1m_sft_bf16_p2_full_epoch_6_NV4080  \
0                                                 不是                                   

  llama-factory/merged_models/internlm2_5-7b-chat-1m_sft_bf16_p2_full_epoch_6_M3  \
0                                                 不是                               

  llama-factory/merged_models/internlm2_5-7b-chat-1m_sft_bf16_p2_full_NV4080  \
0                                                 不是                           

  llama-factory/merged_models/internlm2_5-7b-chat-1m_sft_bf16_p2_full_M3  \
0                                                 不是                       

  llama-factory/merged_models/internlm2_5-7b-chat-1m_sft_bf16_p2_full_NV

  0%|          | 1/3000 [00:01<1:18:52,  1.58s/it]

--------
step 1: 不是</s>
--------
step 2: 不是
--------
step 3: 不是
--------
step 4: 不是
--------
step 5: 不是


100%|██████████| 3000/3000 [1:24:16<00:00,  1.69s/it]


      text label title                                             puzzle  \
0  甄加索是自杀吗    不是  海岸之谜  在远离城市喧嚣的海边小屋，一天清晨，邻居发现甄加索僵卧在沙滩上，已无生命迹象。现场没有发现任...   

                                               truth  \
0  甄加索是一位热爱自然的画家，他每年都会来到这个海边小屋寻找灵感。在他生命的最后几天，他一直在...   

  llama-factory/merged_models/internlm2_5-7b-chat-1m_sft_bf16_p2_full_epoch_6_NV4080  \
0                                                 不是                                   

  llama-factory/merged_models/internlm2_5-7b-chat-1m_sft_bf16_p2_full_epoch_6_M3  \
0                                                 不是                               

  llama-factory/merged_models/internlm2_5-7b-chat-1m_sft_bf16_p2_full_NV4080  \
0                                                 不是                           

  llama-factory/merged_models/internlm2_5-7b-chat-1m_sft_bf16_p2_full_M3  \
0                                                 不是                       

  llama-factory/merged_models/internlm2_5-7b-chat-1m_sft_bf16_p2_full_NV

  0%|          | 1/3000 [00:01<1:20:33,  1.61s/it]

--------
step 1: 不是</s>
--------
step 2: 不是
--------
step 3: 不是
--------
step 4: 不是
--------
step 5: 不是


100%|██████████| 3000/3000 [1:27:12<00:00,  1.74s/it]

      text label title                                             puzzle  \
0  甄加索是自杀吗    不是  海岸之谜  在远离城市喧嚣的海边小屋，一天清晨，邻居发现甄加索僵卧在沙滩上，已无生命迹象。现场没有发现任...   

                                               truth  \
0  甄加索是一位热爱自然的画家，他每年都会来到这个海边小屋寻找灵感。在他生命的最后几天，他一直在...   

  llama-factory/merged_models/internlm2_5-7b-chat-1m_sft_bf16_p2_full_epoch_6_NV4080  \
0                                                 不是                                   

  llama-factory/merged_models/internlm2_5-7b-chat-1m_sft_bf16_p2_full_epoch_6_M3  \
0                                                 不是                               

  llama-factory/merged_models/internlm2_5-7b-chat-1m_sft_bf16_p2_full_NV4080  \
0                                                 不是                           

  llama-factory/merged_models/internlm2_5-7b-chat-1m_sft_bf16_p2_full_M3  \
0                                                 不是                       

  llama-factory/merged_models/internlm2_5-7b-chat-1m_sft_bf16_p2_full_NV


