Spaces:
Build error
Build error
File size: 3,662 Bytes
5860b41 5002792 5860b41 5002792 c8f289a 387046f 7670f2e 0b5e165 f754508 4c31851 387046f d772d9d 387046f 5860b41 387046f 0b5e165 387046f 5860b41 c8f289a 387046f 7670f2e c8f289a 387046f 5002792 5860b41 c8f289a c755e09 5860b41 f754508 4c31851 f754508 5860b41 be560ea 5860b41 7670f2e 5860b41 7670f2e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 |
import os
import sys
import torch
from dotenv import find_dotenv, load_dotenv
found_dotenv = find_dotenv(".env")
if len(found_dotenv) == 0:
found_dotenv = find_dotenv(".env.example")
print(f"loading env vars from: {found_dotenv}")
load_dotenv(found_dotenv, override=False)
path = os.path.dirname(found_dotenv)
print(f"Adding {path} to sys.path")
sys.path.append(path)
from llm_toolkit.llm_utils import *
from llm_toolkit.logical_reasoning_utils import *
model_name = os.getenv("MODEL_NAME")
adapter_name_or_path = os.getenv("ADAPTER_NAME_OR_PATH")
load_in_4bit = os.getenv("LOAD_IN_4BIT") == "true"
data_path = os.getenv("LOGICAL_REASONING_DATA_PATH")
results_path = os.getenv("LOGICAL_REASONING_RESULTS_PATH")
use_english_datasets = os.getenv("USE_ENGLISH_DATASETS") == "true"
using_p1 = os.getenv("USING_P1_PROMPT_TEMPLATE") == "true"
test_data = os.getenv("TEST_DATA", None)
using_llama_factory = os.getenv("USING_LLAMA_FACTORY") == "true"
max_new_tokens = int(os.getenv("MAX_NEW_TOKENS", 16))
repetition_penalty = float(os.getenv("REPETITION_PENALTY", 1.0))
batch_size = int(os.getenv("BATCH_SIZE", 2))
dtype = (
torch.float32
if os.getenv("USE_FLOAT32_FOR_INFERENCE") == "true"
else (
torch.bfloat16
if os.getenv("USE_BF16_FOR_INFERENCE") == "true"
else torch.float16
)
)
print(model_name, adapter_name_or_path, load_in_4bit, data_path, results_path)
gpu_stats = torch.cuda.get_device_properties(0)
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
print(f"(1) GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
print(f"{start_gpu_memory} GB of memory reserved.")
model, tokenizer = load_model(
model_name,
load_in_4bit=load_in_4bit,
adapter_name_or_path=adapter_name_or_path,
using_llama_factory=using_llama_factory,
dtype=dtype,
)
gpu_stats = torch.cuda.get_device_properties(0)
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
print(f"(2) GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
print(f"{start_gpu_memory} GB of memory reserved.")
datasets = load_logical_reasoning_dataset(
data_path,
tokenizer=tokenizer,
chinese_prompt=not use_english_datasets,
using_p1=using_p1,
test_data=test_data,
)
if len(sys.argv) > 1:
num = int(sys.argv[1])
if num > 0:
print(f"--- evaluating {num} entries")
datasets["test"] = datasets["test"].select(range(num))
print_row_details(datasets["test"].to_pandas(), indices=[0, -1])
print("Evaluating model: " + model_name)
predictions = eval_model(
model,
tokenizer,
datasets["test"],
max_new_tokens=max_new_tokens,
repetition_penalty=repetition_penalty,
batch_size=batch_size,
)
gpu_stats = torch.cuda.get_device_properties(0)
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
print(f"(3) GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
print(f"{start_gpu_memory} GB of memory reserved.")
if adapter_name_or_path is not None:
model_name += "/" + adapter_name_or_path.split("/")[-1]
save_results(
(
"answer"
if test_data
else f"{model_name}_{dtype}{'_4bit' if load_in_4bit else ''}{'_lf' if using_llama_factory else ''}"
),
results_path,
datasets["test"],
predictions,
debug=True,
)
if not test_data:
metrics = calc_metrics(datasets["test"]["label"], predictions, debug=True)
print(metrics)
|