Spaces:

inflaton-ai
/

logical-reasoning

Build error

App Files Files Community

dh-mc commited on Jul 24, 2024

Commit

3e74e95

1 Parent(s): a128b31

Create tune_logical_reasoning.py

Browse files

Files changed (1) hide show

llm_toolkit/tune_logical_reasoning.py +162 -0

llm_toolkit/tune_logical_reasoning.py ADDED Viewed

	@@ -0,0 +1,162 @@

+import os
+import sys
+from unsloth import FastLanguageModel, is_bfloat16_supported
+import torch
+from trl import SFTTrainer
+from transformers import TrainingArguments
+from dotenv import find_dotenv, load_dotenv
+from llm_toolkit.logical_reasoning_utils import *
+from llm_toolkit.llm_utils import *
+found_dotenv = find_dotenv(".env")
+if len(found_dotenv) == 0:
+    found_dotenv = find_dotenv(".env.example")
+print(f"loading env vars from: {found_dotenv}")
+load_dotenv(found_dotenv, override=False)
+path = os.path.dirname(found_dotenv)
+print(f"Adding {path} to sys.path")
+sys.path.append(path)
+model_name = os.getenv("MODEL_NAME")
+token = os.getenv("HF_TOKEN") or None
+load_in_4bit = os.getenv("LOAD_IN_4BIT") == "true"
+local_model = os.getenv("LOCAL_MODEL")
+hub_model = os.getenv("HUB_MODEL")
+num_train_epochs = int(os.getenv("NUM_TRAIN_EPOCHS") or 0)
+data_path = os.getenv("LOGICAL_REASONING_DATA_PATH")
+results_path = os.getenv("LOGICAL_REASONING_RESULTS_PATH")
+print(model_name, load_in_4bit, data_path, results_path)
+max_seq_length = 2048  # Choose any! We auto support RoPE Scaling internally!
+dtype = (
+    None  # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
+)
+max_seq_length = 4096  # Choose any! We auto support RoPE Scaling internally!
+dtype = (
+    None  # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
+)
+gpu_stats = torch.cuda.get_device_properties(0)
+start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
+max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
+print(f"(1) GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
+print(f"{start_gpu_memory} GB of memory reserved.")
+model, tokenizer = FastLanguageModel.from_pretrained(
+    model_name=model_name,
+    max_seq_length=max_seq_length,
+    dtype=dtype,
+    load_in_4bit=load_in_4bit,
+)
+gpu_stats = torch.cuda.get_device_properties(0)
+start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
+max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
+print(f"(2) GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
+print(f"{start_gpu_memory} GB of memory reserved.")
+model = FastLanguageModel.get_peft_model(
+    model,
+    r=16,  # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
+    target_modules=[
+        "q_proj",
+        "k_proj",
+        "v_proj",
+        "o_proj",
+        "gate_proj",
+        "up_proj",
+        "down_proj",
+    ],
+    lora_alpha=16,
+    lora_dropout=0,  # Supports any, but = 0 is optimized
+    bias="none",  # Supports any, but = "none" is optimized
+    # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
+    use_gradient_checkpointing="unsloth",  # True or "unsloth" for very long context
+    random_state=3407,
+    use_rslora=False,  # We support rank stabilized LoRA
+    loftq_config=None,  # And LoftQ
+)
+gpu_stats = torch.cuda.get_device_properties(0)
+start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
+max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
+print(f"(3) GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
+print(f"{start_gpu_memory} GB of memory reserved.")
+dataset = load_logical_reasoning_dataset(data_path, tokenizer=tokenizer, using_p1=False)
+print_row_details(dataset["train"].to_pandas())
+trainer = SFTTrainer(
+    model=model,
+    tokenizer=tokenizer,
+    train_dataset=dataset["train"],
+    dataset_text_field="train_text",
+    max_seq_length=max_seq_length,
+    dataset_num_proc=2,
+    packing=False,  # Can make training 5x faster for short sequences.
+    args=TrainingArguments(
+        per_device_train_batch_size=2,
+        gradient_accumulation_steps=4,
+        warmup_steps=5,
+        max_steps=20000,
+        learning_rate=2e-4,
+        fp16=not is_bfloat16_supported(),
+        bf16=is_bfloat16_supported(),
+        logging_steps=100,
+        optim="adamw_8bit",
+        weight_decay=0.01,
+        lr_scheduler_type="linear",
+        seed=3407,
+        output_dir="outputs",
+    ),
+)
+gpu_stats = torch.cuda.get_device_properties(0)
+start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
+max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
+print(f"(4) GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
+print(f"{start_gpu_memory} GB of memory reserved.")
+trainer_stats = trainer.train()
+# @title Show final memory and time stats
+used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
+used_memory_for_lora = round(used_memory - start_gpu_memory, 3)
+used_percentage = round(used_memory / max_memory * 100, 3)
+lora_percentage = round(used_memory_for_lora / max_memory * 100, 3)
+print(f"(5) GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
+print(f"{trainer_stats.metrics['train_runtime']} seconds used for training.")
+print(
+    f"{round(trainer_stats.metrics['train_runtime']/60, 2)} minutes used for training."
+)
+print(f"Peak reserved memory = {used_memory} GB.")
+print(f"Peak reserved memory for training = {used_memory_for_lora} GB.")
+print(f"Peak reserved memory % of max memory = {used_percentage} %.")
+print(f"Peak reserved memory for training % of max memory = {lora_percentage} %.")
+print("Evaluating fine-tuned model: " + model_name)
+FastLanguageModel.for_inference(model)  # Enable native 2x faster inference
+predictions = eval_model(model, tokenizer, datasets["test"])
+gpu_stats = torch.cuda.get_device_properties(0)
+start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
+max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
+print(f"(6) GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
+print(f"{start_gpu_memory} GB of memory reserved.")
+save_results(
+    model_name + "(unsloth_finetuned)",
+    results_path,
+    datasets["test"],
+    predictions,
+    debug=True,
+)
+metrics = calc_metrics(datasets["test"]["label"], predictions, debug=True)
+print(metrics)