machine-translation

Build error

App Files Files Community

dh-mc commited on Jul 28, 2024

Commit

54b1b8a

1 Parent(s): 3860729

clean up

Browse files

Files changed (20) hide show

.gitignore +1 -0
README.md +5 -1
llama-factory/config/llama3_8b_lora_sft.yaml +0 -46
llama-factory/config/qwen2_0.5b_lora_sft.yaml +0 -42
llama-factory/config/qwen2_0.5b_lora_sft_unsloth.yaml +0 -45
llama-factory/config/qwen2_1.5b_lora_sft.yaml +0 -42
llama-factory/config/qwen2_1.5b_lora_sft_unsloth.yaml +0 -45
llama-factory/config/qwen2_7b_lora_sft.yaml +0 -45
llama-factory/config/qwen2_7b_lora_sft_unsloth.yaml +0 -45
llama-factory/data/alpaca_mac.json +0 -3
llama-factory/data/dataset_info.json +0 -3
llama-factory/inference/qwen2_1.5b_lora_sft.yaml +0 -4
llm_toolkit/llm_utils.py +135 -35
requirements.txt +7 -3
scripts/lf-api.sh +0 -8
scripts/tune-large.sh +0 -24
scripts/tune-lf.sh +0 -9
scripts/tune-medium.sh +0 -27
scripts/tune-small-2.sh +0 -14
scripts/tune-small.sh +0 -14

.gitignore CHANGED Viewed

@@ -1,3 +1,4 @@
 *.out
 *.log
 */outputs/

+*.run
 *.out
 *.log
 */outputs/

README.md CHANGED Viewed

@@ -10,4 +10,8 @@ pinned: false
 license: mit
 ---
-An example chatbot using [Gradio](https://gradio.app), [`huggingface_hub`](https://huggingface.co/docs/huggingface_hub/v0.22.2/en/index), and the [Hugging Face Inference API](https://huggingface.co/docs/api-inference/index).

 license: mit
 ---
+An example chatbot using [Gradio](https://gradio.app), [`huggingface_hub`](https://huggingface.co/docs/huggingface_hub/v0.22.2/en/index), and the [Hugging Face Inference API](https://huggingface.co/docs/api-inference/index).
+```
+pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu124
+```

llama-factory/config/llama3_8b_lora_sft.yaml DELETED Viewed

@@ -1,46 +0,0 @@
-### model
-model_name_or_path: gradientai/Llama-3-8B-Instruct-Gradient-1048k
-### method
-stage: sft
-do_train: true
-finetuning_type: lora
-lora_target: all
-quantization_bit: 4                     # use 4-bit QLoRA
-loraplus_lr_ratio: 16.0                 # use LoRA+ with lambda=16.0
-# use_unsloth: true                       # use UnslothAI's LoRA optimization for 2x faster training
-### dataset
-dataset: alpaca_mac
-template: llama3
-cutoff_len: 1024
-max_samples: 4528
-overwrite_cache: true
-preprocessing_num_workers: 16
-### output
-# output_dir: saves/llama3-8b/lora/sft
-output_dir: /Workspace/Users/donghao.huang@mastercard.com/lf-saves/llama3-8b/lora/sft/
-logging_steps: 10
-save_steps: 560
-plot_loss: true
-overwrite_output_dir: true
-# resume_from_checkpoint: true
-### train
-per_device_train_batch_size: 1
-gradient_accumulation_steps: 8
-learning_rate: 1.0e-4
-num_train_epochs: 6.0
-lr_scheduler_type: cosine
-warmup_ratio: 0.1
-bf16: true
-ddp_timeout: 180000000
-### eval
-val_size: 0.01
-per_device_eval_batch_size: 1
-eval_strategy: steps
-eval_steps: 560
-report_to: none

llama-factory/config/qwen2_0.5b_lora_sft.yaml DELETED Viewed

@@ -1,42 +0,0 @@
-### model
-model_name_or_path: Qwen/Qwen2-0.5B-Instruct
-### method
-stage: sft
-do_train: true
-finetuning_type: lora
-lora_target: all
-### dataset
-dataset: alpaca_mac
-template: chatml
-cutoff_len: 1024
-max_samples: 4528
-overwrite_cache: true
-preprocessing_num_workers: 16
-### output
-output_dir: saves/qwen2-0.5b/lora/sft
-logging_steps: 10
-save_steps: 560
-plot_loss: true
-overwrite_output_dir: true
-### train
-per_device_train_batch_size: 1
-gradient_accumulation_steps: 8
-learning_rate: 1.0e-4
-num_train_epochs: 6.0
-lr_scheduler_type: cosine
-warmup_ratio: 0.1
-bf16: true
-ddp_timeout: 180000000
-### eval
-val_size: 0.01
-per_device_eval_batch_size: 1
-eval_strategy: steps
-eval_steps: 560
-report_to: wandb
-run_name: qwen2_0.5b_lora_sft # optional

llama-factory/config/qwen2_0.5b_lora_sft_unsloth.yaml DELETED Viewed

@@ -1,45 +0,0 @@
-### model
-model_name_or_path: Qwen/Qwen2-0.5B-Instruct
-### method
-stage: sft
-do_train: true
-finetuning_type: lora
-lora_target: all
-quantization_bit: 4                     # use 4-bit QLoRA
-loraplus_lr_ratio: 16.0                 # use LoRA+ with lambda=16.0
-use_unsloth: true                       # use UnslothAI's LoRA optimization for 2x faster training
-### dataset
-dataset: alpaca_mac
-template: chatml
-cutoff_len: 1024
-max_samples: 4528
-overwrite_cache: true
-preprocessing_num_workers: 16
-### output
-output_dir: saves/qwen2-0.5b/lora/sft
-logging_steps: 10
-save_steps: 560
-plot_loss: true
-overwrite_output_dir: true
-### train
-per_device_train_batch_size: 1
-gradient_accumulation_steps: 8
-learning_rate: 1.0e-4
-num_train_epochs: 6.0
-lr_scheduler_type: cosine
-warmup_ratio: 0.1
-bf16: true
-ddp_timeout: 180000000
-### eval
-val_size: 0.01
-per_device_eval_batch_size: 1
-eval_strategy: steps
-eval_steps: 560
-report_to: wandb
-run_name: qwen2_0.5b_lora_sft # optional

llama-factory/config/qwen2_1.5b_lora_sft.yaml DELETED Viewed

@@ -1,42 +0,0 @@
-### model
-model_name_or_path: Qwen/Qwen2-1.5B-Instruct
-### method
-stage: sft
-do_train: true
-finetuning_type: lora
-lora_target: all
-### dataset
-dataset: alpaca_mac
-template: chatml
-cutoff_len: 1024
-max_samples: 4528
-overwrite_cache: true
-preprocessing_num_workers: 16
-### output
-output_dir: saves/qwen2-1.5b/lora/sft
-logging_steps: 10
-save_steps: 560
-plot_loss: true
-overwrite_output_dir: true
-### train
-per_device_train_batch_size: 1
-gradient_accumulation_steps: 8
-learning_rate: 1.0e-4
-num_train_epochs: 6.0
-lr_scheduler_type: cosine
-warmup_ratio: 0.1
-bf16: true
-ddp_timeout: 180000000
-### eval
-val_size: 0.01
-per_device_eval_batch_size: 1
-eval_strategy: steps
-eval_steps: 560
-report_to: wandb
-run_name: qwen2_1.5b_lora_sft # optional

llama-factory/config/qwen2_1.5b_lora_sft_unsloth.yaml DELETED Viewed

@@ -1,45 +0,0 @@
-### model
-model_name_or_path: Qwen/Qwen2-1.5B-Instruct
-### method
-stage: sft
-do_train: true
-finetuning_type: lora
-lora_target: all
-quantization_bit: 4                     # use 4-bit QLoRA
-loraplus_lr_ratio: 16.0                 # use LoRA+ with lambda=16.0
-use_unsloth: true                       # use UnslothAI's LoRA optimization for 2x faster training
-### dataset
-dataset: alpaca_mac
-template: chatml
-cutoff_len: 1024
-max_samples: 4528
-overwrite_cache: true
-preprocessing_num_workers: 16
-### output
-output_dir: saves/qwen2-1.5b/lora/sft
-logging_steps: 10
-save_steps: 560
-plot_loss: true
-overwrite_output_dir: true
-### train
-per_device_train_batch_size: 1
-gradient_accumulation_steps: 8
-learning_rate: 1.0e-4
-num_train_epochs: 6.0
-lr_scheduler_type: cosine
-warmup_ratio: 0.1
-bf16: true
-ddp_timeout: 180000000
-### eval
-val_size: 0.01
-per_device_eval_batch_size: 1
-eval_strategy: steps
-eval_steps: 560
-report_to: wandb
-run_name: qwen2_1.5b_lora_sft # optional

llama-factory/config/qwen2_7b_lora_sft.yaml DELETED Viewed

@@ -1,45 +0,0 @@
-### model
-model_name_or_path: Qwen/Qwen2-7B-Instruct
-### method
-stage: sft
-do_train: true
-finetuning_type: lora
-lora_target: all
-quantization_bit: 4                     # use 4-bit QLoRA
-loraplus_lr_ratio: 16.0                 # use LoRA+ with lambda=16.0
-# use_unsloth: true                       # use UnslothAI's LoRA optimization for 2x faster training
-### dataset
-dataset: alpaca_mac
-template: chatml
-cutoff_len: 1024
-max_samples: 4528
-overwrite_cache: true
-preprocessing_num_workers: 16
-### output
-output_dir: saves/qwen2-7b/lora/sft
-logging_steps: 10
-save_steps: 560
-plot_loss: true
-overwrite_output_dir: true
-### train
-per_device_train_batch_size: 1
-gradient_accumulation_steps: 8
-learning_rate: 1.0e-4
-num_train_epochs: 6.0
-lr_scheduler_type: cosine
-warmup_ratio: 0.1
-bf16: true
-ddp_timeout: 180000000
-### eval
-val_size: 0.01
-per_device_eval_batch_size: 1
-eval_strategy: steps
-eval_steps: 560
-report_to: wandb
-run_name: qwen2_7b_lora_sft # optional

llama-factory/config/qwen2_7b_lora_sft_unsloth.yaml DELETED Viewed

@@ -1,45 +0,0 @@
-### model
-model_name_or_path: Qwen/Qwen2-7B-Instruct
-### method
-stage: sft
-do_train: true
-finetuning_type: lora
-lora_target: all
-quantization_bit: 4                     # use 4-bit QLoRA
-loraplus_lr_ratio: 16.0                 # use LoRA+ with lambda=16.0
-use_unsloth: true                       # use UnslothAI's LoRA optimization for 2x faster training
-### dataset
-dataset: alpaca_mac
-template: chatml
-cutoff_len: 1024
-max_samples: 4528
-overwrite_cache: true
-preprocessing_num_workers: 16
-### output
-output_dir: saves/qwen2-7b/lora/sft
-logging_steps: 10
-save_steps: 560
-plot_loss: true
-overwrite_output_dir: true
-### train
-per_device_train_batch_size: 1
-gradient_accumulation_steps: 8
-learning_rate: 1.0e-4
-num_train_epochs: 6.0
-lr_scheduler_type: cosine
-warmup_ratio: 0.1
-bf16: true
-ddp_timeout: 180000000
-### eval
-val_size: 0.01
-per_device_eval_batch_size: 1
-eval_strategy: steps
-eval_steps: 560
-report_to: wandb
-run_name: qwen2_7b_lora_sft # optional

llama-factory/data/alpaca_mac.json DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:6f03e62eb461c2204bbaef55f2de28ec115b1a5834b81f03b10f157551d5fe9f
-size 2240344

llama-factory/data/dataset_info.json DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:84bce610296ed7e729647e85d25576b6226d20ddf0bca4982fb1deb02de35911
-size 13560

llama-factory/inference/qwen2_1.5b_lora_sft.yaml DELETED Viewed

@@ -1,4 +0,0 @@
-model_name_or_path: Qwen/Qwen2-1.5B-Instruct
-adapter_name_or_path: saves/qwen2-1.5b/lora/sft/checkpoint-1680
-template: chatml
-finetuning_type: lora

llm_toolkit/llm_utils.py CHANGED Viewed

@@ -1,22 +1,39 @@
 import os
 import re
-import sys
 import torch
-from llamafactory.chat import ChatModel
-from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, TextStreamer
 def load_model(
     model_name,
-    max_seq_length=2048,
     dtype=torch.bfloat16,
     load_in_4bit=False,
     adapter_name_or_path=None,
 ):
-    print(f"loading model: {model_name}")
-    if adapter_name_or_path:
-        template = "llama3" if "llama-3" in model_name.lower() else "chatml"
         args = dict(
             model_name_or_path=model_name,
@@ -26,6 +43,10 @@ def load_model(
             quantization_bit=4 if load_in_4bit else None,  # load 4-bit quantized model
         )
         chat_model = ChatModel(args)
         return chat_model.engine.model, chat_model.engine.tokenizer
     tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
@@ -36,26 +57,59 @@ def load_model(
         bnb_4bit_compute_dtype=dtype,
     )
-    model = AutoModelForCausalLM.from_pretrained(
-        model_name,
-        quantization_config=bnb_config,
-        torch_dtype=dtype,
-        trust_remote_code=True,
-        device_map="auto",
-    ) if load_in_4bit else AutoModelForCausalLM.from_pretrained(
-        model_name,
-        torch_dtype=dtype,
-        trust_remote_code=True,
-        device_map="auto",
     )
     return model, tokenizer
-def test_model(model, tokenizer, prompt):
     inputs = tokenizer(
         [prompt],
         return_tensors="pt",
-    ).to("cuda")
     text_streamer = TextStreamer(tokenizer)
@@ -68,7 +122,10 @@ def extract_answer(text, debug=False):
     if text:
         # Remove the begin and end tokens
         text = re.sub(
-            r".*?(assistant|\[/INST\]).+?\b", "", text, flags=re.DOTALL | re.MULTILINE
         )
         if debug:
             print("--------\nstep 1:", text)
@@ -83,27 +140,63 @@ def extract_answer(text, debug=False):
         if debug:
             print("--------\nstep 3:", text)
     return text
-def eval_model(model, tokenizer, eval_dataset):
     total = len(eval_dataset)
     predictions = []
-    for i in tqdm(range(total)):
-        inputs = tokenizer(
-            eval_dataset["prompt"][i : i + 1],
-            return_tensors="pt",
-        ).to("cuda")
-        outputs = model.generate(**inputs, max_new_tokens=4096, use_cache=False)
-        decoded_output = tokenizer.batch_decode(outputs)
-        debug = i == 0
-        decoded_output = [
-            extract_answer(output, debug=debug) for output in decoded_output
-        ]
-        predictions.extend(decoded_output)
     return predictions
 def save_model(
     model,
     tokenizer,
@@ -163,3 +256,10 @@ def save_model(
                 )
     except Exception as e:
         print(e)

 import os
 import re
 import torch
+from transformers import (
+    AutoModelForCausalLM,
+    AutoTokenizer,
+    BitsAndBytesConfig,
+    TextStreamer,
+)
+from tqdm import tqdm
+def get_template(model_name):
+    model_name = model_name.lower()
+    if "llama" in model_name:
+        return "llama3"
+    if "internlm" in model_name:
+        return "intern2"
+    if "glm" in model_name:
+        return "glm4"
+    return "chatml"
 def load_model(
     model_name,
     dtype=torch.bfloat16,
     load_in_4bit=False,
     adapter_name_or_path=None,
+    using_llama_factory=False,
 ):
+    print(f"loading model: {model_name} with adapter: {adapter_name_or_path}")
+    if using_llama_factory:
+        from llamafactory.chat import ChatModel
+        template = get_template(model_name)
         args = dict(
             model_name_or_path=model_name,
             quantization_bit=4 if load_in_4bit else None,  # load 4-bit quantized model
         )
         chat_model = ChatModel(args)
+        if os.getenv("RESIZE_TOKEN_EMBEDDINGS") == "true":
+            chat_model.engine.model.resize_token_embeddings(
+                len(chat_model.engine.tokenizer), pad_to_multiple_of=32
+            )
         return chat_model.engine.model, chat_model.engine.tokenizer
     tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
         bnb_4bit_compute_dtype=dtype,
     )
+    model = (
+        AutoModelForCausalLM.from_pretrained(
+            model_name,
+            quantization_config=bnb_config,
+            torch_dtype=dtype,
+            trust_remote_code=True,
+            device_map="auto",
+        )
+        if load_in_4bit
+        else AutoModelForCausalLM.from_pretrained(
+            model_name,
+            torch_dtype=dtype,
+            trust_remote_code=True,
+            device_map="auto",
+        )
     )
+    if adapter_name_or_path:
+        adapter_name = model.load_adapter(adapter_name_or_path)
+        model.active_adapters = adapter_name
+    if not tokenizer.pad_token:
+        print("Adding pad token to tokenizer for model: ", model_name)
+        tokenizer.add_special_tokens({"pad_token": "<pad>"})
+        model.resize_token_embeddings(len(tokenizer), pad_to_multiple_of=32)
     return model, tokenizer
+def check_gpu():
+    # torch.cuda.is_available() checks and returns a Boolean True if a GPU is available, else it'll return False
+    is_cuda = torch.cuda.is_available()
+    # If we have a GPU available, we'll set our device to GPU. We'll use this device variable later in our code.
+    if is_cuda:
+        device = torch.device("cuda")
+        print("CUDA is available, we have found ", torch.cuda.device_count(), " GPU(s)")
+        print(torch.cuda.get_device_name(0))
+        print("CUDA version: " + torch.version.cuda)
+    elif torch.backends.mps.is_available():
+        device = torch.device("mps")
+        print("MPS is available")
+    else:
+        device = torch.device("cpu")
+        print("GPU/MPS not available, CPU used")
+    return device
+def test_model(model, tokenizer, prompt, device="cuda"):
     inputs = tokenizer(
         [prompt],
         return_tensors="pt",
+    ).to(device)
     text_streamer = TextStreamer(tokenizer)
     if text:
         # Remove the begin and end tokens
         text = re.sub(
+            r".*?(assistant|\[/INST\]).+?\b",
+            "",
+            text,
+            flags=re.DOTALL | re.MULTILINE,
         )
         if debug:
             print("--------\nstep 1:", text)
         if debug:
             print("--------\nstep 3:", text)
+        text = text.split("。")[0].strip()
+        if debug:
+            print("--------\nstep 4:", text)
+        text = re.sub(
+            r"^Response:.+?\b",
+            "",
+            text,
+            flags=re.DOTALL | re.MULTILINE,
+        )
+        if debug:
+            print("--------\nstep 5:", text)
     return text
+def eval_model(
+    model,
+    tokenizer,
+    eval_dataset,
+    device="cuda",
+    max_new_tokens=4096,
+    repetition_penalty=1.0,
+    batch_size=1,
+):
     total = len(eval_dataset)
     predictions = []
+    model.eval()
+    with torch.no_grad():
+        for i in tqdm(range(0, total, batch_size)):  # Iterate in batches
+            batch_end = min(i + batch_size, total)  # Ensure not to exceed dataset
+            batch_prompts = eval_dataset["prompt"][i:batch_end]
+            inputs = tokenizer(
+                batch_prompts,
+                return_tensors="pt",
+                padding=True,  # Ensure all inputs in the batch have the same length
+            ).to(device)
+            outputs = model.generate(
+                **inputs,
+                max_new_tokens=max_new_tokens,
+                repetition_penalty=repetition_penalty,
+                use_cache=False,
+            )
+            outputs = outputs[:, inputs["input_ids"].shape[1] :]
+            decoded_output = tokenizer.batch_decode(
+                outputs, skip_special_tokens=True
+            )  # Skip special tokens for clean output
+            if i == 0:
+                print("Batch output:", decoded_output)
+            predictions.extend(decoded_output)
     return predictions
 def save_model(
     model,
     tokenizer,
                 )
     except Exception as e:
         print(e)
+def print_row_details(df, indices=[0]):
+    for index in indices:
+        for col in df.columns:
+            print("-" * 50)
+            print(f"{col}: {df[col].iloc[index]}")

requirements.txt CHANGED Viewed

@@ -1,3 +1,4 @@
 nltk==3.8.1
 python-dotenv==1.0.1
 black==24.4.0
@@ -9,7 +10,10 @@ scikit-learn==1.5.0
 jupyter
 ipywidgets
 packaging
-# triton
-# xformers
 langchain_openai==0.1.13
-wandb==0.17.4

+huggingface_hub==0.24.2
 nltk==3.8.1
 python-dotenv==1.0.1
 black==24.4.0
 jupyter
 ipywidgets
 packaging
 langchain_openai==0.1.13
+wandb==0.17.4
+transformers==4.43.3
+sentencepiece==0.2.0
+einops==0.8.0
+accelerate==0.32.1
+peft==0.11.1

scripts/lf-api.sh DELETED Viewed

@@ -1,8 +0,0 @@
-#!/bin/sh
-BASEDIR=$(dirname "$0")
-cd $BASEDIR/../llama-factory
-echo Current Directory:
-pwd
-API_PORT=8000 llamafactory-cli api $1

scripts/tune-large.sh DELETED Viewed

@@ -1,24 +0,0 @@
-#!/bin/sh
-BASEDIR=$(dirname "$0")
-cd $BASEDIR
-echo Current Directory:
-pwd
-nvidia-smi
-uname -a
-cat /etc/os-release
-lscpu
-grep MemTotal /proc/meminfo
-# pip install -r requirements.txt
-# FLASH_ATTENTION_FORCE_BUILD=TRUE pip install --upgrade flash-attn
-# export MODEL_NAME=unsloth/Qwen2-72B-Instruct-bnb-4bit
-# echo Tuning $MODEL_NAME
-# python tune.py
-export MODEL_NAME=unsloth/llama-3-70b-Instruct-bnb-4bit
-echo Tuning $MODEL_NAME
-python tune.py

scripts/tune-lf.sh DELETED Viewed

@@ -1,9 +0,0 @@
-#!/bin/sh
-BASEDIR=$(dirname "$0")
-cd $BASEDIR/../llama-factory
-echo Current Directory:
-pwd
-YAML=$1 python -c 'import os, json, sys, yaml; filename=os.getenv("YAML"); y=yaml.safe_load(open(filename)) ; print(f"{filename}:\n", json.dumps(y, indent=2))'
-llamafactory-cli train $1

scripts/tune-medium.sh DELETED Viewed

@@ -1,27 +0,0 @@
-#!/bin/sh
-BASEDIR=$(dirname "$0")
-cd $BASEDIR
-echo Current Directory:
-pwd
-nvidia-smi
-uname -a
-cat /etc/os-release
-lscpu
-grep MemTotal /proc/meminfo
-# pip install -r requirements.txt
-# FLASH_ATTENTION_FORCE_BUILD=TRUE pip install --upgrade flash-attn
-export MODEL_NAME=unsloth/Qwen2-7B-Instruct
-echo Tuning $MODEL_NAME
-python llm_toolkit/tune.py
-export MODEL_NAME=unsloth/mistral-7b-instruct-v0.3
-echo Tuning $MODEL_NAME
-python llm_toolkit/tune.py
-export MODEL_NAME=gradientai/Llama-3-8B-Instruct-Gradient-1048k
-echo Tuning $MODEL_NAME
-python llm_toolkit/tune.py

scripts/tune-small-2.sh DELETED Viewed

@@ -1,14 +0,0 @@
-#!/bin/sh
-BASEDIR=$(dirname "$0")
-cd $BASEDIR/..
-echo Current Directory:
-pwd
-export MODEL_NAME=unsloth/Qwen2-0.5B-Instruct
-echo Tuning $MODEL_NAME
-python llm_toolkit/tune.py
-export MODEL_NAME=unsloth/Qwen2-1.5B-Instruct
-echo Tuning $MODEL_NAME
-python llm_toolkit/tune.py

scripts/tune-small.sh DELETED Viewed

@@ -1,14 +0,0 @@
-#!/bin/sh
-BASEDIR=$(dirname "$0")
-cd $BASEDIR/..
-echo Current Directory:
-pwd
-export MODEL_NAME=unsloth/Qwen2-0.5B-Instruct-bnb-4bit
-echo Tuning $MODEL_NAME
-python llm_toolkit/tune.py
-export MODEL_NAME=unsloth/Qwen2-1.5B-Instruct-bnb-4bit
-echo Tuning $MODEL_NAME
-python llm_toolkit/tune.py