Spaces:

Menouar
/

LLM-FineTuning-Notebook-Generator

Runtime error

App Files Files Community

menouar commited on Mar 8, 2024

Commit

af04de4

1 Parent(s): 3caa08b

First Commit

Browse files

Files changed (5) hide show

.gitignore +60 -0
app.py +297 -0
utils/__init__.py +87 -0
utils/components_creator.py +256 -0
utils/notebook_generator.py +393 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,60 @@

+# Byte-compiled / optimized files
+*.py[cod]
+__pycache__/
+*.py[cod]?
+# C extensions
+*.so
+# Distribution / packaging
+dist/
+build/
+eggs/
+*.egg-info/
+.svn/
+*.swp
+*.tar.gz
+*.tgz
+*.zip
+*.rar
+# Development
+*.bak
+*.tmp
+# IDE specific files
+.vscode/
+.idea/
+# Jupyter Notebook
+.ipynb_checkpoints/
+# Environment
+.env
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Compiled Python modules
+*.pyd
+# Coverage
+.coverage
+.coverage.*
+htmlcov/
+# Type checking
+.mypy_cache/
+.dmypy.json
+# Sphinx documentation
+docs/_build/
+# Ignore .pkl file
+*.pkl
+*.html
+*.ipynb

app.py ADDED Viewed

	@@ -0,0 +1,297 @@

+from typing import Any
+from nbconvert import HTMLExporter
+from utils.notebook_generator import *
+from utils.components_creator import *
+finetuning_notebook = "Finetuning_NoteBook"
+css = """
+.container {
+    align-items: center;
+    justify-content: center;
+}
+.center_text {
+    text-align: center;
+}
+.a_custom {
+    border-radius: var(--button-large-radius);
+    padding: var(--button-large-padding);
+    font-weight: var(--button-large-text-weight);
+    font-size: var(--button-large-text-size);
+    border: var(--button-border-width) solid var(--button-primary-border-color);
+    background: var(--button-primary-background-fill);
+    color: var(--button-primary-text-color);
+    justify-content: center;
+    align-items: center;
+    transition: var(--button-transition);
+    box-shadow: var(--button-shadow);
+    text-align: center
+}
+.a_custom a {
+    text-decoration: none;
+    color: white;
+}
+"""
+def centered_column():
+    return gr.Column(elem_classes=["container"])
+def change_model_selection(model_id):
+    if model_id == gemma.name:
+        gr.Warning("""
+        Access Gemma:
+        To load Gemma from Hugging Face, you’re required to review and agree to Google’s usage license.
+        """)
+    if model_id == llama.name:
+        gr.Warning("""
+        Access Llama 2:
+        To load Llama 2 from Hugging Face, you’re required to review and agree to Meta’s usage license.
+        """)
+    for m in models:
+        if m.name == model_id:
+            return gr.Dropdown(choices=m.versions, interactive=True,
+                               visible=True, info=f"Select the version of the model {m.name} you wish to use.")
+    return None
+def check_valid_input(value):
+    if isinstance(value, str):
+        return value and value.strip()
+    if isinstance(value, list):
+        return len(value) > 0
+    return not None
+def get_dataset(dataset_path):
+    for d in ft_datasets:
+        if d.path == dataset_path:
+            return d
+    return None
+def get_value(components: dict[Component, Any], elem_id: str) -> Any:
+    for component, val in components.items():
+        if component.elem_id == elem_id:
+            return val
+    return None
+def preview_notebook():
+    html_exporter = HTMLExporter()
+    (body, resources) = html_exporter.from_notebook_node(notebook)
+    html_path = f"{finetuning_notebook}.html"
+    with open(html_path, 'w') as f:
+        f.write(body)
+    return f'<iframe src="file={html_path}" width="100%" height="250px"></iframe>'
+def generate_code(components: dict[Component, Any]):
+    create_install_libraries_cells(notebook['cells'])
+    flash_attention_value = get_value(components, FLASH_ATTENTION_ID)
+    if flash_attention_value:
+        create_install_flash_attention(notebook['cells'])
+    push_to_hub = get_value(components, PUSH_TO_HUB_ID)
+    if push_to_hub:
+        create_login_hf_cells(notebook['cells'])
+    dataset_value = get_value(components, DATASET_SELECTION_ID)
+    seed_value = get_value(components, DATASET_SHUFFLING_SEED)
+    if not check_valid_input(dataset_value):
+        gr.Warning("No dataset is selected")
+    else:
+        create_datasets_cells(notebook['cells'], get_dataset(dataset_value), seed_value)
+    model_value = get_value(components, MODEL_SELECTION_ID)
+    if not check_valid_input(model_value):
+        gr.Warning("No model is selected!")
+    else:
+        version_value = get_value(components, MODEL_VERSION_SELECTION_ID)
+        if not check_valid_input(version_value):
+            gr.Warning("No version of the model is selected")
+        else:
+            load_in_4bit = get_value(components, LOAD_IN_4_BIT_ID)
+            bnb_4bit_use_double_quant = get_value(components, BNB_4BIT_USE_DOUBLE_QUANT)
+            bnb_4bit_quant_type = get_value(components, BNB_4BIT_QUANT_TYPE)
+            bnb_4bit_compute_dtype = get_value(components, BNB_4BIT_COMPUTE_DTYPE)
+            pad_side = get_value(components, PAD_SIDE_ID)
+            pad_value = get_value(components, PAD_VALUE_ID)
+            create_model_cells(notebook['cells'], model_id=model_value, version=version_value,
+                               flash_attention=flash_attention_value, pad_value=pad_value,
+                               pad_side=pad_side, load_in_4bit=load_in_4bit,
+                               bnb_4bit_use_double_quant=bnb_4bit_use_double_quant,
+                               bnb_4bit_quant_type=bnb_4bit_quant_type, bnb_4bit_compute_dtype=bnb_4bit_compute_dtype)
+    r_value = get_value(components, LORA_R_ID)
+    alpha_value = get_value(components, LORA_ALPHA_ID)
+    dropout_value = get_value(components, LORA_DROPOUT_ID)
+    bias_value = get_value(components, LORA_BIAS_ID)
+    create_lora_config_cells(notebook['cells'], r_value, alpha_value, dropout_value, bias_value)
+    epochs = get_value(components, NUM_TRAIN_EPOCHS_ID)
+    max_steps = get_value(components, MAX_STEPS_ID)
+    logging_steps = get_value(components, LOGGING_STEPS_ID)
+    per_device_train_batch_size = get_value(components, PER_DEVICE_TRAIN_BATCH_SIZE)
+    save_strategy = get_value(components, SAVE_STRATEGY_ID)
+    gradient_accumulation_steps = get_value(components, GRADIENT_ACCUMULATION_STEPS_ID)
+    gradient_checkpointing = get_value(components, GRADIENT_CHECKPOINTING_ID)
+    learning_rate = get_value(components, LEARNING_RATE_ID)
+    max_grad_norm = get_value(components, MAX_GRAD_NORM_ID)
+    warmup_ratio = get_value(components, WARMUP_RATIO_ID)
+    lr_scheduler_type = get_value(components, LR_SCHEDULER_TYPE_ID)
+    output_dir = get_value(components, OUTPUT_DIR_ID)
+    report_to = get_value(components, REPORT_TO_ID)
+    if not check_valid_input(output_dir):
+        gr.Warning("No output_dir is given")
+    create_training_args_cells(notebook['cells'], epochs=epochs, max_steps=max_steps, logging_steps=logging_steps,
+                               per_device_train_batch_size=per_device_train_batch_size, save_strategy=save_strategy,
+                               gradient_accumulation_steps=gradient_accumulation_steps,
+                               gradient_checkpointing=gradient_checkpointing, learning_rate=learning_rate,
+                               max_grad_norm=max_grad_norm, warmup_ratio=warmup_ratio,
+                               lr_scheduler_type=lr_scheduler_type, output_dir=output_dir, report_to=report_to,
+                               seed=seed_value)
+    max_seq_length = get_value(components, MAX_SEQ_LENGTH_ID)
+    packing = get_value(components, PACKING_ID)
+    create_sft_trainer_cells(notebook['cells'], max_seq_length, packing)
+    create_start_training_cells(notebook['cells'], epochs, max_steps, push_to_hub, output_dir)
+    create_free_gpu_cells(notebook['cells'])
+    create_merge_lora_cells(notebook['cells'], output_dir)
+    if push_to_hub:
+        push_merged_model_cells(notebook['cells'], output_dir)
+    file_name = f"{finetuning_notebook}.ipynb"
+    with open(file_name, 'w') as f:
+        nbf.write(notebook, f)
+    return gr.Button(
+        visible=True), f'''<div class="a_custom"><a href="file={file_name}" download={file_name}>
+        💾️ Download {finetuning_notebook}.ipynb</a> </div> '''
+with gr.Blocks(css=css, theme=gr.themes.Soft(text_size='lg', font=["monospace"],
+                                             primary_hue=gr.themes.colors.blue)) as demo:
+    gr.Label("UI-Guided LLM FineTuning Jupyter Notebook Generator 🛠️🧠", show_label=False)
+    gr.Markdown(
+        'Generating a **Jupyter Notebook file (.ipynb)** 📔⚙️ for **finetuning** a Large Language Model (**LLM**) '
+        '🎚️🧠 on a chosen dataset and configured parameters, guided by an intuitive User Interface (UI) 👆💻.',
+        elem_classes=["center_text"])
+    all_components: Set[Component] = set()
+    gr.HTML("<h2 style='text-align: center;'>LLM 🧠</h2>")
+    with gr.Row():
+        model_selection = gr.Dropdown(
+            [model.name for model in models],
+            elem_id=MODEL_SELECTION_ID,
+            label="Select a Large Language Model (LLM)",
+            info="Select a Large Language Model (LLM) to finetune using the SFTTrainer."
+        )
+        version_selection = gr.Dropdown(
+            choices=[], label="Select a Model Version 🔄", info="", visible=False, elem_id=MODEL_VERSION_SELECTION_ID
+        )
+        all_components.add(model_selection)
+        all_components.add(version_selection)
+    gr.HTML("<h2 style='text-align: center;'>Dataset 📊</h2>")
+    with gr.Row():
+        all_components.update(add_dataset_components())
+    gr.HTML("<h2 style='text-align: center;'>⚡ Flash Attention ⚡</h2>")
+    with gr.Row():
+        flash_attention = gr.Checkbox(value=True, label="Enable Flash Attention", interactive=True,
+                                      elem_id=FLASH_ATTENTION_ID,
+                                      info="Flash Attention is a technique that reduces the memory and runtime costs "
+                                           "associated with "
+                                           "the attention layer in a model. For more details, please refer to the "
+                                           "Flash Attention "
+                                           "repository on GitHub.")
+        all_components.add(flash_attention)
+    gr.HTML("<h2 style='text-align: center;'>Quantization</h2>")
+    with gr.Row():
+        with centered_column():
+            all_components.update(add_quantization_components())
+        with centered_column():
+            all_components.update(add_quantization_components1())
+    gr.HTML("<h2 style='text-align: center;'>Tokenizer Configuration</h2>")
+    with gr.Row():
+        all_components.update(add_pad_tokens())
+    gr.HTML("<h2 style='text-align: center;'>Lora Configuration</h2>")
+    with gr.Row():
+        with centered_column():
+            all_components.update(add_lora_components1())
+        with centered_column():
+            all_components.update(add_lora_components())
+    gr.HTML("<h2 style='text-align: center;'>⚙️ Training Arguments ⚙️</h2>")
+    with gr.Row():
+        with centered_column():
+            all_components.update(add_training_args_1())
+            all_components.update(add_training_args_1_bis())
+        with centered_column():
+            all_components.update(add_training_args_3())
+    gr.HTML("<h2 style='text-align: center;'>Optimizer Arguments</h2>")
+    with gr.Row():
+        with centered_column():
+            optimizer1 = add_optimizer1()
+            all_components.update(optimizer1)
+        with centered_column():
+            optimizer = add_optimizer()
+            all_components.update(optimizer)
+    gr.HTML("<h2 style='text-align: center;'>Outputs</h2>")
+    with gr.Row():
+        with centered_column():
+            all_components.update(add_outputs())
+        with centered_column():
+            all_components.update(add_outputs1())
+    gr.HTML("<h2 style='text-align: center;'>SFTTrainer Arguments</h2>")
+    with gr.Row():
+        sft_args = add_sft_trainer_args()
+        all_components.update(sft_args)
+    with gr.Row():
+        iframe = gr.HTML(show_label=False, visible=True)
+    with gr.Row():
+        greet_btn = gr.Button("Generate 🛠️", variant="primary")
+    with gr.Row():
+        preview_btn = gr.Button(f"👀 Preview {finetuning_notebook}.ipynb", variant="primary", visible=False)
+        download_btn = gr.HTML(show_label=False, visible=True)
+    notebook = nbf.v4.new_notebook()
+    greet_btn.click(fn=generate_code, inputs=all_components, outputs=[preview_btn, download_btn])
+    preview_btn.click(fn=preview_notebook, inputs=None, outputs=iframe)
+    model_selection.change(
+        fn=change_model_selection,
+        inputs=model_selection,
+        outputs=version_selection
+    )
+demo.launch(allowed_paths=["/"])

utils/__init__.py ADDED Viewed

	@@ -0,0 +1,87 @@

+from typing import List, Optional
+MODEL_SELECTION_ID: str = "model_selection"
+MODEL_VERSION_SELECTION_ID: str = "model_version_selection"
+LOAD_IN_4_BIT_ID: str = "load_in_4bit"
+BNB_4BIT_QUANT_TYPE: str = "bnb_4bit_quant_type"
+BNB_4BIT_COMPUTE_DTYPE: str = "bnb_4bit_compute_dtype"
+BNB_4BIT_USE_DOUBLE_QUANT: str = "bnb_4bit_use_double_quant"
+DATASET_SELECTION_ID = "dataset_selection"
+DATASET_SHUFFLING_SEED = "dataset_seed"
+FLASH_ATTENTION_ID = "flash_attention"
+PAD_SIDE_ID = "pad_side"
+PAD_VALUE_ID = "pad_value"
+LORA_R_ID = "lora_r"
+LORA_ALPHA_ID = "lora_alpha"
+LORA_DROPOUT_ID = "lora_dropout"
+LORA_BIAS_ID = 'lora_bias'
+NUM_TRAIN_EPOCHS_ID = "num_train_epochs"
+MAX_STEPS_ID = "max_steps_id"
+LOGGING_STEPS_ID = "logging_steps"
+PER_DEVICE_TRAIN_BATCH_SIZE = "per_device_train_batch_size"
+SAVE_STRATEGY_ID = "save_strategy"
+GRADIENT_ACCUMULATION_STEPS_ID = "gradient_accumulation_steps"
+GRADIENT_CHECKPOINTING_ID = "gradient_checkpointing"
+LEARNING_RATE_ID = "learning_rate"
+MAX_GRAD_NORM_ID = "max_grad_norm"
+WARMUP_RATIO_ID = "warmup_ratio"
+LR_SCHEDULER_TYPE_ID = "lr_scheduler_type"
+OUTPUT_DIR_ID = "output_dir"
+PUSH_TO_HUB_ID = "push_to_hub"
+REPORT_TO_ID = "report_to"
+MAX_SEQ_LENGTH_ID = "max_seq_length"
+PACKING_ID = "packing"
+OPTIMIZER_ID = "optim"
+BETA1_ID = "adam_beta1"
+BETA2_ID = "adam_beta2"
+EPSILON_ID = "adam_epsilon"
+WEIGHT_DECAY_ID = "weight_decay"
+class FTDataSet:
+    def __init__(self, path: str, dataset_split: Optional[str] = None):
+        self.path = path
+        self.dataset_split = dataset_split
+    def __str__(self):
+        return self.path
+deita_dataset = FTDataSet(path="HuggingFaceH4/deita-10k-v0-sft", dataset_split="train_sft")
+dolly = FTDataSet(path="philschmid/dolly-15k-oai-style", dataset_split="train")
+ultrachat_200k = FTDataSet(path="HuggingFaceH4/ultrachat_200k", dataset_split="train_sft")
+ft_datasets = [deita_dataset, dolly, ultrachat_200k]
+class Model:
+    def __init__(self, name: str, versions: List[str]):
+        self.name = name
+        self.versions = versions
+    def __str__(self):
+        return self.name
+models: List[Model] = []
+gemma = Model(name="google/gemma", versions=["7b", "2b"])
+models.append(gemma)
+falcon = Model(name="tiiuae/falcon", versions=["7b"])  # "7b-instruct"
+models.append(falcon)
+phi = Model(name="microsoft/phi", versions=["1_5", "1", "2"])
+models.append(phi)
+llama = Model(name="meta-llama/Llama-2", versions=["7b", "7b-hf"])  # "7b-chat", "7b-chat-hf"
+models.append(llama)
+mistral = Model(name="mistralai/Mistral", versions=["7B-v0.1"])  # "7B-Instruct-v0.1"
+models.append(mistral)
+tinyLlama = Model(name="TinyLlama/TinyLlama-1.1B",
+                  versions=['intermediate-step-1431k-3T', 'step-50K-105b', 'intermediate-step-240k-503b',
+                            'intermediate-step-715k-1.5T', 'intermediate-step-1195k-token-2.5T'])
+models.append(tinyLlama)

utils/components_creator.py ADDED Viewed

	@@ -0,0 +1,256 @@

+from typing import Set
+import gradio as gr
+from gradio.components import Component
+from utils import *
+def add_quantization_components() -> Set[Component]:
+    q_components: Set[Component] = set()
+    load_in_4bit = gr.Radio(["load_in_4bit", "load_in_8bit"], value="load_in_4bit",
+                            label="Quantization",
+                            info="This flag is used to enable 4/8-bit "
+                                 "quantization.",
+                            interactive=True,
+                            elem_id=LOAD_IN_4_BIT_ID)
+    bnb_4bit_quant_type = gr.Radio(["fp4", "nf4"], label="bnb_4bit_quant_type",
+                                   value="nf4",
+                                   elem_id=BNB_4BIT_QUANT_TYPE,
+                                   interactive=True,
+                                   info="This sets the quantization data type in "
+                                        "the bnb.nn.Linear4Bit "
+                                        "layers.")
+    q_components.add(load_in_4bit)
+    q_components.add(bnb_4bit_quant_type)
+    return q_components
+def add_quantization_components1() -> Set[Component]:
+    q_components: Set[Component] = set()
+    bnb_4bit_compute_dtype = gr.Radio(
+        ["torch.float32", "torch.bfloat16", "torch.float16"],
+        label="bnb_4bit_compute_dtype",
+        info="This sets the computational type which might be different "
+             "than the input type.",
+        elem_id=BNB_4BIT_COMPUTE_DTYPE,
+        interactive=True, value="torch.bfloat16")
+    bnb_4bit_use_double_quant = gr.Checkbox(label="bnb_4bit_use_double_quant",
+                                            value=True,
+                                            interactive=True,
+                                            elem_id=BNB_4BIT_USE_DOUBLE_QUANT,
+                                            info="This flag is used for nested "
+                                                 "quantization where the "
+                                                 "quantization constants from "
+                                                 "the first "
+                                                 "quantization are quantized "
+                                                 "again.")
+    q_components.add(bnb_4bit_compute_dtype)
+    q_components.add(bnb_4bit_use_double_quant)
+    return q_components
+def add_dataset_components() -> Set[Component]:
+    dataset_selection = gr.Dropdown(
+        [dt.path for dt in ft_datasets],
+        elem_id=DATASET_SELECTION_ID,
+        label="Select a Dataset",
+        info="Select a dataset for finetuning the model."
+    )
+    seed = gr.Slider(0, 256, step=1, value=42, elem_id=DATASET_SHUFFLING_SEED, label="Random Seed",
+                     info="Set a random seed for shuffling the dataset.", interactive=True)
+    d_components: Set[Component] = set()
+    d_components.add(dataset_selection)
+    d_components.add(seed)
+    return d_components
+def add_pad_tokens() -> Set[Component]:
+    pad_token_side = gr.Radio(["right", "left"], label="Tokenizer: padding_side",
+                              info="The side on which the model should have padding applied.",
+                              interactive=True, value="right", elem_id=PAD_SIDE_ID)
+    pad_token_value = gr.Radio([None, "eos_token"], label="Tokenizer: pad_token",
+                               info="A special token used to make arrays of tokens the same size for batching "
+                                    "purpose. Will then be "
+                                    "ignored by attention mechanisms or loss computation.",
+                               interactive=True, value=None, elem_id=PAD_VALUE_ID)
+    pad_components: Set[Component] = set()
+    pad_components.add(pad_token_side)
+    pad_components.add(pad_token_value)
+    return pad_components
+def add_lora_components() -> Set[Component]:
+    r = gr.Slider(1, 2048, step=1, value=6, label="r", info="Lora attention dimension (the 'rank').",
+                  interactive=True, elem_id=LORA_R_ID)
+    alpha = gr.Slider(1, 512, step=1, value=8, label="lora_alpha", info="The alpha parameter for Lora scaling.",
+                      interactive=True, elem_id=LORA_ALPHA_ID)
+    out_components: Set[Component] = set()
+    out_components.add(r)
+    out_components.add(alpha)
+    return out_components
+def add_lora_components1() -> Set[Component]:
+    dropout = gr.Slider(0, 1, step=0.01, value=0.05, label="lora_dropout",
+                        info="The dropout probability for Lora layers.",
+                        interactive=True, elem_id=LORA_DROPOUT_ID)
+    bias = gr.Radio(['none', 'all', 'lora_only'], label="bias",
+                    info="Bias type for LoRA. If 'all' or 'lora_only', the corresponding biases will be updated during "
+                         "training.",
+                    interactive=True, value="none", elem_id=LORA_BIAS_ID)
+    out_components: Set[Component] = set()
+    out_components.add(dropout)
+    out_components.add(bias)
+    return out_components
+def add_training_args_1() -> Set[Component]:
+    epochs = gr.Slider(1, 100, step=1, value=3, label="num_train_epochs",
+                       info="Total number of training epochs to perform.",
+                       interactive=True, elem_id=NUM_TRAIN_EPOCHS_ID)
+    max_steps = gr.Slider(-1, 100, step=1, value=-1, label="max_steps",
+                          info="Total number of training steps to perform. if equals to -1 it overrides "
+                               "num_train_epochs.",
+                          interactive=True, elem_id=MAX_STEPS_ID)
+    out_components: Set[Component] = set()
+    out_components.add(epochs)
+    out_components.add(max_steps)
+    return out_components
+def add_training_args_1_bis() -> Set[Component]:
+    logging_steps = gr.Slider(1, 100, step=1, value=10, label="logging_steps",
+                              info="Number of update steps between two logs if logging_strategy='steps'",
+                              interactive=True, elem_id=LOGGING_STEPS_ID)
+    per_device_train_batch_size = gr.Slider(1, 64, step=1, value=4, label="per_device_train_batch_size",
+                                            info="Batch size per device during training.",
+                                            interactive=True, elem_id=PER_DEVICE_TRAIN_BATCH_SIZE)
+    save_strategy = gr.Radio(['no', 'epoch', 'steps'], label="save_strategy",
+                             info="The checkpoint save strategy to adopt during training.",
+                             interactive=True, value="epoch", elem_id=SAVE_STRATEGY_ID)
+    out_components: Set[Component] = set()
+    out_components.add(save_strategy)
+    out_components.add(logging_steps)
+    out_components.add(per_device_train_batch_size)
+    return out_components
+def add_training_args_3() -> Set[Component]:
+    max_grad_norm = gr.Slider(0.01, 1, value=0.3, label="max_grad_norm",
+                              info="Maximum gradient norm (for gradient clipping).",
+                              interactive=True, elem_id=MAX_GRAD_NORM_ID)
+    warmup_ratio = gr.Slider(0, 1, value=0.1, label="warmup_ratio",
+                             info="Ratio of total training steps used for a linear warmup from 0 to learning_rate.",
+                             interactive=True, elem_id=WARMUP_RATIO_ID)
+    gradient_accumulation_steps = gr.Slider(1, 64, step=1, value=2, label="gradient_accumulation_steps",
+                                            info="Number of updates steps to accumulate the gradients for, before "
+                                                 "performing a backward/update "
+                                                 "pass.",
+                                            interactive=True, elem_id=GRADIENT_ACCUMULATION_STEPS_ID)
+    gradient_checkpointing = gr.Checkbox(label="gradient_checkpointing", value=True, interactive=True,
+                                         info="Use gradient checkpointing to save memory at the expense of slower "
+                                              "backward pass.", elem_id=GRADIENT_CHECKPOINTING_ID)
+    lr_scheduler_type = gr.Radio(['linear', 'constant', 'cosine'], label="lr_scheduler_type",
+                                 info="The learning rate scheduler type to use.",
+                                 interactive=True, value="cosine", elem_id=LR_SCHEDULER_TYPE_ID)
+    out_components: Set[Component] = set()
+    out_components.add(max_grad_norm)
+    out_components.add(warmup_ratio)
+    out_components.add(gradient_accumulation_steps)
+    out_components.add(gradient_checkpointing)
+    out_components.add(lr_scheduler_type)
+    return out_components
+def add_outputs() -> Set[Component]:
+    output_dir = gr.Textbox(interactive=True,
+                            label="output_dir",
+                            info='The output directory where the model predictions and checkpoints will be written.',
+                            elem_id=OUTPUT_DIR_ID)
+    push_to_hub = gr.Checkbox(label="push_to_hub", value=False, interactive=True,
+                              info="Whether or not to upload the trained model to the hub after training. If this is "
+                                   "True, you must specify 'HF_TOKEN'.",
+                              elem_id=PUSH_TO_HUB_ID)
+    out_components: Set[Component] = set()
+    out_components.add(output_dir)
+    out_components.add(push_to_hub)
+    return out_components
+def add_outputs1() -> Set[Component]:
+    report_to = gr.Dropdown(
+        ["azure_ml", "comet_ml", "mlflow", "tensorboard", "wandb", "all", 'none'],
+        value="tensorboard",
+        elem_id=REPORT_TO_ID,
+        label="report_to",
+        info="The list of integrations to report the results and logs to. Supported platforms are 'azure_ml', "
+             "'comet_ml', 'mlflow', 'tensorboard' and 'wandb'. Use 'all' to report to all integrations installed, "
+             "'none' for no integrations."
+    )
+    out_components: Set[Component] = set()
+    out_components.add(report_to)
+    return out_components
+def add_optimizer() -> Set[Component]:
+    adam_beta1 = gr.Slider(0.00001, 1, value=0.9, label="adam_beta1",
+                           info="The beta1 hyperparameter for the [`AdamW`] optimizer.",
+                           interactive=True, elem_id=BETA1_ID)
+    adam_beta2 = gr.Slider(0.00001, 1, value=0.999, label="adam_beta2",
+                           info="The beta2 hyperparameter for the [`AdamW`] optimizer.",
+                           interactive=True, elem_id=BETA2_ID)
+    adam_epsilon = gr.Slider(1e-9, 1, value=1e-8, label="adam_epsilon",
+                             info="The epsilon hyperparameter for the [`AdamW`] optimizer.",
+                             interactive=True, elem_id=EPSILON_ID)
+    out_components: Set[Component] = set()
+    out_components.add(adam_beta1)
+    out_components.add(adam_beta2)
+    out_components.add(adam_epsilon)
+    return out_components
+def add_optimizer1() -> Set[Component]:
+    optimizer = gr.Dropdown(
+        ["adamw_hf", "adamw_torch", "adamw_torch_fused", "adamw_apex_fused", "adamw_anyprecision", "adafactor"],
+        value="adamw_torch_fused",
+        elem_id=OPTIMIZER_ID,
+        label="optimizer",
+        info="The optimizer to use: 'adamw_hf', 'adamw_torch', 'adamw_torch_fused', 'adamw_apex_fused', "
+             "'adamw_anyprecision' or "
+             "'adafactor'. "
+    )
+    learning_rate = gr.Slider(1e-6, 1, step=0.001, value=2.0e-05, label="learning_rate",
+                              info="The initial learning rate for AdamW.",
+                              interactive=True, elem_id=LEARNING_RATE_ID)
+    weight_decay = gr.Slider(0, 1, value=0, label="weight_decay",
+                             info="The weight decay to apply (if not zero) to all layers except all bias and "
+                                  "LayerNorm weights in [`AdamW`] optimizer.",
+                             interactive=True, elem_id=WEIGHT_DECAY_ID)
+    out_components: Set[Component] = set()
+    out_components.add(optimizer)
+    out_components.add(learning_rate)
+    out_components.add(weight_decay)
+    return out_components
+def add_sft_trainer_args() -> Set[Component]:
+    max_seq_length = gr.Slider(512, 3072, value=2048, label="max_seq_length",
+                               info="The maximum sequence length to use for the `ConstantLengthDataset` and for "
+                                    "automatically "
+                                    "creating the Dataset.",
+                               interactive=True, elem_id=MAX_SEQ_LENGTH_ID)
+    packing = gr.Checkbox(label="packing", value=True, interactive=True, elem_id=PACKING_ID,
+                          info="This argument is used by the `ConstantLengthDataset` to pack the sequences of the "
+                               "dataset.")
+    out_components: Set[Component] = set()
+    out_components.add(max_seq_length)
+    out_components.add(packing)
+    return out_components

utils/notebook_generator.py ADDED Viewed

	@@ -0,0 +1,393 @@

+import nbformat as nbf
+from utils import FTDataSet
+def create_install_libraries_cells(cells: list):
+    text_cell = nbf.v4.new_markdown_cell("### Installing Required Libraries!")
+    text_cell1 = nbf.v4.new_markdown_cell(
+        "Installing required libraries, including trl, transformers, accelerate, peft, datasets, "
+        "and bitsandbytes.")
+    code = """
+!pip install -q --upgrade "transformers==4.38.2"
+!pip install -q --upgrade "datasets==2.16.1"
+!pip install -q --upgrade "accelerate==0.26.1"
+!pip install -q --upgrade "evaluate==0.4.1"
+!pip install -q --upgrade "bitsandbytes==0.42.0"
+!pip install -q --upgrade "trl==0.7.11"
+!pip install -q --upgrade "peft==0.8.2"
+    """
+    code_pytorch = """
+# Checks if PyTorch is installed and installs it if not.
+try:
+    import torch
+    print("PyTorch is installed!")
+except ImportError:
+    print("PyTorch is not installed.")
+    !pip install -q torch
+"""
+    code_cell = nbf.v4.new_code_cell(code)
+    cells.append(text_cell)
+    cells.append(text_cell1)
+    cells.append(nbf.v4.new_code_cell(code_pytorch))
+    cells.append(code_cell)
+def create_install_flash_attention(cells: list):
+    text_cell = nbf.v4.new_markdown_cell(
+        "### Installing Flash Attention")
+    text_cell1 = nbf.v4.new_markdown_cell("Installing Flash Attention to reduce the memory "
+                                          "and runtime cost of the attention layer, and improve the performance of "
+                                          "the model training. Learn more at [FlashAttention]("
+                                          "https://github.com/Dao-AILab/flash-attention/tree/main)."
+                                          " Installing flash "
+                                          "attention from source can take quite a bit of time (10-45 "
+                                          "minutes).")
+    code = """
+import torch; assert torch.cuda.get_device_capability()[0] >= 8, 'Hardware not supported for Flash Attention'
+!pip install ninja packaging
+!MAX_JOBS=4 pip install flash-attn --no-build-isolation --upgrade
+    """
+    code_cell = nbf.v4.new_code_cell(code)
+    cells.append(text_cell)
+    cells.append(text_cell1)
+    cells.append(code_cell)
+def create_login_hf_cells(cells: list):
+    text_cell = nbf.v4.new_markdown_cell(
+        "### Login to HF")
+    text_cell1 = nbf.v4.new_markdown_cell("Installing **huggingface_hub** to use as a remote "
+                                          "model versioning service. This means that your model, logs, and information "
+                                          "will be automatically pushed to the Hub during training. You should have "
+                                          "'HF_TOKEN'")
+    code = """
+# Install huggingface_hub
+!pip install -q huggingface_hub
+from huggingface_hub import login
+login(
+        token=userdata.get('Your_HF_TOKEN')
+        add_to_git_credential=True
+)
+    """
+    code_cell = nbf.v4.new_code_cell(code)
+    cells.append(text_cell)
+    cells.append(text_cell1)
+    cells.append(code_cell)
+def create_datasets_cells(cells: list, dataset: FTDataSet, seed: int):
+    text_cell = nbf.v4.new_markdown_cell("### Load and prepare the dataset")
+    text = 'The dataset is already formatted in a conversational format, which is supported by [trl](' \
+           'https://huggingface.co/docs/trl/index/). '
+    text_format = """
+**Conversational format:**
+```python {"messages": [{"role": "system", "content": "You are..."}, {"role": "user", "content": "..."},
+{"role": "assistant", "content": "..."}]} {"messages": [{"role": "system", "content": "You are..."}, {"role": "user",
+"content": "..."}, {"role": "assistant", "content": "..."}]} {"messages": [{"role": "system", "content": "You
+are..."}, {"role": "user", "content": "..."}, {"role": "assistant", "content": "..."}]} """
+    text_cell1 = nbf.v4.new_markdown_cell(text)
+    text_cell2 = nbf.v4.new_markdown_cell(text_format)
+    code = f"""
+from datasets import load_dataset
+# Load dataset from the hub
+dataset = load_dataset("{dataset.path}", split="{dataset.dataset_split}")
+dataset = dataset.shuffle(seed={seed})
+    """
+    code_cell = nbf.v4.new_code_cell(code)
+    cells.append(text_cell)
+    cells.append(text_cell1)
+    cells.append(text_cell2)
+    cells.append(code_cell)
+def create_model_cells(cells: list, model_id: str, version: str, flash_attention: bool, pad_side: str, pad_value: str,
+                       load_in_4bit: str, bnb_4bit_use_double_quant: bool, bnb_4bit_quant_type: str,
+                       bnb_4bit_compute_dtype: str
+                       ):
+    text_cell = nbf.v4.new_markdown_cell(f"### Load {model_id}-{version} for Finetuning")
+    load_in_4bit_str = f"{load_in_4bit}=True"
+    flash_attention_str = "attn_implementation='flash_attention_2',"
+    if not flash_attention:
+        flash_attention_str = ''
+    pad_value_str = "tokenizer.pad_token = tokenizer.eos_token"
+    if pad_value is None:
+        pad_value_str = ""
+    code = f"""
+import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
+from trl import setup_chat_format
+# Hugging Face model id
+model_id = "{model_id}-{version}"
+# BitsAndBytesConfig
+bnb_config = BitsAndBytesConfig(
+    {load_in_4bit_str}, bnb_4bit_use_double_quant={bnb_4bit_use_double_quant},
+    bnb_4bit_quant_type="{bnb_4bit_quant_type}", bnb_4bit_compute_dtype={bnb_4bit_compute_dtype}
+)
+# Load model and tokenizer
+model = AutoModelForCausalLM.from_pretrained(
+    model_id,
+    device_map="auto",
+    {flash_attention_str}
+    torch_dtype=torch.bfloat16,
+    quantization_config=bnb_config
+)
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+tokenizer.padding_side = "{pad_side}"
+{pad_value_str}
+# Set chat template to OAI chatML
+model, tokenizer = setup_chat_format(model, tokenizer)
+    """
+    text_1 = """
+This process involves two key steps:
+1. **LLM Quantization:**
+    - We first load the selected large language model (LLM).
+    - We then use the "bitsandbytes" library to quantize the model, which can significantly reduce its memory footprint.
+> **Note:** The memory requirements of the model scale with its size. For instance, a 7B parameter model may require
+a 24GB GPU for fine-tuning.
+2. **Chat Model Preparation:**
+    - To train a model for chat/conversational tasks, we need to prepare both the model and its tokenizer.
+    - This involves adding special tokens to the tokenizer and the model itself. These tokens help the model
+    understand the different roles within a conversation.
+    - The **trl** provides a convenient method called `setup_chat_format` for this purpose. This method performs the
+    following actions:
+        * Adds special tokens to the tokenizer, such as `<|im_start|>` and `<|im_end|>`, to mark the beginning and
+        ending of a conversation.
+        * Resizes the model's embedding layer to accommodate the new tokens.
+        * Sets the tokenizer's chat template, which defines the format used to convert input data into a chat-like
+        structure. The default template is `chatml` from OpenAI.
+        """
+    code_cell = nbf.v4.new_code_cell(code)
+    text_cell1 = nbf.v4.new_markdown_cell(text_1)
+    cells.append(text_cell)
+    cells.append(text_cell1)
+    cells.append(code_cell)
+def create_lora_config_cells(cells: list, r: int, alpha: int, dropout: float, bias: str):
+    text_cell = nbf.v4.new_markdown_cell("### LoraConfig")
+    code = f"""
+from peft import LoraConfig
+peft_config = LoraConfig(
+    lora_alpha={alpha},
+    lora_dropout={dropout},
+    r={r},
+    bias="{bias}",
+    target_modules="all-linear",
+    task_type="CAUSAL_LM"
+)
+    """
+    text = """The `SFTTrainer` provides native integration with `peft`, simplifying the process of efficiently tuning
+    Language Models (LLMs) using techniques such as [LoRA](
+    https://magazine.sebastianraschka.com/p/practical-tips-for-finetuning-llms). The only requirement is to create
+    our `LoraConfig` and pass it to the `SFTTrainer`.
+    """
+    code_cell = nbf.v4.new_code_cell(code)
+    cells.append(text_cell)
+    cells.append(nbf.v4.new_markdown_cell(text))
+    cells.append(code_cell)
+def create_training_args_cells(cells: list, epochs, max_steps, logging_steps, per_device_train_batch_size,
+                               save_strategy, gradient_accumulation_steps, gradient_checkpointing,
+                               learning_rate, max_grad_norm, warmup_ratio, lr_scheduler_type, output_dir,
+                               report_to, seed):
+    text_cell = nbf.v4.new_markdown_cell("### TrainingArguments")
+    to_install = None
+    if report_to == "all":
+        to_install = "azure_ml comet_ml mlflow tensorboard wandb"
+    elif report_to != "none":
+        to_install = report_to
+    code_report = f"""
+# Installing {to_install} to report the metrics
+!pip install -q {to_install}
+    """
+    code = f"""
+from transformers import TrainingArguments
+args = TrainingArguments(
+    output_dir="{output_dir}",
+    num_train_epochs={epochs},
+    per_device_train_batch_size={per_device_train_batch_size},
+    gradient_accumulation_steps={gradient_accumulation_steps},
+    gradient_checkpointing={gradient_checkpointing},
+    optim="adamw_torch_fused",
+    logging_steps={logging_steps},
+    save_strategy='{save_strategy}',
+    learning_rate={learning_rate},
+    bf16=True,
+    tf32=True,
+    max_grad_norm={max_grad_norm},
+    warmup_ratio={warmup_ratio},
+    lr_scheduler_type='{lr_scheduler_type}',
+    report_to='{report_to}',
+    max_steps={max_steps},
+    seed={seed},
+    overwrite_output_dir=True,
+    remove_unused_columns=True
+)
+    """
+    code_cell = nbf.v4.new_code_cell(code)
+    cells.append(text_cell)
+    if to_install is not None:
+        cells.append(nbf.v4.new_code_cell(code_report))
+    cells.append(code_cell)
+def create_sft_trainer_cells(cells: list, max_seq_length, packing):
+    text_cell = nbf.v4.new_markdown_cell(
+        """### Supervised Finetuning Trainer (SFT Trainer)
+This `SFTTrainer` is a wrapper around the `transformers.Trainer` class and inherits all of its attributes and methods.
+The trainer takes care of properly initializing the `PeftModel`.
+    """)
+    dataset_kwargs = {
+        "add_special_tokens": False,  # We template with special tokens
+        "append_concat_token": False,  # No need to add additional separator token
+    }
+    code = f"""
+from trl import SFTTrainer
+trainer = SFTTrainer(
+    model=model,
+    args=args,
+    train_dataset=dataset,
+    peft_config=peft_config,
+    max_seq_length={max_seq_length},
+    tokenizer=tokenizer,
+    packing={packing},
+    dataset_kwargs={dataset_kwargs}
+)
+"""
+    code_cell = nbf.v4.new_code_cell(code)
+    cells.append(text_cell)
+    cells.append(code_cell)
+def create_start_training_cells(cells: list, epochs, max_steps, push_to_hub, output_dir):
+    if push_to_hub:
+        save_txt = "and to the hub."
+    else:
+        save_txt = "."
+    epoch_str = f"{epochs} epochs"
+    if max_steps > 0:
+        epoch_str = f"{max_steps} steps"
+    text_cell = nbf.v4.new_markdown_cell(
+        f"""### Starting Training and Saving Model/Tokenizer
+We start training the model by calling the `train()` method on the trainer instance. This will start the training
+loop and train the model for `{epoch_str}`. The model will be automatically saved the output directory({output_dir})
+{save_txt}
+    """)
+    code = f"""
+# start training
+trainer.train()
+# save the model
+trainer.save_model()
+# save tokenizer
+tokenizer.save_pretrained("{output_dir}")
+"""
+    code_cell = nbf.v4.new_code_cell(code)
+    cells.append(text_cell)
+    cells.append(code_cell)
+def create_free_gpu_cells(cells: list):
+    text_cell = nbf.v4.new_markdown_cell(
+        """### Free the GPU Memory for Merging `PeftModel`""")
+    code = f"""
+# Free the GPU memory
+del model
+del trainer
+torch.cuda.empty_cache()
+"""
+    code_cell = nbf.v4.new_code_cell(code)
+    cells.append(text_cell)
+    cells.append(code_cell)
+def create_merge_lora_cells(cells: list, output_dir):
+    text_cell = nbf.v4.new_markdown_cell(
+        """### Merge LoRA adapter in to the Original Model
+While utilizing `LoRA`, we focus on training the adapters rather than the entire model. Consequently, during the
+model saving process, only the `adapter weights` are preserved, not the complete model. If we wish to save the
+entire model for easier usage with Text Generation Inference, you can incorporate the adapter weights into the model
+weights. This can be achieved using the `merge_and_unload` method. Following this, the model can be saved using the
+`save_pretrained` method. The result is a default model that is ready for inference.
+""")
+    code = f"""
+import torch
+from peft import AutoPeftModelForCausalLM
+# Load Peft model on CPU
+model = AutoPeftModelForCausalLM.from_pretrained(
+    "{output_dir}",
+    torch_dtype=torch.float16,
+    low_cpu_mem_usage=True
+)
+# Merge LoRA and base model and save
+merged_model = model.merge_and_unload()
+merged_model.save_pretrained("{output_dir}", safe_serialization=True, max_shard_size="2GB")
+"""
+    code_cell = nbf.v4.new_code_cell(code)
+    cells.append(text_cell)
+    cells.append(code_cell)
+def push_merged_model_cells(cells: list, output_dir):
+    text_cell = nbf.v4.new_markdown_cell(
+        """### Push the Merged model as well as the Tokenizer to HF hub""")
+    code = f"""
+merged_model.push_to_hub("{output_dir}", use_temp_dir=False)
+tokenizer.push_to_hub("{output_dir}", use_temp_dir=False)
+"""
+    code_cell = nbf.v4.new_code_cell(code)
+    cells.append(text_cell)
+    cells.append(code_cell)