menouar commited on
Commit
af04de4
1 Parent(s): 3caa08b

First Commit

Browse files
Files changed (5) hide show
  1. .gitignore +60 -0
  2. app.py +297 -0
  3. utils/__init__.py +87 -0
  4. utils/components_creator.py +256 -0
  5. utils/notebook_generator.py +393 -0
.gitignore ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized files
2
+ *.py[cod]
3
+ __pycache__/
4
+ *.py[cod]?
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ dist/
11
+ build/
12
+ eggs/
13
+ *.egg-info/
14
+ .svn/
15
+ *.swp
16
+ *.tar.gz
17
+ *.tgz
18
+ *.zip
19
+ *.rar
20
+
21
+ # Development
22
+ *.bak
23
+ *.tmp
24
+
25
+ # IDE specific files
26
+ .vscode/
27
+ .idea/
28
+
29
+ # Jupyter Notebook
30
+ .ipynb_checkpoints/
31
+
32
+ # Environment
33
+ .env
34
+ env/
35
+ venv/
36
+ ENV/
37
+ env.bak/
38
+ venv.bak/
39
+
40
+ # Compiled Python modules
41
+ *.pyd
42
+
43
+ # Coverage
44
+ .coverage
45
+ .coverage.*
46
+ htmlcov/
47
+
48
+ # Type checking
49
+ .mypy_cache/
50
+ .dmypy.json
51
+
52
+ # Sphinx documentation
53
+ docs/_build/
54
+
55
+ # Ignore .pkl file
56
+ *.pkl
57
+
58
+
59
+ *.html
60
+ *.ipynb
app.py ADDED
@@ -0,0 +1,297 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Any
2
+
3
+ from nbconvert import HTMLExporter
4
+
5
+
6
+ from utils.notebook_generator import *
7
+ from utils.components_creator import *
8
+
9
+ finetuning_notebook = "Finetuning_NoteBook"
10
+
11
+ css = """
12
+ .container {
13
+ align-items: center;
14
+ justify-content: center;
15
+ }
16
+ .center_text {
17
+ text-align: center;
18
+ }
19
+
20
+ .a_custom {
21
+ border-radius: var(--button-large-radius);
22
+ padding: var(--button-large-padding);
23
+ font-weight: var(--button-large-text-weight);
24
+ font-size: var(--button-large-text-size);
25
+ border: var(--button-border-width) solid var(--button-primary-border-color);
26
+ background: var(--button-primary-background-fill);
27
+ color: var(--button-primary-text-color);
28
+ justify-content: center;
29
+ align-items: center;
30
+ transition: var(--button-transition);
31
+ box-shadow: var(--button-shadow);
32
+ text-align: center
33
+ }
34
+ .a_custom a {
35
+ text-decoration: none;
36
+ color: white;
37
+ }
38
+ """
39
+
40
+
41
+ def centered_column():
42
+ return gr.Column(elem_classes=["container"])
43
+
44
+
45
+ def change_model_selection(model_id):
46
+ if model_id == gemma.name:
47
+ gr.Warning("""
48
+ Access Gemma:
49
+
50
+ To load Gemma from Hugging Face, you’re required to review and agree to Google’s usage license.
51
+ """)
52
+ if model_id == llama.name:
53
+ gr.Warning("""
54
+ Access Llama 2:
55
+
56
+ To load Llama 2 from Hugging Face, you’re required to review and agree to Meta’s usage license.
57
+ """)
58
+
59
+ for m in models:
60
+ if m.name == model_id:
61
+ return gr.Dropdown(choices=m.versions, interactive=True,
62
+ visible=True, info=f"Select the version of the model {m.name} you wish to use.")
63
+ return None
64
+
65
+
66
+ def check_valid_input(value):
67
+ if isinstance(value, str):
68
+ return value and value.strip()
69
+ if isinstance(value, list):
70
+ return len(value) > 0
71
+ return not None
72
+
73
+
74
+ def get_dataset(dataset_path):
75
+ for d in ft_datasets:
76
+ if d.path == dataset_path:
77
+ return d
78
+ return None
79
+
80
+
81
+ def get_value(components: dict[Component, Any], elem_id: str) -> Any:
82
+ for component, val in components.items():
83
+ if component.elem_id == elem_id:
84
+ return val
85
+ return None
86
+
87
+
88
+ def preview_notebook():
89
+ html_exporter = HTMLExporter()
90
+ (body, resources) = html_exporter.from_notebook_node(notebook)
91
+
92
+ html_path = f"{finetuning_notebook}.html"
93
+ with open(html_path, 'w') as f:
94
+ f.write(body)
95
+ return f'<iframe src="file={html_path}" width="100%" height="250px"></iframe>'
96
+
97
+
98
+ def generate_code(components: dict[Component, Any]):
99
+ create_install_libraries_cells(notebook['cells'])
100
+ flash_attention_value = get_value(components, FLASH_ATTENTION_ID)
101
+ if flash_attention_value:
102
+ create_install_flash_attention(notebook['cells'])
103
+
104
+ push_to_hub = get_value(components, PUSH_TO_HUB_ID)
105
+ if push_to_hub:
106
+ create_login_hf_cells(notebook['cells'])
107
+
108
+ dataset_value = get_value(components, DATASET_SELECTION_ID)
109
+ seed_value = get_value(components, DATASET_SHUFFLING_SEED)
110
+ if not check_valid_input(dataset_value):
111
+ gr.Warning("No dataset is selected")
112
+ else:
113
+ create_datasets_cells(notebook['cells'], get_dataset(dataset_value), seed_value)
114
+
115
+ model_value = get_value(components, MODEL_SELECTION_ID)
116
+ if not check_valid_input(model_value):
117
+ gr.Warning("No model is selected!")
118
+ else:
119
+ version_value = get_value(components, MODEL_VERSION_SELECTION_ID)
120
+ if not check_valid_input(version_value):
121
+ gr.Warning("No version of the model is selected")
122
+ else:
123
+ load_in_4bit = get_value(components, LOAD_IN_4_BIT_ID)
124
+ bnb_4bit_use_double_quant = get_value(components, BNB_4BIT_USE_DOUBLE_QUANT)
125
+ bnb_4bit_quant_type = get_value(components, BNB_4BIT_QUANT_TYPE)
126
+ bnb_4bit_compute_dtype = get_value(components, BNB_4BIT_COMPUTE_DTYPE)
127
+ pad_side = get_value(components, PAD_SIDE_ID)
128
+ pad_value = get_value(components, PAD_VALUE_ID)
129
+ create_model_cells(notebook['cells'], model_id=model_value, version=version_value,
130
+ flash_attention=flash_attention_value, pad_value=pad_value,
131
+ pad_side=pad_side, load_in_4bit=load_in_4bit,
132
+ bnb_4bit_use_double_quant=bnb_4bit_use_double_quant,
133
+ bnb_4bit_quant_type=bnb_4bit_quant_type, bnb_4bit_compute_dtype=bnb_4bit_compute_dtype)
134
+
135
+ r_value = get_value(components, LORA_R_ID)
136
+ alpha_value = get_value(components, LORA_ALPHA_ID)
137
+ dropout_value = get_value(components, LORA_DROPOUT_ID)
138
+ bias_value = get_value(components, LORA_BIAS_ID)
139
+ create_lora_config_cells(notebook['cells'], r_value, alpha_value, dropout_value, bias_value)
140
+
141
+ epochs = get_value(components, NUM_TRAIN_EPOCHS_ID)
142
+ max_steps = get_value(components, MAX_STEPS_ID)
143
+ logging_steps = get_value(components, LOGGING_STEPS_ID)
144
+ per_device_train_batch_size = get_value(components, PER_DEVICE_TRAIN_BATCH_SIZE)
145
+ save_strategy = get_value(components, SAVE_STRATEGY_ID)
146
+ gradient_accumulation_steps = get_value(components, GRADIENT_ACCUMULATION_STEPS_ID)
147
+ gradient_checkpointing = get_value(components, GRADIENT_CHECKPOINTING_ID)
148
+ learning_rate = get_value(components, LEARNING_RATE_ID)
149
+ max_grad_norm = get_value(components, MAX_GRAD_NORM_ID)
150
+ warmup_ratio = get_value(components, WARMUP_RATIO_ID)
151
+ lr_scheduler_type = get_value(components, LR_SCHEDULER_TYPE_ID)
152
+ output_dir = get_value(components, OUTPUT_DIR_ID)
153
+ report_to = get_value(components, REPORT_TO_ID)
154
+
155
+ if not check_valid_input(output_dir):
156
+ gr.Warning("No output_dir is given")
157
+
158
+ create_training_args_cells(notebook['cells'], epochs=epochs, max_steps=max_steps, logging_steps=logging_steps,
159
+ per_device_train_batch_size=per_device_train_batch_size, save_strategy=save_strategy,
160
+ gradient_accumulation_steps=gradient_accumulation_steps,
161
+ gradient_checkpointing=gradient_checkpointing, learning_rate=learning_rate,
162
+ max_grad_norm=max_grad_norm, warmup_ratio=warmup_ratio,
163
+ lr_scheduler_type=lr_scheduler_type, output_dir=output_dir, report_to=report_to,
164
+ seed=seed_value)
165
+
166
+ max_seq_length = get_value(components, MAX_SEQ_LENGTH_ID)
167
+ packing = get_value(components, PACKING_ID)
168
+ create_sft_trainer_cells(notebook['cells'], max_seq_length, packing)
169
+
170
+ create_start_training_cells(notebook['cells'], epochs, max_steps, push_to_hub, output_dir)
171
+
172
+ create_free_gpu_cells(notebook['cells'])
173
+
174
+ create_merge_lora_cells(notebook['cells'], output_dir)
175
+
176
+ if push_to_hub:
177
+ push_merged_model_cells(notebook['cells'], output_dir)
178
+
179
+ file_name = f"{finetuning_notebook}.ipynb"
180
+
181
+ with open(file_name, 'w') as f:
182
+ nbf.write(notebook, f)
183
+
184
+ return gr.Button(
185
+ visible=True), f'''<div class="a_custom"><a href="file={file_name}" download={file_name}>
186
+ 💾️ Download {finetuning_notebook}.ipynb</a> </div> '''
187
+
188
+
189
+ with gr.Blocks(css=css, theme=gr.themes.Soft(text_size='lg', font=["monospace"],
190
+ primary_hue=gr.themes.colors.blue)) as demo:
191
+ gr.Label("UI-Guided LLM FineTuning Jupyter Notebook Generator 🛠️🧠", show_label=False)
192
+ gr.Markdown(
193
+ 'Generating a **Jupyter Notebook file (.ipynb)** 📔⚙️ for **finetuning** a Large Language Model (**LLM**) '
194
+ '🎚️🧠 on a chosen dataset and configured parameters, guided by an intuitive User Interface (UI) 👆💻.',
195
+ elem_classes=["center_text"])
196
+
197
+ all_components: Set[Component] = set()
198
+
199
+ gr.HTML("<h2 style='text-align: center;'>LLM 🧠</h2>")
200
+ with gr.Row():
201
+ model_selection = gr.Dropdown(
202
+ [model.name for model in models],
203
+ elem_id=MODEL_SELECTION_ID,
204
+ label="Select a Large Language Model (LLM)",
205
+ info="Select a Large Language Model (LLM) to finetune using the SFTTrainer."
206
+ )
207
+ version_selection = gr.Dropdown(
208
+ choices=[], label="Select a Model Version 🔄", info="", visible=False, elem_id=MODEL_VERSION_SELECTION_ID
209
+ )
210
+ all_components.add(model_selection)
211
+ all_components.add(version_selection)
212
+
213
+ gr.HTML("<h2 style='text-align: center;'>Dataset 📊</h2>")
214
+ with gr.Row():
215
+ all_components.update(add_dataset_components())
216
+
217
+ gr.HTML("<h2 style='text-align: center;'>⚡ Flash Attention ⚡</h2>")
218
+ with gr.Row():
219
+ flash_attention = gr.Checkbox(value=True, label="Enable Flash Attention", interactive=True,
220
+ elem_id=FLASH_ATTENTION_ID,
221
+ info="Flash Attention is a technique that reduces the memory and runtime costs "
222
+ "associated with "
223
+ "the attention layer in a model. For more details, please refer to the "
224
+ "Flash Attention "
225
+ "repository on GitHub.")
226
+ all_components.add(flash_attention)
227
+
228
+ gr.HTML("<h2 style='text-align: center;'>Quantization</h2>")
229
+ with gr.Row():
230
+ with centered_column():
231
+ all_components.update(add_quantization_components())
232
+ with centered_column():
233
+ all_components.update(add_quantization_components1())
234
+
235
+ gr.HTML("<h2 style='text-align: center;'>Tokenizer Configuration</h2>")
236
+ with gr.Row():
237
+ all_components.update(add_pad_tokens())
238
+
239
+ gr.HTML("<h2 style='text-align: center;'>Lora Configuration</h2>")
240
+ with gr.Row():
241
+ with centered_column():
242
+ all_components.update(add_lora_components1())
243
+ with centered_column():
244
+ all_components.update(add_lora_components())
245
+
246
+ gr.HTML("<h2 style='text-align: center;'>⚙️ Training Arguments ⚙️</h2>")
247
+ with gr.Row():
248
+ with centered_column():
249
+ all_components.update(add_training_args_1())
250
+ all_components.update(add_training_args_1_bis())
251
+ with centered_column():
252
+ all_components.update(add_training_args_3())
253
+
254
+ gr.HTML("<h2 style='text-align: center;'>Optimizer Arguments</h2>")
255
+ with gr.Row():
256
+ with centered_column():
257
+ optimizer1 = add_optimizer1()
258
+ all_components.update(optimizer1)
259
+
260
+ with centered_column():
261
+ optimizer = add_optimizer()
262
+ all_components.update(optimizer)
263
+
264
+ gr.HTML("<h2 style='text-align: center;'>Outputs</h2>")
265
+ with gr.Row():
266
+ with centered_column():
267
+ all_components.update(add_outputs())
268
+ with centered_column():
269
+ all_components.update(add_outputs1())
270
+
271
+ gr.HTML("<h2 style='text-align: center;'>SFTTrainer Arguments</h2>")
272
+ with gr.Row():
273
+ sft_args = add_sft_trainer_args()
274
+ all_components.update(sft_args)
275
+
276
+ with gr.Row():
277
+ iframe = gr.HTML(show_label=False, visible=True)
278
+
279
+ with gr.Row():
280
+ greet_btn = gr.Button("Generate 🛠️", variant="primary")
281
+
282
+ with gr.Row():
283
+ preview_btn = gr.Button(f"👀 Preview {finetuning_notebook}.ipynb", variant="primary", visible=False)
284
+ download_btn = gr.HTML(show_label=False, visible=True)
285
+
286
+ notebook = nbf.v4.new_notebook()
287
+ greet_btn.click(fn=generate_code, inputs=all_components, outputs=[preview_btn, download_btn])
288
+
289
+ preview_btn.click(fn=preview_notebook, inputs=None, outputs=iframe)
290
+
291
+ model_selection.change(
292
+ fn=change_model_selection,
293
+ inputs=model_selection,
294
+ outputs=version_selection
295
+ )
296
+
297
+ demo.launch(allowed_paths=["/"])
utils/__init__.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List, Optional
2
+
3
+ MODEL_SELECTION_ID: str = "model_selection"
4
+ MODEL_VERSION_SELECTION_ID: str = "model_version_selection"
5
+
6
+ LOAD_IN_4_BIT_ID: str = "load_in_4bit"
7
+ BNB_4BIT_QUANT_TYPE: str = "bnb_4bit_quant_type"
8
+ BNB_4BIT_COMPUTE_DTYPE: str = "bnb_4bit_compute_dtype"
9
+ BNB_4BIT_USE_DOUBLE_QUANT: str = "bnb_4bit_use_double_quant"
10
+
11
+ DATASET_SELECTION_ID = "dataset_selection"
12
+ DATASET_SHUFFLING_SEED = "dataset_seed"
13
+
14
+ FLASH_ATTENTION_ID = "flash_attention"
15
+
16
+ PAD_SIDE_ID = "pad_side"
17
+ PAD_VALUE_ID = "pad_value"
18
+
19
+ LORA_R_ID = "lora_r"
20
+ LORA_ALPHA_ID = "lora_alpha"
21
+ LORA_DROPOUT_ID = "lora_dropout"
22
+ LORA_BIAS_ID = 'lora_bias'
23
+
24
+ NUM_TRAIN_EPOCHS_ID = "num_train_epochs"
25
+ MAX_STEPS_ID = "max_steps_id"
26
+ LOGGING_STEPS_ID = "logging_steps"
27
+ PER_DEVICE_TRAIN_BATCH_SIZE = "per_device_train_batch_size"
28
+ SAVE_STRATEGY_ID = "save_strategy"
29
+ GRADIENT_ACCUMULATION_STEPS_ID = "gradient_accumulation_steps"
30
+ GRADIENT_CHECKPOINTING_ID = "gradient_checkpointing"
31
+ LEARNING_RATE_ID = "learning_rate"
32
+ MAX_GRAD_NORM_ID = "max_grad_norm"
33
+ WARMUP_RATIO_ID = "warmup_ratio"
34
+ LR_SCHEDULER_TYPE_ID = "lr_scheduler_type"
35
+ OUTPUT_DIR_ID = "output_dir"
36
+ PUSH_TO_HUB_ID = "push_to_hub"
37
+ REPORT_TO_ID = "report_to"
38
+
39
+ MAX_SEQ_LENGTH_ID = "max_seq_length"
40
+ PACKING_ID = "packing"
41
+
42
+ OPTIMIZER_ID = "optim"
43
+ BETA1_ID = "adam_beta1"
44
+ BETA2_ID = "adam_beta2"
45
+ EPSILON_ID = "adam_epsilon"
46
+ WEIGHT_DECAY_ID = "weight_decay"
47
+
48
+
49
+ class FTDataSet:
50
+ def __init__(self, path: str, dataset_split: Optional[str] = None):
51
+ self.path = path
52
+ self.dataset_split = dataset_split
53
+
54
+ def __str__(self):
55
+ return self.path
56
+
57
+
58
+ deita_dataset = FTDataSet(path="HuggingFaceH4/deita-10k-v0-sft", dataset_split="train_sft")
59
+ dolly = FTDataSet(path="philschmid/dolly-15k-oai-style", dataset_split="train")
60
+ ultrachat_200k = FTDataSet(path="HuggingFaceH4/ultrachat_200k", dataset_split="train_sft")
61
+ ft_datasets = [deita_dataset, dolly, ultrachat_200k]
62
+
63
+
64
+ class Model:
65
+ def __init__(self, name: str, versions: List[str]):
66
+ self.name = name
67
+ self.versions = versions
68
+
69
+ def __str__(self):
70
+ return self.name
71
+
72
+
73
+ models: List[Model] = []
74
+ gemma = Model(name="google/gemma", versions=["7b", "2b"])
75
+ models.append(gemma)
76
+ falcon = Model(name="tiiuae/falcon", versions=["7b"]) # "7b-instruct"
77
+ models.append(falcon)
78
+ phi = Model(name="microsoft/phi", versions=["1_5", "1", "2"])
79
+ models.append(phi)
80
+ llama = Model(name="meta-llama/Llama-2", versions=["7b", "7b-hf"]) # "7b-chat", "7b-chat-hf"
81
+ models.append(llama)
82
+ mistral = Model(name="mistralai/Mistral", versions=["7B-v0.1"]) # "7B-Instruct-v0.1"
83
+ models.append(mistral)
84
+ tinyLlama = Model(name="TinyLlama/TinyLlama-1.1B",
85
+ versions=['intermediate-step-1431k-3T', 'step-50K-105b', 'intermediate-step-240k-503b',
86
+ 'intermediate-step-715k-1.5T', 'intermediate-step-1195k-token-2.5T'])
87
+ models.append(tinyLlama)
utils/components_creator.py ADDED
@@ -0,0 +1,256 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Set
2
+
3
+ import gradio as gr
4
+ from gradio.components import Component
5
+
6
+ from utils import *
7
+
8
+
9
+ def add_quantization_components() -> Set[Component]:
10
+ q_components: Set[Component] = set()
11
+ load_in_4bit = gr.Radio(["load_in_4bit", "load_in_8bit"], value="load_in_4bit",
12
+ label="Quantization",
13
+ info="This flag is used to enable 4/8-bit "
14
+ "quantization.",
15
+ interactive=True,
16
+ elem_id=LOAD_IN_4_BIT_ID)
17
+ bnb_4bit_quant_type = gr.Radio(["fp4", "nf4"], label="bnb_4bit_quant_type",
18
+ value="nf4",
19
+ elem_id=BNB_4BIT_QUANT_TYPE,
20
+ interactive=True,
21
+ info="This sets the quantization data type in "
22
+ "the bnb.nn.Linear4Bit "
23
+ "layers.")
24
+ q_components.add(load_in_4bit)
25
+ q_components.add(bnb_4bit_quant_type)
26
+ return q_components
27
+
28
+
29
+ def add_quantization_components1() -> Set[Component]:
30
+ q_components: Set[Component] = set()
31
+ bnb_4bit_compute_dtype = gr.Radio(
32
+ ["torch.float32", "torch.bfloat16", "torch.float16"],
33
+ label="bnb_4bit_compute_dtype",
34
+ info="This sets the computational type which might be different "
35
+ "than the input type.",
36
+ elem_id=BNB_4BIT_COMPUTE_DTYPE,
37
+ interactive=True, value="torch.bfloat16")
38
+ bnb_4bit_use_double_quant = gr.Checkbox(label="bnb_4bit_use_double_quant",
39
+ value=True,
40
+ interactive=True,
41
+ elem_id=BNB_4BIT_USE_DOUBLE_QUANT,
42
+ info="This flag is used for nested "
43
+ "quantization where the "
44
+ "quantization constants from "
45
+ "the first "
46
+ "quantization are quantized "
47
+ "again.")
48
+ q_components.add(bnb_4bit_compute_dtype)
49
+ q_components.add(bnb_4bit_use_double_quant)
50
+ return q_components
51
+
52
+
53
+ def add_dataset_components() -> Set[Component]:
54
+ dataset_selection = gr.Dropdown(
55
+ [dt.path for dt in ft_datasets],
56
+ elem_id=DATASET_SELECTION_ID,
57
+ label="Select a Dataset",
58
+ info="Select a dataset for finetuning the model."
59
+ )
60
+ seed = gr.Slider(0, 256, step=1, value=42, elem_id=DATASET_SHUFFLING_SEED, label="Random Seed",
61
+ info="Set a random seed for shuffling the dataset.", interactive=True)
62
+
63
+ d_components: Set[Component] = set()
64
+ d_components.add(dataset_selection)
65
+ d_components.add(seed)
66
+ return d_components
67
+
68
+
69
+ def add_pad_tokens() -> Set[Component]:
70
+ pad_token_side = gr.Radio(["right", "left"], label="Tokenizer: padding_side",
71
+ info="The side on which the model should have padding applied.",
72
+ interactive=True, value="right", elem_id=PAD_SIDE_ID)
73
+ pad_token_value = gr.Radio([None, "eos_token"], label="Tokenizer: pad_token",
74
+ info="A special token used to make arrays of tokens the same size for batching "
75
+ "purpose. Will then be "
76
+ "ignored by attention mechanisms or loss computation.",
77
+ interactive=True, value=None, elem_id=PAD_VALUE_ID)
78
+ pad_components: Set[Component] = set()
79
+ pad_components.add(pad_token_side)
80
+ pad_components.add(pad_token_value)
81
+ return pad_components
82
+
83
+
84
+ def add_lora_components() -> Set[Component]:
85
+ r = gr.Slider(1, 2048, step=1, value=6, label="r", info="Lora attention dimension (the 'rank').",
86
+ interactive=True, elem_id=LORA_R_ID)
87
+ alpha = gr.Slider(1, 512, step=1, value=8, label="lora_alpha", info="The alpha parameter for Lora scaling.",
88
+ interactive=True, elem_id=LORA_ALPHA_ID)
89
+
90
+ out_components: Set[Component] = set()
91
+ out_components.add(r)
92
+ out_components.add(alpha)
93
+ return out_components
94
+
95
+
96
+ def add_lora_components1() -> Set[Component]:
97
+ dropout = gr.Slider(0, 1, step=0.01, value=0.05, label="lora_dropout",
98
+ info="The dropout probability for Lora layers.",
99
+ interactive=True, elem_id=LORA_DROPOUT_ID)
100
+ bias = gr.Radio(['none', 'all', 'lora_only'], label="bias",
101
+ info="Bias type for LoRA. If 'all' or 'lora_only', the corresponding biases will be updated during "
102
+ "training.",
103
+ interactive=True, value="none", elem_id=LORA_BIAS_ID)
104
+
105
+ out_components: Set[Component] = set()
106
+ out_components.add(dropout)
107
+ out_components.add(bias)
108
+ return out_components
109
+
110
+
111
+ def add_training_args_1() -> Set[Component]:
112
+ epochs = gr.Slider(1, 100, step=1, value=3, label="num_train_epochs",
113
+ info="Total number of training epochs to perform.",
114
+ interactive=True, elem_id=NUM_TRAIN_EPOCHS_ID)
115
+ max_steps = gr.Slider(-1, 100, step=1, value=-1, label="max_steps",
116
+ info="Total number of training steps to perform. if equals to -1 it overrides "
117
+ "num_train_epochs.",
118
+ interactive=True, elem_id=MAX_STEPS_ID)
119
+ out_components: Set[Component] = set()
120
+ out_components.add(epochs)
121
+ out_components.add(max_steps)
122
+ return out_components
123
+
124
+
125
+ def add_training_args_1_bis() -> Set[Component]:
126
+ logging_steps = gr.Slider(1, 100, step=1, value=10, label="logging_steps",
127
+ info="Number of update steps between two logs if logging_strategy='steps'",
128
+ interactive=True, elem_id=LOGGING_STEPS_ID)
129
+ per_device_train_batch_size = gr.Slider(1, 64, step=1, value=4, label="per_device_train_batch_size",
130
+ info="Batch size per device during training.",
131
+ interactive=True, elem_id=PER_DEVICE_TRAIN_BATCH_SIZE)
132
+ save_strategy = gr.Radio(['no', 'epoch', 'steps'], label="save_strategy",
133
+ info="The checkpoint save strategy to adopt during training.",
134
+ interactive=True, value="epoch", elem_id=SAVE_STRATEGY_ID)
135
+ out_components: Set[Component] = set()
136
+ out_components.add(save_strategy)
137
+ out_components.add(logging_steps)
138
+ out_components.add(per_device_train_batch_size)
139
+ return out_components
140
+
141
+
142
+ def add_training_args_3() -> Set[Component]:
143
+ max_grad_norm = gr.Slider(0.01, 1, value=0.3, label="max_grad_norm",
144
+ info="Maximum gradient norm (for gradient clipping).",
145
+ interactive=True, elem_id=MAX_GRAD_NORM_ID)
146
+ warmup_ratio = gr.Slider(0, 1, value=0.1, label="warmup_ratio",
147
+ info="Ratio of total training steps used for a linear warmup from 0 to learning_rate.",
148
+ interactive=True, elem_id=WARMUP_RATIO_ID)
149
+ gradient_accumulation_steps = gr.Slider(1, 64, step=1, value=2, label="gradient_accumulation_steps",
150
+ info="Number of updates steps to accumulate the gradients for, before "
151
+ "performing a backward/update "
152
+ "pass.",
153
+ interactive=True, elem_id=GRADIENT_ACCUMULATION_STEPS_ID)
154
+ gradient_checkpointing = gr.Checkbox(label="gradient_checkpointing", value=True, interactive=True,
155
+ info="Use gradient checkpointing to save memory at the expense of slower "
156
+ "backward pass.", elem_id=GRADIENT_CHECKPOINTING_ID)
157
+ lr_scheduler_type = gr.Radio(['linear', 'constant', 'cosine'], label="lr_scheduler_type",
158
+ info="The learning rate scheduler type to use.",
159
+ interactive=True, value="cosine", elem_id=LR_SCHEDULER_TYPE_ID)
160
+
161
+ out_components: Set[Component] = set()
162
+ out_components.add(max_grad_norm)
163
+ out_components.add(warmup_ratio)
164
+ out_components.add(gradient_accumulation_steps)
165
+ out_components.add(gradient_checkpointing)
166
+ out_components.add(lr_scheduler_type)
167
+ return out_components
168
+
169
+
170
+ def add_outputs() -> Set[Component]:
171
+ output_dir = gr.Textbox(interactive=True,
172
+ label="output_dir",
173
+ info='The output directory where the model predictions and checkpoints will be written.',
174
+ elem_id=OUTPUT_DIR_ID)
175
+
176
+ push_to_hub = gr.Checkbox(label="push_to_hub", value=False, interactive=True,
177
+ info="Whether or not to upload the trained model to the hub after training. If this is "
178
+ "True, you must specify 'HF_TOKEN'.",
179
+ elem_id=PUSH_TO_HUB_ID)
180
+
181
+ out_components: Set[Component] = set()
182
+ out_components.add(output_dir)
183
+ out_components.add(push_to_hub)
184
+ return out_components
185
+
186
+
187
+ def add_outputs1() -> Set[Component]:
188
+ report_to = gr.Dropdown(
189
+ ["azure_ml", "comet_ml", "mlflow", "tensorboard", "wandb", "all", 'none'],
190
+ value="tensorboard",
191
+ elem_id=REPORT_TO_ID,
192
+ label="report_to",
193
+ info="The list of integrations to report the results and logs to. Supported platforms are 'azure_ml', "
194
+ "'comet_ml', 'mlflow', 'tensorboard' and 'wandb'. Use 'all' to report to all integrations installed, "
195
+ "'none' for no integrations."
196
+ )
197
+ out_components: Set[Component] = set()
198
+ out_components.add(report_to)
199
+ return out_components
200
+
201
+
202
+ def add_optimizer() -> Set[Component]:
203
+ adam_beta1 = gr.Slider(0.00001, 1, value=0.9, label="adam_beta1",
204
+ info="The beta1 hyperparameter for the [`AdamW`] optimizer.",
205
+ interactive=True, elem_id=BETA1_ID)
206
+ adam_beta2 = gr.Slider(0.00001, 1, value=0.999, label="adam_beta2",
207
+ info="The beta2 hyperparameter for the [`AdamW`] optimizer.",
208
+ interactive=True, elem_id=BETA2_ID)
209
+ adam_epsilon = gr.Slider(1e-9, 1, value=1e-8, label="adam_epsilon",
210
+ info="The epsilon hyperparameter for the [`AdamW`] optimizer.",
211
+ interactive=True, elem_id=EPSILON_ID)
212
+ out_components: Set[Component] = set()
213
+ out_components.add(adam_beta1)
214
+ out_components.add(adam_beta2)
215
+ out_components.add(adam_epsilon)
216
+ return out_components
217
+
218
+
219
+ def add_optimizer1() -> Set[Component]:
220
+ optimizer = gr.Dropdown(
221
+ ["adamw_hf", "adamw_torch", "adamw_torch_fused", "adamw_apex_fused", "adamw_anyprecision", "adafactor"],
222
+ value="adamw_torch_fused",
223
+ elem_id=OPTIMIZER_ID,
224
+ label="optimizer",
225
+ info="The optimizer to use: 'adamw_hf', 'adamw_torch', 'adamw_torch_fused', 'adamw_apex_fused', "
226
+ "'adamw_anyprecision' or "
227
+ "'adafactor'. "
228
+ )
229
+ learning_rate = gr.Slider(1e-6, 1, step=0.001, value=2.0e-05, label="learning_rate",
230
+ info="The initial learning rate for AdamW.",
231
+ interactive=True, elem_id=LEARNING_RATE_ID)
232
+ weight_decay = gr.Slider(0, 1, value=0, label="weight_decay",
233
+ info="The weight decay to apply (if not zero) to all layers except all bias and "
234
+ "LayerNorm weights in [`AdamW`] optimizer.",
235
+ interactive=True, elem_id=WEIGHT_DECAY_ID)
236
+ out_components: Set[Component] = set()
237
+ out_components.add(optimizer)
238
+ out_components.add(learning_rate)
239
+ out_components.add(weight_decay)
240
+ return out_components
241
+
242
+
243
+ def add_sft_trainer_args() -> Set[Component]:
244
+ max_seq_length = gr.Slider(512, 3072, value=2048, label="max_seq_length",
245
+ info="The maximum sequence length to use for the `ConstantLengthDataset` and for "
246
+ "automatically "
247
+ "creating the Dataset.",
248
+ interactive=True, elem_id=MAX_SEQ_LENGTH_ID)
249
+ packing = gr.Checkbox(label="packing", value=True, interactive=True, elem_id=PACKING_ID,
250
+ info="This argument is used by the `ConstantLengthDataset` to pack the sequences of the "
251
+ "dataset.")
252
+
253
+ out_components: Set[Component] = set()
254
+ out_components.add(max_seq_length)
255
+ out_components.add(packing)
256
+ return out_components
utils/notebook_generator.py ADDED
@@ -0,0 +1,393 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import nbformat as nbf
2
+
3
+ from utils import FTDataSet
4
+
5
+
6
+ def create_install_libraries_cells(cells: list):
7
+ text_cell = nbf.v4.new_markdown_cell("### Installing Required Libraries!")
8
+ text_cell1 = nbf.v4.new_markdown_cell(
9
+ "Installing required libraries, including trl, transformers, accelerate, peft, datasets, "
10
+ "and bitsandbytes.")
11
+ code = """
12
+ !pip install -q --upgrade "transformers==4.38.2"
13
+ !pip install -q --upgrade "datasets==2.16.1"
14
+ !pip install -q --upgrade "accelerate==0.26.1"
15
+ !pip install -q --upgrade "evaluate==0.4.1"
16
+ !pip install -q --upgrade "bitsandbytes==0.42.0"
17
+ !pip install -q --upgrade "trl==0.7.11"
18
+ !pip install -q --upgrade "peft==0.8.2"
19
+ """
20
+ code_pytorch = """
21
+ # Checks if PyTorch is installed and installs it if not.
22
+ try:
23
+ import torch
24
+ print("PyTorch is installed!")
25
+ except ImportError:
26
+ print("PyTorch is not installed.")
27
+ !pip install -q torch
28
+ """
29
+ code_cell = nbf.v4.new_code_cell(code)
30
+ cells.append(text_cell)
31
+ cells.append(text_cell1)
32
+ cells.append(nbf.v4.new_code_cell(code_pytorch))
33
+ cells.append(code_cell)
34
+
35
+
36
+ def create_install_flash_attention(cells: list):
37
+ text_cell = nbf.v4.new_markdown_cell(
38
+ "### Installing Flash Attention")
39
+ text_cell1 = nbf.v4.new_markdown_cell("Installing Flash Attention to reduce the memory "
40
+ "and runtime cost of the attention layer, and improve the performance of "
41
+ "the model training. Learn more at [FlashAttention]("
42
+ "https://github.com/Dao-AILab/flash-attention/tree/main)."
43
+ " Installing flash "
44
+ "attention from source can take quite a bit of time (10-45 "
45
+ "minutes).")
46
+ code = """
47
+ import torch; assert torch.cuda.get_device_capability()[0] >= 8, 'Hardware not supported for Flash Attention'
48
+
49
+ !pip install ninja packaging
50
+ !MAX_JOBS=4 pip install flash-attn --no-build-isolation --upgrade
51
+ """
52
+ code_cell = nbf.v4.new_code_cell(code)
53
+ cells.append(text_cell)
54
+ cells.append(text_cell1)
55
+ cells.append(code_cell)
56
+
57
+
58
+ def create_login_hf_cells(cells: list):
59
+ text_cell = nbf.v4.new_markdown_cell(
60
+ "### Login to HF")
61
+ text_cell1 = nbf.v4.new_markdown_cell("Installing **huggingface_hub** to use as a remote "
62
+ "model versioning service. This means that your model, logs, and information "
63
+ "will be automatically pushed to the Hub during training. You should have "
64
+ "'HF_TOKEN'")
65
+ code = """
66
+ # Install huggingface_hub
67
+ !pip install -q huggingface_hub
68
+
69
+ from huggingface_hub import login
70
+
71
+ login(
72
+ token=userdata.get('Your_HF_TOKEN')
73
+ add_to_git_credential=True
74
+ )
75
+ """
76
+ code_cell = nbf.v4.new_code_cell(code)
77
+ cells.append(text_cell)
78
+ cells.append(text_cell1)
79
+ cells.append(code_cell)
80
+
81
+
82
+ def create_datasets_cells(cells: list, dataset: FTDataSet, seed: int):
83
+ text_cell = nbf.v4.new_markdown_cell("### Load and prepare the dataset")
84
+ text = 'The dataset is already formatted in a conversational format, which is supported by [trl](' \
85
+ 'https://huggingface.co/docs/trl/index/). '
86
+ text_format = """
87
+ **Conversational format:**
88
+
89
+
90
+ ```python {"messages": [{"role": "system", "content": "You are..."}, {"role": "user", "content": "..."},
91
+ {"role": "assistant", "content": "..."}]} {"messages": [{"role": "system", "content": "You are..."}, {"role": "user",
92
+ "content": "..."}, {"role": "assistant", "content": "..."}]} {"messages": [{"role": "system", "content": "You
93
+ are..."}, {"role": "user", "content": "..."}, {"role": "assistant", "content": "..."}]} """
94
+ text_cell1 = nbf.v4.new_markdown_cell(text)
95
+ text_cell2 = nbf.v4.new_markdown_cell(text_format)
96
+ code = f"""
97
+ from datasets import load_dataset
98
+
99
+ # Load dataset from the hub
100
+ dataset = load_dataset("{dataset.path}", split="{dataset.dataset_split}")
101
+
102
+ dataset = dataset.shuffle(seed={seed})
103
+ """
104
+
105
+ code_cell = nbf.v4.new_code_cell(code)
106
+ cells.append(text_cell)
107
+ cells.append(text_cell1)
108
+ cells.append(text_cell2)
109
+ cells.append(code_cell)
110
+
111
+
112
+ def create_model_cells(cells: list, model_id: str, version: str, flash_attention: bool, pad_side: str, pad_value: str,
113
+ load_in_4bit: str, bnb_4bit_use_double_quant: bool, bnb_4bit_quant_type: str,
114
+ bnb_4bit_compute_dtype: str
115
+ ):
116
+ text_cell = nbf.v4.new_markdown_cell(f"### Load {model_id}-{version} for Finetuning")
117
+ load_in_4bit_str = f"{load_in_4bit}=True"
118
+
119
+ flash_attention_str = "attn_implementation='flash_attention_2',"
120
+ if not flash_attention:
121
+ flash_attention_str = ''
122
+
123
+ pad_value_str = "tokenizer.pad_token = tokenizer.eos_token"
124
+ if pad_value is None:
125
+ pad_value_str = ""
126
+
127
+ code = f"""
128
+ import torch
129
+ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
130
+ from trl import setup_chat_format
131
+
132
+ # Hugging Face model id
133
+ model_id = "{model_id}-{version}"
134
+
135
+ # BitsAndBytesConfig
136
+ bnb_config = BitsAndBytesConfig(
137
+ {load_in_4bit_str}, bnb_4bit_use_double_quant={bnb_4bit_use_double_quant},
138
+ bnb_4bit_quant_type="{bnb_4bit_quant_type}", bnb_4bit_compute_dtype={bnb_4bit_compute_dtype}
139
+ )
140
+
141
+ # Load model and tokenizer
142
+ model = AutoModelForCausalLM.from_pretrained(
143
+ model_id,
144
+ device_map="auto",
145
+ {flash_attention_str}
146
+ torch_dtype=torch.bfloat16,
147
+ quantization_config=bnb_config
148
+ )
149
+
150
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
151
+ tokenizer.padding_side = "{pad_side}"
152
+ {pad_value_str}
153
+
154
+ # Set chat template to OAI chatML
155
+ model, tokenizer = setup_chat_format(model, tokenizer)
156
+ """
157
+
158
+ text_1 = """
159
+ This process involves two key steps:
160
+
161
+ 1. **LLM Quantization:**
162
+ - We first load the selected large language model (LLM).
163
+ - We then use the "bitsandbytes" library to quantize the model, which can significantly reduce its memory footprint.
164
+
165
+ > **Note:** The memory requirements of the model scale with its size. For instance, a 7B parameter model may require
166
+ a 24GB GPU for fine-tuning.
167
+
168
+ 2. **Chat Model Preparation:**
169
+ - To train a model for chat/conversational tasks, we need to prepare both the model and its tokenizer.
170
+
171
+ - This involves adding special tokens to the tokenizer and the model itself. These tokens help the model
172
+ understand the different roles within a conversation.
173
+
174
+ - The **trl** provides a convenient method called `setup_chat_format` for this purpose. This method performs the
175
+ following actions:
176
+
177
+ * Adds special tokens to the tokenizer, such as `<|im_start|>` and `<|im_end|>`, to mark the beginning and
178
+ ending of a conversation.
179
+
180
+ * Resizes the model's embedding layer to accommodate the new tokens.
181
+
182
+ * Sets the tokenizer's chat template, which defines the format used to convert input data into a chat-like
183
+ structure. The default template is `chatml` from OpenAI.
184
+ """
185
+
186
+ code_cell = nbf.v4.new_code_cell(code)
187
+ text_cell1 = nbf.v4.new_markdown_cell(text_1)
188
+ cells.append(text_cell)
189
+ cells.append(text_cell1)
190
+ cells.append(code_cell)
191
+
192
+
193
+ def create_lora_config_cells(cells: list, r: int, alpha: int, dropout: float, bias: str):
194
+ text_cell = nbf.v4.new_markdown_cell("### LoraConfig")
195
+ code = f"""
196
+ from peft import LoraConfig
197
+
198
+ peft_config = LoraConfig(
199
+ lora_alpha={alpha},
200
+ lora_dropout={dropout},
201
+ r={r},
202
+ bias="{bias}",
203
+ target_modules="all-linear",
204
+ task_type="CAUSAL_LM"
205
+ )
206
+ """
207
+
208
+ text = """The `SFTTrainer` provides native integration with `peft`, simplifying the process of efficiently tuning
209
+ Language Models (LLMs) using techniques such as [LoRA](
210
+ https://magazine.sebastianraschka.com/p/practical-tips-for-finetuning-llms). The only requirement is to create
211
+ our `LoraConfig` and pass it to the `SFTTrainer`.
212
+ """
213
+
214
+ code_cell = nbf.v4.new_code_cell(code)
215
+ cells.append(text_cell)
216
+ cells.append(nbf.v4.new_markdown_cell(text))
217
+ cells.append(code_cell)
218
+
219
+
220
+ def create_training_args_cells(cells: list, epochs, max_steps, logging_steps, per_device_train_batch_size,
221
+ save_strategy, gradient_accumulation_steps, gradient_checkpointing,
222
+ learning_rate, max_grad_norm, warmup_ratio, lr_scheduler_type, output_dir,
223
+ report_to, seed):
224
+ text_cell = nbf.v4.new_markdown_cell("### TrainingArguments")
225
+ to_install = None
226
+ if report_to == "all":
227
+ to_install = "azure_ml comet_ml mlflow tensorboard wandb"
228
+ elif report_to != "none":
229
+ to_install = report_to
230
+
231
+ code_report = f"""
232
+ # Installing {to_install} to report the metrics
233
+
234
+ !pip install -q {to_install}
235
+ """
236
+
237
+ code = f"""
238
+ from transformers import TrainingArguments
239
+
240
+ args = TrainingArguments(
241
+ output_dir="{output_dir}",
242
+ num_train_epochs={epochs},
243
+ per_device_train_batch_size={per_device_train_batch_size},
244
+ gradient_accumulation_steps={gradient_accumulation_steps},
245
+ gradient_checkpointing={gradient_checkpointing},
246
+ optim="adamw_torch_fused",
247
+ logging_steps={logging_steps},
248
+ save_strategy='{save_strategy}',
249
+ learning_rate={learning_rate},
250
+ bf16=True,
251
+ tf32=True,
252
+ max_grad_norm={max_grad_norm},
253
+ warmup_ratio={warmup_ratio},
254
+ lr_scheduler_type='{lr_scheduler_type}',
255
+ report_to='{report_to}',
256
+ max_steps={max_steps},
257
+ seed={seed},
258
+ overwrite_output_dir=True,
259
+ remove_unused_columns=True
260
+ )
261
+ """
262
+
263
+ code_cell = nbf.v4.new_code_cell(code)
264
+ cells.append(text_cell)
265
+ if to_install is not None:
266
+ cells.append(nbf.v4.new_code_cell(code_report))
267
+ cells.append(code_cell)
268
+
269
+
270
+ def create_sft_trainer_cells(cells: list, max_seq_length, packing):
271
+ text_cell = nbf.v4.new_markdown_cell(
272
+ """### Supervised Finetuning Trainer (SFT Trainer)
273
+
274
+ This `SFTTrainer` is a wrapper around the `transformers.Trainer` class and inherits all of its attributes and methods.
275
+ The trainer takes care of properly initializing the `PeftModel`.
276
+ """)
277
+ dataset_kwargs = {
278
+ "add_special_tokens": False, # We template with special tokens
279
+ "append_concat_token": False, # No need to add additional separator token
280
+ }
281
+ code = f"""
282
+ from trl import SFTTrainer
283
+
284
+ trainer = SFTTrainer(
285
+ model=model,
286
+ args=args,
287
+ train_dataset=dataset,
288
+ peft_config=peft_config,
289
+ max_seq_length={max_seq_length},
290
+ tokenizer=tokenizer,
291
+ packing={packing},
292
+ dataset_kwargs={dataset_kwargs}
293
+ )
294
+ """
295
+ code_cell = nbf.v4.new_code_cell(code)
296
+ cells.append(text_cell)
297
+ cells.append(code_cell)
298
+
299
+
300
+ def create_start_training_cells(cells: list, epochs, max_steps, push_to_hub, output_dir):
301
+ if push_to_hub:
302
+ save_txt = "and to the hub."
303
+ else:
304
+ save_txt = "."
305
+
306
+ epoch_str = f"{epochs} epochs"
307
+ if max_steps > 0:
308
+ epoch_str = f"{max_steps} steps"
309
+
310
+ text_cell = nbf.v4.new_markdown_cell(
311
+ f"""### Starting Training and Saving Model/Tokenizer
312
+
313
+ We start training the model by calling the `train()` method on the trainer instance. This will start the training
314
+ loop and train the model for `{epoch_str}`. The model will be automatically saved the output directory({output_dir})
315
+ {save_txt}
316
+
317
+ """)
318
+
319
+ code = f"""
320
+
321
+ # start training
322
+ trainer.train()
323
+
324
+ # save the model
325
+ trainer.save_model()
326
+
327
+ # save tokenizer
328
+ tokenizer.save_pretrained("{output_dir}")
329
+ """
330
+ code_cell = nbf.v4.new_code_cell(code)
331
+ cells.append(text_cell)
332
+ cells.append(code_cell)
333
+
334
+
335
+ def create_free_gpu_cells(cells: list):
336
+ text_cell = nbf.v4.new_markdown_cell(
337
+ """### Free the GPU Memory for Merging `PeftModel`""")
338
+
339
+ code = f"""
340
+
341
+ # Free the GPU memory
342
+ del model
343
+ del trainer
344
+ torch.cuda.empty_cache()
345
+ """
346
+ code_cell = nbf.v4.new_code_cell(code)
347
+ cells.append(text_cell)
348
+ cells.append(code_cell)
349
+
350
+
351
+ def create_merge_lora_cells(cells: list, output_dir):
352
+ text_cell = nbf.v4.new_markdown_cell(
353
+ """### Merge LoRA adapter in to the Original Model
354
+
355
+ While utilizing `LoRA`, we focus on training the adapters rather than the entire model. Consequently, during the
356
+ model saving process, only the `adapter weights` are preserved, not the complete model. If we wish to save the
357
+ entire model for easier usage with Text Generation Inference, you can incorporate the adapter weights into the model
358
+ weights. This can be achieved using the `merge_and_unload` method. Following this, the model can be saved using the
359
+ `save_pretrained` method. The result is a default model that is ready for inference.
360
+ """)
361
+
362
+ code = f"""
363
+ import torch
364
+ from peft import AutoPeftModelForCausalLM
365
+
366
+ # Load Peft model on CPU
367
+ model = AutoPeftModelForCausalLM.from_pretrained(
368
+ "{output_dir}",
369
+ torch_dtype=torch.float16,
370
+ low_cpu_mem_usage=True
371
+ )
372
+
373
+ # Merge LoRA and base model and save
374
+ merged_model = model.merge_and_unload()
375
+ merged_model.save_pretrained("{output_dir}", safe_serialization=True, max_shard_size="2GB")
376
+ """
377
+ code_cell = nbf.v4.new_code_cell(code)
378
+ cells.append(text_cell)
379
+ cells.append(code_cell)
380
+
381
+
382
+ def push_merged_model_cells(cells: list, output_dir):
383
+ text_cell = nbf.v4.new_markdown_cell(
384
+ """### Push the Merged model as well as the Tokenizer to HF hub""")
385
+
386
+ code = f"""
387
+ merged_model.push_to_hub("{output_dir}", use_temp_dir=False)
388
+
389
+ tokenizer.push_to_hub("{output_dir}", use_temp_dir=False)
390
+ """
391
+ code_cell = nbf.v4.new_code_cell(code)
392
+ cells.append(text_cell)
393
+ cells.append(code_cell)