CultriX commited on
Commit
adbba5d
·
1 Parent(s): 23f7316

Upload MistralTrix.ipynb

Browse files

Thanks to

@mlabonne
(as it's a direct copy of his code which can be found over at his amazing GitHub page: https://github.com/mlabonne/llm-course)

The exact notebook used to train Mistraltrix.

Files changed (1) hide show
  1. MistralTrix.ipynb +751 -0
MistralTrix.ipynb ADDED
@@ -0,0 +1,751 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "nbformat": 4,
3
+ "nbformat_minor": 0,
4
+ "metadata": {
5
+ "colab": {
6
+ "provenance": [],
7
+ "machine_shape": "hm",
8
+ "gpuType": "A100"
9
+ },
10
+ "kernelspec": {
11
+ "name": "python3",
12
+ "display_name": "Python 3"
13
+ },
14
+ "language_info": {
15
+ "name": "python"
16
+ },
17
+ "widgets": {
18
+ "application/vnd.jupyter.widget-state+json": {
19
+ "22773c721a7c4221a9c14cd388461d4c": {
20
+ "model_module": "@jupyter-widgets/controls",
21
+ "model_name": "HBoxModel",
22
+ "model_module_version": "1.5.0",
23
+ "state": {
24
+ "_dom_classes": [],
25
+ "_model_module": "@jupyter-widgets/controls",
26
+ "_model_module_version": "1.5.0",
27
+ "_model_name": "HBoxModel",
28
+ "_view_count": null,
29
+ "_view_module": "@jupyter-widgets/controls",
30
+ "_view_module_version": "1.5.0",
31
+ "_view_name": "HBoxView",
32
+ "box_style": "",
33
+ "children": [
34
+ "IPY_MODEL_6b54841f5de1482694c360095dae3039",
35
+ "IPY_MODEL_448ccbc85e624ec3b3e71931a7ee4ff6",
36
+ "IPY_MODEL_173769f6f465485f8848a11bf269850b"
37
+ ],
38
+ "layout": "IPY_MODEL_60978b9b4e8348f0a71ce3e35c73bcff"
39
+ }
40
+ },
41
+ "6b54841f5de1482694c360095dae3039": {
42
+ "model_module": "@jupyter-widgets/controls",
43
+ "model_name": "HTMLModel",
44
+ "model_module_version": "1.5.0",
45
+ "state": {
46
+ "_dom_classes": [],
47
+ "_model_module": "@jupyter-widgets/controls",
48
+ "_model_module_version": "1.5.0",
49
+ "_model_name": "HTMLModel",
50
+ "_view_count": null,
51
+ "_view_module": "@jupyter-widgets/controls",
52
+ "_view_module_version": "1.5.0",
53
+ "_view_name": "HTMLView",
54
+ "description": "",
55
+ "description_tooltip": null,
56
+ "layout": "IPY_MODEL_6a38dcbaf4674b448329ac0a16587d2a",
57
+ "placeholder": "​",
58
+ "style": "IPY_MODEL_7eaeada2158e493189449af91f643553",
59
+ "value": "Loading checkpoint shards: 100%"
60
+ }
61
+ },
62
+ "448ccbc85e624ec3b3e71931a7ee4ff6": {
63
+ "model_module": "@jupyter-widgets/controls",
64
+ "model_name": "FloatProgressModel",
65
+ "model_module_version": "1.5.0",
66
+ "state": {
67
+ "_dom_classes": [],
68
+ "_model_module": "@jupyter-widgets/controls",
69
+ "_model_module_version": "1.5.0",
70
+ "_model_name": "FloatProgressModel",
71
+ "_view_count": null,
72
+ "_view_module": "@jupyter-widgets/controls",
73
+ "_view_module_version": "1.5.0",
74
+ "_view_name": "ProgressView",
75
+ "bar_style": "success",
76
+ "description": "",
77
+ "description_tooltip": null,
78
+ "layout": "IPY_MODEL_6e32854952b340008edca0139d3471d6",
79
+ "max": 3,
80
+ "min": 0,
81
+ "orientation": "horizontal",
82
+ "style": "IPY_MODEL_db6d7cfcdade4b4baa213a5d0abc07d7",
83
+ "value": 3
84
+ }
85
+ },
86
+ "173769f6f465485f8848a11bf269850b": {
87
+ "model_module": "@jupyter-widgets/controls",
88
+ "model_name": "HTMLModel",
89
+ "model_module_version": "1.5.0",
90
+ "state": {
91
+ "_dom_classes": [],
92
+ "_model_module": "@jupyter-widgets/controls",
93
+ "_model_module_version": "1.5.0",
94
+ "_model_name": "HTMLModel",
95
+ "_view_count": null,
96
+ "_view_module": "@jupyter-widgets/controls",
97
+ "_view_module_version": "1.5.0",
98
+ "_view_name": "HTMLView",
99
+ "description": "",
100
+ "description_tooltip": null,
101
+ "layout": "IPY_MODEL_9083029642744c43b7705532cbe0cf79",
102
+ "placeholder": "​",
103
+ "style": "IPY_MODEL_d028a98caa13425b907ceb513119006e",
104
+ "value": " 3/3 [00:11<00:00, 2.89s/it]"
105
+ }
106
+ },
107
+ "60978b9b4e8348f0a71ce3e35c73bcff": {
108
+ "model_module": "@jupyter-widgets/base",
109
+ "model_name": "LayoutModel",
110
+ "model_module_version": "1.2.0",
111
+ "state": {
112
+ "_model_module": "@jupyter-widgets/base",
113
+ "_model_module_version": "1.2.0",
114
+ "_model_name": "LayoutModel",
115
+ "_view_count": null,
116
+ "_view_module": "@jupyter-widgets/base",
117
+ "_view_module_version": "1.2.0",
118
+ "_view_name": "LayoutView",
119
+ "align_content": null,
120
+ "align_items": null,
121
+ "align_self": null,
122
+ "border": null,
123
+ "bottom": null,
124
+ "display": null,
125
+ "flex": null,
126
+ "flex_flow": null,
127
+ "grid_area": null,
128
+ "grid_auto_columns": null,
129
+ "grid_auto_flow": null,
130
+ "grid_auto_rows": null,
131
+ "grid_column": null,
132
+ "grid_gap": null,
133
+ "grid_row": null,
134
+ "grid_template_areas": null,
135
+ "grid_template_columns": null,
136
+ "grid_template_rows": null,
137
+ "height": null,
138
+ "justify_content": null,
139
+ "justify_items": null,
140
+ "left": null,
141
+ "margin": null,
142
+ "max_height": null,
143
+ "max_width": null,
144
+ "min_height": null,
145
+ "min_width": null,
146
+ "object_fit": null,
147
+ "object_position": null,
148
+ "order": null,
149
+ "overflow": null,
150
+ "overflow_x": null,
151
+ "overflow_y": null,
152
+ "padding": null,
153
+ "right": null,
154
+ "top": null,
155
+ "visibility": null,
156
+ "width": null
157
+ }
158
+ },
159
+ "6a38dcbaf4674b448329ac0a16587d2a": {
160
+ "model_module": "@jupyter-widgets/base",
161
+ "model_name": "LayoutModel",
162
+ "model_module_version": "1.2.0",
163
+ "state": {
164
+ "_model_module": "@jupyter-widgets/base",
165
+ "_model_module_version": "1.2.0",
166
+ "_model_name": "LayoutModel",
167
+ "_view_count": null,
168
+ "_view_module": "@jupyter-widgets/base",
169
+ "_view_module_version": "1.2.0",
170
+ "_view_name": "LayoutView",
171
+ "align_content": null,
172
+ "align_items": null,
173
+ "align_self": null,
174
+ "border": null,
175
+ "bottom": null,
176
+ "display": null,
177
+ "flex": null,
178
+ "flex_flow": null,
179
+ "grid_area": null,
180
+ "grid_auto_columns": null,
181
+ "grid_auto_flow": null,
182
+ "grid_auto_rows": null,
183
+ "grid_column": null,
184
+ "grid_gap": null,
185
+ "grid_row": null,
186
+ "grid_template_areas": null,
187
+ "grid_template_columns": null,
188
+ "grid_template_rows": null,
189
+ "height": null,
190
+ "justify_content": null,
191
+ "justify_items": null,
192
+ "left": null,
193
+ "margin": null,
194
+ "max_height": null,
195
+ "max_width": null,
196
+ "min_height": null,
197
+ "min_width": null,
198
+ "object_fit": null,
199
+ "object_position": null,
200
+ "order": null,
201
+ "overflow": null,
202
+ "overflow_x": null,
203
+ "overflow_y": null,
204
+ "padding": null,
205
+ "right": null,
206
+ "top": null,
207
+ "visibility": null,
208
+ "width": null
209
+ }
210
+ },
211
+ "7eaeada2158e493189449af91f643553": {
212
+ "model_module": "@jupyter-widgets/controls",
213
+ "model_name": "DescriptionStyleModel",
214
+ "model_module_version": "1.5.0",
215
+ "state": {
216
+ "_model_module": "@jupyter-widgets/controls",
217
+ "_model_module_version": "1.5.0",
218
+ "_model_name": "DescriptionStyleModel",
219
+ "_view_count": null,
220
+ "_view_module": "@jupyter-widgets/base",
221
+ "_view_module_version": "1.2.0",
222
+ "_view_name": "StyleView",
223
+ "description_width": ""
224
+ }
225
+ },
226
+ "6e32854952b340008edca0139d3471d6": {
227
+ "model_module": "@jupyter-widgets/base",
228
+ "model_name": "LayoutModel",
229
+ "model_module_version": "1.2.0",
230
+ "state": {
231
+ "_model_module": "@jupyter-widgets/base",
232
+ "_model_module_version": "1.2.0",
233
+ "_model_name": "LayoutModel",
234
+ "_view_count": null,
235
+ "_view_module": "@jupyter-widgets/base",
236
+ "_view_module_version": "1.2.0",
237
+ "_view_name": "LayoutView",
238
+ "align_content": null,
239
+ "align_items": null,
240
+ "align_self": null,
241
+ "border": null,
242
+ "bottom": null,
243
+ "display": null,
244
+ "flex": null,
245
+ "flex_flow": null,
246
+ "grid_area": null,
247
+ "grid_auto_columns": null,
248
+ "grid_auto_flow": null,
249
+ "grid_auto_rows": null,
250
+ "grid_column": null,
251
+ "grid_gap": null,
252
+ "grid_row": null,
253
+ "grid_template_areas": null,
254
+ "grid_template_columns": null,
255
+ "grid_template_rows": null,
256
+ "height": null,
257
+ "justify_content": null,
258
+ "justify_items": null,
259
+ "left": null,
260
+ "margin": null,
261
+ "max_height": null,
262
+ "max_width": null,
263
+ "min_height": null,
264
+ "min_width": null,
265
+ "object_fit": null,
266
+ "object_position": null,
267
+ "order": null,
268
+ "overflow": null,
269
+ "overflow_x": null,
270
+ "overflow_y": null,
271
+ "padding": null,
272
+ "right": null,
273
+ "top": null,
274
+ "visibility": null,
275
+ "width": null
276
+ }
277
+ },
278
+ "db6d7cfcdade4b4baa213a5d0abc07d7": {
279
+ "model_module": "@jupyter-widgets/controls",
280
+ "model_name": "ProgressStyleModel",
281
+ "model_module_version": "1.5.0",
282
+ "state": {
283
+ "_model_module": "@jupyter-widgets/controls",
284
+ "_model_module_version": "1.5.0",
285
+ "_model_name": "ProgressStyleModel",
286
+ "_view_count": null,
287
+ "_view_module": "@jupyter-widgets/base",
288
+ "_view_module_version": "1.2.0",
289
+ "_view_name": "StyleView",
290
+ "bar_color": null,
291
+ "description_width": ""
292
+ }
293
+ },
294
+ "9083029642744c43b7705532cbe0cf79": {
295
+ "model_module": "@jupyter-widgets/base",
296
+ "model_name": "LayoutModel",
297
+ "model_module_version": "1.2.0",
298
+ "state": {
299
+ "_model_module": "@jupyter-widgets/base",
300
+ "_model_module_version": "1.2.0",
301
+ "_model_name": "LayoutModel",
302
+ "_view_count": null,
303
+ "_view_module": "@jupyter-widgets/base",
304
+ "_view_module_version": "1.2.0",
305
+ "_view_name": "LayoutView",
306
+ "align_content": null,
307
+ "align_items": null,
308
+ "align_self": null,
309
+ "border": null,
310
+ "bottom": null,
311
+ "display": null,
312
+ "flex": null,
313
+ "flex_flow": null,
314
+ "grid_area": null,
315
+ "grid_auto_columns": null,
316
+ "grid_auto_flow": null,
317
+ "grid_auto_rows": null,
318
+ "grid_column": null,
319
+ "grid_gap": null,
320
+ "grid_row": null,
321
+ "grid_template_areas": null,
322
+ "grid_template_columns": null,
323
+ "grid_template_rows": null,
324
+ "height": null,
325
+ "justify_content": null,
326
+ "justify_items": null,
327
+ "left": null,
328
+ "margin": null,
329
+ "max_height": null,
330
+ "max_width": null,
331
+ "min_height": null,
332
+ "min_width": null,
333
+ "object_fit": null,
334
+ "object_position": null,
335
+ "order": null,
336
+ "overflow": null,
337
+ "overflow_x": null,
338
+ "overflow_y": null,
339
+ "padding": null,
340
+ "right": null,
341
+ "top": null,
342
+ "visibility": null,
343
+ "width": null
344
+ }
345
+ },
346
+ "d028a98caa13425b907ceb513119006e": {
347
+ "model_module": "@jupyter-widgets/controls",
348
+ "model_name": "DescriptionStyleModel",
349
+ "model_module_version": "1.5.0",
350
+ "state": {
351
+ "_model_module": "@jupyter-widgets/controls",
352
+ "_model_module_version": "1.5.0",
353
+ "_model_name": "DescriptionStyleModel",
354
+ "_view_count": null,
355
+ "_view_module": "@jupyter-widgets/base",
356
+ "_view_module_version": "1.2.0",
357
+ "_view_name": "StyleView",
358
+ "description_width": ""
359
+ }
360
+ }
361
+ }
362
+ },
363
+ "accelerator": "GPU"
364
+ },
365
+ "cells": [
366
+ {
367
+ "cell_type": "markdown",
368
+ "source": [
369
+ "# MistralTrix\n",
370
+ "\n",
371
+ "❤️ Created by [@maximelabonne](https://twitter.com/maximelabonne)."
372
+ ],
373
+ "metadata": {
374
+ "id": "Pa8905-YsHAn"
375
+ }
376
+ },
377
+ {
378
+ "cell_type": "code",
379
+ "execution_count": null,
380
+ "metadata": {
381
+ "id": "_zIBL8IssExG"
382
+ },
383
+ "outputs": [],
384
+ "source": [
385
+ "!pip install -q datasets trl peft bitsandbytes sentencepiece wandb"
386
+ ]
387
+ },
388
+ {
389
+ "cell_type": "code",
390
+ "source": [
391
+ "import os\n",
392
+ "import gc\n",
393
+ "import torch\n",
394
+ "\n",
395
+ "import transformers\n",
396
+ "from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, BitsAndBytesConfig\n",
397
+ "from datasets import load_dataset\n",
398
+ "from peft import LoraConfig, PeftModel, get_peft_model, prepare_model_for_kbit_training\n",
399
+ "from trl import DPOTrainer\n",
400
+ "import bitsandbytes as bnb\n",
401
+ "from google.colab import userdata\n",
402
+ "import wandb\n",
403
+ "\n",
404
+ "# Defined in the secrets tab in Google Colab\n",
405
+ "hf_token = userdata.get('huggingface')\n",
406
+ "wb_token = userdata.get('wandb')\n",
407
+ "wandb.login(key=wb_token)\n",
408
+ "\n",
409
+ "model_name = \"zyh3826/GML-Mistral-merged-v1\"\n",
410
+ "new_model = \"MistralTrix-v1\""
411
+ ],
412
+ "metadata": {
413
+ "colab": {
414
+ "base_uri": "https://localhost:8080/"
415
+ },
416
+ "id": "YpdkZsMNylvp",
417
+ "outputId": "6c2df234-1ce7-4cd2-a7e3-567e7536319f"
418
+ },
419
+ "execution_count": null,
420
+ "outputs": [
421
+ {
422
+ "output_type": "stream",
423
+ "name": "stderr",
424
+ "text": [
425
+ "/usr/local/lib/python3.10/dist-packages/trl/trainer/ppo_config.py:141: UserWarning: The `optimize_cuda_cache` arguement will be deprecated soon, please use `optimize_device_cache` instead.\n",
426
+ " warnings.warn(\n",
427
+ "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mmlabonne\u001b[0m. Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n",
428
+ "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m If you're specifying your api key in code, ensure this code is not shared publicly.\n",
429
+ "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m Consider setting the WANDB_API_KEY environment variable, or running `wandb login` from the command line.\n",
430
+ "\u001b[34m\u001b[1mwandb\u001b[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc\n"
431
+ ]
432
+ }
433
+ ]
434
+ },
435
+ {
436
+ "cell_type": "markdown",
437
+ "source": [
438
+ "## Format dataset"
439
+ ],
440
+ "metadata": {
441
+ "id": "d8CvUgROUDw-"
442
+ }
443
+ },
444
+ {
445
+ "cell_type": "code",
446
+ "source": [
447
+ "def chatml_format(example):\n",
448
+ " # Format system\n",
449
+ " if len(example['system']) > 0:\n",
450
+ " message = {\"role\": \"system\", \"content\": example['system']}\n",
451
+ " system = tokenizer.apply_chat_template([message], tokenize=False)\n",
452
+ " else:\n",
453
+ " system = \"\"\n",
454
+ "\n",
455
+ " # Format instruction\n",
456
+ " message = {\"role\": \"user\", \"content\": example['question']}\n",
457
+ " prompt = tokenizer.apply_chat_template([message], tokenize=False, add_generation_prompt=True)\n",
458
+ "\n",
459
+ " # Format chosen answer\n",
460
+ " chosen = example['chosen'] + \"<|im_end|>\\n\"\n",
461
+ "\n",
462
+ " # Format rejected answer\n",
463
+ " rejected = example['rejected'] + \"<|im_end|>\\n\"\n",
464
+ "\n",
465
+ " return {\n",
466
+ " \"prompt\": system + prompt,\n",
467
+ " \"chosen\": chosen,\n",
468
+ " \"rejected\": rejected,\n",
469
+ " }\n",
470
+ "\n",
471
+ "# Load dataset\n",
472
+ "dataset = load_dataset(\"Intel/orca_dpo_pairs\")['train']\n",
473
+ "\n",
474
+ "# Save columns\n",
475
+ "original_columns = dataset.column_names\n",
476
+ "\n",
477
+ "# Tokenizer\n",
478
+ "tokenizer = AutoTokenizer.from_pretrained(model_name)\n",
479
+ "tokenizer.pad_token = tokenizer.eos_token\n",
480
+ "tokenizer.padding_side = \"left\"\n",
481
+ "\n",
482
+ "# Format dataset\n",
483
+ "dataset = dataset.map(\n",
484
+ " chatml_format,\n",
485
+ " remove_columns=original_columns\n",
486
+ ")\n",
487
+ "\n",
488
+ "# Print sample\n",
489
+ "dataset[1]"
490
+ ],
491
+ "metadata": {
492
+ "colab": {
493
+ "base_uri": "https://localhost:8080/"
494
+ },
495
+ "id": "MCD77GZ60DOT",
496
+ "outputId": "c7c6773c-5545-4fee-bfa3-6fa6d69c0f3f"
497
+ },
498
+ "execution_count": null,
499
+ "outputs": [
500
+ {
501
+ "output_type": "stream",
502
+ "name": "stderr",
503
+ "text": [
504
+ "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
505
+ "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n"
506
+ ]
507
+ },
508
+ {
509
+ "output_type": "execute_result",
510
+ "data": {
511
+ "text/plain": [
512
+ "{'prompt': '<|im_start|>system\\nYou are an AI assistant. You will be given a task. You must generate a detailed and long answer.<|im_end|>\\n<|im_start|>user\\nGenerate an approximately fifteen-word sentence that describes all this data: Midsummer House eatType restaurant; Midsummer House food Chinese; Midsummer House priceRange moderate; Midsummer House customer rating 3 out of 5; Midsummer House near All Bar One<|im_end|>\\n<|im_start|>assistant\\n',\n",
513
+ " 'chosen': 'Midsummer House is a moderately priced Chinese restaurant with a 3/5 customer rating, located near All Bar One.<|im_end|>\\n',\n",
514
+ " 'rejected': ' Sure! Here\\'s a sentence that describes all the data you provided:\\n\\n\"Midsummer House is a moderately priced Chinese restaurant with a customer rating of 3 out of 5, located near All Bar One, offering a variety of delicious dishes.\"<|im_end|>\\n'}"
515
+ ]
516
+ },
517
+ "metadata": {},
518
+ "execution_count": 3
519
+ }
520
+ ]
521
+ },
522
+ {
523
+ "cell_type": "markdown",
524
+ "source": [
525
+ "## Train model with DPO"
526
+ ],
527
+ "metadata": {
528
+ "id": "DeT5eUK_UJgK"
529
+ }
530
+ },
531
+ {
532
+ "cell_type": "code",
533
+ "source": [
534
+ "# LoRA configuration\n",
535
+ "peft_config = LoraConfig(\n",
536
+ " r=16,\n",
537
+ " lora_alpha=16,\n",
538
+ " lora_dropout=0.05,\n",
539
+ " bias=\"none\",\n",
540
+ " task_type=\"CAUSAL_LM\",\n",
541
+ " target_modules=['k_proj', 'gate_proj', 'v_proj', 'up_proj', 'q_proj', 'o_proj', 'down_proj']\n",
542
+ ")\n",
543
+ "\n",
544
+ "# Model to fine-tune\n",
545
+ "model = AutoModelForCausalLM.from_pretrained(\n",
546
+ " model_name,\n",
547
+ " torch_dtype=torch.float16,\n",
548
+ " load_in_4bit=True\n",
549
+ ")\n",
550
+ "model.config.use_cache = False\n",
551
+ "\n",
552
+ "# Reference model\n",
553
+ "ref_model = AutoModelForCausalLM.from_pretrained(\n",
554
+ " model_name,\n",
555
+ " torch_dtype=torch.float16,\n",
556
+ " load_in_4bit=True\n",
557
+ ")\n",
558
+ "\n",
559
+ "# Training arguments\n",
560
+ "training_args = TrainingArguments(\n",
561
+ " per_device_train_batch_size=4,\n",
562
+ " gradient_accumulation_steps=4,\n",
563
+ " gradient_checkpointing=True,\n",
564
+ " learning_rate=5e-5,\n",
565
+ " lr_scheduler_type=\"cosine\",\n",
566
+ " max_steps=200,\n",
567
+ " save_strategy=\"no\",\n",
568
+ " logging_steps=1,\n",
569
+ " output_dir=new_model,\n",
570
+ " optim=\"paged_adamw_32bit\",\n",
571
+ " warmup_steps=100,\n",
572
+ " bf16=True,\n",
573
+ " report_to=\"wandb\",\n",
574
+ ")\n",
575
+ "\n",
576
+ "# Create DPO trainer\n",
577
+ "dpo_trainer = DPOTrainer(\n",
578
+ " model,\n",
579
+ " ref_model,\n",
580
+ " args=training_args,\n",
581
+ " train_dataset=dataset,\n",
582
+ " tokenizer=tokenizer,\n",
583
+ " peft_config=peft_config,\n",
584
+ " beta=0.1,\n",
585
+ " max_prompt_length=1024,\n",
586
+ " max_length=1536,\n",
587
+ ")\n",
588
+ "\n",
589
+ "# Fine-tune model with DPO\n",
590
+ "dpo_trainer.train()"
591
+ ],
592
+ "metadata": {
593
+ "id": "rKPILNOLR-aK"
594
+ },
595
+ "execution_count": null,
596
+ "outputs": []
597
+ },
598
+ {
599
+ "cell_type": "markdown",
600
+ "source": [
601
+ "## Upload model"
602
+ ],
603
+ "metadata": {
604
+ "id": "3LdhPpcrUM3H"
605
+ }
606
+ },
607
+ {
608
+ "cell_type": "code",
609
+ "source": [
610
+ "# Save artifacts\n",
611
+ "dpo_trainer.model.save_pretrained(\"final_checkpoint\")\n",
612
+ "tokenizer.save_pretrained(\"final_checkpoint\")\n",
613
+ "\n",
614
+ "# Flush memory\n",
615
+ "del dpo_trainer, model, ref_model\n",
616
+ "gc.collect()\n",
617
+ "torch.cuda.empty_cache()\n",
618
+ "\n",
619
+ "# Reload model in FP16 (instead of NF4)\n",
620
+ "base_model = AutoModelForCausalLM.from_pretrained(\n",
621
+ " model_name,\n",
622
+ " return_dict=True,\n",
623
+ " torch_dtype=torch.float16,\n",
624
+ ")\n",
625
+ "tokenizer = AutoTokenizer.from_pretrained(model_name)\n",
626
+ "\n",
627
+ "# Merge base model with the adapter\n",
628
+ "model = PeftModel.from_pretrained(base_model, \"final_checkpoint\")\n",
629
+ "model = model.merge_and_unload()\n",
630
+ "\n",
631
+ "# Save model and tokenizer\n",
632
+ "model.save_pretrained(new_model)\n",
633
+ "tokenizer.save_pretrained(new_model)\n",
634
+ "\n",
635
+ "# Push them to the HF Hub\n",
636
+ "model.push_to_hub(new_model, use_temp_dir=False, token=hf_token)\n",
637
+ "tokenizer.push_to_hub(new_model, use_temp_dir=False, token=hf_token)"
638
+ ],
639
+ "metadata": {
640
+ "id": "h7cIvxcTfBC4"
641
+ },
642
+ "execution_count": null,
643
+ "outputs": []
644
+ },
645
+ {
646
+ "cell_type": "markdown",
647
+ "source": [
648
+ "## Inference"
649
+ ],
650
+ "metadata": {
651
+ "id": "G6EFsmS4UOgV"
652
+ }
653
+ },
654
+ {
655
+ "cell_type": "code",
656
+ "source": [
657
+ "# Format prompt\n",
658
+ "message = [\n",
659
+ " {\"role\": \"system\", \"content\": \"You are a helpful assistant chatbot.\"},\n",
660
+ " {\"role\": \"user\", \"content\": \"What is a Large Language Model?\"}\n",
661
+ "]\n",
662
+ "tokenizer = AutoTokenizer.from_pretrained(new_model)\n",
663
+ "prompt = tokenizer.apply_chat_template(message, add_generation_prompt=True, tokenize=False)\n",
664
+ "\n",
665
+ "# Create pipeline\n",
666
+ "pipeline = transformers.pipeline(\n",
667
+ " \"text-generation\",\n",
668
+ " model=new_model,\n",
669
+ " tokenizer=tokenizer\n",
670
+ ")\n",
671
+ "\n",
672
+ "# Generate text\n",
673
+ "sequences = pipeline(\n",
674
+ " prompt,\n",
675
+ " do_sample=True,\n",
676
+ " temperature=0.7,\n",
677
+ " top_p=0.9,\n",
678
+ " num_return_sequences=1,\n",
679
+ " max_length=200,\n",
680
+ ")\n",
681
+ "print(sequences[0]['generated_text'])"
682
+ ],
683
+ "metadata": {
684
+ "colab": {
685
+ "base_uri": "https://localhost:8080/",
686
+ "height": 251,
687
+ "referenced_widgets": [
688
+ "22773c721a7c4221a9c14cd388461d4c",
689
+ "6b54841f5de1482694c360095dae3039",
690
+ "448ccbc85e624ec3b3e71931a7ee4ff6",
691
+ "173769f6f465485f8848a11bf269850b",
692
+ "60978b9b4e8348f0a71ce3e35c73bcff",
693
+ "6a38dcbaf4674b448329ac0a16587d2a",
694
+ "7eaeada2158e493189449af91f643553",
695
+ "6e32854952b340008edca0139d3471d6",
696
+ "db6d7cfcdade4b4baa213a5d0abc07d7",
697
+ "9083029642744c43b7705532cbe0cf79",
698
+ "d028a98caa13425b907ceb513119006e"
699
+ ]
700
+ },
701
+ "id": "LAEUZFjvlJOv",
702
+ "outputId": "9b5720c7-49ef-45c7-e5a7-f38d64899b1e"
703
+ },
704
+ "execution_count": null,
705
+ "outputs": [
706
+ {
707
+ "output_type": "stream",
708
+ "name": "stderr",
709
+ "text": [
710
+ "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n"
711
+ ]
712
+ },
713
+ {
714
+ "output_type": "display_data",
715
+ "data": {
716
+ "text/plain": [
717
+ "Loading checkpoint shards: 0%| | 0/3 [00:00<?, ?it/s]"
718
+ ],
719
+ "application/vnd.jupyter.widget-view+json": {
720
+ "version_major": 2,
721
+ "version_minor": 0,
722
+ "model_id": "22773c721a7c4221a9c14cd388461d4c"
723
+ }
724
+ },
725
+ "metadata": {}
726
+ },
727
+ {
728
+ "output_type": "stream",
729
+ "name": "stderr",
730
+ "text": [
731
+ "/usr/local/lib/python3.10/dist-packages/transformers/generation/utils.py:1473: UserWarning: You have modified the pretrained model configuration to control generation. This is a deprecated strategy to control generation and will be removed soon, in a future version. Please use and modify the model generation configuration (see https://huggingface.co/docs/transformers/generation_strategies#default-text-generation-configuration )\n",
732
+ " warnings.warn(\n",
733
+ "Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.\n"
734
+ ]
735
+ },
736
+ {
737
+ "output_type": "stream",
738
+ "name": "stdout",
739
+ "text": [
740
+ "<|im_start|>system\n",
741
+ "You are a helpful assistant chatbot.<|im_end|>\n",
742
+ "<|im_start|>user\n",
743
+ "What is a Large Language Model?<|im_end|>\n",
744
+ "<|im_start|>assistant\n",
745
+ "A large language model is a type of artificial intelligence (AI) system that has been trained on vast amounts of text data. These models are designed to understand and generate human language, allowing them to perform various natural language processing tasks, such as text generation, language translation, and question answering. Large language models typically use deep learning techniques, like recurrent neural networks (RNNs) or transformers, to learn patterns and relationships in the data, enabling them to generate coherent and contextually relevant responses. The size of these models, in terms of the number of parameters and the volume of data they are trained on, plays a significant role in their ability to comprehend and produce complex language structures.\n"
746
+ ]
747
+ }
748
+ ]
749
+ }
750
+ ]
751
+ }