File size: 133,240 Bytes
e0444cd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Requirement already satisfied: huggingface_hub in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (0.23.3)\n",
      "Requirement already satisfied: filelock in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from huggingface_hub) (3.14.0)\n",
      "Requirement already satisfied: fsspec>=2023.5.0 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from huggingface_hub) (2024.3.1)\n",
      "Requirement already satisfied: packaging>=20.9 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from huggingface_hub) (23.2)\n",
      "Requirement already satisfied: pyyaml>=5.1 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from huggingface_hub) (6.0.1)\n",
      "Requirement already satisfied: requests in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from huggingface_hub) (2.32.3)\n",
      "Requirement already satisfied: tqdm>=4.42.1 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from huggingface_hub) (4.66.4)\n",
      "Requirement already satisfied: typing-extensions>=3.7.4.3 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from huggingface_hub) (4.12.0rc1)\n",
      "Requirement already satisfied: charset-normalizer<4,>=2 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from requests->huggingface_hub) (3.3.2)\n",
      "Requirement already satisfied: idna<4,>=2.5 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from requests->huggingface_hub) (3.7)\n",
      "Requirement already satisfied: urllib3<3,>=1.21.1 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from requests->huggingface_hub) (2.2.1)\n",
      "Requirement already satisfied: certifi>=2017.4.17 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from requests->huggingface_hub) (2024.6.2)\n",
      "Requirement already satisfied: datasets in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (2.19.2)\n",
      "Requirement already satisfied: peft in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (0.11.1)\n",
      "Requirement already satisfied: transformers[torch] in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (4.41.2)\n",
      "Requirement already satisfied: filelock in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from datasets) (3.14.0)\n",
      "Requirement already satisfied: numpy>=1.17 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from datasets) (1.25.0)\n",
      "Requirement already satisfied: pyarrow>=12.0.0 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from datasets) (16.1.0)\n",
      "Requirement already satisfied: pyarrow-hotfix in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from datasets) (0.6)\n",
      "Requirement already satisfied: dill<0.3.9,>=0.3.0 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from datasets) (0.3.8)\n",
      "Requirement already satisfied: pandas in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from datasets) (2.2.2)\n",
      "Requirement already satisfied: requests>=2.32.1 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from datasets) (2.32.3)\n",
      "Requirement already satisfied: tqdm>=4.62.1 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from datasets) (4.66.4)\n",
      "Requirement already satisfied: xxhash in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from datasets) (3.4.1)\n",
      "Requirement already satisfied: multiprocess in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from datasets) (0.70.16)\n",
      "Requirement already satisfied: fsspec<=2024.3.1,>=2023.1.0 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from fsspec[http]<=2024.3.1,>=2023.1.0->datasets) (2024.3.1)\n",
      "Requirement already satisfied: aiohttp in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from datasets) (3.9.5)\n",
      "Requirement already satisfied: huggingface-hub>=0.21.2 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from datasets) (0.23.3)\n",
      "Requirement already satisfied: packaging in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from datasets) (23.2)\n",
      "Requirement already satisfied: pyyaml>=5.1 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from datasets) (6.0.1)\n",
      "Requirement already satisfied: psutil in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from peft) (5.9.0)\n",
      "Requirement already satisfied: torch>=1.13.0 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from peft) (1.13.1)\n",
      "Requirement already satisfied: accelerate>=0.21.0 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from peft) (0.30.1)\n",
      "Requirement already satisfied: safetensors in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from peft) (0.4.3)\n",
      "Requirement already satisfied: regex!=2019.12.17 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from transformers[torch]) (2024.5.15)\n",
      "Requirement already satisfied: tokenizers<0.20,>=0.19 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from transformers[torch]) (0.19.1)\n",
      "Requirement already satisfied: aiosignal>=1.1.2 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from aiohttp->datasets) (1.3.1)\n",
      "Requirement already satisfied: attrs>=17.3.0 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from aiohttp->datasets) (23.2.0)\n",
      "Requirement already satisfied: frozenlist>=1.1.1 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from aiohttp->datasets) (1.4.1)\n",
      "Requirement already satisfied: multidict<7.0,>=4.5 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from aiohttp->datasets) (6.0.5)\n",
      "Requirement already satisfied: yarl<2.0,>=1.0 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from aiohttp->datasets) (1.9.4)\n",
      "Requirement already satisfied: async-timeout<5.0,>=4.0 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from aiohttp->datasets) (4.0.3)\n",
      "Requirement already satisfied: typing-extensions>=3.7.4.3 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from huggingface-hub>=0.21.2->datasets) (4.12.0rc1)\n",
      "Requirement already satisfied: charset-normalizer<4,>=2 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from requests>=2.32.1->datasets) (3.3.2)\n",
      "Requirement already satisfied: idna<4,>=2.5 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from requests>=2.32.1->datasets) (3.7)\n",
      "Requirement already satisfied: urllib3<3,>=1.21.1 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from requests>=2.32.1->datasets) (2.2.1)\n",
      "Requirement already satisfied: certifi>=2017.4.17 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from requests>=2.32.1->datasets) (2024.6.2)\n",
      "Requirement already satisfied: nvidia-cuda-runtime-cu11==11.7.99 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from torch>=1.13.0->peft) (11.7.99)\n",
      "Requirement already satisfied: nvidia-cudnn-cu11==8.5.0.96 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from torch>=1.13.0->peft) (8.5.0.96)\n",
      "Requirement already satisfied: nvidia-cublas-cu11==11.10.3.66 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from torch>=1.13.0->peft) (11.10.3.66)\n",
      "Requirement already satisfied: nvidia-cuda-nvrtc-cu11==11.7.99 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from torch>=1.13.0->peft) (11.7.99)\n",
      "Requirement already satisfied: setuptools in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from nvidia-cublas-cu11==11.10.3.66->torch>=1.13.0->peft) (69.5.1)\n",
      "Requirement already satisfied: wheel in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from nvidia-cublas-cu11==11.10.3.66->torch>=1.13.0->peft) (0.43.0)\n",
      "Requirement already satisfied: python-dateutil>=2.8.2 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from pandas->datasets) (2.9.0.post0)\n",
      "Requirement already satisfied: pytz>=2020.1 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from pandas->datasets) (2024.1)\n",
      "Requirement already satisfied: tzdata>=2022.7 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from pandas->datasets) (2024.1)\n",
      "Requirement already satisfied: six>=1.5 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from python-dateutil>=2.8.2->pandas->datasets) (1.16.0)\n",
      "Requirement already satisfied: flash-attn in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (2.5.9.post1)\n",
      "Requirement already satisfied: torch in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from flash-attn) (1.13.1)\n",
      "Requirement already satisfied: einops in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from flash-attn) (0.8.0)\n",
      "Requirement already satisfied: typing-extensions in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from torch->flash-attn) (4.12.0rc1)\n",
      "Requirement already satisfied: nvidia-cuda-runtime-cu11==11.7.99 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from torch->flash-attn) (11.7.99)\n",
      "Requirement already satisfied: nvidia-cudnn-cu11==8.5.0.96 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from torch->flash-attn) (8.5.0.96)\n",
      "Requirement already satisfied: nvidia-cublas-cu11==11.10.3.66 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from torch->flash-attn) (11.10.3.66)\n",
      "Requirement already satisfied: nvidia-cuda-nvrtc-cu11==11.7.99 in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from torch->flash-attn) (11.7.99)\n",
      "Requirement already satisfied: setuptools in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from nvidia-cublas-cu11==11.10.3.66->torch->flash-attn) (69.5.1)\n",
      "Requirement already satisfied: wheel in /home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages (from nvidia-cublas-cu11==11.10.3.66->torch->flash-attn) (0.43.0)\n"
     ]
    }
   ],
   "source": [
    "!pip install huggingface_hub\n",
    "!pip install -U datasets peft transformers[torch]\n",
    "!pip install -q bitsandbytes trl accelerate\n",
    "!pip install flash-attn --no-build-isolation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import json\n",
    "import re\n",
    "from pprint import pprint\n",
    " \n",
    "import pandas as pd\n",
    "import torch\n",
    "from datasets import Dataset, load_dataset\n",
    "from huggingface_hub import notebook_login\n",
    "from peft import LoraConfig, PeftModel\n",
    "from transformers import (\n",
    "    AutoModelForCausalLM,\n",
    "    AutoTokenizer,\n",
    "    BitsAndBytesConfig,\n",
    "    TrainingArguments,\n",
    ")\n",
    "from trl import SFTTrainer\n",
    "import re"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "torch.cuda.set_per_process_memory_fraction(0.8) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "66d43524d1a04d309785e243f6f016a8",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "VBox(children=(HTML(value='<center> <img\\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "notebook_login()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "MODEL_NAME = \"meta-llama/Meta-Llama-3-8B-Instruct\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "data = load_dataset(\"AI-4-Health/merged-datasets\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "# data['train']['filename']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "def create_model_and_tokenizer():\n",
    "    bnb_config = BitsAndBytesConfig(\n",
    "        load_in_4bit=True,\n",
    "        bnb_4bit_quant_type=\"nf4\",\n",
    "        bnb_4bit_compute_dtype=torch.float16,\n",
    "    )\n",
    " \n",
    "    model = AutoModelForCausalLM.from_pretrained(\n",
    "        MODEL_NAME,\n",
    "        use_safetensors=True,\n",
    "        quantization_config=bnb_config,\n",
    "        trust_remote_code=True,\n",
    "        device_map=\"auto\",\n",
    "    )\n",
    " \n",
    "    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=True)\n",
    "    tokenizer.pad_token = tokenizer.eos_token\n",
    "    tokenizer.padding_side = \"right\"\n",
    " \n",
    "    return model, tokenizer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "1ef29ad31f1d4e4b83ccce696acd4819",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "We've detected an older driver with an RTX 4000 series GPU. These drivers have issues with P2P. This can affect the multi-gpu inference when using accelerate device_map.Please make sure to update your driver to the latest version which resolves this.\n",
      "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n"
     ]
    }
   ],
   "source": [
    "model, tokenizer = create_model_and_tokenizer()\n",
    "model.config.use_cache = False"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "# model.gradient_checkpointing_enable()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "\"\\n==== Front\\nKnee Surg Sports Traumatol Arthrosc\\nKnee Surg Sports Traumatol Arthrosc\\nKnee Surgery, Sports Traumatology, Arthroscopy\\n0942-2056\\n1433-7347\\nSpringer Berlin Heidelberg Berlin/Heidelberg\\n\\n32968845\\n6290\\n10.1007/s00167-020-06290-0\\nSports Traumatology\\nBilateral Looser zones or pseudofractures in the anteromedial tibia as a component of medial tibial stress syndrome in athletes\\nStürznickel Julian 1\\nJandl Nico Maximilian 12\\nDelsmann Maximilian M. 1\\nvon Vopelius Emil 1\\nBarvencik Florian 1\\nhttp://orcid.org/0000-0003-2382-8348\\nAmling Michael amling@uke.de\\n\\n1\\nUeblacker Peter 13\\nRolvien Tim 12\\nOheim Ralf 1\\n1 grid.13648.38 0000 0001 2180 3484 Department of Osteology and Biomechanics, University Medical Center Hamburg-Eppendorf, Lottestraße 59, 22529 Hamburg, Germany\\n2 grid.13648.38 0000 0001 2180 3484 Department of Orthopedics, University Medical Center Hamburg-Eppendorf, Hamburg, Germany\\n3 Orthopedics and Sports Medicine, Munich, Germany\\n23 9 2020\\n23 9 2020\\n2021\\n29 5 16441650\\n5 6 2020\\n14 9 2020\\n© The Author(s) 2020\\nhttps://creativecommons.org/licenses/by/4.0/ Open AccessThis article is licensed under a Creative Commons Attribution 4.0 International License, which permits use, sharing, adaptation, distribution and reproduction in any medium or format, as long as you give appropriate credit to the original author(s) and the source, provide a link to the Creative Commons licence, and indicate if changes were made. The images or other third party material in this article are included in the article's Creative Commons licence, unless indicated otherwise in a credit line to the material. If material is not included in the article's Creative Commons licence and your intended use is not permitted by statutory regulation or exceeds the permitted use, you will need to obtain permission directly from the copyright holder. To view a copy of this licence, visit http://creativecommons.org/licenses/by/4.0/.\\nPurpose\\n\\nMedial tibial stress syndrome (MTSS) represents a common diagnosis in individuals exposed to repetitive high-stress loads affecting the lower limb, e.g., high-performance athletes. However, the diagnostic approach and therapeutic regimens are not well established.\\n\\nMethods\\n\\nNine patients, diagnosed as MTSS, were analyzed by a comprehensive skeletal analysis including laboratory bone turnover parameters, dual-energy X-Ray absorptiometry (DXA), and high-resolution peripheral quantitative computed tomography (HR-pQCT).\\n\\nResults\\n\\nIn 4/9 patients, bilateral pseudofractures were detected in the mid-shaft tibia. These patients had significantly lower levels of 25-hydroxycholecalciferol compared to patients with MTSS but similar levels of bone turnover parameters. Interestingly, the skeletal assessment revealed significantly higher bone mineral density (BMD) Z-scores at the hip (1.3\\u2009±\\u20090.6 vs. −\\u20090.7\\u2009±\\u20090.5, p\\u2009=\\u20090.013) in patients with pseudofractures and a trend towards higher bone microarchitecture parameters measured by HR-pQCT at the distal tibia. Vitamin D supplementation restored the calcium-homeostasis in all patients. Combined with weight-bearing as tolerated, pseudofractures healed in all patients and return to competition was achieved.\\n\\nConclusion\\n\\nIn conclusion, deficient vitamin D levels may lead to pseudofractures due to localized deterioration of mineralization, representing a pivotal component of MTSS in athletes with increased repetitive mechanical loading of the lower limbs. Moreover, the manifestation of pseudofractures is not a consequence of an altered BMD nor microarchitecture but appears in patients with exercise-induced BMD increase in combination with reduced 25-OH-D levels. The screening of MTSS patients for pseudofractures is crucial for the initiation of an appropriate treatment such as vitamin D supplementation to prevent a prolonged course of healing or recurrence.\\n\\nLevel of evidence\\n\\nIII.\\n\\nElectronic supplementary material\\n\\nThe online version of this article (10.1007/s00167-020-06290-0) contains supplementary material, which is available to authorized users.\\n\\nKeyword\\n\\nPseudofracture\\nVitamin D\\nLooser zone\\nAthlete\\nMedial tibial stress syndrome (MTSS)\\nUniversitätsklinikum Hamburg-Eppendorf (UKE) (5411)Open Access funding enabled and organized by Projekt DEAL.\\n\\nissue-copyright-statement© European Society of Sports Traumatology, Knee Surgery, Arthroscopy (ESSKA) 2021\\n==== Body\\nIntroduction\\n\\nMedial tibial stress syndrome (MTSS) or shin splints are stress-induced injuries described by diffuse (≥\\u20095\\xa0cm) pain of the (postero-)medial tibia [19, 32]. It is a common injury especially in athletes exposed to a repetitive load of the lower limbs [1, 19, 28] and may display osseous signal alterations in magnetic resonance imaging (MRI) [2, 9] or translucent bone structures in radiographs and/or computed tomography (CT) [10]. There are a clinical overlap and ongoing variability of terminology in stress injuries, MTSS, pseudofractures, and stress fractures. Whereas stress injuries serve as an umbrella term for all load-induced lesions, MTSS, pseudofractures, and stress fractures represent distinct entities [25].\\n\\nThe pathophysiology of MTSS is still under debate with recent results suggesting biomechanical factors [3] and a disbalance of local bone remodeling with subsequent failure to repair load-induced microdamage [1, 11, 31, 32]. In most cases, clinical examination provides sufficient information to diagnose MTSS but especially when symptoms are prolonged or not characteristic, more advanced lesions and differential diagnoses (i.e., pseudofractures or stress fractures) need to be evaluated in more detail by the use of conventional imaging such as MRI/CT [30].\\n\\nPseudofractures are defined as local, radiolucent cortical defects found in patients with severe osteomalacia, caused by hereditary (e.g., X-linked hypophosphatemia) [4, 12] or in patients with severe vitamin D deficiency [13] which may occur bilateral or multilocular [15]. Supplementation of vitamin D is crucial and leads to normalization of clinical symptoms in most cases [7, 13]. In the context of repetitive high impact in athletes with underlying vitamin D deficiency, pseudofractures represent an important differential diagnosis in athletes with prolonged tibial pain [27]. As the treatment consists predominantly of establishing sufficient vitamin D levels, the detection of such lesions is of paramount importance to optimize patient outcomes and sustain physical activity in the long-term.\\n\\nNine patients presented, initially diagnosed as bilateral MTSS, of whom four had additional bilateral anterior mid-shaft pseudofractures. The aim of this study was to evaluate the specific differences regarding bone density, microstructure and turnover in MTSS patients with and without pseudofractures. The assessment included biochemical analysis (n\\u2009=\\u20099), bone densitometry via dual-energy X-ray absorptiometry (DXA; n\\u2009=\\u20099), and high-resolution peripheral quantitative computer tomography (HR-pQCT; n\\u2009=\\u20093).\\n\\nMaterials and methods\\n\\nWritten informed consent of the patients or the respective legal representatives was obtained in all cases. This anonymized, retrospective chart review was performed in line with the rules of the local ethics committee (Ärztekammer Hamburg, Germany) and is in accordance with the Declaration of Helsinki. Nine patients presented who were diagnosed as bilateral MTSS but resistant to therapy. All had a history of intense and competitive physical activity. We examined these patients in our specialized outpatient clinic for musculoskeletal disorders. Diagnosis of additional pseudofractures was established by radiographs, CT and/or MRI. Patients with detected bilateral pseudofractures were compared to patients with bilateral MTSS and no (uni- or bi-lateral) pseudofractures. Body mass and height were measured in all patients before DXA measurement.\\n\\nBlood samples were collected at the initial presentation to analyze markers of bone formation (bone-specific alkaline phosphatase (BAP) and osteocalcin) and bone resorption (deoxypyridinoline/crea (Dpd)). Furthermore, serum parameters of calcium metabolism (calcium, phosphate, parathyroid hormone (PTH), alkaline phosphatase (ALP), 25-hydroxycholecalciferol-levels (25-OH-D)) were assessed.\\n\\nBone mineral density (BMD) was assessed via dual-energy X-ray absorptiometry (DXA; Lunar iDXA, GE Healthcare, Madison, WI, USA) at both spine and hip. Bone microarchitecture and volumetric BMD (vBMD) was analyzed at both distal tibia and radius according to our published protocol [18] by high-resolution peripheral quantitative computer tomography (HR-pQCT; XtremeCT, Scanco Medical, Brütisellen, Switzerland) and compared to an age- and sex-matched reference values [5].\\n\\nStatistical analysis\\n\\nThe used software for statistical analysis was SPSS® 26 (version 26.0, IBM, Armonk, New York, USA) and GraphPad Prism® (version 7.0, GraphPad Software, La Jolla, CA). If not specified, the presented values are mean\\u2009±\\u2009standard deviation (SD). Evaluation of normality of data distribution was performed by the Shapiro–Wilk test. Data of the two groups were tested for significance using Student’s t test for normality distributed data and Mann–Whitney U test for non-normally distributed data. p values of\\u2009<\\u20090.05 were considered as statistically significant.\\n\\nResults\\n\\nPatient characteristics are reported in Table 1. Nine patients, mostly female (7/9, Fig.\\xa01a) and in early adulthood (22.3\\u2009±\\u20097.5\\xa0years, Table 1), were included suffering from bilateral pain of the tibia. In 4/9 cases, analyzes of radiographs revealed bilateral pseudofractures of the tibia (Fig.\\xa01b), whereas bilateral MTSS without pseudofracture was apparent in the remaining 5/9 patients. Between patients with or without pseudofractures, a trend towards higher body mass index values was revealed in those with pseudofractures. Regarding the performed sports disciplines, track and field were the most prevalent (6/9). According to patients’ reports and available files of medical history, 4/9 patients had a history of fractures. Time from onset of symptoms to diagnosis was 18.2\\u2009±\\u200912.8\\xa0months and did not differ significantly between groups (Fig.\\xa01c). Detection of pseudofractures was achieved by different imaging techniques, e.g., radiographs (Fig.\\xa01d), as well as CT and MRI (Fig.\\xa01e), which were performed in athletes with prolonged symptoms (Fig.\\xa02). After balancing bone metabolism, the establishment of sufficient vitamin D levels (i. e., 25-OH-D\\u2009≥\\u200930\\xa0µg/L), additive oral calcium supplementation (Suppl. Figure 1), non-elastic horizontal tape above the ankle distal to the muscle-to-tendon interface and transient reduction of physical activity, clinical symptoms were absent, and all patients returned to competition.Table 1 Group characteristics of patients\\n\\nVariable\\tPseudofractures (n\\u2009=\\u20094)\\tMTSS (n\\u2009=\\u20095)\\tp\\t\\nSex (f/m)\\t3/1\\t4/1\\t–\\t\\nAge (years)\\t21.0\\u2009±\\u20095.0\\t23.4\\u2009±\\u20099.6\\tn.s\\t\\nHeight (cm)\\t173.9\\u2009±\\u20092.2\\t172.9\\u2009±\\u20094.4\\tn.s\\t\\nWeight (kg)\\t79.4\\u2009±\\u20099.3\\t60.1\\u2009±\\u20095.1\\tn.s\\t\\nBMI (kg/m2)\\t26.2\\u2009±\\u20092.9\\t20.0\\u2009±\\u20090.9\\tn.s\\t\\nTime to diagnosis (months)\\t21.8\\u2009±\\u20098.3\\t15.4\\u2009±\\u20095.4\\tn.s\\t\\nHistory of fractures\\t1/4\\t3/5\\t–\\t\\nVitamin D (µg/L)\\t20.4\\u2009±\\u200912.4\\t40.6\\u2009±\\u20096.9\\t0.017\\t\\nZ-score spine\\t0.4\\u2009±\\u20091.2\\t−\\u20090.7\\u2009±\\u20091.8\\tn.s\\t\\nZ-score hip\\t1.3\\u2009±\\u20090.6\\t−\\u20090.7\\u2009±\\u20090.5\\t0.013\\t\\nIndividuals with MTSS and pseudofractures (Pseudofractures) and MTSS without pseudofractures (MTSS) were compared according to sex, age, morphometrics (height, weight, BMI), time from onset of clinical symptoms until diagnosis, history of fractures, and vitamin D levels at baseline. Significant values defined as p\\u2009<\\u20090.05 indicated in bold\\n\\nMTSS medial tibial stress syndrome, f female, m male, BMI body mass index\\n\\nFig. 1 Patient characteristics and representative radiographs of bilateral pseudofractures/Looser zones at anteromedial tibiae. a Sex distribution of patients presenting with pain at bilateral tibiae. b Distribution of MTSS with and without pseudofractures in the presented patients. c Time from onset (in months) of clinical symptoms until diagnosis was made did not differ between the two groups. d Lateral view of radiographs of Patient 1 revealing bilateral Looser zones at anterior tibiae. e Sagittal CT and MRI images of right (left panel) and left (right panel) tibiae of Patient 4 with Looser zones at anterior tibiae\\n\\nFig. 2 Diagnostic approach in patients with exercise-induced pain of the lower limbs. a Patients presenting with pain at the distal third of the tibia can be diagnosed as medial tibial stress syndrome (MTSS) if the criteria are met (green arrows). In other cases, differential diagnoses (see asterisk) should be evaluated by appropriate diagnostic approaches and treated accordingly, if applicable (red arrows). b In patients with suspected MTSS and prolonged symptoms despite receiving treatment, magnetic resonance imaging (MRI) scan should be obtained. After confirmation of MTSS (green arrow), MRI images should be evaluated for additional cortical lesions like pseudofractures. If results from MRI show additional (cortical) lesions not compatible to pseudofractures (e.g., stress fractures) or other signal alterations not fulfilling criteria of MTSS, underlying pathologies need to be addressed separately (red arrows). *Potential differential diagnoses include such as exertional compartment syndrome, infections (skin infections or osteomyelitis) or stress fractures. MTSS medial tibial stress syndrome; MRI magnetic resonance imaging, CBCT cone-beam computed tomography, CT computed tomography\\n\\nBiochemical analysis of patients revealed significantly reduced levels of 25-OH-D in patients with pseudofractures compared to those with MTSS and no pseudofractures (Table 1). There was no significant difference in bone formation (BAP and osteocalcin) or of bone resorption markers between the two patient groups. Interestingly, 50% of the patients with pseudofractures had values above the reference range, indicating an increased bone resorption.\\n\\nAssessment of BMD via DXA in patients with pseudofractures compared to patients without revealed significantly higher Z-scores at the hip (Fig.\\xa03b). Parameters of bone microarchitecture assessed by HR-pQCT at the distal tibia in patient 4 with pseudofractures revealed a minor decrease in cortical volumetric BMD (Ct.BMD) and trabecular thickness (Tb.Th), whereas trabecular number (Tb.N), trabecular volumetric BMD (Tb.BMD) and cortical thickness (Ct.Th) were above reference values (Fig.\\xa03c). In contrast, two patients with MTSS and no pseudofractures had comparably higher values of Ct.BMD, but lower values of Tb.Th, Tb.N, Tb.BMD as well as Ct.Th (Fig.\\xa03c). Similar patterns were observed at the distal radius (Fig.\\xa03d). Furthermore, we performed cone-beam computed tomography (CBCT) in this patient to evaluate the lesion status, revealing a nearly completed consolidation after 8\\xa0weeks of intensified vitamin D supplementation and oral calcium gluconate supplementation, correlating to improved clinical symptoms (Fig.\\xa04).Fig. 3 Skeletal assessment of MTSS patients presenting with or without pseudofractures. a Assessment of bone mineral density (BMD) via dual-energy X-ray (DXA) at both spine and hip. Interestingly, patients with pseudofractures had no impairment of BMD but significantly higher Z-scores at the hip compared to patients missing pseudofractures. b Representative image of high-resolution peripheral quantitative CT (HR-pQCT) analysis of distal tibia of Patient 4. c Analysis of bone microarchitecture at both tibia and radius via HR-pQCT in Patients 4, 9 and 10, revealing higher values in 4/5 parameters in the patient with pseudofractures compared to MTSS. Values are given as percent of the reference median [5]\\n\\nFig. 4 Course of lesion healing in Patient 4 assessed by cone beam computed tomography (CBCT). a Assessment of skeletal status at an initial presentation by CBCT revealed bilateral Looser zones at anteromedial tibiae. b Follow-up of radiograph after 8\\xa0weeks of intensified vitamin D supplementation (i.e., 14\\xa0days of 20,000 I.U. per day followed by 20,000 I.U. per week) and oral calcium gluconate supplementation (i.e., 1000\\xa0mg per day for three months) showed nearly complete consolidation in CBCT correlating to improved clinical symptoms\\n\\nDiscussion\\n\\nThe most important finding of the present study was that bilateral pseudofractures pose a relevant component or comorbidity of MTSS in athletes with deficient vitamin D levels and (physiologically) increased BMD which is clinically relevant and should not be missed. It was further demonstrated that pseudofractures are not the result of systemically attenuated skeletal status. Of note, it was revealed by our skeletal assessment that BMD and bone microarchitecture parameters were even higher in patients with pseudofractures compared to those of patients missing pseudofractures. Based on these collective findings, we stress the pivotal role of sufficient vitamin D supplementation in athletes to prevent the development of or to improve the healing of pseudofractures of the tibia.\\n\\nIn line with previous studies, female athletes represented the majority of our study cohort [1, 20, 32] and the time from the initial onset of clinical symptoms until diagnosis was prolonged [9, 30]. Furthermore, patients with pseudofractures showed a trend towards higher BMI values, resulting in a greater mechanical impact on the anteromedial tibia and facilitating the development of pseudofractures. This effect has, to the best of our knowledge, not been described in patients suffering from pseudofractures, but was previously reported as a relevant factor for MTSS [20].\\n\\nThe underlying pathomechanism of pseudofractures is an insufficient mineralization of stress-induced microdamage due to osteomalacia [15, 22]. Though available histological studies of pseudofractures and MTSS are scarce and of limited quality, described characteristics of these biopsies underline an increase in remodeling with osteoid seams [11] and especially no complete fracture in these lesions [26].\\n\\nPatients with diagnosed pseudofractures had significantly lower levels of vitamin D compared to patients with MTSS and no pseudofractures, posing a risk factor for the development of insufficient mineralization with subsequent osteomalacia [14, 21] as well as an increased risk for the development of pseudofractures [7, 13, 27] and MTSS [24]. Moreover, insufficient vitamin D levels favor the development of stress injuries [29], stress fractures [23], as well as fractures in general [6].\\n\\nAssessment of BMD revealed no systemic reduction but significantly higher values in patients with pseudofractures compared to those with MTSS without pseudofractures. To our best knowledge, this finding has not been stated before and indicates that systemically intact bone status does not protect individuals from the development of local bone lesions, i.e., pseudofractures or MTSS. Furthermore, elevated BMD as a physiological response to increased mechanical load may even increase the risk for the development of pseudofractures in case of simultaneous vitamin D deficiency due to higher demand for mineral supply within the context of increased bone remodeling.\\n\\nPrevious studies of patients with MTSS have reported locally reduced BMD in affected tibiae [17], indicating increased remodeling taking place at these sites, as DXA measures mineralized tissue only. Consequently, the skeleton is more vulnerable to the development of pseudofractures in states of chronic vitamin D deficiency. Importantly, after normalization of clinical symptoms, BMD increased again and was re-established [16].\\n\\nThe assessment of bone microarchitecture in the patient with pseudofractures revealed superior parameters compared to patients’ with MTSS and no pseudofracture. Supporting the results of BMD analysis, no generally impaired bone microarchitecture compared to references was detected. Interestingly, a recent study described an impairment of trabecular bone microarchitecture in soldiers affected by bone stress injuries, yet no differentiation regarding the type of lesion or comparison to reference values was conducted [25]. However, in our study, Ct.BMD was the only reduced value in the patient with pseudofracture, indicating aggravated mineralization deterioration at the cortical site correlating to the development of Looser’s zones and corresponding to the reported decrease of mineralization at the lesions’ sites [17].\\n\\nIn these patients, the treatment consisted of vitamin D supplementation [13, 27], additional oral calcium supplementation in cases of diagnosed pseudofractures or secondary hyperparathyroidism, and weight-bearing as tolerated followed by a gradual increase with a return to training. By applying non-elastic horizontal tape above the ankle, a redirection of forces at the tendon-to-bone insertion is achieved with greater potential potential for rehabilitation of the bone. Clinical symptoms disappeared allowing a return to competition in all patients. As this causative treatment approach is easily accessible and cost-effective, it should not be missed, especially in athletes with prolonged pain at the (anteromedial) tibia. As in general, a high rate of recurrence [32] and, in particular, tibial stress fractures [8], is known for MTSS, vitamin D supplementation should be maintained according to the individual’s needs. However, further understanding of the underlying pathomechanisms is needed to elaborate, whether higher BMD values impose an additional risk factor under simultaneous conditions of vitamin D deficiency for the development of pseudofractures and not MTSS.\\n\\nConclusion\\n\\nIn conclusion, pseudofractures pose a relevant component of MTSS in athletes with prolonged pain at bilateral tibiae. The skeletal assessment revealed significantly lower values of vitamin D, significantly higher Z-scores at the hip as well as superior microarchitecture parameters in MTSS patients with pseudofractures compared to those without. The paramount importance of calcium homeostasis was highlighted, as supplementation of vitamin D and oral calcium gluconate led to the disappearance of clinical symptoms and pseudofracture consolidation. Consequently, a sufficient supplementation of vitamin D is highly recommended, especially in elite athletes, to prevent MTSS and/or pseudofractures.\\n\\nElectronic supplementary material\\n\\nBelow is the link to the electronic supplementary material.Supplemental Figure 1: Therapeutic approach for the establishment of calcium and bone turnover homeostasis to promote healing of MTSS and/or pseudofractures. Patients with no comorbidity for hypercalcemia receive vitamin D (25-OH-D) supplementation according to their current serum levels. All patients are advised to pay attention to adequate dietary calcium intake. Furthermore, in patients with detected pseudofractures or biochemical signs of secondary hyperparathyroidism, additional calcium supplementation is prescribed for 3\\xa0months with an analysis of serum calcium levels to avoid iatrogenic hypercalcemia (PDF 14 kb)\\n\\nAbbreviations\\n\\nMTSS Medial tibial stress syndrome\\n\\nDXA Dual-energy X-Ray absorptiometry\\n\\nHR-pQCT High-resolution peripheral quantitative computed tomography\\n\\n25-OH-D 25-Hydroxycholecalciferol\\n\\nBMD Bone mineral density\\n\\nMRI Magnetic resonance imaging\\n\\nCT Computed tomography\\n\\nBAP Bone-specific alkaline phosphatase\\n\\nDpd Deoxypyridinoline/crea\\n\\nPTH Parathyroid hormone\\n\\nALP Alkaline phosphatase\\n\\nvBMD Volumetric bone mineral density\\n\\nBMI Body mass index\\n\\nCt.BMD Cortical bone mineral density\\n\\nTb.Th Trabecular thickness\\n\\nTb.N Trabecular number\\n\\nTb.BMD Trabecular bone mineral density\\n\\nCt.Th Cortical thickness\\n\\nCBCT Cone beam computed tomography\\n\\nAcknowledgements\\n\\nThe authors have no further acknowledgements.\\n\\nAuthor contributions\\n\\nJS, MA, TR, RO participated in the study design. JS, RO conducted the study. Data was collected by JS, MMD, EV and NMJ. JS, MMD, TR, RO performed data analysis, which was interpreted by JS, MA, PU, TR, RO. Drafting of the manuscript was done by JS and TR and FB, MA, PU, RO further revised the manuscript content. All the authors read and approved the final manuscript.\\n\\nFunding\\n\\nOpen Access funding enabled and organized by Projekt DEAL. The authors received no specific funding for this work.\\n\\nCompliance with ethical standards\\n\\nConflict of interest\\n\\nAll authors declare that there is no conflict of interest.\\n\\nEthical approval\\n\\nThe authors declare that all procedures were in line with the local ethics committee and all procedures involving human participants were in accordance with the 1964 Helsinki Declaration and its later amendments.\\n\\nInformed consent\\n\\nThe authors declare that informed consent was obtained in all patients or from the family members.\\n\\nPublisher's Note\\n\\nSpringer Nature remains neutral with regard to jurisdictional claims in published maps and institutional affiliations.\\n==== Refs\\nReferences\\n\\n1. Arendt E Agel J Heikes C Griffiths H Stress injuries to bone in college athletes: a retrospective review of experience at a single institution Am J Sports Med 2003 31 959 968 10.1177/03635465030310063601 14623664\\n2. Arendt EA Griffiths HJ The use of MR imaging in the assessment and clinical management of stress reactions of bone in high-performance athletes Clin Sports Med 1997 16 291 306 10.1016/S0278-5919(05)70023-5 9238311\\n3. Becker J Nakajima M Wu WFW Factors contributing to medial tibial stress syndrome in runners: a prospective study Med Sci Sports Exerc 2018 50 2092 2100 10.1249/MSS.0000000000001674 29787473\\n4. Bhadada SK Pal R Dhiman V Alonso N Ralston SH Kaur S Adult hypophosphatasia with a novel ALPL mutation: report of an Indian kindred Bone Rep 2020 12 100247 10.1016/j.bonr.2020.100247 32025537\\n5. Burt LA Liang Z Sajobi TT Hanley DA Boyd SK Sex- and site-specific normative data curves for HR-pQCT J Bone Miner Res 2016 31 2041 2047 10.1002/jbmr.2873 27192388\\n6. Busse B Bale HA Zimmermann EA Panganiban B Barth HD Carriero A Vitamin D deficiency induces early signs of aging in human bone, increasing the risk of fracture Sci Transl Med 2013 5 193ra188 10.1126/scitranslmed.3006286\\n7. Clark F Simpson W Young JR Osteomalacia in immigrants from the Indian subcontinent in Newcastle upon Tyne Proc R Soc Med 1972 65 478 480 5083685\\n8. Ekstrand J Torstveit MK Stress fractures in elite male football players Scand J Med Sci Sports 2012 22 341 346 10.1111/j.1600-0838.2010.01171.x 20807388\\n9. Fredericson M Bergman AG Hoffman KL Dillingham MS Tibial stress reaction in runners. Correlation of clinical symptoms and scintigraphy with a new magnetic resonance imaging grading system Am J Sports Med 1995 23 472 481 10.1177/036354659502300418 7573660\\n10. Gaeta M Minutoli F Vinci S Salamone I D'Andrea L Bitto L High-resolution CT grading of tibial stress reactions in distance runners AJR Am J Roentgenol 2006 187 789 793 10.2214/AJR.05.0303 16928946\\n11. Johnell O Rausing A Wendeberg B Westlin N Morphological bone changes in shin splints Clin Orthop Relat Res 1982 167 180 184\\n12. Lecoq AL Brandi ML Linglart A Kamenicky P Management of X-linked hypophosphatemia in adults Metabolism 2020 103S 154049 10.1016/j.metabol.2019.154049 31863781\\n13. Lee C Lashari S Pseudofracture of the neck of femur secondary to osteomalacia J Bone Joint Surg Br 2007 89 956 958 10.1302/0301-620X.89B7.18425 17673594\\n14. Lips P Vitamin D deficiency and secondary hyperparathyroidism in the elderly: consequences for bone loss and fractures and therapeutic implications Endocr Rev 2001 22 477 501 10.1210/edrv.22.4.0437 11493580\\n15. Looser E Uber pathologische von Infraktionen und Callusbildungen bei Rachits und Osteomalcie und Knochenerkrankungen Zbl Chir 1920 47 1470\\n16. Magnusson HI Ahlborg HG Karlsson C Nyquist F Karlsson MK Low regional tibial bone density in athletes with medial tibial stress syndrome normalizes after recovery from symptoms Am J Sports Med 2003 31 596 600 10.1177/03635465030310042001 12860551\\n17. Magnusson HI Westlin NE Nyqvist F Gardsell P Seeman E Karlsson MK Abnormally decreased regional bone density in athletes with medial tibial stress syndrome Am J Sports Med 2001 29 712 715 10.1177/03635465010290060701 11734482\\n18. Milovanovic P Adamu U Simon MJ Rolvien T Djuric M Amling M Age- and sex-specific bone structure patterns portend bone fragility in radii and tibiae in relation to osteodensitometry: a high-resolution peripheral quantitative computed tomography study in 385 individuals J Gerontol A Biol Sci Med Sci 2015 70 1269 1275 10.1093/gerona/glv052 25934995\\n19. Moen MH Tol JL Weir A Steunebrink M De Winter TC Medial tibial stress syndrome: a critical review Sports Med 2009 39 523 546 10.2165/00007256-200939070-00002 19530750\\n20. Plisky MS Rauh MJ Heiderscheit B Underwood FB Tank RT Medial tibial stress syndrome in high school cross-country runners: incidence and risk factors J Orthop Sports Phys Ther 2007 37 40 47 10.2519/jospt.2007.2343 17366958\\n21. Priemel M von Domarus C Klatte TO Kessler S Schlie J Meier S Bone mineralization defects and vitamin D deficiency: histomorphometric analysis of iliac crest bone biopsies and circulating 25-hydroxyvitamin D in 675 patients J Bone Miner Res 2010 25 305 312 10.1359/jbmr.090728 19594303\\n22. Roberts SM Vogt EC Pseudofracture of the Tibia JBJS 1939 21 891 901\\n23. Ruohola JP Laaksi I Ylikomi T Haataja R Mattila VM Sahi T Association between serum 25(OH)D concentrations and bone stress fractures in Finnish young men J Bone Miner Res 2006 21 1483 1488 10.1359/jbmr.060607 16939407\\n24. Saxena A Fullem B Gerdesmeyer L Treatment of medial tibial stress syndrome with radial soundwave therapy in elite athletes: current evidence, report on two cases, and proposed treatment regimen J Foot Ankle Surg 2017 56 985 989 10.1053/j.jfas.2017.06.013 28842109\\n25. Schanda JE Kocijan R Resch H Baierl A Feichtinger X Mittermayr R Bone stress injuries are associated with differences in bone microarchitecture in male professional soldiers J Orthop Res 2019 37 2516 2523 10.1002/jor.24442 31410876\\n26. Schilcher J Bernhardsson M Aspenberg P Chronic anterior tibial stress fractures in athletes: no crack but intense remodeling Scand J Med Sci Sports 2019 29 1521 1528 10.1111/sms.13466 31102562\\n27. Simpson W Young JR Clark F Pseudofractures resembling stress fractures in Punjabi immigrants with osteomalacia Clin Radiol 1973 24 83 89 10.1016/S0009-9260(73)80122-9 4723500\\n28. Taunton JE Ryan MB Clement DB McKenzie DC Lloyd-Smith DR Zumbo BD A retrospective case-control analysis of 2002 running injuries Br J Sports Med 2002 36 95 101 10.1136/bjsm.36.2.95 11916889\\n29. Tenforde AS Kraus E Fredericson M Bone stress injuries in runners Phys Med Rehabil Clin N Am 2016 27 139 149 10.1016/j.pmr.2015.08.008 26616181\\n30. Winters M The diagnosis and management of medial tibial stress syndrome: an evidence update Unfallchirurg 2020 123 15 19 10.1007/s00113-019-0667-z 31098646\\n31. Winters M Burr DB van der Hoeven H Condon KW Bellemans J Moen MH Microcrack-associated bone remodeling is rarely observed in biopsies from athletes with medial tibial stress syndrome J Bone Miner Metab 2019 37 496 502 10.1007/s00774-018-0945-9 30066165\\n32. Yates B White S The incidence and risk factors in the development of medial tibial stress syndrome among naval recruits Am J Sports Med 2004 32 772 780 10.1177/0095399703258776 15090396\\n\\n\""
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data['train']['text'][0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "# data preprocessing\n",
    "def clean_text(text):\n",
    "    text = re.sub(r'http\\S+|www.\\S+', '', text)\n",
    "    text = text.replace('====', ' ')\n",
    "    text = text.replace('\\n', ' ') # replace newline with space\n",
    "    text = re.sub(r'\\[[0-9]*\\]', ' ', text)\n",
    "    text = re.sub(r'\\s+', ' ', text)\n",
    "    return text\n",
    "\n",
    "def preprocess_function(examples):\n",
    "    examples['text'] = [clean_text(text) for text in examples['text']]\n",
    "    return examples\n",
    "\n",
    "dataset = data['train'].map(preprocess_function, batched=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "\" Front Knee Surg Sports Traumatol Arthrosc Knee Surg Sports Traumatol Arthrosc Knee Surgery, Sports Traumatology, Arthroscopy 0942-2056 1433-7347 Springer Berlin Heidelberg Berlin/Heidelberg 32968845 6290 10.1007/s00167-020-06290-0 Sports Traumatology Bilateral Looser zones or pseudofractures in the anteromedial tibia as a component of medial tibial stress syndrome in athletes Stürznickel Julian 1 Jandl Nico Maximilian 12 Delsmann Maximilian M. 1 von Vopelius Emil 1 Barvencik Florian 1 Amling Michael amling@uke.de 1 Ueblacker Peter 13 Rolvien Tim 12 Oheim Ralf 1 1 grid.13648.38 0000 0001 2180 3484 Department of Osteology and Biomechanics, University Medical Center Hamburg-Eppendorf, Lottestraße 59, 22529 Hamburg, Germany 2 grid.13648.38 0000 0001 2180 3484 Department of Orthopedics, University Medical Center Hamburg-Eppendorf, Hamburg, Germany 3 Orthopedics and Sports Medicine, Munich, Germany 23 9 2020 23 9 2020 2021 29 5 16441650 5 6 2020 14 9 2020 © The Author(s) 2020 Open AccessThis article is licensed under a Creative Commons Attribution 4.0 International License, which permits use, sharing, adaptation, distribution and reproduction in any medium or format, as long as you give appropriate credit to the original author(s) and the source, provide a link to the Creative Commons licence, and indicate if changes were made. The images or other third party material in this article are included in the article's Creative Commons licence, unless indicated otherwise in a credit line to the material. If material is not included in the article's Creative Commons licence and your intended use is not permitted by statutory regulation or exceeds the permitted use, you will need to obtain permission directly from the copyright holder. To view a copy of this licence, visit Purpose Medial tibial stress syndrome (MTSS) represents a common diagnosis in individuals exposed to repetitive high-stress loads affecting the lower limb, e.g., high-performance athletes. However, the diagnostic approach and therapeutic regimens are not well established. Methods Nine patients, diagnosed as MTSS, were analyzed by a comprehensive skeletal analysis including laboratory bone turnover parameters, dual-energy X-Ray absorptiometry (DXA), and high-resolution peripheral quantitative computed tomography (HR-pQCT). Results In 4/9 patients, bilateral pseudofractures were detected in the mid-shaft tibia. These patients had significantly lower levels of 25-hydroxycholecalciferol compared to patients with MTSS but similar levels of bone turnover parameters. Interestingly, the skeletal assessment revealed significantly higher bone mineral density (BMD) Z-scores at the hip (1.3 ± 0.6 vs. − 0.7 ± 0.5, p = 0.013) in patients with pseudofractures and a trend towards higher bone microarchitecture parameters measured by HR-pQCT at the distal tibia. Vitamin D supplementation restored the calcium-homeostasis in all patients. Combined with weight-bearing as tolerated, pseudofractures healed in all patients and return to competition was achieved. Conclusion In conclusion, deficient vitamin D levels may lead to pseudofractures due to localized deterioration of mineralization, representing a pivotal component of MTSS in athletes with increased repetitive mechanical loading of the lower limbs. Moreover, the manifestation of pseudofractures is not a consequence of an altered BMD nor microarchitecture but appears in patients with exercise-induced BMD increase in combination with reduced 25-OH-D levels. The screening of MTSS patients for pseudofractures is crucial for the initiation of an appropriate treatment such as vitamin D supplementation to prevent a prolonged course of healing or recurrence. Level of evidence III. Electronic supplementary material The online version of this article (10.1007/s00167-020-06290-0) contains supplementary material, which is available to authorized users. Keyword Pseudofracture Vitamin D Looser zone Athlete Medial tibial stress syndrome (MTSS) Universitätsklinikum Hamburg-Eppendorf (UKE) (5411)Open Access funding enabled and organized by Projekt DEAL. issue-copyright-statement© European Society of Sports Traumatology, Knee Surgery, Arthroscopy (ESSKA) 2021 Body Introduction Medial tibial stress syndrome (MTSS) or shin splints are stress-induced injuries described by diffuse (≥ 5 cm) pain of the (postero-)medial tibia [19, 32]. It is a common injury especially in athletes exposed to a repetitive load of the lower limbs [1, 19, 28] and may display osseous signal alterations in magnetic resonance imaging (MRI) [2, 9] or translucent bone structures in radiographs and/or computed tomography (CT) . There are a clinical overlap and ongoing variability of terminology in stress injuries, MTSS, pseudofractures, and stress fractures. Whereas stress injuries serve as an umbrella term for all load-induced lesions, MTSS, pseudofractures, and stress fractures represent distinct entities . The pathophysiology of MTSS is still under debate with recent results suggesting biomechanical factors and a disbalance of local bone remodeling with subsequent failure to repair load-induced microdamage [1, 11, 31, 32]. In most cases, clinical examination provides sufficient information to diagnose MTSS but especially when symptoms are prolonged or not characteristic, more advanced lesions and differential diagnoses (i.e., pseudofractures or stress fractures) need to be evaluated in more detail by the use of conventional imaging such as MRI/CT . Pseudofractures are defined as local, radiolucent cortical defects found in patients with severe osteomalacia, caused by hereditary (e.g., X-linked hypophosphatemia) [4, 12] or in patients with severe vitamin D deficiency which may occur bilateral or multilocular . Supplementation of vitamin D is crucial and leads to normalization of clinical symptoms in most cases [7, 13]. In the context of repetitive high impact in athletes with underlying vitamin D deficiency, pseudofractures represent an important differential diagnosis in athletes with prolonged tibial pain . As the treatment consists predominantly of establishing sufficient vitamin D levels, the detection of such lesions is of paramount importance to optimize patient outcomes and sustain physical activity in the long-term. Nine patients presented, initially diagnosed as bilateral MTSS, of whom four had additional bilateral anterior mid-shaft pseudofractures. The aim of this study was to evaluate the specific differences regarding bone density, microstructure and turnover in MTSS patients with and without pseudofractures. The assessment included biochemical analysis (n = 9), bone densitometry via dual-energy X-ray absorptiometry (DXA; n = 9), and high-resolution peripheral quantitative computer tomography (HR-pQCT; n = 3). Materials and methods Written informed consent of the patients or the respective legal representatives was obtained in all cases. This anonymized, retrospective chart review was performed in line with the rules of the local ethics committee (Ärztekammer Hamburg, Germany) and is in accordance with the Declaration of Helsinki. Nine patients presented who were diagnosed as bilateral MTSS but resistant to therapy. All had a history of intense and competitive physical activity. We examined these patients in our specialized outpatient clinic for musculoskeletal disorders. Diagnosis of additional pseudofractures was established by radiographs, CT and/or MRI. Patients with detected bilateral pseudofractures were compared to patients with bilateral MTSS and no (uni- or bi-lateral) pseudofractures. Body mass and height were measured in all patients before DXA measurement. Blood samples were collected at the initial presentation to analyze markers of bone formation (bone-specific alkaline phosphatase (BAP) and osteocalcin) and bone resorption (deoxypyridinoline/crea (Dpd)). Furthermore, serum parameters of calcium metabolism (calcium, phosphate, parathyroid hormone (PTH), alkaline phosphatase (ALP), 25-hydroxycholecalciferol-levels (25-OH-D)) were assessed. Bone mineral density (BMD) was assessed via dual-energy X-ray absorptiometry (DXA; Lunar iDXA, GE Healthcare, Madison, WI, USA) at both spine and hip. Bone microarchitecture and volumetric BMD (vBMD) was analyzed at both distal tibia and radius according to our published protocol by high-resolution peripheral quantitative computer tomography (HR-pQCT; XtremeCT, Scanco Medical, Brütisellen, Switzerland) and compared to an age- and sex-matched reference values . Statistical analysis The used software for statistical analysis was SPSS® 26 (version 26.0, IBM, Armonk, New York, USA) and GraphPad Prism® (version 7.0, GraphPad Software, La Jolla, CA). If not specified, the presented values are mean ± standard deviation (SD). Evaluation of normality of data distribution was performed by the Shapiro–Wilk test. Data of the two groups were tested for significance using Student’s t test for normality distributed data and Mann–Whitney U test for non-normally distributed data. p values of < 0.05 were considered as statistically significant. Results Patient characteristics are reported in Table 1. Nine patients, mostly female (7/9, Fig. 1a) and in early adulthood (22.3 ± 7.5 years, Table 1), were included suffering from bilateral pain of the tibia. In 4/9 cases, analyzes of radiographs revealed bilateral pseudofractures of the tibia (Fig. 1b), whereas bilateral MTSS without pseudofracture was apparent in the remaining 5/9 patients. Between patients with or without pseudofractures, a trend towards higher body mass index values was revealed in those with pseudofractures. Regarding the performed sports disciplines, track and field were the most prevalent (6/9). According to patients’ reports and available files of medical history, 4/9 patients had a history of fractures. Time from onset of symptoms to diagnosis was 18.2 ± 12.8 months and did not differ significantly between groups (Fig. 1c). Detection of pseudofractures was achieved by different imaging techniques, e.g., radiographs (Fig. 1d), as well as CT and MRI (Fig. 1e), which were performed in athletes with prolonged symptoms (Fig. 2). After balancing bone metabolism, the establishment of sufficient vitamin D levels (i. e., 25-OH-D ≥ 30 µg/L), additive oral calcium supplementation (Suppl. Figure 1), non-elastic horizontal tape above the ankle distal to the muscle-to-tendon interface and transient reduction of physical activity, clinical symptoms were absent, and all patients returned to competition.Table 1 Group characteristics of patients Variable Pseudofractures (n = 4) MTSS (n = 5) p Sex (f/m) 3/1 4/1 – Age (years) 21.0 ± 5.0 23.4 ± 9.6 n.s Height (cm) 173.9 ± 2.2 172.9 ± 4.4 n.s Weight (kg) 79.4 ± 9.3 60.1 ± 5.1 n.s BMI (kg/m2) 26.2 ± 2.9 20.0 ± 0.9 n.s Time to diagnosis (months) 21.8 ± 8.3 15.4 ± 5.4 n.s History of fractures 1/4 3/5 – Vitamin D (µg/L) 20.4 ± 12.4 40.6 ± 6.9 0.017 Z-score spine 0.4 ± 1.2 − 0.7 ± 1.8 n.s Z-score hip 1.3 ± 0.6 − 0.7 ± 0.5 0.013 Individuals with MTSS and pseudofractures (Pseudofractures) and MTSS without pseudofractures (MTSS) were compared according to sex, age, morphometrics (height, weight, BMI), time from onset of clinical symptoms until diagnosis, history of fractures, and vitamin D levels at baseline. Significant values defined as p < 0.05 indicated in bold MTSS medial tibial stress syndrome, f female, m male, BMI body mass index Fig. 1 Patient characteristics and representative radiographs of bilateral pseudofractures/Looser zones at anteromedial tibiae. a Sex distribution of patients presenting with pain at bilateral tibiae. b Distribution of MTSS with and without pseudofractures in the presented patients. c Time from onset (in months) of clinical symptoms until diagnosis was made did not differ between the two groups. d Lateral view of radiographs of Patient 1 revealing bilateral Looser zones at anterior tibiae. e Sagittal CT and MRI images of right (left panel) and left (right panel) tibiae of Patient 4 with Looser zones at anterior tibiae Fig. 2 Diagnostic approach in patients with exercise-induced pain of the lower limbs. a Patients presenting with pain at the distal third of the tibia can be diagnosed as medial tibial stress syndrome (MTSS) if the criteria are met (green arrows). In other cases, differential diagnoses (see asterisk) should be evaluated by appropriate diagnostic approaches and treated accordingly, if applicable (red arrows). b In patients with suspected MTSS and prolonged symptoms despite receiving treatment, magnetic resonance imaging (MRI) scan should be obtained. After confirmation of MTSS (green arrow), MRI images should be evaluated for additional cortical lesions like pseudofractures. If results from MRI show additional (cortical) lesions not compatible to pseudofractures (e.g., stress fractures) or other signal alterations not fulfilling criteria of MTSS, underlying pathologies need to be addressed separately (red arrows). *Potential differential diagnoses include such as exertional compartment syndrome, infections (skin infections or osteomyelitis) or stress fractures. MTSS medial tibial stress syndrome; MRI magnetic resonance imaging, CBCT cone-beam computed tomography, CT computed tomography Biochemical analysis of patients revealed significantly reduced levels of 25-OH-D in patients with pseudofractures compared to those with MTSS and no pseudofractures (Table 1). There was no significant difference in bone formation (BAP and osteocalcin) or of bone resorption markers between the two patient groups. Interestingly, 50% of the patients with pseudofractures had values above the reference range, indicating an increased bone resorption. Assessment of BMD via DXA in patients with pseudofractures compared to patients without revealed significantly higher Z-scores at the hip (Fig. 3b). Parameters of bone microarchitecture assessed by HR-pQCT at the distal tibia in patient 4 with pseudofractures revealed a minor decrease in cortical volumetric BMD (Ct.BMD) and trabecular thickness (Tb.Th), whereas trabecular number (Tb.N), trabecular volumetric BMD (Tb.BMD) and cortical thickness (Ct.Th) were above reference values (Fig. 3c). In contrast, two patients with MTSS and no pseudofractures had comparably higher values of Ct.BMD, but lower values of Tb.Th, Tb.N, Tb.BMD as well as Ct.Th (Fig. 3c). Similar patterns were observed at the distal radius (Fig. 3d). Furthermore, we performed cone-beam computed tomography (CBCT) in this patient to evaluate the lesion status, revealing a nearly completed consolidation after 8 weeks of intensified vitamin D supplementation and oral calcium gluconate supplementation, correlating to improved clinical symptoms (Fig. 4).Fig. 3 Skeletal assessment of MTSS patients presenting with or without pseudofractures. a Assessment of bone mineral density (BMD) via dual-energy X-ray (DXA) at both spine and hip. Interestingly, patients with pseudofractures had no impairment of BMD but significantly higher Z-scores at the hip compared to patients missing pseudofractures. b Representative image of high-resolution peripheral quantitative CT (HR-pQCT) analysis of distal tibia of Patient 4. c Analysis of bone microarchitecture at both tibia and radius via HR-pQCT in Patients 4, 9 and 10, revealing higher values in 4/5 parameters in the patient with pseudofractures compared to MTSS. Values are given as percent of the reference median Fig. 4 Course of lesion healing in Patient 4 assessed by cone beam computed tomography (CBCT). a Assessment of skeletal status at an initial presentation by CBCT revealed bilateral Looser zones at anteromedial tibiae. b Follow-up of radiograph after 8 weeks of intensified vitamin D supplementation (i.e., 14 days of 20,000 I.U. per day followed by 20,000 I.U. per week) and oral calcium gluconate supplementation (i.e., 1000 mg per day for three months) showed nearly complete consolidation in CBCT correlating to improved clinical symptoms Discussion The most important finding of the present study was that bilateral pseudofractures pose a relevant component or comorbidity of MTSS in athletes with deficient vitamin D levels and (physiologically) increased BMD which is clinically relevant and should not be missed. It was further demonstrated that pseudofractures are not the result of systemically attenuated skeletal status. Of note, it was revealed by our skeletal assessment that BMD and bone microarchitecture parameters were even higher in patients with pseudofractures compared to those of patients missing pseudofractures. Based on these collective findings, we stress the pivotal role of sufficient vitamin D supplementation in athletes to prevent the development of or to improve the healing of pseudofractures of the tibia. In line with previous studies, female athletes represented the majority of our study cohort [1, 20, 32] and the time from the initial onset of clinical symptoms until diagnosis was prolonged [9, 30]. Furthermore, patients with pseudofractures showed a trend towards higher BMI values, resulting in a greater mechanical impact on the anteromedial tibia and facilitating the development of pseudofractures. This effect has, to the best of our knowledge, not been described in patients suffering from pseudofractures, but was previously reported as a relevant factor for MTSS . The underlying pathomechanism of pseudofractures is an insufficient mineralization of stress-induced microdamage due to osteomalacia [15, 22]. Though available histological studies of pseudofractures and MTSS are scarce and of limited quality, described characteristics of these biopsies underline an increase in remodeling with osteoid seams and especially no complete fracture in these lesions . Patients with diagnosed pseudofractures had significantly lower levels of vitamin D compared to patients with MTSS and no pseudofractures, posing a risk factor for the development of insufficient mineralization with subsequent osteomalacia [14, 21] as well as an increased risk for the development of pseudofractures [7, 13, 27] and MTSS . Moreover, insufficient vitamin D levels favor the development of stress injuries , stress fractures , as well as fractures in general . Assessment of BMD revealed no systemic reduction but significantly higher values in patients with pseudofractures compared to those with MTSS without pseudofractures. To our best knowledge, this finding has not been stated before and indicates that systemically intact bone status does not protect individuals from the development of local bone lesions, i.e., pseudofractures or MTSS. Furthermore, elevated BMD as a physiological response to increased mechanical load may even increase the risk for the development of pseudofractures in case of simultaneous vitamin D deficiency due to higher demand for mineral supply within the context of increased bone remodeling. Previous studies of patients with MTSS have reported locally reduced BMD in affected tibiae , indicating increased remodeling taking place at these sites, as DXA measures mineralized tissue only. Consequently, the skeleton is more vulnerable to the development of pseudofractures in states of chronic vitamin D deficiency. Importantly, after normalization of clinical symptoms, BMD increased again and was re-established . The assessment of bone microarchitecture in the patient with pseudofractures revealed superior parameters compared to patients’ with MTSS and no pseudofracture. Supporting the results of BMD analysis, no generally impaired bone microarchitecture compared to references was detected. Interestingly, a recent study described an impairment of trabecular bone microarchitecture in soldiers affected by bone stress injuries, yet no differentiation regarding the type of lesion or comparison to reference values was conducted . However, in our study, Ct.BMD was the only reduced value in the patient with pseudofracture, indicating aggravated mineralization deterioration at the cortical site correlating to the development of Looser’s zones and corresponding to the reported decrease of mineralization at the lesions’ sites . In these patients, the treatment consisted of vitamin D supplementation [13, 27], additional oral calcium supplementation in cases of diagnosed pseudofractures or secondary hyperparathyroidism, and weight-bearing as tolerated followed by a gradual increase with a return to training. By applying non-elastic horizontal tape above the ankle, a redirection of forces at the tendon-to-bone insertion is achieved with greater potential potential for rehabilitation of the bone. Clinical symptoms disappeared allowing a return to competition in all patients. As this causative treatment approach is easily accessible and cost-effective, it should not be missed, especially in athletes with prolonged pain at the (anteromedial) tibia. As in general, a high rate of recurrence and, in particular, tibial stress fractures , is known for MTSS, vitamin D supplementation should be maintained according to the individual’s needs. However, further understanding of the underlying pathomechanisms is needed to elaborate, whether higher BMD values impose an additional risk factor under simultaneous conditions of vitamin D deficiency for the development of pseudofractures and not MTSS. Conclusion In conclusion, pseudofractures pose a relevant component of MTSS in athletes with prolonged pain at bilateral tibiae. The skeletal assessment revealed significantly lower values of vitamin D, significantly higher Z-scores at the hip as well as superior microarchitecture parameters in MTSS patients with pseudofractures compared to those without. The paramount importance of calcium homeostasis was highlighted, as supplementation of vitamin D and oral calcium gluconate led to the disappearance of clinical symptoms and pseudofracture consolidation. Consequently, a sufficient supplementation of vitamin D is highly recommended, especially in elite athletes, to prevent MTSS and/or pseudofractures. Electronic supplementary material Below is the link to the electronic supplementary material.Supplemental Figure 1: Therapeutic approach for the establishment of calcium and bone turnover homeostasis to promote healing of MTSS and/or pseudofractures. Patients with no comorbidity for hypercalcemia receive vitamin D (25-OH-D) supplementation according to their current serum levels. All patients are advised to pay attention to adequate dietary calcium intake. Furthermore, in patients with detected pseudofractures or biochemical signs of secondary hyperparathyroidism, additional calcium supplementation is prescribed for 3 months with an analysis of serum calcium levels to avoid iatrogenic hypercalcemia (PDF 14 kb) Abbreviations MTSS Medial tibial stress syndrome DXA Dual-energy X-Ray absorptiometry HR-pQCT High-resolution peripheral quantitative computed tomography 25-OH-D 25-Hydroxycholecalciferol BMD Bone mineral density MRI Magnetic resonance imaging CT Computed tomography BAP Bone-specific alkaline phosphatase Dpd Deoxypyridinoline/crea PTH Parathyroid hormone ALP Alkaline phosphatase vBMD Volumetric bone mineral density BMI Body mass index Ct.BMD Cortical bone mineral density Tb.Th Trabecular thickness Tb.N Trabecular number Tb.BMD Trabecular bone mineral density Ct.Th Cortical thickness CBCT Cone beam computed tomography Acknowledgements The authors have no further acknowledgements. Author contributions JS, MA, TR, RO participated in the study design. JS, RO conducted the study. Data was collected by JS, MMD, EV and NMJ. JS, MMD, TR, RO performed data analysis, which was interpreted by JS, MA, PU, TR, RO. Drafting of the manuscript was done by JS and TR and FB, MA, PU, RO further revised the manuscript content. All the authors read and approved the final manuscript. Funding Open Access funding enabled and organized by Projekt DEAL. The authors received no specific funding for this work. Compliance with ethical standards Conflict of interest All authors declare that there is no conflict of interest. Ethical approval The authors declare that all procedures were in line with the local ethics committee and all procedures involving human participants were in accordance with the 1964 Helsinki Declaration and its later amendments. Informed consent The authors declare that informed consent was obtained in all patients or from the family members. Publisher's Note Springer Nature remains neutral with regard to jurisdictional claims in published maps and institutional affiliations. Refs References 1. Arendt E Agel J Heikes C Griffiths H Stress injuries to bone in college athletes: a retrospective review of experience at a single institution Am J Sports Med 2003 31 959 968 10.1177/03635465030310063601 14623664 2. Arendt EA Griffiths HJ The use of MR imaging in the assessment and clinical management of stress reactions of bone in high-performance athletes Clin Sports Med 1997 16 291 306 10.1016/S0278-5919(05)70023-5 9238311 3. Becker J Nakajima M Wu WFW Factors contributing to medial tibial stress syndrome in runners: a prospective study Med Sci Sports Exerc 2018 50 2092 2100 10.1249/MSS.0000000000001674 29787473 4. Bhadada SK Pal R Dhiman V Alonso N Ralston SH Kaur S Adult hypophosphatasia with a novel ALPL mutation: report of an Indian kindred Bone Rep 2020 12 100247 10.1016/j.bonr.2020.100247 32025537 5. Burt LA Liang Z Sajobi TT Hanley DA Boyd SK Sex- and site-specific normative data curves for HR-pQCT J Bone Miner Res 2016 31 2041 2047 10.1002/jbmr.2873 27192388 6. Busse B Bale HA Zimmermann EA Panganiban B Barth HD Carriero A Vitamin D deficiency induces early signs of aging in human bone, increasing the risk of fracture Sci Transl Med 2013 5 193ra188 10.1126/scitranslmed.3006286 7. Clark F Simpson W Young JR Osteomalacia in immigrants from the Indian subcontinent in Newcastle upon Tyne Proc R Soc Med 1972 65 478 480 5083685 8. Ekstrand J Torstveit MK Stress fractures in elite male football players Scand J Med Sci Sports 2012 22 341 346 10.1111/j.1600-0838.2010.01171.x 20807388 9. Fredericson M Bergman AG Hoffman KL Dillingham MS Tibial stress reaction in runners. Correlation of clinical symptoms and scintigraphy with a new magnetic resonance imaging grading system Am J Sports Med 1995 23 472 481 10.1177/036354659502300418 7573660 10. Gaeta M Minutoli F Vinci S Salamone I D'Andrea L Bitto L High-resolution CT grading of tibial stress reactions in distance runners AJR Am J Roentgenol 2006 187 789 793 10.2214/AJR.05.0303 16928946 11. Johnell O Rausing A Wendeberg B Westlin N Morphological bone changes in shin splints Clin Orthop Relat Res 1982 167 180 184 12. Lecoq AL Brandi ML Linglart A Kamenicky P Management of X-linked hypophosphatemia in adults Metabolism 2020 103S 154049 10.1016/j.metabol.2019.154049 31863781 13. Lee C Lashari S Pseudofracture of the neck of femur secondary to osteomalacia J Bone Joint Surg Br 2007 89 956 958 10.1302/0301-620X.89B7.18425 17673594 14. Lips P Vitamin D deficiency and secondary hyperparathyroidism in the elderly: consequences for bone loss and fractures and therapeutic implications Endocr Rev 2001 22 477 501 10.1210/edrv.22.4.0437 11493580 15. Looser E Uber pathologische von Infraktionen und Callusbildungen bei Rachits und Osteomalcie und Knochenerkrankungen Zbl Chir 1920 47 1470 16. Magnusson HI Ahlborg HG Karlsson C Nyquist F Karlsson MK Low regional tibial bone density in athletes with medial tibial stress syndrome normalizes after recovery from symptoms Am J Sports Med 2003 31 596 600 10.1177/03635465030310042001 12860551 17. Magnusson HI Westlin NE Nyqvist F Gardsell P Seeman E Karlsson MK Abnormally decreased regional bone density in athletes with medial tibial stress syndrome Am J Sports Med 2001 29 712 715 10.1177/03635465010290060701 11734482 18. Milovanovic P Adamu U Simon MJ Rolvien T Djuric M Amling M Age- and sex-specific bone structure patterns portend bone fragility in radii and tibiae in relation to osteodensitometry: a high-resolution peripheral quantitative computed tomography study in 385 individuals J Gerontol A Biol Sci Med Sci 2015 70 1269 1275 10.1093/gerona/glv052 25934995 19. Moen MH Tol JL Weir A Steunebrink M De Winter TC Medial tibial stress syndrome: a critical review Sports Med 2009 39 523 546 10.2165/00007256-200939070-00002 19530750 20. Plisky MS Rauh MJ Heiderscheit B Underwood FB Tank RT Medial tibial stress syndrome in high school cross-country runners: incidence and risk factors J Orthop Sports Phys Ther 2007 37 40 47 10.2519/jospt.2007.2343 17366958 21. Priemel M von Domarus C Klatte TO Kessler S Schlie J Meier S Bone mineralization defects and vitamin D deficiency: histomorphometric analysis of iliac crest bone biopsies and circulating 25-hydroxyvitamin D in 675 patients J Bone Miner Res 2010 25 305 312 10.1359/jbmr.090728 19594303 22. Roberts SM Vogt EC Pseudofracture of the Tibia JBJS 1939 21 891 901 23. Ruohola JP Laaksi I Ylikomi T Haataja R Mattila VM Sahi T Association between serum 25(OH)D concentrations and bone stress fractures in Finnish young men J Bone Miner Res 2006 21 1483 1488 10.1359/jbmr.060607 16939407 24. Saxena A Fullem B Gerdesmeyer L Treatment of medial tibial stress syndrome with radial soundwave therapy in elite athletes: current evidence, report on two cases, and proposed treatment regimen J Foot Ankle Surg 2017 56 985 989 10.1053/j.jfas.2017.06.013 28842109 25. Schanda JE Kocijan R Resch H Baierl A Feichtinger X Mittermayr R Bone stress injuries are associated with differences in bone microarchitecture in male professional soldiers J Orthop Res 2019 37 2516 2523 10.1002/jor.24442 31410876 26. Schilcher J Bernhardsson M Aspenberg P Chronic anterior tibial stress fractures in athletes: no crack but intense remodeling Scand J Med Sci Sports 2019 29 1521 1528 10.1111/sms.13466 31102562 27. Simpson W Young JR Clark F Pseudofractures resembling stress fractures in Punjabi immigrants with osteomalacia Clin Radiol 1973 24 83 89 10.1016/S0009-9260(73)80122-9 4723500 28. Taunton JE Ryan MB Clement DB McKenzie DC Lloyd-Smith DR Zumbo BD A retrospective case-control analysis of 2002 running injuries Br J Sports Med 2002 36 95 101 10.1136/bjsm.36.2.95 11916889 29. Tenforde AS Kraus E Fredericson M Bone stress injuries in runners Phys Med Rehabil Clin N Am 2016 27 139 149 10.1016/j.pmr.2015.08.008 26616181 30. Winters M The diagnosis and management of medial tibial stress syndrome: an evidence update Unfallchirurg 2020 123 15 19 10.1007/s00113-019-0667-z 31098646 31. Winters M Burr DB van der Hoeven H Condon KW Bellemans J Moen MH Microcrack-associated bone remodeling is rarely observed in biopsies from athletes with medial tibial stress syndrome J Bone Miner Metab 2019 37 496 502 10.1007/s00774-018-0945-9 30066165 32. Yates B White S The incidence and risk factors in the development of medial tibial stress syndrome among naval recruits Am J Sports Med 2004 32 772 780 10.1177/0095399703258776 15090396 \""
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dataset['text'][0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [],
   "source": [
    "# QLoRA config\n",
    "lora_r = 16\n",
    "lora_alpha = 64\n",
    "lora_dropout = 0.1\n",
    "lora_target_modules = [\n",
    "    \"q_proj\",\n",
    "    # \"up_proj\",\n",
    "    # \"o_proj\",\n",
    "    # \"k_proj\",\n",
    "    # \"down_proj\",\n",
    "    # \"gate_proj\",\n",
    "    \"v_proj\",\n",
    "]\n",
    " \n",
    " \n",
    "peft_config = LoraConfig(\n",
    "    r=lora_r,\n",
    "    lora_alpha=lora_alpha,\n",
    "    lora_dropout=lora_dropout,\n",
    "    target_modules=lora_target_modules,\n",
    "    bias=\"none\",\n",
    "    task_type=\"CAUSAL_LM\",\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The tensorboard extension is already loaded. To reload it, use:\n",
      "  %reload_ext tensorboard\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "Reusing TensorBoard on port 6006 (pid 361373), started 9:35:12 ago. (Use '!kill 361373' to kill it.)"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "\n",
       "      <iframe id=\"tensorboard-frame-bdd640fb06671ad1\" width=\"100%\" height=\"800\" frameborder=\"0\">\n",
       "      </iframe>\n",
       "      <script>\n",
       "        (function() {\n",
       "          const frame = document.getElementById(\"tensorboard-frame-bdd640fb06671ad1\");\n",
       "          const url = new URL(\"/\", window.location);\n",
       "          const port = 6006;\n",
       "          if (port) {\n",
       "            url.port = port;\n",
       "          }\n",
       "          frame.src = url;\n",
       "        })();\n",
       "      </script>\n",
       "    "
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "OUTPUT_DIR = \"experiments\"\n",
    " \n",
    "%load_ext tensorboard\n",
    "%tensorboard --logdir experiments/runs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [],
   "source": [
    "from peft import get_peft_model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [],
   "source": [
    "model = get_peft_model(model, peft_config)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "trainable params: 6,815,744 || all params: 8,037,076,992 || trainable%: 0.0848\n"
     ]
    }
   ],
   "source": [
    "model.print_trainable_parameters()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [],
   "source": [
    "from transformers import DataCollatorForLanguageModeling\n",
    "from trl import SFTTrainer, SFTConfig"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "os.environ[\"NCCL_P2P_DISABLE\"] = \"1\"\n",
    "os.environ[\"NCCL_IB_DISABLE\"] = \"1\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages/transformers/training_args.py:1474: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead\n",
      "  warnings.warn(\n"
     ]
    }
   ],
   "source": [
    "training_arguments = SFTConfig(\n",
    "    per_device_train_batch_size=1,\n",
    "    gradient_accumulation_steps=8,\n",
    "    optim=\"paged_adamw_32bit\",\n",
    "    logging_steps=1,\n",
    "    learning_rate=1e-4,\n",
    "    fp16=True,\n",
    "    max_grad_norm=0.3,\n",
    "    num_train_epochs=3,\n",
    "    evaluation_strategy=\"steps\",\n",
    "    eval_steps=0.01,\n",
    "    warmup_ratio=0.05,\n",
    "    save_strategy=\"epoch\",\n",
    "    group_by_length=True,\n",
    "    output_dir=OUTPUT_DIR,\n",
    "    report_to=\"tensorboard\",\n",
    "    save_safetensors=True,\n",
    "    lr_scheduler_type=\"cosine\",\n",
    "    seed=42,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {},
   "outputs": [],
   "source": [
    "splitted_dataset = dataset.train_test_split(test_size=0.1, seed=42)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "metadata": {},
   "outputs": [
    {
     "ename": "NameError",
     "evalue": "name 'EarlyStoppingCallback' is not defined",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
      "Cell \u001b[0;32mIn[57], line 10\u001b[0m\n\u001b[1;32m      1\u001b[0m trainer \u001b[38;5;241m=\u001b[39m SFTTrainer(\n\u001b[1;32m      2\u001b[0m     model\u001b[38;5;241m=\u001b[39mmodel,\n\u001b[1;32m      3\u001b[0m     train_dataset\u001b[38;5;241m=\u001b[39msplitted_dataset[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mtrain\u001b[39m\u001b[38;5;124m'\u001b[39m],\n\u001b[1;32m      4\u001b[0m     eval_dataset\u001b[38;5;241m=\u001b[39msplitted_dataset[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mtest\u001b[39m\u001b[38;5;124m'\u001b[39m],\n\u001b[1;32m      5\u001b[0m     peft_config\u001b[38;5;241m=\u001b[39mpeft_config,\n\u001b[1;32m      6\u001b[0m     dataset_text_field\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtext\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m      7\u001b[0m     max_seq_length\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m512\u001b[39m,\n\u001b[1;32m      8\u001b[0m     tokenizer\u001b[38;5;241m=\u001b[39mtokenizer,\n\u001b[1;32m      9\u001b[0m     args\u001b[38;5;241m=\u001b[39mtraining_arguments,\n\u001b[0;32m---> 10\u001b[0m     callbacks\u001b[38;5;241m=\u001b[39m[\u001b[43mEarlyStoppingCallback\u001b[49m(early_stopping_patience\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m3\u001b[39m)]\n\u001b[1;32m     11\u001b[0m )\n",
      "\u001b[0;31mNameError\u001b[0m: name 'EarlyStoppingCallback' is not defined"
     ]
    }
   ],
   "source": [
    "trainer = SFTTrainer(\n",
    "    model=model,\n",
    "    train_dataset=splitted_dataset['train'],\n",
    "    eval_dataset=splitted_dataset['test'],\n",
    "    peft_config=peft_config,\n",
    "    dataset_text_field=\"text\",\n",
    "    max_seq_length=512,\n",
    "    tokenizer=tokenizer,\n",
    "    args=training_arguments,\n",
    "    callbacks=[EarlyStoppingCallback(early_stopping_patience=3)]\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "os.environ['CUDA_LAUNCH_BLOCKING'] = '1'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "metadata": {},
   "outputs": [],
   "source": [
    "# torch.cuda.synchronize()\n",
    "# torch.cuda.empty_cache()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "\n",
       "    <div>\n",
       "      \n",
       "      <progress value='205' max='360' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
       "      [205/360 15:44 < 12:01, 0.21 it/s, Epoch 1.70/3]\n",
       "    </div>\n",
       "    <table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       " <tr style=\"text-align: left;\">\n",
       "      <th>Step</th>\n",
       "      <th>Training Loss</th>\n",
       "      <th>Validation Loss</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>1.981200</td>\n",
       "      <td>1.774238</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>8</td>\n",
       "      <td>1.753500</td>\n",
       "      <td>1.754978</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>12</td>\n",
       "      <td>1.772600</td>\n",
       "      <td>1.726179</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>16</td>\n",
       "      <td>1.557500</td>\n",
       "      <td>1.692598</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>20</td>\n",
       "      <td>1.790200</td>\n",
       "      <td>1.662171</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>24</td>\n",
       "      <td>1.598900</td>\n",
       "      <td>1.633827</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>28</td>\n",
       "      <td>1.526500</td>\n",
       "      <td>1.615106</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>32</td>\n",
       "      <td>1.560600</td>\n",
       "      <td>1.600001</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>36</td>\n",
       "      <td>1.618500</td>\n",
       "      <td>1.588000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>40</td>\n",
       "      <td>1.646600</td>\n",
       "      <td>1.579654</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>44</td>\n",
       "      <td>1.657100</td>\n",
       "      <td>1.567710</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>48</td>\n",
       "      <td>1.587900</td>\n",
       "      <td>1.558632</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>52</td>\n",
       "      <td>1.397700</td>\n",
       "      <td>1.550794</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>56</td>\n",
       "      <td>1.704600</td>\n",
       "      <td>1.543783</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>60</td>\n",
       "      <td>1.456500</td>\n",
       "      <td>1.538500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>64</td>\n",
       "      <td>1.901600</td>\n",
       "      <td>1.532189</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>68</td>\n",
       "      <td>1.774300</td>\n",
       "      <td>1.528996</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>72</td>\n",
       "      <td>1.390000</td>\n",
       "      <td>1.524170</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>76</td>\n",
       "      <td>1.558800</td>\n",
       "      <td>1.519067</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>80</td>\n",
       "      <td>1.627400</td>\n",
       "      <td>1.517108</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>84</td>\n",
       "      <td>1.504100</td>\n",
       "      <td>1.512800</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>88</td>\n",
       "      <td>1.668200</td>\n",
       "      <td>1.509464</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>92</td>\n",
       "      <td>1.526700</td>\n",
       "      <td>1.505236</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>96</td>\n",
       "      <td>1.618400</td>\n",
       "      <td>1.503344</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>100</td>\n",
       "      <td>1.451900</td>\n",
       "      <td>1.499353</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>104</td>\n",
       "      <td>1.644900</td>\n",
       "      <td>1.496035</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>108</td>\n",
       "      <td>1.469000</td>\n",
       "      <td>1.492282</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>112</td>\n",
       "      <td>1.614600</td>\n",
       "      <td>1.489366</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>116</td>\n",
       "      <td>1.591700</td>\n",
       "      <td>1.487346</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>120</td>\n",
       "      <td>1.487500</td>\n",
       "      <td>1.482805</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>124</td>\n",
       "      <td>1.416000</td>\n",
       "      <td>1.480361</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>128</td>\n",
       "      <td>1.313600</td>\n",
       "      <td>1.481161</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>132</td>\n",
       "      <td>1.334400</td>\n",
       "      <td>1.479421</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>136</td>\n",
       "      <td>1.471800</td>\n",
       "      <td>1.476773</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>140</td>\n",
       "      <td>1.540500</td>\n",
       "      <td>1.474109</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>144</td>\n",
       "      <td>1.452700</td>\n",
       "      <td>1.473360</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>148</td>\n",
       "      <td>1.323000</td>\n",
       "      <td>1.472112</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>152</td>\n",
       "      <td>1.527600</td>\n",
       "      <td>1.470621</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>156</td>\n",
       "      <td>1.535100</td>\n",
       "      <td>1.469403</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>160</td>\n",
       "      <td>1.356000</td>\n",
       "      <td>1.467490</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>164</td>\n",
       "      <td>1.492700</td>\n",
       "      <td>1.465348</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>168</td>\n",
       "      <td>1.371600</td>\n",
       "      <td>1.464317</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>172</td>\n",
       "      <td>1.628700</td>\n",
       "      <td>1.463003</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>176</td>\n",
       "      <td>1.242100</td>\n",
       "      <td>1.462533</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>180</td>\n",
       "      <td>1.284400</td>\n",
       "      <td>1.461138</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>184</td>\n",
       "      <td>1.563000</td>\n",
       "      <td>1.459591</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>188</td>\n",
       "      <td>1.421000</td>\n",
       "      <td>1.457585</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>192</td>\n",
       "      <td>1.208200</td>\n",
       "      <td>1.456179</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>196</td>\n",
       "      <td>1.350800</td>\n",
       "      <td>1.454647</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>200</td>\n",
       "      <td>1.602600</td>\n",
       "      <td>1.454009</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table><p>\n",
       "    <div>\n",
       "      \n",
       "      <progress value='2' max='14' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
       "      [ 2/14 00:00 < 00:08, 1.40 it/s]\n",
       "    </div>\n",
       "    "
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "ename": "KeyboardInterrupt",
     "evalue": "",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
      "Cell \u001b[0;32mIn[55], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mtrainer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtrain\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[0;32m~/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages/trl/trainer/sft_trainer.py:440\u001b[0m, in \u001b[0;36mSFTTrainer.train\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m    437\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mneftune_noise_alpha \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_trainer_supports_neftune:\n\u001b[1;32m    438\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_trl_activate_neftune(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel)\n\u001b[0;32m--> 440\u001b[0m output \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtrain\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    442\u001b[0m \u001b[38;5;66;03m# After training we make sure to retrieve back the original forward pass method\u001b[39;00m\n\u001b[1;32m    443\u001b[0m \u001b[38;5;66;03m# for the embedding layer by removing the forward post hook.\u001b[39;00m\n\u001b[1;32m    444\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mneftune_noise_alpha \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_trainer_supports_neftune:\n",
      "File \u001b[0;32m~/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages/transformers/trainer.py:1885\u001b[0m, in \u001b[0;36mTrainer.train\u001b[0;34m(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)\u001b[0m\n\u001b[1;32m   1883\u001b[0m         hf_hub_utils\u001b[38;5;241m.\u001b[39menable_progress_bars()\n\u001b[1;32m   1884\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1885\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43minner_training_loop\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   1886\u001b[0m \u001b[43m        \u001b[49m\u001b[43margs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1887\u001b[0m \u001b[43m        \u001b[49m\u001b[43mresume_from_checkpoint\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mresume_from_checkpoint\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1888\u001b[0m \u001b[43m        \u001b[49m\u001b[43mtrial\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtrial\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1889\u001b[0m \u001b[43m        \u001b[49m\u001b[43mignore_keys_for_eval\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mignore_keys_for_eval\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1890\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[0;32m~/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages/transformers/trainer.py:2291\u001b[0m, in \u001b[0;36mTrainer._inner_training_loop\u001b[0;34m(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval)\u001b[0m\n\u001b[1;32m   2288\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstate\u001b[38;5;241m.\u001b[39mepoch \u001b[38;5;241m=\u001b[39m epoch \u001b[38;5;241m+\u001b[39m (step \u001b[38;5;241m+\u001b[39m \u001b[38;5;241m1\u001b[39m \u001b[38;5;241m+\u001b[39m steps_skipped) \u001b[38;5;241m/\u001b[39m steps_in_epoch\n\u001b[1;32m   2289\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcontrol \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcallback_handler\u001b[38;5;241m.\u001b[39mon_step_end(args, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstate, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcontrol)\n\u001b[0;32m-> 2291\u001b[0m     \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_maybe_log_save_evaluate\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtr_loss\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mgrad_norm\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtrial\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mepoch\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mignore_keys_for_eval\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   2292\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m   2293\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcontrol \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcallback_handler\u001b[38;5;241m.\u001b[39mon_substep_end(args, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstate, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcontrol)\n",
      "File \u001b[0;32m~/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages/transformers/trainer.py:2721\u001b[0m, in \u001b[0;36mTrainer._maybe_log_save_evaluate\u001b[0;34m(self, tr_loss, grad_norm, model, trial, epoch, ignore_keys_for_eval)\u001b[0m\n\u001b[1;32m   2719\u001b[0m metrics \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m   2720\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcontrol\u001b[38;5;241m.\u001b[39mshould_evaluate:\n\u001b[0;32m-> 2721\u001b[0m     metrics \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mevaluate\u001b[49m\u001b[43m(\u001b[49m\u001b[43mignore_keys\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mignore_keys_for_eval\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   2722\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_report_to_hp_search(trial, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstate\u001b[38;5;241m.\u001b[39mglobal_step, metrics)\n\u001b[1;32m   2724\u001b[0m     \u001b[38;5;66;03m# Run delayed LR scheduler now that metrics are populated\u001b[39;00m\n",
      "File \u001b[0;32m~/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages/transformers/trainer.py:3572\u001b[0m, in \u001b[0;36mTrainer.evaluate\u001b[0;34m(self, eval_dataset, ignore_keys, metric_key_prefix)\u001b[0m\n\u001b[1;32m   3569\u001b[0m start_time \u001b[38;5;241m=\u001b[39m time\u001b[38;5;241m.\u001b[39mtime()\n\u001b[1;32m   3571\u001b[0m eval_loop \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mprediction_loop \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39margs\u001b[38;5;241m.\u001b[39muse_legacy_prediction_loop \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mevaluation_loop\n\u001b[0;32m-> 3572\u001b[0m output \u001b[38;5;241m=\u001b[39m \u001b[43meval_loop\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   3573\u001b[0m \u001b[43m    \u001b[49m\u001b[43meval_dataloader\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   3574\u001b[0m \u001b[43m    \u001b[49m\u001b[43mdescription\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mEvaluation\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m   3575\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;66;43;03m# No point gathering the predictions if there are no metrics, otherwise we defer to\u001b[39;49;00m\n\u001b[1;32m   3576\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;66;43;03m# self.args.prediction_loss_only\u001b[39;49;00m\n\u001b[1;32m   3577\u001b[0m \u001b[43m    \u001b[49m\u001b[43mprediction_loss_only\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mif\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcompute_metrics\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mis\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01melse\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m   3578\u001b[0m \u001b[43m    \u001b[49m\u001b[43mignore_keys\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mignore_keys\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   3579\u001b[0m \u001b[43m    \u001b[49m\u001b[43mmetric_key_prefix\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmetric_key_prefix\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   3580\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   3582\u001b[0m total_batch_size \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39margs\u001b[38;5;241m.\u001b[39meval_batch_size \u001b[38;5;241m*\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39margs\u001b[38;5;241m.\u001b[39mworld_size\n\u001b[1;32m   3583\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mmetric_key_prefix\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m_jit_compilation_time\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m output\u001b[38;5;241m.\u001b[39mmetrics:\n",
      "File \u001b[0;32m~/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages/transformers/trainer.py:3757\u001b[0m, in \u001b[0;36mTrainer.evaluation_loop\u001b[0;34m(self, dataloader, description, prediction_loss_only, ignore_keys, metric_key_prefix)\u001b[0m\n\u001b[1;32m   3754\u001b[0m         batch_size \u001b[38;5;241m=\u001b[39m observed_batch_size\n\u001b[1;32m   3756\u001b[0m \u001b[38;5;66;03m# Prediction step\u001b[39;00m\n\u001b[0;32m-> 3757\u001b[0m loss, logits, labels \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mprediction_step\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minputs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mprediction_loss_only\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mignore_keys\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mignore_keys\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   3758\u001b[0m main_input_name \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mgetattr\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmain_input_name\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124minput_ids\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m   3759\u001b[0m inputs_decode \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_prepare_input(inputs[main_input_name]) \u001b[38;5;28;01mif\u001b[39;00m args\u001b[38;5;241m.\u001b[39minclude_inputs_for_metrics \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n",
      "File \u001b[0;32m~/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages/transformers/trainer.py:3971\u001b[0m, in \u001b[0;36mTrainer.prediction_step\u001b[0;34m(self, model, inputs, prediction_loss_only, ignore_keys)\u001b[0m\n\u001b[1;32m   3969\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m has_labels \u001b[38;5;129;01mor\u001b[39;00m loss_without_labels:\n\u001b[1;32m   3970\u001b[0m     \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcompute_loss_context_manager():\n\u001b[0;32m-> 3971\u001b[0m         loss, outputs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcompute_loss\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minputs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mreturn_outputs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\n\u001b[1;32m   3972\u001b[0m     loss \u001b[38;5;241m=\u001b[39m loss\u001b[38;5;241m.\u001b[39mmean()\u001b[38;5;241m.\u001b[39mdetach()\n\u001b[1;32m   3974\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(outputs, \u001b[38;5;28mdict\u001b[39m):\n",
      "File \u001b[0;32m~/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages/transformers/trainer.py:3264\u001b[0m, in \u001b[0;36mTrainer.compute_loss\u001b[0;34m(self, model, inputs, return_outputs)\u001b[0m\n\u001b[1;32m   3262\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m   3263\u001b[0m     labels \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m-> 3264\u001b[0m outputs \u001b[38;5;241m=\u001b[39m \u001b[43mmodel\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43minputs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   3265\u001b[0m \u001b[38;5;66;03m# Save past state if it exists\u001b[39;00m\n\u001b[1;32m   3266\u001b[0m \u001b[38;5;66;03m# TODO: this needs to be fixed and made cleaner later.\u001b[39;00m\n\u001b[1;32m   3267\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39margs\u001b[38;5;241m.\u001b[39mpast_index \u001b[38;5;241m>\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0\u001b[39m:\n",
      "File \u001b[0;32m~/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages/torch/nn/modules/module.py:1194\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *input, **kwargs)\u001b[0m\n\u001b[1;32m   1190\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m   1191\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m   1192\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m   1193\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1194\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1195\u001b[0m \u001b[38;5;66;03m# Do not call functions when jit is used\u001b[39;00m\n\u001b[1;32m   1196\u001b[0m full_backward_hooks, non_full_backward_hooks \u001b[38;5;241m=\u001b[39m [], []\n",
      "File \u001b[0;32m~/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages/accelerate/utils/operations.py:822\u001b[0m, in \u001b[0;36mconvert_outputs_to_fp32.<locals>.forward\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m    821\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mforward\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[0;32m--> 822\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mmodel_forward\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[0;32m~/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages/accelerate/utils/operations.py:810\u001b[0m, in \u001b[0;36mConvertOutputsToFp32.__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m    809\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__call__\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[0;32m--> 810\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m convert_to_fp32(\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmodel_forward\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m)\n",
      "File \u001b[0;32m~/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages/torch/amp/autocast_mode.py:14\u001b[0m, in \u001b[0;36mautocast_decorator.<locals>.decorate_autocast\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m     11\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m     12\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mdecorate_autocast\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m     13\u001b[0m     \u001b[38;5;28;01mwith\u001b[39;00m autocast_instance:\n\u001b[0;32m---> 14\u001b[0m         \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[0;32m~/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages/accelerate/utils/operations.py:822\u001b[0m, in \u001b[0;36mconvert_outputs_to_fp32.<locals>.forward\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m    821\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mforward\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[0;32m--> 822\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mmodel_forward\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[0;32m~/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages/accelerate/utils/operations.py:810\u001b[0m, in \u001b[0;36mConvertOutputsToFp32.__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m    809\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__call__\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[0;32m--> 810\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m convert_to_fp32(\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmodel_forward\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m)\n",
      "File \u001b[0;32m~/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages/torch/amp/autocast_mode.py:14\u001b[0m, in \u001b[0;36mautocast_decorator.<locals>.decorate_autocast\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m     11\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m     12\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mdecorate_autocast\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m     13\u001b[0m     \u001b[38;5;28;01mwith\u001b[39;00m autocast_instance:\n\u001b[0;32m---> 14\u001b[0m         \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[0;32m~/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages/peft/peft_model.py:1430\u001b[0m, in \u001b[0;36mPeftModelForCausalLM.forward\u001b[0;34m(self, input_ids, attention_mask, inputs_embeds, labels, output_attentions, output_hidden_states, return_dict, task_ids, **kwargs)\u001b[0m\n\u001b[1;32m   1428\u001b[0m     \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_enable_peft_forward_hooks(\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m   1429\u001b[0m         kwargs \u001b[38;5;241m=\u001b[39m {k: v \u001b[38;5;28;01mfor\u001b[39;00m k, v \u001b[38;5;129;01min\u001b[39;00m kwargs\u001b[38;5;241m.\u001b[39mitems() \u001b[38;5;28;01mif\u001b[39;00m k \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mspecial_peft_forward_args}\n\u001b[0;32m-> 1430\u001b[0m         \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbase_model\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   1431\u001b[0m \u001b[43m            \u001b[49m\u001b[43minput_ids\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minput_ids\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1432\u001b[0m \u001b[43m            \u001b[49m\u001b[43mattention_mask\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mattention_mask\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1433\u001b[0m \u001b[43m            \u001b[49m\u001b[43minputs_embeds\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minputs_embeds\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1434\u001b[0m \u001b[43m            \u001b[49m\u001b[43mlabels\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlabels\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1435\u001b[0m \u001b[43m            \u001b[49m\u001b[43moutput_attentions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moutput_attentions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1436\u001b[0m \u001b[43m            \u001b[49m\u001b[43moutput_hidden_states\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moutput_hidden_states\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1437\u001b[0m \u001b[43m            \u001b[49m\u001b[43mreturn_dict\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mreturn_dict\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1438\u001b[0m \u001b[43m            \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1439\u001b[0m \u001b[43m        \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1441\u001b[0m batch_size \u001b[38;5;241m=\u001b[39m _get_batch_size(input_ids, inputs_embeds)\n\u001b[1;32m   1442\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m attention_mask \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m   1443\u001b[0m     \u001b[38;5;66;03m# concat prompt attention mask\u001b[39;00m\n",
      "File \u001b[0;32m~/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages/torch/nn/modules/module.py:1194\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *input, **kwargs)\u001b[0m\n\u001b[1;32m   1190\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m   1191\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m   1192\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m   1193\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1194\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1195\u001b[0m \u001b[38;5;66;03m# Do not call functions when jit is used\u001b[39;00m\n\u001b[1;32m   1196\u001b[0m full_backward_hooks, non_full_backward_hooks \u001b[38;5;241m=\u001b[39m [], []\n",
      "File \u001b[0;32m~/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages/peft/tuners/tuners_utils.py:179\u001b[0m, in \u001b[0;36mBaseTuner.forward\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m    178\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mforward\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;241m*\u001b[39margs: Any, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs: Any):\n\u001b[0;32m--> 179\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmodel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mforward\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[0;32m~/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages/accelerate/hooks.py:167\u001b[0m, in \u001b[0;36madd_hook_to_module.<locals>.new_forward\u001b[0;34m(module, *args, **kwargs)\u001b[0m\n\u001b[1;32m    165\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m    166\u001b[0m     output \u001b[38;5;241m=\u001b[39m module\u001b[38;5;241m.\u001b[39m_old_forward(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m--> 167\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mmodule\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_hf_hook\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpost_forward\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodule\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43moutput\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[0;32m~/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages/accelerate/hooks.py:380\u001b[0m, in \u001b[0;36mAlignDevicesHook.post_forward\u001b[0;34m(self, module, output)\u001b[0m\n\u001b[1;32m    377\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtied_pointers_to_remove \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mset\u001b[39m()\n\u001b[1;32m    379\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mio_same_device \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39minput_device \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m--> 380\u001b[0m     output \u001b[38;5;241m=\u001b[39m \u001b[43msend_to_device\u001b[49m\u001b[43m(\u001b[49m\u001b[43moutput\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43minput_device\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mskip_keys\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mskip_keys\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    382\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m output\n",
      "File \u001b[0;32m~/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages/accelerate/utils/operations.py:186\u001b[0m, in \u001b[0;36msend_to_device\u001b[0;34m(tensor, device, non_blocking, skip_keys)\u001b[0m\n\u001b[1;32m    183\u001b[0m     \u001b[38;5;28;01melif\u001b[39;00m skip_keys \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m    184\u001b[0m         skip_keys \u001b[38;5;241m=\u001b[39m []\n\u001b[1;32m    185\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mtype\u001b[39m(tensor)(\n\u001b[0;32m--> 186\u001b[0m         {\n\u001b[1;32m    187\u001b[0m             k: t \u001b[38;5;28;01mif\u001b[39;00m k \u001b[38;5;129;01min\u001b[39;00m skip_keys \u001b[38;5;28;01melse\u001b[39;00m send_to_device(t, device, non_blocking\u001b[38;5;241m=\u001b[39mnon_blocking, skip_keys\u001b[38;5;241m=\u001b[39mskip_keys)\n\u001b[1;32m    188\u001b[0m             \u001b[38;5;28;01mfor\u001b[39;00m k, t \u001b[38;5;129;01min\u001b[39;00m tensor\u001b[38;5;241m.\u001b[39mitems()\n\u001b[1;32m    189\u001b[0m         }\n\u001b[1;32m    190\u001b[0m     )\n\u001b[1;32m    191\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m    192\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m tensor\n",
      "File \u001b[0;32m~/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages/accelerate/utils/operations.py:187\u001b[0m, in \u001b[0;36m<dictcomp>\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m    183\u001b[0m     \u001b[38;5;28;01melif\u001b[39;00m skip_keys \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m    184\u001b[0m         skip_keys \u001b[38;5;241m=\u001b[39m []\n\u001b[1;32m    185\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mtype\u001b[39m(tensor)(\n\u001b[1;32m    186\u001b[0m         {\n\u001b[0;32m--> 187\u001b[0m             k: t \u001b[38;5;28;01mif\u001b[39;00m k \u001b[38;5;129;01min\u001b[39;00m skip_keys \u001b[38;5;28;01melse\u001b[39;00m \u001b[43msend_to_device\u001b[49m\u001b[43m(\u001b[49m\u001b[43mt\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdevice\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mnon_blocking\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mnon_blocking\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mskip_keys\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mskip_keys\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    188\u001b[0m             \u001b[38;5;28;01mfor\u001b[39;00m k, t \u001b[38;5;129;01min\u001b[39;00m tensor\u001b[38;5;241m.\u001b[39mitems()\n\u001b[1;32m    189\u001b[0m         }\n\u001b[1;32m    190\u001b[0m     )\n\u001b[1;32m    191\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m    192\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m tensor\n",
      "File \u001b[0;32m~/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages/accelerate/utils/operations.py:158\u001b[0m, in \u001b[0;36msend_to_device\u001b[0;34m(tensor, device, non_blocking, skip_keys)\u001b[0m\n\u001b[1;32m    156\u001b[0m     tensor \u001b[38;5;241m=\u001b[39m tensor\u001b[38;5;241m.\u001b[39mcpu()\n\u001b[1;32m    157\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 158\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mtensor\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mto\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdevice\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mnon_blocking\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mnon_blocking\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    159\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m:  \u001b[38;5;66;03m# .to() doesn't accept non_blocking as kwarg\u001b[39;00m\n\u001b[1;32m    160\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m tensor\u001b[38;5;241m.\u001b[39mto(device)\n",
      "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
     ]
    }
   ],
   "source": [
    "trainer.train()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/xinzheng/anaconda3/envs/speech-BCI-new/lib/python3.10/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "bc20cacf58bd4f21bea7fd858060b3cd",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "training_args.bin:   0%|          | 0.00/4.92k [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "a84f635f17304651adffa1f7a00225ab",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "adapter_model.safetensors:   0%|          | 0.00/27.3M [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "6ee95ca486d94f6182b6da552794e8b1",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "events.out.tfevents.1718234993.jupiter.364556.2:   0%|          | 0.00/61.6k [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "4205c431d3634c8a84fe19f0fd686e82",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "events.out.tfevents.1718234875.jupiter.364556.1:   0%|          | 0.00/12.2k [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "4dfa92c14f4c42c786bb313e57617d63",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Upload 4 LFS files:   0%|          | 0/4 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "CommitInfo(commit_url='https://huggingface.co/shin00001/experiments/commit/61a909cf186f09d62099fa9b62a70519fd2be619', commit_message='AI-4-Health/HPP-FINETUNED-Meta-Llama-3-8B-Instruct', commit_description='', oid='61a909cf186f09d62099fa9b62a70519fd2be619', pr_url=None, pr_revision=None, pr_num=None)"
      ]
     },
     "execution_count": 56,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "trainer.push_to_hub(\"AI-4-Health/HPP-FINETUNED-Meta-Llama-3-8B-Instruct\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}