pszemraj commited on
Commit
e849018
1 Parent(s): 874e8e9

step 10800

Browse files
Files changed (2) hide show
  1. model.safetensors +1 -1
  2. trainer_state.json +600 -4
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1395c5c36e20564cc37941ccb71e32f9fbf1039cc0aa1f5550d04df07dcab4f0
3
  size 352324400
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4cb33bf85bff9330bbdf1b1dc24b34597251a190543a6a00cf8fe1845f36985
3
  size 352324400
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.9605806103224412,
3
- "best_model_checkpoint": "checkpoints/BEE-spoke-data-bert-plus-L8-v1.0-allNLI_matryoshka-synthetic-text-similarity-Mar-07_22-56/checkpoint-9600",
4
- "epoch": 0.8361809045226131,
5
  "eval_steps": 300,
6
- "global_step": 10400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -15179,6 +15179,602 @@
15179
  "learning_rate": 3.6397748592870546e-06,
15180
  "loss": 0.0043,
15181
  "step": 10400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15182
  }
15183
  ],
15184
  "logging_steps": 5,
 
1
  {
2
+ "best_metric": 0.9642074088296352,
3
+ "best_model_checkpoint": "checkpoints/BEE-spoke-data-bert-plus-L8-v1.0-allNLI_matryoshka-synthetic-text-similarity-Mar-07_22-56/checkpoint-10800",
4
+ "epoch": 0.8683417085427135,
5
  "eval_steps": 300,
6
+ "global_step": 10800,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
15179
  "learning_rate": 3.6397748592870546e-06,
15180
  "loss": 0.0043,
15181
  "step": 10400
15182
+ },
15183
+ {
15184
+ "epoch": 0.84,
15185
+ "grad_norm": 0.4659731984138489,
15186
+ "learning_rate": 3.630840704011436e-06,
15187
+ "loss": 0.0064,
15188
+ "step": 10405
15189
+ },
15190
+ {
15191
+ "epoch": 0.84,
15192
+ "grad_norm": 0.8237358331680298,
15193
+ "learning_rate": 3.6219065487358176e-06,
15194
+ "loss": 0.0089,
15195
+ "step": 10410
15196
+ },
15197
+ {
15198
+ "epoch": 0.84,
15199
+ "grad_norm": 0.2651444971561432,
15200
+ "learning_rate": 3.6129723934601986e-06,
15201
+ "loss": 0.0088,
15202
+ "step": 10415
15203
+ },
15204
+ {
15205
+ "epoch": 0.84,
15206
+ "grad_norm": 0.2811392545700073,
15207
+ "learning_rate": 3.60403823818458e-06,
15208
+ "loss": 0.0045,
15209
+ "step": 10420
15210
+ },
15211
+ {
15212
+ "epoch": 0.84,
15213
+ "grad_norm": 0.26526081562042236,
15214
+ "learning_rate": 3.595104082908961e-06,
15215
+ "loss": 0.0077,
15216
+ "step": 10425
15217
+ },
15218
+ {
15219
+ "epoch": 0.84,
15220
+ "grad_norm": 0.31172770261764526,
15221
+ "learning_rate": 3.586169927633343e-06,
15222
+ "loss": 0.0066,
15223
+ "step": 10430
15224
+ },
15225
+ {
15226
+ "epoch": 0.84,
15227
+ "grad_norm": 0.24446207284927368,
15228
+ "learning_rate": 3.577235772357724e-06,
15229
+ "loss": 0.0095,
15230
+ "step": 10435
15231
+ },
15232
+ {
15233
+ "epoch": 0.84,
15234
+ "grad_norm": 0.36921826004981995,
15235
+ "learning_rate": 3.568301617082105e-06,
15236
+ "loss": 0.0092,
15237
+ "step": 10440
15238
+ },
15239
+ {
15240
+ "epoch": 0.84,
15241
+ "grad_norm": 0.37674766778945923,
15242
+ "learning_rate": 3.5593674618064865e-06,
15243
+ "loss": 0.0068,
15244
+ "step": 10445
15245
+ },
15246
+ {
15247
+ "epoch": 0.84,
15248
+ "grad_norm": 0.3865291178226471,
15249
+ "learning_rate": 3.5504333065308676e-06,
15250
+ "loss": 0.0101,
15251
+ "step": 10450
15252
+ },
15253
+ {
15254
+ "epoch": 0.84,
15255
+ "grad_norm": 0.27692753076553345,
15256
+ "learning_rate": 3.5414991512552494e-06,
15257
+ "loss": 0.0118,
15258
+ "step": 10455
15259
+ },
15260
+ {
15261
+ "epoch": 0.84,
15262
+ "grad_norm": 0.3851732611656189,
15263
+ "learning_rate": 3.5325649959796305e-06,
15264
+ "loss": 0.0064,
15265
+ "step": 10460
15266
+ },
15267
+ {
15268
+ "epoch": 0.84,
15269
+ "grad_norm": 0.3045642673969269,
15270
+ "learning_rate": 3.5236308407040115e-06,
15271
+ "loss": 0.006,
15272
+ "step": 10465
15273
+ },
15274
+ {
15275
+ "epoch": 0.84,
15276
+ "grad_norm": 0.33417442440986633,
15277
+ "learning_rate": 3.514696685428393e-06,
15278
+ "loss": 0.0053,
15279
+ "step": 10470
15280
+ },
15281
+ {
15282
+ "epoch": 0.84,
15283
+ "grad_norm": 0.25378555059432983,
15284
+ "learning_rate": 3.505762530152774e-06,
15285
+ "loss": 0.0054,
15286
+ "step": 10475
15287
+ },
15288
+ {
15289
+ "epoch": 0.84,
15290
+ "grad_norm": 0.30482515692710876,
15291
+ "learning_rate": 3.496828374877156e-06,
15292
+ "loss": 0.006,
15293
+ "step": 10480
15294
+ },
15295
+ {
15296
+ "epoch": 0.84,
15297
+ "grad_norm": 0.3786448836326599,
15298
+ "learning_rate": 3.487894219601537e-06,
15299
+ "loss": 0.0069,
15300
+ "step": 10485
15301
+ },
15302
+ {
15303
+ "epoch": 0.84,
15304
+ "grad_norm": 0.4568743407726288,
15305
+ "learning_rate": 3.478960064325918e-06,
15306
+ "loss": 0.007,
15307
+ "step": 10490
15308
+ },
15309
+ {
15310
+ "epoch": 0.84,
15311
+ "grad_norm": 0.4057961702346802,
15312
+ "learning_rate": 3.4700259090503e-06,
15313
+ "loss": 0.0072,
15314
+ "step": 10495
15315
+ },
15316
+ {
15317
+ "epoch": 0.84,
15318
+ "grad_norm": 0.4396592080593109,
15319
+ "learning_rate": 3.461091753774681e-06,
15320
+ "loss": 0.0068,
15321
+ "step": 10500
15322
+ },
15323
+ {
15324
+ "epoch": 0.84,
15325
+ "eval_loss": 0.0074067204259335995,
15326
+ "eval_pearson_cosine": 0.9673687223458171,
15327
+ "eval_pearson_dot": 0.9653915362220337,
15328
+ "eval_pearson_euclidean": 0.9564555475438925,
15329
+ "eval_pearson_manhattan": 0.9551556309301102,
15330
+ "eval_pearson_max": 0.9673687223458171,
15331
+ "eval_runtime": 426.0441,
15332
+ "eval_samples_per_second": 1.174,
15333
+ "eval_spearman_cosine": 0.9609645158580634,
15334
+ "eval_spearman_dot": 0.955628814515258,
15335
+ "eval_spearman_euclidean": 0.9613639734558937,
15336
+ "eval_spearman_manhattan": 0.9601420805683222,
15337
+ "eval_spearman_max": 0.9613639734558937,
15338
+ "eval_steps_per_second": 1.174,
15339
+ "step": 10500
15340
+ },
15341
+ {
15342
+ "epoch": 0.84,
15343
+ "grad_norm": 0.33185890316963196,
15344
+ "learning_rate": 3.4521575984990624e-06,
15345
+ "loss": 0.0108,
15346
+ "step": 10505
15347
+ },
15348
+ {
15349
+ "epoch": 0.85,
15350
+ "grad_norm": 0.42153021693229675,
15351
+ "learning_rate": 3.4432234432234434e-06,
15352
+ "loss": 0.0111,
15353
+ "step": 10510
15354
+ },
15355
+ {
15356
+ "epoch": 0.85,
15357
+ "grad_norm": 0.3933052122592926,
15358
+ "learning_rate": 3.4342892879478245e-06,
15359
+ "loss": 0.0053,
15360
+ "step": 10515
15361
+ },
15362
+ {
15363
+ "epoch": 0.85,
15364
+ "grad_norm": 0.37213289737701416,
15365
+ "learning_rate": 3.4253551326722063e-06,
15366
+ "loss": 0.0052,
15367
+ "step": 10520
15368
+ },
15369
+ {
15370
+ "epoch": 0.85,
15371
+ "grad_norm": 0.3832128643989563,
15372
+ "learning_rate": 3.4164209773965874e-06,
15373
+ "loss": 0.0083,
15374
+ "step": 10525
15375
+ },
15376
+ {
15377
+ "epoch": 0.85,
15378
+ "grad_norm": 0.2269567996263504,
15379
+ "learning_rate": 3.407486822120969e-06,
15380
+ "loss": 0.0046,
15381
+ "step": 10530
15382
+ },
15383
+ {
15384
+ "epoch": 0.85,
15385
+ "grad_norm": 0.3588921129703522,
15386
+ "learning_rate": 3.39855266684535e-06,
15387
+ "loss": 0.0085,
15388
+ "step": 10535
15389
+ },
15390
+ {
15391
+ "epoch": 0.85,
15392
+ "grad_norm": 0.2792292833328247,
15393
+ "learning_rate": 3.389618511569731e-06,
15394
+ "loss": 0.0064,
15395
+ "step": 10540
15396
+ },
15397
+ {
15398
+ "epoch": 0.85,
15399
+ "grad_norm": 0.30247360467910767,
15400
+ "learning_rate": 3.380684356294113e-06,
15401
+ "loss": 0.009,
15402
+ "step": 10545
15403
+ },
15404
+ {
15405
+ "epoch": 0.85,
15406
+ "grad_norm": 0.33265256881713867,
15407
+ "learning_rate": 3.371750201018494e-06,
15408
+ "loss": 0.0087,
15409
+ "step": 10550
15410
+ },
15411
+ {
15412
+ "epoch": 0.85,
15413
+ "grad_norm": 0.41412991285324097,
15414
+ "learning_rate": 3.3628160457428753e-06,
15415
+ "loss": 0.0059,
15416
+ "step": 10555
15417
+ },
15418
+ {
15419
+ "epoch": 0.85,
15420
+ "grad_norm": 0.31760454177856445,
15421
+ "learning_rate": 3.3538818904672563e-06,
15422
+ "loss": 0.0062,
15423
+ "step": 10560
15424
+ },
15425
+ {
15426
+ "epoch": 0.85,
15427
+ "grad_norm": 0.3177451193332672,
15428
+ "learning_rate": 3.3449477351916382e-06,
15429
+ "loss": 0.0072,
15430
+ "step": 10565
15431
+ },
15432
+ {
15433
+ "epoch": 0.85,
15434
+ "grad_norm": 0.45792147517204285,
15435
+ "learning_rate": 3.3360135799160193e-06,
15436
+ "loss": 0.0075,
15437
+ "step": 10570
15438
+ },
15439
+ {
15440
+ "epoch": 0.85,
15441
+ "grad_norm": 0.29566365480422974,
15442
+ "learning_rate": 3.3270794246404003e-06,
15443
+ "loss": 0.0059,
15444
+ "step": 10575
15445
+ },
15446
+ {
15447
+ "epoch": 0.85,
15448
+ "grad_norm": 0.32841619849205017,
15449
+ "learning_rate": 3.318145269364782e-06,
15450
+ "loss": 0.0108,
15451
+ "step": 10580
15452
+ },
15453
+ {
15454
+ "epoch": 0.85,
15455
+ "grad_norm": 0.341621458530426,
15456
+ "learning_rate": 3.3092111140891632e-06,
15457
+ "loss": 0.0128,
15458
+ "step": 10585
15459
+ },
15460
+ {
15461
+ "epoch": 0.85,
15462
+ "grad_norm": 0.423700213432312,
15463
+ "learning_rate": 3.3002769588135447e-06,
15464
+ "loss": 0.0075,
15465
+ "step": 10590
15466
+ },
15467
+ {
15468
+ "epoch": 0.85,
15469
+ "grad_norm": 0.3334985673427582,
15470
+ "learning_rate": 3.2913428035379257e-06,
15471
+ "loss": 0.0085,
15472
+ "step": 10595
15473
+ },
15474
+ {
15475
+ "epoch": 0.85,
15476
+ "grad_norm": 0.4427613317966461,
15477
+ "learning_rate": 3.2824086482623068e-06,
15478
+ "loss": 0.0098,
15479
+ "step": 10600
15480
+ },
15481
+ {
15482
+ "epoch": 0.85,
15483
+ "grad_norm": 0.510867714881897,
15484
+ "learning_rate": 3.2734744929866887e-06,
15485
+ "loss": 0.007,
15486
+ "step": 10605
15487
+ },
15488
+ {
15489
+ "epoch": 0.85,
15490
+ "grad_norm": 0.2945081293582916,
15491
+ "learning_rate": 3.2645403377110697e-06,
15492
+ "loss": 0.0095,
15493
+ "step": 10610
15494
+ },
15495
+ {
15496
+ "epoch": 0.85,
15497
+ "grad_norm": 0.3742575943470001,
15498
+ "learning_rate": 3.255606182435451e-06,
15499
+ "loss": 0.0052,
15500
+ "step": 10615
15501
+ },
15502
+ {
15503
+ "epoch": 0.85,
15504
+ "grad_norm": 0.3119674026966095,
15505
+ "learning_rate": 3.246672027159832e-06,
15506
+ "loss": 0.009,
15507
+ "step": 10620
15508
+ },
15509
+ {
15510
+ "epoch": 0.85,
15511
+ "grad_norm": 0.2969549894332886,
15512
+ "learning_rate": 3.2377378718842132e-06,
15513
+ "loss": 0.007,
15514
+ "step": 10625
15515
+ },
15516
+ {
15517
+ "epoch": 0.85,
15518
+ "grad_norm": 0.3154788315296173,
15519
+ "learning_rate": 3.228803716608595e-06,
15520
+ "loss": 0.0083,
15521
+ "step": 10630
15522
+ },
15523
+ {
15524
+ "epoch": 0.86,
15525
+ "grad_norm": 0.33745089173316956,
15526
+ "learning_rate": 3.219869561332976e-06,
15527
+ "loss": 0.0056,
15528
+ "step": 10635
15529
+ },
15530
+ {
15531
+ "epoch": 0.86,
15532
+ "grad_norm": 0.36266443133354187,
15533
+ "learning_rate": 3.2109354060573576e-06,
15534
+ "loss": 0.0087,
15535
+ "step": 10640
15536
+ },
15537
+ {
15538
+ "epoch": 0.86,
15539
+ "grad_norm": 0.40266337990760803,
15540
+ "learning_rate": 3.2020012507817387e-06,
15541
+ "loss": 0.0055,
15542
+ "step": 10645
15543
+ },
15544
+ {
15545
+ "epoch": 0.86,
15546
+ "grad_norm": 0.3595188558101654,
15547
+ "learning_rate": 3.19306709550612e-06,
15548
+ "loss": 0.0199,
15549
+ "step": 10650
15550
+ },
15551
+ {
15552
+ "epoch": 0.86,
15553
+ "grad_norm": 0.3521510064601898,
15554
+ "learning_rate": 3.1841329402305016e-06,
15555
+ "loss": 0.0082,
15556
+ "step": 10655
15557
+ },
15558
+ {
15559
+ "epoch": 0.86,
15560
+ "grad_norm": 0.3168518841266632,
15561
+ "learning_rate": 3.1751987849548826e-06,
15562
+ "loss": 0.009,
15563
+ "step": 10660
15564
+ },
15565
+ {
15566
+ "epoch": 0.86,
15567
+ "grad_norm": 0.4278966188430786,
15568
+ "learning_rate": 3.166264629679264e-06,
15569
+ "loss": 0.0075,
15570
+ "step": 10665
15571
+ },
15572
+ {
15573
+ "epoch": 0.86,
15574
+ "grad_norm": 0.379189670085907,
15575
+ "learning_rate": 3.1573304744036455e-06,
15576
+ "loss": 0.008,
15577
+ "step": 10670
15578
+ },
15579
+ {
15580
+ "epoch": 0.86,
15581
+ "grad_norm": 0.361432820558548,
15582
+ "learning_rate": 3.1483963191280266e-06,
15583
+ "loss": 0.0069,
15584
+ "step": 10675
15585
+ },
15586
+ {
15587
+ "epoch": 0.86,
15588
+ "grad_norm": 0.20192596316337585,
15589
+ "learning_rate": 3.139462163852408e-06,
15590
+ "loss": 0.0049,
15591
+ "step": 10680
15592
+ },
15593
+ {
15594
+ "epoch": 0.86,
15595
+ "grad_norm": 0.4057570695877075,
15596
+ "learning_rate": 3.130528008576789e-06,
15597
+ "loss": 0.0058,
15598
+ "step": 10685
15599
+ },
15600
+ {
15601
+ "epoch": 0.86,
15602
+ "grad_norm": 0.2621855139732361,
15603
+ "learning_rate": 3.121593853301171e-06,
15604
+ "loss": 0.007,
15605
+ "step": 10690
15606
+ },
15607
+ {
15608
+ "epoch": 0.86,
15609
+ "grad_norm": 0.4035142958164215,
15610
+ "learning_rate": 3.112659698025552e-06,
15611
+ "loss": 0.0077,
15612
+ "step": 10695
15613
+ },
15614
+ {
15615
+ "epoch": 0.86,
15616
+ "grad_norm": 0.35792276263237,
15617
+ "learning_rate": 3.103725542749933e-06,
15618
+ "loss": 0.0127,
15619
+ "step": 10700
15620
+ },
15621
+ {
15622
+ "epoch": 0.86,
15623
+ "grad_norm": 0.48556092381477356,
15624
+ "learning_rate": 3.0947913874743145e-06,
15625
+ "loss": 0.008,
15626
+ "step": 10705
15627
+ },
15628
+ {
15629
+ "epoch": 0.86,
15630
+ "grad_norm": 0.33445674180984497,
15631
+ "learning_rate": 3.0858572321986955e-06,
15632
+ "loss": 0.0059,
15633
+ "step": 10710
15634
+ },
15635
+ {
15636
+ "epoch": 0.86,
15637
+ "grad_norm": 0.4826265871524811,
15638
+ "learning_rate": 3.0769230769230774e-06,
15639
+ "loss": 0.0089,
15640
+ "step": 10715
15641
+ },
15642
+ {
15643
+ "epoch": 0.86,
15644
+ "grad_norm": 0.42223629355430603,
15645
+ "learning_rate": 3.0679889216474585e-06,
15646
+ "loss": 0.0066,
15647
+ "step": 10720
15648
+ },
15649
+ {
15650
+ "epoch": 0.86,
15651
+ "grad_norm": 0.45981764793395996,
15652
+ "learning_rate": 3.05905476637184e-06,
15653
+ "loss": 0.0085,
15654
+ "step": 10725
15655
+ },
15656
+ {
15657
+ "epoch": 0.86,
15658
+ "grad_norm": 0.2595252990722656,
15659
+ "learning_rate": 3.050120611096221e-06,
15660
+ "loss": 0.0063,
15661
+ "step": 10730
15662
+ },
15663
+ {
15664
+ "epoch": 0.86,
15665
+ "grad_norm": 0.5080291032791138,
15666
+ "learning_rate": 3.0411864558206024e-06,
15667
+ "loss": 0.0094,
15668
+ "step": 10735
15669
+ },
15670
+ {
15671
+ "epoch": 0.86,
15672
+ "grad_norm": 0.32294949889183044,
15673
+ "learning_rate": 3.032252300544984e-06,
15674
+ "loss": 0.0058,
15675
+ "step": 10740
15676
+ },
15677
+ {
15678
+ "epoch": 0.86,
15679
+ "grad_norm": 0.4228246808052063,
15680
+ "learning_rate": 3.023318145269365e-06,
15681
+ "loss": 0.0059,
15682
+ "step": 10745
15683
+ },
15684
+ {
15685
+ "epoch": 0.86,
15686
+ "grad_norm": 0.3359188139438629,
15687
+ "learning_rate": 3.0143839899937464e-06,
15688
+ "loss": 0.0062,
15689
+ "step": 10750
15690
+ },
15691
+ {
15692
+ "epoch": 0.86,
15693
+ "grad_norm": 0.2471199929714203,
15694
+ "learning_rate": 3.005449834718128e-06,
15695
+ "loss": 0.0066,
15696
+ "step": 10755
15697
+ },
15698
+ {
15699
+ "epoch": 0.87,
15700
+ "grad_norm": 0.8795719742774963,
15701
+ "learning_rate": 2.996515679442509e-06,
15702
+ "loss": 0.0129,
15703
+ "step": 10760
15704
+ },
15705
+ {
15706
+ "epoch": 0.87,
15707
+ "grad_norm": 1.3051950931549072,
15708
+ "learning_rate": 2.9875815241668904e-06,
15709
+ "loss": 0.0128,
15710
+ "step": 10765
15711
+ },
15712
+ {
15713
+ "epoch": 0.87,
15714
+ "grad_norm": 0.3568212389945984,
15715
+ "learning_rate": 2.9786473688912714e-06,
15716
+ "loss": 0.0061,
15717
+ "step": 10770
15718
+ },
15719
+ {
15720
+ "epoch": 0.87,
15721
+ "grad_norm": 0.3010600805282593,
15722
+ "learning_rate": 2.9697132136156533e-06,
15723
+ "loss": 0.0065,
15724
+ "step": 10775
15725
+ },
15726
+ {
15727
+ "epoch": 0.87,
15728
+ "grad_norm": 0.38424891233444214,
15729
+ "learning_rate": 2.9607790583400343e-06,
15730
+ "loss": 0.0078,
15731
+ "step": 10780
15732
+ },
15733
+ {
15734
+ "epoch": 0.87,
15735
+ "grad_norm": 0.309994101524353,
15736
+ "learning_rate": 2.9518449030644154e-06,
15737
+ "loss": 0.0086,
15738
+ "step": 10785
15739
+ },
15740
+ {
15741
+ "epoch": 0.87,
15742
+ "grad_norm": 0.37481045722961426,
15743
+ "learning_rate": 2.942910747788797e-06,
15744
+ "loss": 0.0122,
15745
+ "step": 10790
15746
+ },
15747
+ {
15748
+ "epoch": 0.87,
15749
+ "grad_norm": 0.432425856590271,
15750
+ "learning_rate": 2.933976592513178e-06,
15751
+ "loss": 0.0065,
15752
+ "step": 10795
15753
+ },
15754
+ {
15755
+ "epoch": 0.87,
15756
+ "grad_norm": 0.27843984961509705,
15757
+ "learning_rate": 2.9250424372375598e-06,
15758
+ "loss": 0.0076,
15759
+ "step": 10800
15760
+ },
15761
+ {
15762
+ "epoch": 0.87,
15763
+ "eval_loss": 0.007469375152140856,
15764
+ "eval_pearson_cosine": 0.9681370387245068,
15765
+ "eval_pearson_dot": 0.9657977526847813,
15766
+ "eval_pearson_euclidean": 0.957258084356729,
15767
+ "eval_pearson_manhattan": 0.956023768146456,
15768
+ "eval_pearson_max": 0.9681370387245068,
15769
+ "eval_runtime": 426.2283,
15770
+ "eval_samples_per_second": 1.173,
15771
+ "eval_spearman_cosine": 0.9642074088296352,
15772
+ "eval_spearman_dot": 0.9586846987387948,
15773
+ "eval_spearman_euclidean": 0.9637806871227486,
15774
+ "eval_spearman_manhattan": 0.9624153696614787,
15775
+ "eval_spearman_max": 0.9642074088296352,
15776
+ "eval_steps_per_second": 1.173,
15777
+ "step": 10800
15778
  }
15779
  ],
15780
  "logging_steps": 5,