File size: 2,071 Bytes
35fe4a2
4f14fea
35fe4a2
4f14fea
35fe4a2
 
 
 
 
 
 
4f14fea
35fe4a2
4f14fea
35fe4a2
 
 
4f14fea
35fe4a2
4f14fea
35fe4a2
 
 
4f14fea
 
 
 
 
 
35fe4a2
 
 
4f14fea
35fe4a2
4f14fea
35fe4a2
 
 
4f14fea
 
 
 
 
 
35fe4a2
 
 
4f14fea
35fe4a2
4f14fea
35fe4a2
 
 
4f14fea
 
 
 
 
 
35fe4a2
 
 
4f14fea
35fe4a2
4f14fea
35fe4a2
 
 
4f14fea
 
 
 
 
 
35fe4a2
 
 
 
 
4f14fea
35fe4a2
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
{
  "best_metric": 1.1014840602874756,
  "best_model_checkpoint": "/kaggle/output/checkpoint-4000",
  "epoch": 0.16297262059973924,
  "eval_steps": 1000,
  "global_step": 4000,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.0,
      "learning_rate": 2.7777777777777777e-11,
      "loss": 1.1383,
      "step": 1
    },
    {
      "epoch": 0.04,
      "learning_rate": 2.7750000000000004e-08,
      "loss": 1.1424,
      "step": 1000
    },
    {
      "epoch": 0.04,
      "eval_accuracy": 0.32375249500998005,
      "eval_loss": 1.1077626943588257,
      "eval_runtime": 54.8633,
      "eval_samples_per_second": 91.318,
      "eval_steps_per_second": 11.428,
      "step": 1000
    },
    {
      "epoch": 0.08,
      "learning_rate": 5.5527777777777784e-08,
      "loss": 1.1244,
      "step": 2000
    },
    {
      "epoch": 0.08,
      "eval_accuracy": 0.33652694610778444,
      "eval_loss": 1.1080161333084106,
      "eval_runtime": 54.7384,
      "eval_samples_per_second": 91.526,
      "eval_steps_per_second": 11.454,
      "step": 2000
    },
    {
      "epoch": 0.12,
      "learning_rate": 8.327777777777778e-08,
      "loss": 1.1228,
      "step": 3000
    },
    {
      "epoch": 0.12,
      "eval_accuracy": 0.34331337325349304,
      "eval_loss": 1.1084064245224,
      "eval_runtime": 54.7948,
      "eval_samples_per_second": 91.432,
      "eval_steps_per_second": 11.443,
      "step": 3000
    },
    {
      "epoch": 0.16,
      "learning_rate": 1.1105555555555557e-07,
      "loss": 1.1216,
      "step": 4000
    },
    {
      "epoch": 0.16,
      "eval_accuracy": 0.3385229540918164,
      "eval_loss": 1.1014840602874756,
      "eval_runtime": 54.8508,
      "eval_samples_per_second": 91.339,
      "eval_steps_per_second": 11.431,
      "step": 4000
    }
  ],
  "logging_steps": 1000,
  "max_steps": 10000000,
  "num_train_epochs": 408,
  "save_steps": 1000,
  "total_flos": 8361420521472000.0,
  "trial_name": null,
  "trial_params": null
}