File size: 1,275 Bytes
2544748
 
 
324c43f
2544748
369d72f
2544748
 
 
 
 
369d72f
 
 
 
2544748
 
26638be
324c43f
369d72f
324c43f
369d72f
 
1abc375
 
324c43f
369d72f
324c43f
369d72f
 
324c43f
 
 
369d72f
324c43f
369d72f
 
324c43f
 
 
369d72f
324c43f
369d72f
3f52b57
 
369d72f
2544748
 
 
369d72f
2544748
324c43f
2544748
324c43f
2544748
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 4.0,
  "eval_steps": 500,
  "global_step": 16,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.25,
      "grad_norm": 6.53125,
      "learning_rate": 0.0001,
      "loss": 6.4013,
      "step": 1
    },
    {
      "epoch": 1.25,
      "grad_norm": 3.84375,
      "learning_rate": 0.000178183148246803,
      "loss": 6.2911,
      "step": 5
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.765625,
      "learning_rate": 7.774790660436858e-05,
      "loss": 6.0613,
      "step": 10
    },
    {
      "epoch": 3.75,
      "grad_norm": 2.5625,
      "learning_rate": 2.5072087818176382e-06,
      "loss": 5.9747,
      "step": 15
    },
    {
      "epoch": 4.0,
      "step": 16,
      "total_flos": 535126081536000.0,
      "train_loss": 6.083620756864548,
      "train_runtime": 28.0153,
      "train_samples_per_second": 35.838,
      "train_steps_per_second": 0.571
    }
  ],
  "logging_steps": 5,
  "max_steps": 16,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 4,
  "save_steps": 100,
  "total_flos": 535126081536000.0,
  "train_batch_size": 16,
  "trial_name": null,
  "trial_params": null
}