File size: 3,216 Bytes
8b6ab0a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
{
  "best_metric": 19.7665,
  "best_model_checkpoint": "bin/liputan6-pt-pl50/checkpoint-252",
  "epoch": 5.0,
  "eval_steps": 500,
  "global_step": 315,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 1.0,
      "grad_norm": 1.0756858587265015,
      "learning_rate": 0.0008,
      "loss": 4.7245,
      "step": 63
    },
    {
      "epoch": 1.0,
      "eval_gen_len": 30.652,
      "eval_loss": 3.9912047386169434,
      "eval_rouge1": 16.8276,
      "eval_rouge2": 3.6927,
      "eval_rougeL": 14.367,
      "eval_rougeLsum": 15.3151,
      "eval_runtime": 761.8947,
      "eval_samples_per_second": 1.313,
      "eval_steps_per_second": 0.042,
      "step": 63
    },
    {
      "epoch": 2.0,
      "grad_norm": 1.0456533432006836,
      "learning_rate": 0.0006,
      "loss": 3.9104,
      "step": 126
    },
    {
      "epoch": 2.0,
      "eval_gen_len": 35.104,
      "eval_loss": 3.8609139919281006,
      "eval_rouge1": 17.712,
      "eval_rouge2": 4.2061,
      "eval_rougeL": 14.9465,
      "eval_rougeLsum": 15.9818,
      "eval_runtime": 1158.8796,
      "eval_samples_per_second": 0.863,
      "eval_steps_per_second": 0.028,
      "step": 126
    },
    {
      "epoch": 3.0,
      "grad_norm": 1.1262755393981934,
      "learning_rate": 0.0004,
      "loss": 3.6651,
      "step": 189
    },
    {
      "epoch": 3.0,
      "eval_gen_len": 30.749,
      "eval_loss": 3.8036019802093506,
      "eval_rouge1": 18.8508,
      "eval_rouge2": 4.6943,
      "eval_rougeL": 15.8363,
      "eval_rougeLsum": 17.0134,
      "eval_runtime": 740.0114,
      "eval_samples_per_second": 1.351,
      "eval_steps_per_second": 0.043,
      "step": 189
    },
    {
      "epoch": 4.0,
      "grad_norm": 1.2107006311416626,
      "learning_rate": 0.0002,
      "loss": 3.4442,
      "step": 252
    },
    {
      "epoch": 4.0,
      "eval_gen_len": 28.31,
      "eval_loss": 3.7532596588134766,
      "eval_rouge1": 19.7665,
      "eval_rouge2": 5.1425,
      "eval_rougeL": 16.7615,
      "eval_rougeLsum": 18.1456,
      "eval_runtime": 520.3377,
      "eval_samples_per_second": 1.922,
      "eval_steps_per_second": 0.061,
      "step": 252
    },
    {
      "epoch": 5.0,
      "grad_norm": 1.1439872980117798,
      "learning_rate": 0.0,
      "loss": 3.2664,
      "step": 315
    },
    {
      "epoch": 5.0,
      "eval_gen_len": 29.142,
      "eval_loss": 3.7381248474121094,
      "eval_rouge1": 19.5385,
      "eval_rouge2": 5.1106,
      "eval_rougeL": 16.7601,
      "eval_rougeLsum": 17.9271,
      "eval_runtime": 664.3792,
      "eval_samples_per_second": 1.505,
      "eval_steps_per_second": 0.048,
      "step": 315
    },
    {
      "epoch": 5.0,
      "step": 315,
      "total_flos": 3877644533760000.0,
      "train_loss": 3.802120681036086,
      "train_runtime": 4251.699,
      "train_samples_per_second": 1.176,
      "train_steps_per_second": 0.074
    }
  ],
  "logging_steps": 500,
  "max_steps": 315,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 5,
  "save_steps": 500,
  "total_flos": 3877644533760000.0,
  "train_batch_size": 16,
  "trial_name": null,
  "trial_params": null
}