unza commited on
Commit
a55ff4d
·
1 Parent(s): c2dcea1

End of training

Browse files
all_results.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
  "epoch": 2.0,
3
- "eval_loss": 0.2534329295158386,
4
- "eval_runtime": 132.1673,
5
- "eval_samples": 1549,
6
- "eval_samples_per_second": 11.72,
7
- "eval_steps_per_second": 1.468,
8
- "eval_wer": 0.8469980632666236,
9
- "train_loss": 2.1141159319460474,
10
- "train_runtime": 3966.7132,
11
- "train_samples": 11892,
12
- "train_samples_per_second": 5.996,
13
- "train_steps_per_second": 0.375
14
  }
 
1
  {
2
  "epoch": 2.0,
3
+ "eval_loss": 0.6070845127105713,
4
+ "eval_runtime": 277.1948,
5
+ "eval_samples": 1684,
6
+ "eval_samples_per_second": 6.075,
7
+ "eval_steps_per_second": 0.761,
8
+ "eval_wer": 0.9916864608076009,
9
+ "train_loss": 3.814326035825512,
10
+ "train_runtime": 2899.9163,
11
+ "train_samples": 5053,
12
+ "train_samples_per_second": 3.485,
13
+ "train_steps_per_second": 0.218
14
  }
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 2.0,
3
- "eval_loss": 0.2534329295158386,
4
- "eval_runtime": 132.1673,
5
- "eval_samples": 1549,
6
- "eval_samples_per_second": 11.72,
7
- "eval_steps_per_second": 1.468,
8
- "eval_wer": 0.8469980632666236
9
  }
 
1
  {
2
  "epoch": 2.0,
3
+ "eval_loss": 0.6070845127105713,
4
+ "eval_runtime": 277.1948,
5
+ "eval_samples": 1684,
6
+ "eval_samples_per_second": 6.075,
7
+ "eval_steps_per_second": 0.761,
8
+ "eval_wer": 0.9916864608076009
9
  }
runs/Nov22_08-17-11_14e70937d4dd/events.out.tfevents.1669112076.14e70937d4dd.926.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8e1dfc26fa3b815ae8a17d44372657af37d32dd4aed690329fc8b7833c6da2c
3
+ size 358
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 2.0,
3
- "train_loss": 2.1141159319460474,
4
- "train_runtime": 3966.7132,
5
- "train_samples": 11892,
6
- "train_samples_per_second": 5.996,
7
- "train_steps_per_second": 0.375
8
  }
 
1
  {
2
  "epoch": 2.0,
3
+ "train_loss": 3.814326035825512,
4
+ "train_runtime": 2899.9163,
5
+ "train_samples": 5053,
6
+ "train_samples_per_second": 3.485,
7
+ "train_steps_per_second": 0.218
8
  }
trainer_state.json CHANGED
@@ -1,127 +1,70 @@
1
  {
2
- "best_metric": 0.2534329295158386,
3
- "best_model_checkpoint": "./xls-r-300m-bemba-fullset/checkpoint-1000",
4
- "epoch": 1.9993275050437123,
5
- "global_step": 1486,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 0.13,
12
- "learning_rate": 1.47e-05,
13
- "loss": 11.7638,
14
  "step": 100
15
  },
16
  {
17
- "epoch": 0.27,
18
- "learning_rate": 2.97e-05,
19
- "loss": 3.7497,
20
  "step": 200
21
  },
22
  {
23
- "epoch": 0.4,
24
- "learning_rate": 4.4699999999999996e-05,
25
- "loss": 3.0287,
26
  "step": 300
27
  },
28
  {
29
- "epoch": 0.54,
30
- "learning_rate": 5.97e-05,
31
- "loss": 2.8222,
32
  "step": 400
33
  },
34
  {
35
- "epoch": 0.67,
36
- "learning_rate": 7.47e-05,
37
- "loss": 2.268,
38
  "step": 500
39
  },
40
  {
41
- "epoch": 0.67,
42
- "eval_loss": 1.0355395078659058,
43
- "eval_runtime": 140.0953,
44
- "eval_samples_per_second": 11.057,
45
- "eval_steps_per_second": 1.385,
46
- "eval_wer": 0.9928986442866365,
47
  "step": 500
48
  },
49
  {
50
- "epoch": 0.81,
51
- "learning_rate": 8.969999999999998e-05,
52
- "loss": 1.1597,
53
  "step": 600
54
  },
55
- {
56
- "epoch": 0.94,
57
- "learning_rate": 0.00010469999999999998,
58
- "loss": 0.9172,
59
- "step": 700
60
- },
61
- {
62
- "epoch": 1.08,
63
- "learning_rate": 0.0001197,
64
- "loss": 0.8284,
65
- "step": 800
66
- },
67
- {
68
- "epoch": 1.21,
69
- "learning_rate": 0.0001347,
70
- "loss": 0.7667,
71
- "step": 900
72
- },
73
- {
74
- "epoch": 1.35,
75
- "learning_rate": 0.00014969999999999998,
76
- "loss": 0.7365,
77
- "step": 1000
78
- },
79
- {
80
- "epoch": 1.35,
81
- "eval_loss": 0.2534329295158386,
82
- "eval_runtime": 132.8836,
83
- "eval_samples_per_second": 11.657,
84
- "eval_steps_per_second": 1.46,
85
- "eval_wer": 0.8469980632666236,
86
- "step": 1000
87
- },
88
- {
89
- "epoch": 1.48,
90
- "learning_rate": 0.0001647,
91
- "loss": 0.7227,
92
- "step": 1100
93
- },
94
- {
95
- "epoch": 1.61,
96
- "learning_rate": 0.00017969999999999998,
97
- "loss": 0.7068,
98
- "step": 1200
99
- },
100
- {
101
- "epoch": 1.75,
102
- "learning_rate": 0.0001947,
103
- "loss": 0.6875,
104
- "step": 1300
105
- },
106
- {
107
- "epoch": 1.88,
108
- "learning_rate": 0.00020969999999999997,
109
- "loss": 0.6798,
110
- "step": 1400
111
- },
112
  {
113
  "epoch": 2.0,
114
- "step": 1486,
115
- "total_flos": 4.4337322315078743e+18,
116
- "train_loss": 2.1141159319460474,
117
- "train_runtime": 3966.7132,
118
- "train_samples_per_second": 5.996,
119
- "train_steps_per_second": 0.375
120
  }
121
  ],
122
- "max_steps": 1486,
123
  "num_train_epochs": 2,
124
- "total_flos": 4.4337322315078743e+18,
125
  "trial_name": null,
126
  "trial_params": null
127
  }
 
1
  {
2
+ "best_metric": 0.6070845127105713,
3
+ "best_model_checkpoint": "./xls-r-300m-bemba-fullset/checkpoint-500",
4
+ "epoch": 2.0,
5
+ "global_step": 632,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
+ "epoch": 0.32,
12
+ "learning_rate": 1.9599999999999995e-05,
13
+ "loss": 11.9405,
14
  "step": 100
15
  },
16
  {
17
+ "epoch": 0.63,
18
+ "learning_rate": 3.96e-05,
19
+ "loss": 3.6915,
20
  "step": 200
21
  },
22
  {
23
+ "epoch": 0.95,
24
+ "learning_rate": 5.959999999999999e-05,
25
+ "loss": 2.984,
26
  "step": 300
27
  },
28
  {
29
+ "epoch": 1.27,
30
+ "learning_rate": 7.959999999999998e-05,
31
+ "loss": 2.7834,
32
  "step": 400
33
  },
34
  {
35
+ "epoch": 1.58,
36
+ "learning_rate": 9.96e-05,
37
+ "loss": 1.6564,
38
  "step": 500
39
  },
40
  {
41
+ "epoch": 1.58,
42
+ "eval_loss": 0.6070845127105713,
43
+ "eval_runtime": 282.4761,
44
+ "eval_samples_per_second": 5.962,
45
+ "eval_steps_per_second": 0.747,
46
+ "eval_wer": 0.9916864608076009,
47
  "step": 500
48
  },
49
  {
50
+ "epoch": 1.9,
51
+ "learning_rate": 0.0001196,
52
+ "loss": 0.8314,
53
  "step": 600
54
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  {
56
  "epoch": 2.0,
57
+ "step": 632,
58
+ "total_flos": 2.9733883621978947e+18,
59
+ "train_loss": 3.814326035825512,
60
+ "train_runtime": 2899.9163,
61
+ "train_samples_per_second": 3.485,
62
+ "train_steps_per_second": 0.218
63
  }
64
  ],
65
+ "max_steps": 632,
66
  "num_train_epochs": 2,
67
+ "total_flos": 2.9733883621978947e+18,
68
  "trial_name": null,
69
  "trial_params": null
70
  }