MHGanainy commited on
Commit
96cec99
1 Parent(s): 893f595

Training in progress, epoch 1

Browse files
all_results.json CHANGED
@@ -1,23 +1,23 @@
1
  {
2
- "epoch": 4.0,
3
- "eval_loss": 0.41252821683883667,
4
- "eval_macro-f1": 0.029029793735676088,
5
- "eval_micro-f1": 0.16521739130434782,
6
- "eval_runtime": 0.9629,
7
- "eval_samples": 100,
8
- "eval_samples_per_second": 103.853,
9
- "eval_steps_per_second": 4.154,
10
- "predict_loss": 0.39896854758262634,
11
- "predict_macro-f1": 0.01948051948051948,
12
- "predict_micro-f1": 0.1095890410958904,
13
- "predict_runtime": 0.9583,
14
- "predict_samples": 100,
15
- "predict_samples_per_second": 104.354,
16
- "predict_steps_per_second": 4.174,
17
- "total_flos": 2512343535714304.0,
18
- "train_loss": 0.3421715199947357,
19
- "train_runtime": 21.559,
20
- "train_samples": 100,
21
- "train_samples_per_second": 92.769,
22
- "train_steps_per_second": 3.711
23
  }
 
1
  {
2
+ "epoch": 7.0,
3
+ "eval_loss": 0.17048782110214233,
4
+ "eval_macro-f1": 0.6382923768808028,
5
+ "eval_micro-f1": 0.6991622239146992,
6
+ "eval_runtime": 8.0772,
7
+ "eval_samples": 1000,
8
+ "eval_samples_per_second": 123.805,
9
+ "eval_steps_per_second": 3.962,
10
+ "predict_loss": 0.17293496429920197,
11
+ "predict_macro-f1": 0.6196372035945414,
12
+ "predict_micro-f1": 0.7044410413476263,
13
+ "predict_runtime": 8.1514,
14
+ "predict_samples": 1000,
15
+ "predict_samples_per_second": 122.678,
16
+ "predict_steps_per_second": 3.926,
17
+ "total_flos": 3.099603884499272e+17,
18
+ "train_loss": 0.1036064192396046,
19
+ "train_runtime": 1162.1568,
20
+ "train_samples": 9000,
21
+ "train_samples_per_second": 154.884,
22
+ "train_steps_per_second": 4.853
23
  }
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "MHGanainy/roberta-base-legal-multi",
3
  "architectures": [
4
  "RobertaForSequenceClassification"
5
  ],
 
1
  {
2
+ "_name_or_path": "roberta-base",
3
  "architectures": [
4
  "RobertaForSequenceClassification"
5
  ],
eval_results.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "epoch": 4.0,
3
- "eval_loss": 0.41252821683883667,
4
- "eval_macro-f1": 0.029029793735676088,
5
- "eval_micro-f1": 0.16521739130434782,
6
- "eval_runtime": 0.9629,
7
- "eval_samples": 100,
8
- "eval_samples_per_second": 103.853,
9
- "eval_steps_per_second": 4.154
10
  }
 
1
  {
2
+ "epoch": 7.0,
3
+ "eval_loss": 0.17048782110214233,
4
+ "eval_macro-f1": 0.6382923768808028,
5
+ "eval_micro-f1": 0.6991622239146992,
6
+ "eval_runtime": 8.0772,
7
+ "eval_samples": 1000,
8
+ "eval_samples_per_second": 123.805,
9
+ "eval_steps_per_second": 3.962
10
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4d7c8091f761aa3c28089731f10012011c36f00fab5143295d6870ddb95aeb37
3
  size 555550888
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1bef6202866743eb9ff9f0eb39a9bb1b791b83ef1fe2ffdcae3439df470d8fd
3
  size 555550888
predict_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "predict_loss": 0.39896854758262634,
3
- "predict_macro-f1": 0.01948051948051948,
4
- "predict_micro-f1": 0.1095890410958904,
5
- "predict_runtime": 0.9583,
6
- "predict_samples": 100,
7
- "predict_samples_per_second": 104.354,
8
- "predict_steps_per_second": 4.174
9
  }
 
1
  {
2
+ "predict_loss": 0.17293496429920197,
3
+ "predict_macro-f1": 0.6196372035945414,
4
+ "predict_micro-f1": 0.7044410413476263,
5
+ "predict_runtime": 8.1514,
6
+ "predict_samples": 1000,
7
+ "predict_samples_per_second": 122.678,
8
+ "predict_steps_per_second": 3.926
9
  }
special_tokens_map.json CHANGED
@@ -1,25 +1,7 @@
1
  {
2
- "bos_token": {
3
- "content": "<s>",
4
- "lstrip": false,
5
- "normalized": true,
6
- "rstrip": false,
7
- "single_word": false
8
- },
9
- "cls_token": {
10
- "content": "<s>",
11
- "lstrip": false,
12
- "normalized": true,
13
- "rstrip": false,
14
- "single_word": false
15
- },
16
- "eos_token": {
17
- "content": "</s>",
18
- "lstrip": false,
19
- "normalized": true,
20
- "rstrip": false,
21
- "single_word": false
22
- },
23
  "mask_token": {
24
  "content": "<mask>",
25
  "lstrip": true,
@@ -27,25 +9,7 @@
27
  "rstrip": false,
28
  "single_word": false
29
  },
30
- "pad_token": {
31
- "content": "<pad>",
32
- "lstrip": false,
33
- "normalized": true,
34
- "rstrip": false,
35
- "single_word": false
36
- },
37
- "sep_token": {
38
- "content": "</s>",
39
- "lstrip": false,
40
- "normalized": true,
41
- "rstrip": false,
42
- "single_word": false
43
- },
44
- "unk_token": {
45
- "content": "<unk>",
46
- "lstrip": false,
47
- "normalized": true,
48
- "rstrip": false,
49
- "single_word": false
50
- }
51
  }
 
1
  {
2
+ "bos_token": "<s>",
3
+ "cls_token": "<s>",
4
+ "eos_token": "</s>",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  "mask_token": {
6
  "content": "<mask>",
7
  "lstrip": true,
 
9
  "rstrip": false,
10
  "single_word": false
11
  },
12
+ "pad_token": "<pad>",
13
+ "sep_token": "</s>",
14
+ "unk_token": "<unk>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  }
test_predictions.csv CHANGED
The diff for this file is too large to render. See raw diff
 
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 4.0,
3
- "total_flos": 2512343535714304.0,
4
- "train_loss": 0.3421715199947357,
5
- "train_runtime": 21.559,
6
- "train_samples": 100,
7
- "train_samples_per_second": 92.769,
8
- "train_steps_per_second": 3.711
9
  }
 
1
  {
2
+ "epoch": 7.0,
3
+ "total_flos": 3.099603884499272e+17,
4
+ "train_loss": 0.1036064192396046,
5
+ "train_runtime": 1162.1568,
6
+ "train_samples": 9000,
7
+ "train_samples_per_second": 154.884,
8
+ "train_steps_per_second": 4.853
9
  }
trainer_state.json CHANGED
@@ -1,65 +1,116 @@
1
  {
2
- "best_metric": 0.16521739130434782,
3
- "best_model_checkpoint": "logs/ecthr_a/MHGanainy/roberta-base-legal-multi/seed_1/checkpoint-4",
4
- "epoch": 4.0,
5
  "eval_steps": 500,
6
- "global_step": 16,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_loss": 0.41245660185813904,
14
- "eval_macro-f1": 0.029029793735676088,
15
- "eval_micro-f1": 0.16521739130434782,
16
- "eval_runtime": 1.0722,
17
- "eval_samples_per_second": 93.268,
18
- "eval_steps_per_second": 3.731,
19
- "step": 4
 
 
 
 
 
 
 
20
  },
21
  {
22
  "epoch": 2.0,
23
- "eval_loss": 0.3445906341075897,
24
- "eval_macro-f1": 0.029029793735676088,
25
- "eval_micro-f1": 0.16521739130434782,
26
- "eval_runtime": 1.4747,
27
- "eval_samples_per_second": 67.809,
28
- "eval_steps_per_second": 2.712,
29
- "step": 8
30
  },
31
  {
32
  "epoch": 3.0,
33
- "eval_loss": 0.3343995213508606,
34
- "eval_macro-f1": 0.029029793735676088,
35
- "eval_micro-f1": 0.16521739130434782,
36
- "eval_runtime": 1.6766,
37
- "eval_samples_per_second": 59.646,
38
- "eval_steps_per_second": 2.386,
39
- "step": 12
40
  },
41
  {
42
- "epoch": 4.0,
43
- "eval_loss": 0.333324134349823,
44
- "eval_macro-f1": 0.029029793735676088,
45
- "eval_micro-f1": 0.16521739130434782,
46
- "eval_runtime": 1.0723,
47
- "eval_samples_per_second": 93.253,
48
- "eval_steps_per_second": 3.73,
49
- "step": 16
50
  },
51
  {
52
  "epoch": 4.0,
53
- "step": 16,
54
- "total_flos": 2512343535714304.0,
55
- "train_loss": 0.3421715199947357,
56
- "train_runtime": 21.559,
57
- "train_samples_per_second": 92.769,
58
- "train_steps_per_second": 3.711
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
  }
60
  ],
61
  "logging_steps": 500,
62
- "max_steps": 80,
63
  "num_input_tokens_seen": 0,
64
  "num_train_epochs": 20,
65
  "save_steps": 500,
@@ -84,7 +135,7 @@
84
  "attributes": {}
85
  }
86
  },
87
- "total_flos": 2512343535714304.0,
88
  "train_batch_size": 16,
89
  "trial_name": null,
90
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.6991622239146992,
3
+ "best_model_checkpoint": "logs/ecthr_a/roberta-base/seed_1/checkpoint-1128",
4
+ "epoch": 7.0,
5
  "eval_steps": 500,
6
+ "global_step": 1974,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_loss": 0.17883636057376862,
14
+ "eval_macro-f1": 0.5361152463526995,
15
+ "eval_micro-f1": 0.6690590111642744,
16
+ "eval_runtime": 8.3244,
17
+ "eval_samples_per_second": 120.128,
18
+ "eval_steps_per_second": 3.844,
19
+ "step": 282
20
+ },
21
+ {
22
+ "epoch": 1.773049645390071,
23
+ "grad_norm": 1.5047844648361206,
24
+ "learning_rate": 2.7340425531914897e-05,
25
+ "loss": 0.1598,
26
+ "step": 500
27
  },
28
  {
29
  "epoch": 2.0,
30
+ "eval_loss": 0.165725439786911,
31
+ "eval_macro-f1": 0.5864998053589437,
32
+ "eval_micro-f1": 0.6876456876456877,
33
+ "eval_runtime": 8.9929,
34
+ "eval_samples_per_second": 111.199,
35
+ "eval_steps_per_second": 3.558,
36
+ "step": 564
37
  },
38
  {
39
  "epoch": 3.0,
40
+ "eval_loss": 0.1847357153892517,
41
+ "eval_macro-f1": 0.619725405380703,
42
+ "eval_micro-f1": 0.6802973977695167,
43
+ "eval_runtime": 9.0294,
44
+ "eval_samples_per_second": 110.749,
45
+ "eval_steps_per_second": 3.544,
46
+ "step": 846
47
  },
48
  {
49
+ "epoch": 3.546099290780142,
50
+ "grad_norm": 1.266696572303772,
51
+ "learning_rate": 2.4680851063829786e-05,
52
+ "loss": 0.1038,
53
+ "step": 1000
 
 
 
54
  },
55
  {
56
  "epoch": 4.0,
57
+ "eval_loss": 0.17048540711402893,
58
+ "eval_macro-f1": 0.6382923768808028,
59
+ "eval_micro-f1": 0.6991622239146992,
60
+ "eval_runtime": 8.9765,
61
+ "eval_samples_per_second": 111.402,
62
+ "eval_steps_per_second": 3.565,
63
+ "step": 1128
64
+ },
65
+ {
66
+ "epoch": 5.0,
67
+ "eval_loss": 0.18128305673599243,
68
+ "eval_macro-f1": 0.6483583317279754,
69
+ "eval_micro-f1": 0.6948249619482496,
70
+ "eval_runtime": 8.4919,
71
+ "eval_samples_per_second": 117.76,
72
+ "eval_steps_per_second": 3.768,
73
+ "step": 1410
74
+ },
75
+ {
76
+ "epoch": 5.319148936170213,
77
+ "grad_norm": 1.974063754081726,
78
+ "learning_rate": 2.2026595744680854e-05,
79
+ "loss": 0.0835,
80
+ "step": 1500
81
+ },
82
+ {
83
+ "epoch": 6.0,
84
+ "eval_loss": 0.1945626437664032,
85
+ "eval_macro-f1": 0.6427139982243243,
86
+ "eval_micro-f1": 0.6928838951310862,
87
+ "eval_runtime": 8.3849,
88
+ "eval_samples_per_second": 119.262,
89
+ "eval_steps_per_second": 3.816,
90
+ "step": 1692
91
+ },
92
+ {
93
+ "epoch": 7.0,
94
+ "eval_loss": 0.20862846076488495,
95
+ "eval_macro-f1": 0.6248697036429669,
96
+ "eval_micro-f1": 0.6922798115259152,
97
+ "eval_runtime": 8.6109,
98
+ "eval_samples_per_second": 116.132,
99
+ "eval_steps_per_second": 3.716,
100
+ "step": 1974
101
+ },
102
+ {
103
+ "epoch": 7.0,
104
+ "step": 1974,
105
+ "total_flos": 3.099603884499272e+17,
106
+ "train_loss": 0.1036064192396046,
107
+ "train_runtime": 1162.1568,
108
+ "train_samples_per_second": 154.884,
109
+ "train_steps_per_second": 4.853
110
  }
111
  ],
112
  "logging_steps": 500,
113
+ "max_steps": 5640,
114
  "num_input_tokens_seen": 0,
115
  "num_train_epochs": 20,
116
  "save_steps": 500,
 
135
  "attributes": {}
136
  }
137
  },
138
+ "total_flos": 3.099603884499272e+17,
139
  "train_batch_size": 16,
140
  "trial_name": null,
141
  "trial_params": null
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b87e2f8ca7182c2e8e7414bb9ba5315ef8689f78814540dd7ecf7ffc055dc6c0
3
- size 5368
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:008de9f90937118ff4ab764fae92bf7cffb170775d5feb1b19e47bea4ef14eeb
3
+ size 5304