ccasimiro commited on
Commit
206d483
1 Parent(s): 5d83116

upload model

Browse files
README.md ADDED
File without changes
all_results.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 10.0,
3
+ "eval_combined_score": 0.7437504611247936,
4
+ "eval_loss": 0.33539897203445435,
5
+ "eval_pearson": 0.7484496954159015,
6
+ "eval_runtime": 10.2662,
7
+ "eval_samples": 500,
8
+ "eval_samples_per_second": 48.704,
9
+ "eval_spearmanr": 0.7390512268336858,
10
+ "eval_steps_per_second": 1.559,
11
+ "predict_combined_score": 0.8155956731804759,
12
+ "predict_loss": 0.2975914776325226,
13
+ "predict_pearson": 0.8120486139447483,
14
+ "predict_runtime": 10.2099,
15
+ "predict_samples_per_second": 48.972,
16
+ "predict_spearmanr": 0.8191427324162035,
17
+ "predict_steps_per_second": 1.567,
18
+ "train_loss": 0.17999618823711688,
19
+ "train_runtime": 1715.2897,
20
+ "train_samples": 2073,
21
+ "train_samples_per_second": 12.085,
22
+ "train_steps_per_second": 0.379
23
+ }
config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "bsc/roberta-base-ca-cased",
3
+ "architectures": [
4
+ "RobertaForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 0,
8
+ "eos_token_id": 2,
9
+ "gradient_checkpointing": false,
10
+ "hidden_act": "gelu",
11
+ "hidden_dropout_prob": 0.1,
12
+ "hidden_size": 768,
13
+ "id2label": {
14
+ "0": "LABEL_0"
15
+ },
16
+ "initializer_range": 0.02,
17
+ "intermediate_size": 3072,
18
+ "label2id": {
19
+ "LABEL_0": 0
20
+ },
21
+ "layer_norm_eps": 1e-05,
22
+ "max_position_embeddings": 514,
23
+ "model_type": "roberta",
24
+ "num_attention_heads": 12,
25
+ "num_hidden_layers": 12,
26
+ "pad_token_id": 1,
27
+ "position_embedding_type": "absolute",
28
+ "problem_type": "regression",
29
+ "torch_dtype": "float32",
30
+ "transformers_version": "4.9.1",
31
+ "type_vocab_size": 1,
32
+ "use_cache": true,
33
+ "vocab_size": 52000
34
+ }
eval_results.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 10.0,
3
+ "eval_combined_score": 0.7437504611247936,
4
+ "eval_loss": 0.33539897203445435,
5
+ "eval_pearson": 0.7484496954159015,
6
+ "eval_runtime": 10.2662,
7
+ "eval_samples": 500,
8
+ "eval_samples_per_second": 48.704,
9
+ "eval_spearmanr": 0.7390512268336858,
10
+ "eval_steps_per_second": 1.559
11
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0cbf9473aa0ddba401a11d0da19747b16e4661908fc403b7073f4088743c5c2a
3
+ size 504004120
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "eos_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "unk_token": {"content": "<unk>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "sep_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "pad_token": {"content": "<pad>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "cls_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}}
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": {"content": "<unk>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "bos_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "eos_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "add_prefix_space": true, "errors": "replace", "sep_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "cls_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "pad_token": {"content": "<pad>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "max_len": 512, "special_tokens_map_file": null, "name_or_path": "bsc/roberta-base-ca-cased", "tokenizer_class": "RobertaTokenizer"}
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 10.0,
3
+ "train_loss": 0.17999618823711688,
4
+ "train_runtime": 1715.2897,
5
+ "train_samples": 2073,
6
+ "train_samples_per_second": 12.085,
7
+ "train_steps_per_second": 0.379
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.7437504611247936,
3
+ "best_model_checkpoint": "/home/ccasimiro/ccasimiro/berta/src/finetuning/sts/roberta-base-ca-cased-sts/checkpoint-455",
4
+ "epoch": 10.0,
5
+ "global_step": 650,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 1.0,
12
+ "eval_combined_score": 0.6805577619623024,
13
+ "eval_loss": 0.4414205849170685,
14
+ "eval_pearson": 0.6667563227309266,
15
+ "eval_runtime": 10.4961,
16
+ "eval_samples_per_second": 47.637,
17
+ "eval_spearmanr": 0.6943592011936784,
18
+ "eval_steps_per_second": 1.524,
19
+ "step": 65
20
+ },
21
+ {
22
+ "epoch": 2.0,
23
+ "eval_combined_score": 0.7152235717947664,
24
+ "eval_loss": 0.3806273341178894,
25
+ "eval_pearson": 0.718255308294224,
26
+ "eval_runtime": 10.3114,
27
+ "eval_samples_per_second": 48.49,
28
+ "eval_spearmanr": 0.7121918352953086,
29
+ "eval_steps_per_second": 1.552,
30
+ "step": 130
31
+ },
32
+ {
33
+ "epoch": 3.0,
34
+ "eval_combined_score": 0.7127492106541832,
35
+ "eval_loss": 0.3876854479312897,
36
+ "eval_pearson": 0.7141017567917686,
37
+ "eval_runtime": 10.3161,
38
+ "eval_samples_per_second": 48.468,
39
+ "eval_spearmanr": 0.7113966645165978,
40
+ "eval_steps_per_second": 1.551,
41
+ "step": 195
42
+ },
43
+ {
44
+ "epoch": 4.0,
45
+ "eval_combined_score": 0.7300156062557066,
46
+ "eval_loss": 0.380470871925354,
47
+ "eval_pearson": 0.731967198769625,
48
+ "eval_runtime": 10.2905,
49
+ "eval_samples_per_second": 48.589,
50
+ "eval_spearmanr": 0.7280640137417881,
51
+ "eval_steps_per_second": 1.555,
52
+ "step": 260
53
+ },
54
+ {
55
+ "epoch": 5.0,
56
+ "eval_combined_score": 0.7302656118713525,
57
+ "eval_loss": 0.33833837509155273,
58
+ "eval_pearson": 0.7366071674449775,
59
+ "eval_runtime": 10.3827,
60
+ "eval_samples_per_second": 48.157,
61
+ "eval_spearmanr": 0.7239240562977276,
62
+ "eval_steps_per_second": 1.541,
63
+ "step": 325
64
+ },
65
+ {
66
+ "epoch": 6.0,
67
+ "eval_combined_score": 0.7325772109724351,
68
+ "eval_loss": 0.36641925573349,
69
+ "eval_pearson": 0.7349002365451928,
70
+ "eval_runtime": 10.2668,
71
+ "eval_samples_per_second": 48.701,
72
+ "eval_spearmanr": 0.7302541853996775,
73
+ "eval_steps_per_second": 1.558,
74
+ "step": 390
75
+ },
76
+ {
77
+ "epoch": 7.0,
78
+ "eval_combined_score": 0.7437504611247936,
79
+ "eval_loss": 0.33539897203445435,
80
+ "eval_pearson": 0.7484496954159015,
81
+ "eval_runtime": 10.2857,
82
+ "eval_samples_per_second": 48.611,
83
+ "eval_spearmanr": 0.7390512268336858,
84
+ "eval_steps_per_second": 1.556,
85
+ "step": 455
86
+ },
87
+ {
88
+ "epoch": 7.69,
89
+ "learning_rate": 1.153846153846154e-05,
90
+ "loss": 0.2244,
91
+ "step": 500
92
+ },
93
+ {
94
+ "epoch": 8.0,
95
+ "eval_combined_score": 0.7370831462161629,
96
+ "eval_loss": 0.3497055172920227,
97
+ "eval_pearson": 0.7429713239243328,
98
+ "eval_runtime": 10.3129,
99
+ "eval_samples_per_second": 48.483,
100
+ "eval_spearmanr": 0.731194968507993,
101
+ "eval_steps_per_second": 1.551,
102
+ "step": 520
103
+ },
104
+ {
105
+ "epoch": 9.0,
106
+ "eval_combined_score": 0.7429305671131216,
107
+ "eval_loss": 0.3452938497066498,
108
+ "eval_pearson": 0.7476867678468953,
109
+ "eval_runtime": 10.2732,
110
+ "eval_samples_per_second": 48.67,
111
+ "eval_spearmanr": 0.7381743663793479,
112
+ "eval_steps_per_second": 1.557,
113
+ "step": 585
114
+ },
115
+ {
116
+ "epoch": 10.0,
117
+ "eval_combined_score": 0.7410151955266198,
118
+ "eval_loss": 0.3426874279975891,
119
+ "eval_pearson": 0.7469684882191953,
120
+ "eval_runtime": 10.2706,
121
+ "eval_samples_per_second": 48.683,
122
+ "eval_spearmanr": 0.7350619028340443,
123
+ "eval_steps_per_second": 1.558,
124
+ "step": 650
125
+ },
126
+ {
127
+ "epoch": 10.0,
128
+ "step": 650,
129
+ "total_flos": 5454243205724160.0,
130
+ "train_loss": 0.17999618823711688,
131
+ "train_runtime": 1715.2897,
132
+ "train_samples_per_second": 12.085,
133
+ "train_steps_per_second": 0.379
134
+ }
135
+ ],
136
+ "max_steps": 650,
137
+ "num_train_epochs": 10,
138
+ "total_flos": 5454243205724160.0,
139
+ "trial_name": null,
140
+ "trial_params": null
141
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b082c17858e6b0b991611988c93aaae3cc138a67f2ed1507e280b40332e18502
3
+ size 2799
vocab.json ADDED
The diff for this file is too large to render. See raw diff