Commit
·
d42f148
1
Parent(s):
e502568
Training in progress, step 500
Browse files- all_results.json +9 -9
- config.json +85 -4
- post-training eval_results.json +5 -5
- prediction_output.jsonl +1 -1
- pytorch_model.bin +2 -2
- train_results.json +4 -4
- trainer_state.json +186 -186
- training_args.bin +1 -1
all_results.json
CHANGED
@@ -1,14 +1,14 @@
|
|
1 |
{
|
2 |
"epoch": 5.0,
|
3 |
"eval_samples": 98,
|
4 |
-
"test_f1": 0.
|
5 |
-
"test_loss": 1.
|
6 |
-
"test_runtime":
|
7 |
-
"test_samples_per_second":
|
8 |
-
"test_steps_per_second":
|
9 |
-
"train_loss": 0.
|
10 |
-
"train_runtime":
|
11 |
"train_samples": 702,
|
12 |
-
"train_samples_per_second": 3.
|
13 |
-
"train_steps_per_second": 3.
|
14 |
}
|
|
|
1 |
{
|
2 |
"epoch": 5.0,
|
3 |
"eval_samples": 98,
|
4 |
+
"test_f1": 0.5474589844403075,
|
5 |
+
"test_loss": 1.7049692869186401,
|
6 |
+
"test_runtime": 3.311,
|
7 |
+
"test_samples_per_second": 29.598,
|
8 |
+
"test_steps_per_second": 29.598,
|
9 |
+
"train_loss": 0.6843088025041455,
|
10 |
+
"train_runtime": 956.7933,
|
11 |
"train_samples": 702,
|
12 |
+
"train_samples_per_second": 3.669,
|
13 |
+
"train_steps_per_second": 3.669
|
14 |
}
|
config.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"_name_or_path": "
|
3 |
"architectures": [
|
4 |
"SentenceClassificationModel"
|
5 |
],
|
@@ -10,6 +10,87 @@
|
|
10 |
"pooling_method": "attention"
|
11 |
},
|
12 |
"classifier_dropout": null,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
"context_layer": "transformer",
|
14 |
"eos_token_id": 2,
|
15 |
"frozen_layers": [
|
@@ -31,10 +112,10 @@
|
|
31 |
"initializer_range": 0.02,
|
32 |
"intermediate_size": 3072,
|
33 |
"layer_norm_eps": 1e-05,
|
34 |
-
"max_position_embeddings":
|
35 |
"model_type": "roberta",
|
36 |
-
"num_attention_heads":
|
37 |
-
"num_hidden_layers":
|
38 |
"pad_token_id": 1,
|
39 |
"position_embedding_type": "absolute",
|
40 |
"torch_dtype": "float32",
|
|
|
1 |
{
|
2 |
+
"_name_or_path": "roberta-base",
|
3 |
"architectures": [
|
4 |
"SentenceClassificationModel"
|
5 |
],
|
|
|
10 |
"pooling_method": "attention"
|
11 |
},
|
12 |
"classifier_dropout": null,
|
13 |
+
"context_config": {
|
14 |
+
"_name_or_path": "roberta-base",
|
15 |
+
"add_cross_attention": false,
|
16 |
+
"architectures": [
|
17 |
+
"RobertaForMaskedLM"
|
18 |
+
],
|
19 |
+
"attention_probs_dropout_prob": 0.1,
|
20 |
+
"bad_words_ids": null,
|
21 |
+
"begin_suppress_tokens": null,
|
22 |
+
"bos_token_id": 0,
|
23 |
+
"chunk_size_feed_forward": 0,
|
24 |
+
"classifier_dropout": null,
|
25 |
+
"cross_attention_hidden_size": null,
|
26 |
+
"decoder_start_token_id": null,
|
27 |
+
"diversity_penalty": 0.0,
|
28 |
+
"do_sample": false,
|
29 |
+
"early_stopping": false,
|
30 |
+
"encoder_no_repeat_ngram_size": 0,
|
31 |
+
"eos_token_id": 2,
|
32 |
+
"exponential_decay_length_penalty": null,
|
33 |
+
"finetuning_task": null,
|
34 |
+
"forced_bos_token_id": null,
|
35 |
+
"forced_eos_token_id": null,
|
36 |
+
"hidden_act": "gelu",
|
37 |
+
"hidden_dropout_prob": 0.1,
|
38 |
+
"hidden_size": 768,
|
39 |
+
"id2label": {
|
40 |
+
"0": "LABEL_0",
|
41 |
+
"1": "LABEL_1"
|
42 |
+
},
|
43 |
+
"initializer_range": 0.02,
|
44 |
+
"intermediate_size": 3072,
|
45 |
+
"is_decoder": false,
|
46 |
+
"is_encoder_decoder": false,
|
47 |
+
"label2id": {
|
48 |
+
"LABEL_0": 0,
|
49 |
+
"LABEL_1": 1
|
50 |
+
},
|
51 |
+
"layer_norm_eps": 1e-05,
|
52 |
+
"length_penalty": 1.0,
|
53 |
+
"max_length": 20,
|
54 |
+
"max_position_embeddings": 120,
|
55 |
+
"min_length": 0,
|
56 |
+
"model_type": "roberta",
|
57 |
+
"no_repeat_ngram_size": 0,
|
58 |
+
"num_attention_heads": 2,
|
59 |
+
"num_beam_groups": 1,
|
60 |
+
"num_beams": 1,
|
61 |
+
"num_hidden_layers": 2,
|
62 |
+
"num_return_sequences": 1,
|
63 |
+
"output_attentions": false,
|
64 |
+
"output_hidden_states": false,
|
65 |
+
"output_scores": false,
|
66 |
+
"pad_token_id": 1,
|
67 |
+
"position_embedding_type": "absolute",
|
68 |
+
"prefix": null,
|
69 |
+
"problem_type": null,
|
70 |
+
"pruned_heads": {},
|
71 |
+
"remove_invalid_values": false,
|
72 |
+
"repetition_penalty": 1.0,
|
73 |
+
"return_dict": true,
|
74 |
+
"return_dict_in_generate": false,
|
75 |
+
"sep_token_id": null,
|
76 |
+
"suppress_tokens": null,
|
77 |
+
"task_specific_params": null,
|
78 |
+
"temperature": 1.0,
|
79 |
+
"tf_legacy_loss": false,
|
80 |
+
"tie_encoder_decoder": false,
|
81 |
+
"tie_word_embeddings": true,
|
82 |
+
"tokenizer_class": null,
|
83 |
+
"top_k": 50,
|
84 |
+
"top_p": 1.0,
|
85 |
+
"torch_dtype": null,
|
86 |
+
"torchscript": false,
|
87 |
+
"transformers_version": "4.30.2",
|
88 |
+
"type_vocab_size": 1,
|
89 |
+
"typical_p": 1.0,
|
90 |
+
"use_bfloat16": false,
|
91 |
+
"use_cache": true,
|
92 |
+
"vocab_size": 50265
|
93 |
+
},
|
94 |
"context_layer": "transformer",
|
95 |
"eos_token_id": 2,
|
96 |
"frozen_layers": [
|
|
|
112 |
"initializer_range": 0.02,
|
113 |
"intermediate_size": 3072,
|
114 |
"layer_norm_eps": 1e-05,
|
115 |
+
"max_position_embeddings": 514,
|
116 |
"model_type": "roberta",
|
117 |
+
"num_attention_heads": 12,
|
118 |
+
"num_hidden_layers": 12,
|
119 |
"pad_token_id": 1,
|
120 |
"position_embedding_type": "absolute",
|
121 |
"torch_dtype": "float32",
|
post-training eval_results.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"eval_samples": 98,
|
3 |
-
"test_f1": 0.
|
4 |
-
"test_loss": 1.
|
5 |
-
"test_runtime":
|
6 |
-
"test_samples_per_second":
|
7 |
-
"test_steps_per_second":
|
8 |
}
|
|
|
1 |
{
|
2 |
"eval_samples": 98,
|
3 |
+
"test_f1": 0.5474589844403075,
|
4 |
+
"test_loss": 1.7049692869186401,
|
5 |
+
"test_runtime": 3.311,
|
6 |
+
"test_samples_per_second": 29.598,
|
7 |
+
"test_steps_per_second": 29.598
|
8 |
}
|
prediction_output.jsonl
CHANGED
@@ -1 +1 @@
|
|
1 |
-
[{"pred": 10.019140243530273, "label": 0.0}, {"pred": 0.6532726287841797, "label": 5.0}, {"pred": -1.5070127248764038, "label": 2.0}, {"pred": 0.24536748230457306, "label": 4.0}, {"pred": -2.25246524810791, "label": 2.0}, {"pred": -2.66007924079895, "label": 8.0}, {"pred": -1.6002302169799805, "label": 8.0}, {"pred": -2.7276155948638916, "label": 2.0}, {"pred": -1.574631690979004, "label": 8.0}, {"pred": -0.33556482195854187, "label": 2.0}, {"pred": -3.09733247756958, "label": 2.0}, {"pred": 0.4764690697193146, "label": 2.0}, {"pred": 6.7093048095703125, "label": 5.0}, {"pred": -2.442629337310791, "label": 5.0}, {"pred": 2.1584315299987793, "label": 5.0}, {"pred": -1.3713687658309937, "label": 5.0}, {"pred": 0.45947328209877014, "label": 5.0}, {"pred": -3.8779995441436768, "label": 6.0}, {"pred": 0.47429022192955017, "label": 6.0}, {"pred": 1.6603984832763672, "label": 5.0}, {"pred": 8.309249877929688, "label": 8.0}, {"pred": 3.2501542568206787, "label": 8.0}, {"pred": -2.717172384262085, "label": 2.0}, {"pred": -0.39201176166534424, "label": 5.0}, {"pred": -4.595953464508057, "label": 5.0}, {"pred": -2.235063076019287, "label": 5.0}, {"pred": -2.44830584526062, "label": 2.0}, {"pred": -0.0069770533591508865, "label": 2.0}, {"pred": 0.969771146774292, "label": 2.0}, {"pred": 7.553024768829346, "label": 3.0}, {"pred": 2.6176557540893555, "label": 3.0}, {"pred": 0.691173255443573, "label": 5.0}, {"pred": 0.06238013133406639, "label": 0.0}, {"pred": -4.164968013763428, "label": 2.0}, {"pred": -3.2204692363739014, "label": 3.0}, {"pred": -3.3176817893981934, "label": 3.0}, {"pred": -2.7758054733276367, "label": 3.0}, {"pred": -0.1663847714662552, "label": 7.0}, {"pred": 9.606531143188477, "label": 8.0}, {"pred": -1.0946831703186035, "label": 7.0}, {"pred": -0.14866292476654053, "label": 7.0}, {"pred": 0.30310508608818054, "label": 2.0}, {"pred": -3.1653261184692383, "label": 2.0}, {"pred": -1.565669298171997, "label": 3.0}, {"pred": -0.18176725506782532, "label": 8.0}, {"pred": -1.1957494020462036, "label": 0.0}, {"pred": -1.6685035228729248, "label": 2.0}, {"pred": -0.043660327792167664, "label": 2.0}, {"pred": -1.7757004499435425, "label": 2.0}, {"pred": -1.4604750871658325, "label": 2.0}, {"pred": 1.0374609231948853, "label": 2.0}, {"pred": -1.8439505100250244, "label": 2.0}, {"pred": -0.7517821788787842, "label": 0.0}, {"pred": 8.289470672607422, "label": 7.0}, {"pred": -1.8440061807632446, "label": 7.0}, {"pred": -1.2976378202438354, "label": 7.0}, {"pred": -0.34481102228164673, "label": 7.0}, {"pred": -2.668887138366699, "label": 8.0}, {"pred": -1.2527223825454712, "label": 8.0}, {"pred": -0.075716033577919, "label": 8.0}, {"pred": -1.3003135919570923, "label": 3.0}, {"pred": -1.1259794235229492, "label": 6.0}, {"pred": 10.510120391845703, "label": 0.0}, {"pred": 2.0237245559692383, "label": 2.0}, {"pred": 1.3917936086654663, "label": 3.0}, {"pred": 6.7284111976623535, "label": 7.0}, {"pred": 4.734694957733154, "label": 0.0}, {"pred": -3.3061137199401855, "label": 5.0}, {"pred": -1.8624444007873535, "label": 2.0}, {"pred": -4.001344203948975, "label": 2.0}, {"pred": -1.8468009233474731, "label": 7.0}, {"pred": -3.2456326484680176, "label": 2.0}, {"pred": 0.39871928095817566, "label": 2.0}, {"pred": -2.057805299758911, "label": 2.0}, {"pred": 0.13218283653259277, "label": 8.0}, {"pred": -0.892180323600769, "label": 8.0}, {"pred": -1.2932769060134888, "label": 3.0}, {"pred": 0.1954576075077057, "label": 0.0}, {"pred": -2.229828357696533, "label": 0.0}, {"pred": -2.0269217491149902, "label": 2.0}, {"pred": 8.243239402770996, "label": 2.0}, {"pred": -2.59175968170166, "label": 2.0}, {"pred": 0.8506417870521545, "label": 8.0}, {"pred": 9.682441711425781, "label": 6.0}, {"pred": -1.4374347925186157, "label": 2.0}, {"pred": 0.009745392948389053, "label": 2.0}, {"pred": -0.4231429994106293, "label": 8.0}, {"pred": -2.4760589599609375, "label": 0.0}, {"pred": -1.4600414037704468, "label": 2.0}, {"pred": -0.09000376611948013, "label": 3.0}, {"pred": -3.15801739692688, "label": 3.0}, {"pred": -1.641605257987976, "label": 2.0}, {"pred": 1.6113959550857544, "label": 6.0}, {"pred": 1.35031259059906, "label": 0.0}, {"pred": 1.4748491048812866, "label": 0.0}, {"pred": 8.973405838012695, "label": 5.0}, {"pred": -0.44036799669265747, "label": 5.0}, {"pred": -2.8449594974517822, "label": 5.0}, {"pred": -2.715689182281494, "label": 5.0}, {"pred": -0.5715784430503845, "label": 5.0}, {"pred": -1.1643404960632324, "label": 2.0}, {"pred": 2.7366137504577637, "label": 2.0}, {"pred": 4.114213466644287, "label": 2.0}, {"pred": -1.6327688694000244, "label": 5.0}, {"pred": 3.206129312515259, "label": 4.0}, {"pred": 1.428622841835022, "label": 8.0}, {"pred": -1.9255108833312988, "label": 4.0}, {"pred": -5.721708297729492, "label": 4.0}, {"pred": -1.8454394340515137, "label": 4.0}, {"pred": -0.906534731388092, "label": 8.0}, {"pred": 0.5136074423789978, "label": 2.0}, {"pred": 0.31771302223205566, "label": 3.0}, {"pred": 6.170832633972168, "label": 2.0}, {"pred": 5.061820983886719, "label": 5.0}, {"pred": -2.271714448928833, "label": 5.0}, {"pred": -1.850223422050476, "label": 5.0}, {"pred": -3.501612901687622, "label": 0.0}, {"pred": -2.0070955753326416, "label": 6.0}, {"pred": -1.9781445264816284, "label": 5.0}, {"pred": -1.6112139225006104, "label": 5.0}, {"pred": -1.830712914466858, "label": 5.0}, {"pred": -0.9544073939323425, "label": 2.0}, {"pred": 3.8555877208709717, "label": 2.0}, {"pred": 4.870663166046143, "label": 2.0}, {"pred": -1.867636799812317, "label": 2.0}, {"pred": 1.9673343896865845, "label": 2.0}, {"pred": -0.6791107058525085, "label": 2.0}, {"pred": -1.7167493104934692, "label": 5.0}, {"pred": -2.843881368637085, "label": 5.0}, {"pred": -1.1429674625396729, "label": 6.0}, {"pred": 2.594322443008423, "label": 6.0}, {"pred": 7.284641265869141, "label": 0.0}, {"pred": 2.655223846435547, "label": 2.0}, {"pred": -2.549745798110962, "label": 2.0}, {"pred": -3.192248821258545, "label": 6.0}, {"pred": 0.08405379951000214, "label": 2.0}, {"pred": -3.116849184036255, "label": 5.0}, {"pred": -0.5237486958503723, "label": 5.0}, {"pred": 2.366180419921875, "label": 6.0}, {"pred": -1.2737070322036743, "label": 2.0}, {"pred": 2.989075183868408, "label": 5.0}, {"pred": 5.9830098152160645, "label": 2.0}, {"pred": -2.1102664470672607, "label": 8.0}, {"pred": -3.5376365184783936, "label": 2.0}, {"pred": -0.9231218099594116, "label": 2.0}, {"pred": -2.3496975898742676, "label": 5.0}, {"pred": -0.7576992511749268, "label": 8.0}, {"pred": 0.5094679594039917, "label": 5.0}, {"pred": -0.6562840342521667, "label": 5.0}, {"pred": 2.5642170906066895, "label": 5.0}, {"pred": 7.409799575805664, "label": 5.0}, {"pred": -2.4087905883789062, "label": 5.0}, {"pred": -2.6217246055603027, "label": 6.0}, {"pred": -0.6317837834358215, "label": 8.0}, {"pred": 0.762174665927887, "label": 6.0}, {"pred": 0.018339872360229492, "label": 7.0}, {"pred": -0.6353716254234314, "label": 6.0}, {"pred": -2.467911958694458, "label": 8.0}, {"pred": -1.4698598384857178, "label": 0.0}, {"pred": 6.675502777099609, "label": 2.0}, {"pred": -1.6566411256790161, "label": 6.0}, {"pred": -1.6938785314559937, "label": 5.0}, {"pred": 2.345128297805786, "label": 5.0}, {"pred": -0.5916361808776855, "label": 2.0}, {"pred": -3.186824083328247, "label": 2.0}, {"pred": -0.9120825529098511, "label": 8.0}, {"pred": -0.8187981247901917, "label": 8.0}, {"pred": -0.8101270198822021, "label": 0.0}, {"pred": 7.5780930519104, "label": 2.0}, {"pred": -2.928427219390869, "label": 0.0}, {"pred": -1.3859901428222656, "label": 7.0}, {"pred": 3.0276236534118652, "label": 2.0}, {"pred": -1.8224083185195923, "label": 2.0}, {"pred": -4.104769706726074, "label": 8.0}, {"pred": 0.03777565062046051, "label": 6.0}, {"pred": -0.014997448772192001, "label": 6.0}, {"pred": 1.0839900970458984, "label": 5.0}, {"pred": 6.335562229156494, "label": 5.0}, {"pred": -3.4330081939697266, "label": 5.0}, {"pred": -2.156536817550659, "label": 5.0}, {"pred": -1.3603081703186035, "label": 5.0}, {"pred": -1.5407929420471191, "label": 5.0}, {"pred": -0.07066483795642853, "label": 5.0}, {"pred": -1.6604976654052734, "label": 6.0}, {"pred": -1.440683364868164, "label": 8.0}, {"pred": 0.9588245153427124, "label": 5.0}, {"pred": -1.7472809553146362, "label": 5.0}, {"pred": -1.087075114250183, "label": 3.0}, {"pred": 8.69987678527832, "label": 3.0}, {"pred": -1.4591203927993774, "label": 6.0}, {"pred": -1.2280433177947998, "label": 2.0}, {"pred": -0.5524106621742249, "label": 3.0}, {"pred": -2.392406463623047, "label": 2.0}, {"pred": -1.3709814548492432, "label": 2.0}, {"pred": -0.1875690072774887, "label": 2.0}, {"pred": -1.5981941223144531, "label": 0.0}, {"pred": -1.1282113790512085, "label": 2.0}, {"pred": 10.509876251220703, "label": 5.0}, {"pred": -0.5587414503097534, "label": 5.0}, {"pred": -0.11731800436973572, "label": 5.0}, {"pred": 8.627680778503418, "label": 2.0}, {"pred": 4.010296821594238, "label": 6.0}, {"pred": -3.5236639976501465, "label": 3.0}, {"pred": -2.157024383544922, "label": 3.0}, {"pred": -1.5002130270004272, "label": 6.0}, {"pred": -1.053414225578308, "label": 3.0}, {"pred": -3.027621030807495, "label": 2.0}, {"pred": 3.2739715576171875, "label": 8.0}, {"pred": -2.5486247539520264, "label": 8.0}, {"pred": -3.9105663299560547, "label": 0.0}, {"pred": 0.9814618825912476, "label": 1.0}, {"pred": -0.46406108140945435, "label": 2.0}, {"pred": 2.880763053894043, "label": 3.0}, {"pred": 5.093158721923828, "label": 6.0}, {"pred": -3.3170182704925537, "label": 8.0}, {"pred": -2.14131498336792, "label": 7.0}, {"pred": -1.5651869773864746, "label": 0.0}, {"pred": 0.6249361038208008, "label": 8.0}, {"pred": 3.9404008388519287, "label": 2.0}, {"pred": 2.718013048171997, "label": 2.0}, {"pred": -2.9450290203094482, "label": 5.0}, {"pred": 6.749814033508301, "label": 5.0}, {"pred": -1.64478600025177, "label": 7.0}, {"pred": -4.171202659606934, "label": 2.0}, {"pred": -3.1550910472869873, "label": 2.0}, {"pred": -2.1020801067352295, "label": 8.0}, {"pred": -1.6797502040863037, "label": 5.0}, {"pred": 6.253066062927246, "label": 5.0}, {"pred": 4.325278282165527, "label": 2.0}, {"pred": -3.1421334743499756, "label": 6.0}, {"pred": 4.086713790893555, "label": 6.0}, {"pred": -1.0735156536102295, "label": 2.0}, {"pred": -3.3857691287994385, "label": 5.0}, {"pred": -2.6780574321746826, "label": 2.0}, {"pred": -2.3147029876708984, "label": 5.0}, {"pred": 0.46940743923187256, "label": 5.0}, {"pred": 9.495539665222168, "label": 8.0}, {"pred": -1.889843463897705, "label": 5.0}, {"pred": 0.36386972665786743, "label": 2.0}, {"pred": -0.4371775984764099, "label": 6.0}, {"pred": -2.8748319149017334, "label": 6.0}, {"pred": -1.1083565950393677, "label": 5.0}, {"pred": 0.13744854927062988, "label": 6.0}, {"pred": -1.4635956287384033, "label": 2.0}, {"pred": -0.5616008043289185, "label": 2.0}, {"pred": 8.173700332641602, "label": 5.0}, {"pred": 4.250307083129883, "label": 5.0}, {"pred": -2.4926605224609375, "label": 0.0}, {"pred": -0.31131792068481445, "label": 5.0}, {"pred": -3.5435919761657715, "label": 2.0}, {"pred": -0.29817143082618713, "label": 2.0}, {"pred": -2.634324073791504, "label": 2.0}, {"pred": -1.6608585119247437, "label": 5.0}, {"pred": -1.39112389087677, "label": 2.0}, {"pred": 7.657371997833252, "label": 6.0}, {"pred": 0.5764560699462891, "label": 6.0}, {"pred": -0.8287283182144165, "label": 6.0}, {"pred": 4.195872783660889, "label": 6.0}, {"pred": -0.7789970636367798, "label": 5.0}, {"pred": -2.123466730117798, "label": 5.0}, {"pred": -3.2755870819091797, "label": 5.0}, {"pred": -2.2995500564575195, "label": 5.0}, {"pred": -4.09137487411499, "label": 5.0}, {"pred": 3.905306577682495, "label": 2.0}, {"pred": 5.917882442474365, "label": 5.0}, {"pred": -2.399942636489868, "label": 8.0}, {"pred": 0.7929872274398804, "label": 8.0}, {"pred": -0.8256539106369019, "label": 0.0}, {"pred": 1.5062816143035889, "label": 3.0}, {"pred": -2.261814594268799, "label": 2.0}, {"pred": -0.10297468304634094, "label": 2.0}, {"pred": -0.8380897641181946, "label": 2.0}, {"pred": 1.7574100494384766, "label": 8.0}, {"pred": -0.9829455614089966, "label": 8.0}, {"pred": -1.2617555856704712, "label": 0.0}, {"pred": 6.876880168914795, "label": 2.0}, {"pred": 1.2606760263442993, "label": 0.0}, {"pred": -3.835230588912964, "label": 7.0}, {"pred": -1.5416922569274902, "label": 0.0}, {"pred": -3.7833173274993896, "label": 0.0}, {"pred": -1.4478707313537598, "label": 0.0}, {"pred": 3.486963987350464, "label": 8.0}, {"pred": 1.1780611276626587, "label": 8.0}, {"pred": -1.6854732036590576, "label": 0.0}, {"pred": 5.843297004699707, "label": 0.0}, {"pred": 3.6902544498443604, "label": 2.0}, {"pred": -2.237409830093384, "label": 2.0}, {"pred": -3.392303943634033, "label": 0.0}]
|
|
|
1 |
+
[{"pred": 9.395666122436523, "label": 0.0}, {"pred": 0.4096878468990326, "label": 5.0}, {"pred": -1.3507310152053833, "label": 2.0}, {"pred": -0.09250641614198685, "label": 4.0}, {"pred": -2.3654043674468994, "label": 2.0}, {"pred": -3.81583571434021, "label": 8.0}, {"pred": -1.1759780645370483, "label": 8.0}, {"pred": -2.295236349105835, "label": 2.0}, {"pred": 0.5764787197113037, "label": 8.0}, {"pred": 1.1780004501342773, "label": 2.0}, {"pred": -1.306742787361145, "label": 2.0}, {"pred": 2.852921485900879, "label": 2.0}, {"pred": 5.584786891937256, "label": 5.0}, {"pred": -4.040923118591309, "label": 5.0}, {"pred": 0.4870939552783966, "label": 5.0}, {"pred": -1.0437679290771484, "label": 5.0}, {"pred": -1.8512382507324219, "label": 5.0}, {"pred": -2.754425048828125, "label": 6.0}, {"pred": 0.00021423818543553352, "label": 6.0}, {"pred": 1.0303763151168823, "label": 5.0}, {"pred": 7.444942474365234, "label": 8.0}, {"pred": 3.7297921180725098, "label": 8.0}, {"pred": -3.4230027198791504, "label": 2.0}, {"pred": -0.2933298647403717, "label": 5.0}, {"pred": -4.0809712409973145, "label": 5.0}, {"pred": -2.418494701385498, "label": 5.0}, {"pred": -3.129002332687378, "label": 2.0}, {"pred": -0.008926194161176682, "label": 2.0}, {"pred": 0.5936489701271057, "label": 2.0}, {"pred": 7.085031509399414, "label": 3.0}, {"pred": 2.1849164962768555, "label": 3.0}, {"pred": 0.3647543489933014, "label": 5.0}, {"pred": -0.2870313823223114, "label": 0.0}, {"pred": -3.9118170738220215, "label": 2.0}, {"pred": -2.2775864601135254, "label": 3.0}, {"pred": -5.056410789489746, "label": 3.0}, {"pred": -2.984327554702759, "label": 3.0}, {"pred": -0.5400028824806213, "label": 7.0}, {"pred": 9.985075950622559, "label": 8.0}, {"pred": -1.0413215160369873, "label": 7.0}, {"pred": -0.9643221497535706, "label": 7.0}, {"pred": 0.15067163109779358, "label": 2.0}, {"pred": -2.808295488357544, "label": 2.0}, {"pred": -1.4440401792526245, "label": 3.0}, {"pred": -1.1782810688018799, "label": 8.0}, {"pred": -1.2901784181594849, "label": 0.0}, {"pred": -1.437712550163269, "label": 2.0}, {"pred": -1.0705559253692627, "label": 2.0}, {"pred": -1.2407697439193726, "label": 2.0}, {"pred": -1.2407119274139404, "label": 2.0}, {"pred": -0.7768977880477905, "label": 2.0}, {"pred": -0.9326913952827454, "label": 2.0}, {"pred": -0.2104552686214447, "label": 0.0}, {"pred": 8.760655403137207, "label": 7.0}, {"pred": -1.9766864776611328, "label": 7.0}, {"pred": -0.9865150451660156, "label": 7.0}, {"pred": -1.477076530456543, "label": 7.0}, {"pred": -1.1692349910736084, "label": 8.0}, {"pred": -1.8697360754013062, "label": 8.0}, {"pred": -0.8081696033477783, "label": 8.0}, {"pred": -1.256617546081543, "label": 3.0}, {"pred": -0.06512688100337982, "label": 6.0}, {"pred": 10.312076568603516, "label": 0.0}, {"pred": 1.8776612281799316, "label": 2.0}, {"pred": 0.12151290476322174, "label": 3.0}, {"pred": 5.705040454864502, "label": 7.0}, {"pred": 5.735830307006836, "label": 0.0}, {"pred": -3.6927287578582764, "label": 5.0}, {"pred": -3.134650468826294, "label": 2.0}, {"pred": -4.3415656089782715, "label": 2.0}, {"pred": -1.1031872034072876, "label": 7.0}, {"pred": -2.867774724960327, "label": 2.0}, {"pred": -1.0985033512115479, "label": 2.0}, {"pred": 0.1397317796945572, "label": 2.0}, {"pred": -0.2997811436653137, "label": 8.0}, {"pred": -1.7282167673110962, "label": 8.0}, {"pred": -2.1505489349365234, "label": 3.0}, {"pred": 0.01920280233025551, "label": 0.0}, {"pred": -1.770388126373291, "label": 0.0}, {"pred": -0.7139682769775391, "label": 2.0}, {"pred": 9.983798027038574, "label": 2.0}, {"pred": -2.4545624256134033, "label": 2.0}, {"pred": 0.5524357557296753, "label": 8.0}, {"pred": 9.475410461425781, "label": 6.0}, {"pred": -2.2126882076263428, "label": 2.0}, {"pred": -0.74846351146698, "label": 2.0}, {"pred": -0.7173618078231812, "label": 8.0}, {"pred": -1.5908926725387573, "label": 0.0}, {"pred": -2.2288694381713867, "label": 2.0}, {"pred": -0.41721978783607483, "label": 3.0}, {"pred": -2.9400010108947754, "label": 3.0}, {"pred": -0.6818507313728333, "label": 2.0}, {"pred": 6.953657150268555, "label": 6.0}, {"pred": 3.4554193019866943, "label": 0.0}, {"pred": -0.9989187717437744, "label": 0.0}, {"pred": 2.9632811546325684, "label": 5.0}, {"pred": -3.579110860824585, "label": 5.0}, {"pred": -1.9934924840927124, "label": 5.0}, {"pred": -3.581209421157837, "label": 5.0}, {"pred": 1.6473805904388428, "label": 5.0}, {"pred": -0.1725456863641739, "label": 2.0}, {"pred": 2.595804452896118, "label": 2.0}, {"pred": 3.480557441711426, "label": 2.0}, {"pred": -3.1583964824676514, "label": 5.0}, {"pred": 0.5711352825164795, "label": 4.0}, {"pred": 0.9948037266731262, "label": 8.0}, {"pred": -1.7164541482925415, "label": 4.0}, {"pred": -4.09295654296875, "label": 4.0}, {"pred": -1.60957932472229, "label": 4.0}, {"pred": -1.4958873987197876, "label": 8.0}, {"pred": 0.606967031955719, "label": 2.0}, {"pred": 0.13369253277778625, "label": 3.0}, {"pred": 3.7107839584350586, "label": 2.0}, {"pred": 7.855518817901611, "label": 5.0}, {"pred": -1.616620421409607, "label": 5.0}, {"pred": -2.36954665184021, "label": 5.0}, {"pred": -3.969804286956787, "label": 0.0}, {"pred": -0.8281141519546509, "label": 6.0}, {"pred": -2.75907301902771, "label": 5.0}, {"pred": -1.5639314651489258, "label": 5.0}, {"pred": -1.4895418882369995, "label": 5.0}, {"pred": -0.5654066801071167, "label": 2.0}, {"pred": 5.1662092208862305, "label": 2.0}, {"pred": 4.961638450622559, "label": 2.0}, {"pred": -1.876211166381836, "label": 2.0}, {"pred": 1.0843249559402466, "label": 2.0}, {"pred": 0.7490819096565247, "label": 2.0}, {"pred": -2.537386655807495, "label": 5.0}, {"pred": -2.4651215076446533, "label": 5.0}, {"pred": -0.8757886290550232, "label": 6.0}, {"pred": 2.1154074668884277, "label": 6.0}, {"pred": 7.650022029876709, "label": 0.0}, {"pred": 2.0454721450805664, "label": 2.0}, {"pred": -2.042609214782715, "label": 2.0}, {"pred": -3.1547300815582275, "label": 6.0}, {"pred": 1.4215494394302368, "label": 2.0}, {"pred": -1.9600963592529297, "label": 5.0}, {"pred": -0.6864432692527771, "label": 5.0}, {"pred": 0.5956198573112488, "label": 6.0}, {"pred": -1.2251653671264648, "label": 2.0}, {"pred": 2.744778871536255, "label": 5.0}, {"pred": 6.340786933898926, "label": 2.0}, {"pred": -3.3739516735076904, "label": 8.0}, {"pred": -3.5059309005737305, "label": 2.0}, {"pred": -0.0787801668047905, "label": 2.0}, {"pred": -2.4171361923217773, "label": 5.0}, {"pred": 0.30282217264175415, "label": 8.0}, {"pred": 0.536819338798523, "label": 5.0}, {"pred": -1.4018205404281616, "label": 5.0}, {"pred": 3.8433799743652344, "label": 5.0}, {"pred": 5.5743727684021, "label": 5.0}, {"pred": -2.5080723762512207, "label": 5.0}, {"pred": -3.364506483078003, "label": 6.0}, {"pred": 0.15638209879398346, "label": 8.0}, {"pred": -0.2856593132019043, "label": 6.0}, {"pred": 0.37352120876312256, "label": 7.0}, {"pred": 0.10489370673894882, "label": 6.0}, {"pred": -2.4880270957946777, "label": 8.0}, {"pred": -1.3927054405212402, "label": 0.0}, {"pred": 6.64209508895874, "label": 2.0}, {"pred": -2.0052239894866943, "label": 6.0}, {"pred": -2.2603507041931152, "label": 5.0}, {"pred": 2.793245315551758, "label": 5.0}, {"pred": -0.34100469946861267, "label": 2.0}, {"pred": -3.1591796875, "label": 2.0}, {"pred": -0.7837404608726501, "label": 8.0}, {"pred": -0.8880395889282227, "label": 8.0}, {"pred": -1.5245490074157715, "label": 0.0}, {"pred": 7.281620025634766, "label": 2.0}, {"pred": -2.9855499267578125, "label": 0.0}, {"pred": -0.9715536236763, "label": 7.0}, {"pred": 2.981710433959961, "label": 2.0}, {"pred": -1.71941077709198, "label": 2.0}, {"pred": -4.1776885986328125, "label": 8.0}, {"pred": -0.2737337648868561, "label": 6.0}, {"pred": -0.2323164939880371, "label": 6.0}, {"pred": 0.7083033323287964, "label": 5.0}, {"pred": 7.313948631286621, "label": 5.0}, {"pred": -3.2342827320098877, "label": 5.0}, {"pred": -1.1054272651672363, "label": 5.0}, {"pred": -1.3207857608795166, "label": 5.0}, {"pred": -1.396915316581726, "label": 5.0}, {"pred": -0.9858799576759338, "label": 5.0}, {"pred": -1.2481117248535156, "label": 6.0}, {"pred": -1.185423731803894, "label": 8.0}, {"pred": -0.6355394124984741, "label": 5.0}, {"pred": -1.3282532691955566, "label": 5.0}, {"pred": -0.06940007954835892, "label": 3.0}, {"pred": 8.819711685180664, "label": 3.0}, {"pred": -1.7800638675689697, "label": 6.0}, {"pred": -0.9062626957893372, "label": 2.0}, {"pred": -1.4409409761428833, "label": 3.0}, {"pred": -0.9971213340759277, "label": 2.0}, {"pred": -1.8456844091415405, "label": 2.0}, {"pred": -0.7595651149749756, "label": 2.0}, {"pred": -1.2325983047485352, "label": 0.0}, {"pred": -0.2523927688598633, "label": 2.0}, {"pred": 10.457062721252441, "label": 5.0}, {"pred": 1.2496005296707153, "label": 5.0}, {"pred": -0.0661335289478302, "label": 5.0}, {"pred": 5.924855709075928, "label": 2.0}, {"pred": 5.310446262359619, "label": 6.0}, {"pred": -4.33503532409668, "label": 3.0}, {"pred": -3.246478319168091, "label": 3.0}, {"pred": -1.0310145616531372, "label": 6.0}, {"pred": -1.9235048294067383, "label": 3.0}, {"pred": -3.1231048107147217, "label": 2.0}, {"pred": 2.0666773319244385, "label": 8.0}, {"pred": -2.21870493888855, "label": 8.0}, {"pred": -4.353604316711426, "label": 0.0}, {"pred": 0.4301067292690277, "label": 1.0}, {"pred": 0.6327790021896362, "label": 2.0}, {"pred": 2.628701686859131, "label": 3.0}, {"pred": 6.137325286865234, "label": 6.0}, {"pred": -2.3515894412994385, "label": 8.0}, {"pred": -2.0138840675354004, "label": 7.0}, {"pred": -0.3610410690307617, "label": 0.0}, {"pred": 0.6118225455284119, "label": 8.0}, {"pred": 1.8093969821929932, "label": 2.0}, {"pred": 1.2613120079040527, "label": 2.0}, {"pred": -2.8037238121032715, "label": 5.0}, {"pred": 7.11236047744751, "label": 5.0}, {"pred": 1.2082927227020264, "label": 7.0}, {"pred": -3.570901870727539, "label": 2.0}, {"pred": -3.2932965755462646, "label": 2.0}, {"pred": -0.8259910345077515, "label": 8.0}, {"pred": -0.7468539476394653, "label": 5.0}, {"pred": 6.213565349578857, "label": 5.0}, {"pred": 5.455384254455566, "label": 2.0}, {"pred": -4.203694820404053, "label": 6.0}, {"pred": 1.6539826393127441, "label": 6.0}, {"pred": -1.7960268259048462, "label": 2.0}, {"pred": -3.1243834495544434, "label": 5.0}, {"pred": -2.0782253742218018, "label": 2.0}, {"pred": -2.1685826778411865, "label": 5.0}, {"pred": 0.5615622997283936, "label": 5.0}, {"pred": 9.341123580932617, "label": 8.0}, {"pred": -1.8784537315368652, "label": 5.0}, {"pred": -0.7927582859992981, "label": 2.0}, {"pred": 0.4580661654472351, "label": 6.0}, {"pred": -2.3094468116760254, "label": 6.0}, {"pred": -2.143928289413452, "label": 5.0}, {"pred": -0.9896171689033508, "label": 6.0}, {"pred": -2.01908016204834, "label": 2.0}, {"pred": -0.6412465572357178, "label": 2.0}, {"pred": 6.39767599105835, "label": 5.0}, {"pred": 4.225459098815918, "label": 5.0}, {"pred": -3.1502859592437744, "label": 0.0}, {"pred": 0.43117573857307434, "label": 5.0}, {"pred": -2.445258617401123, "label": 2.0}, {"pred": 0.1308278888463974, "label": 2.0}, {"pred": -2.7748281955718994, "label": 2.0}, {"pred": -2.6895854473114014, "label": 5.0}, {"pred": -1.6292206048965454, "label": 2.0}, {"pred": 7.543752193450928, "label": 6.0}, {"pred": -1.342669129371643, "label": 6.0}, {"pred": -1.7914018630981445, "label": 6.0}, {"pred": 4.5911688804626465, "label": 6.0}, {"pred": -0.3857574164867401, "label": 5.0}, {"pred": -1.955091118812561, "label": 5.0}, {"pred": -1.7245426177978516, "label": 5.0}, {"pred": -2.9511044025421143, "label": 5.0}, {"pred": -2.7611618041992188, "label": 5.0}, {"pred": 3.6846923828125, "label": 2.0}, {"pred": 4.134253025054932, "label": 5.0}, {"pred": -3.159534454345703, "label": 8.0}, {"pred": 2.984583854675293, "label": 8.0}, {"pred": -1.0099399089813232, "label": 0.0}, {"pred": 1.7101552486419678, "label": 3.0}, {"pred": -2.5953452587127686, "label": 2.0}, {"pred": 1.3067090511322021, "label": 2.0}, {"pred": 2.284773111343384, "label": 2.0}, {"pred": 4.78453254699707, "label": 8.0}, {"pred": -2.012260675430298, "label": 8.0}, {"pred": -2.6885504722595215, "label": 0.0}, {"pred": 3.0605850219726562, "label": 2.0}, {"pred": -2.044528007507324, "label": 0.0}, {"pred": -3.634997606277466, "label": 7.0}, {"pred": -2.3484065532684326, "label": 0.0}, {"pred": -2.205092191696167, "label": 0.0}, {"pred": -1.424561858177185, "label": 0.0}, {"pred": 1.915219783782959, "label": 8.0}, {"pred": 2.2997443675994873, "label": 8.0}, {"pred": -1.9867368936538696, "label": 0.0}, {"pred": 6.400612831115723, "label": 0.0}, {"pred": 1.8677934408187866, "label": 2.0}, {"pred": -2.124760150909424, "label": 2.0}, {"pred": -3.5659399032592773, "label": 0.0}]
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:563526884464f58b3c1b06eac98e478501952393b97712559a9892d6765fe912
|
3 |
+
size 714922721
|
train_results.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"epoch": 5.0,
|
3 |
-
"train_loss": 0.
|
4 |
-
"train_runtime":
|
5 |
"train_samples": 702,
|
6 |
-
"train_samples_per_second": 3.
|
7 |
-
"train_steps_per_second": 3.
|
8 |
}
|
|
|
1 |
{
|
2 |
"epoch": 5.0,
|
3 |
+
"train_loss": 0.6843088025041455,
|
4 |
+
"train_runtime": 956.7933,
|
5 |
"train_samples": 702,
|
6 |
+
"train_samples_per_second": 3.669,
|
7 |
+
"train_steps_per_second": 3.669
|
8 |
}
|
trainer_state.json
CHANGED
@@ -9,369 +9,369 @@
|
|
9 |
"log_history": [
|
10 |
{
|
11 |
"epoch": 0.14,
|
12 |
-
"eval_f1": 0.
|
13 |
-
"eval_loss": 1.
|
14 |
-
"eval_runtime": 3.
|
15 |
-
"eval_samples_per_second": 29.
|
16 |
-
"eval_steps_per_second": 29.
|
17 |
"step": 100
|
18 |
},
|
19 |
{
|
20 |
"epoch": 0.28,
|
21 |
-
"eval_f1": 0.
|
22 |
-
"eval_loss": 1.
|
23 |
-
"eval_runtime":
|
24 |
-
"eval_samples_per_second":
|
25 |
-
"eval_steps_per_second":
|
26 |
"step": 200
|
27 |
},
|
28 |
{
|
29 |
"epoch": 0.43,
|
30 |
-
"eval_f1": 0.
|
31 |
-
"eval_loss": 1.
|
32 |
-
"eval_runtime":
|
33 |
-
"eval_samples_per_second":
|
34 |
-
"eval_steps_per_second":
|
35 |
"step": 300
|
36 |
},
|
37 |
{
|
38 |
"epoch": 0.57,
|
39 |
-
"eval_f1": 0.
|
40 |
-
"eval_loss": 1.
|
41 |
-
"eval_runtime":
|
42 |
-
"eval_samples_per_second":
|
43 |
-
"eval_steps_per_second":
|
44 |
"step": 400
|
45 |
},
|
46 |
{
|
47 |
"epoch": 0.71,
|
48 |
"learning_rate": 2.572649572649573e-05,
|
49 |
-
"loss": 1.
|
50 |
"step": 500
|
51 |
},
|
52 |
{
|
53 |
"epoch": 0.71,
|
54 |
-
"eval_f1": 0.
|
55 |
-
"eval_loss": 1.
|
56 |
-
"eval_runtime":
|
57 |
-
"eval_samples_per_second":
|
58 |
-
"eval_steps_per_second":
|
59 |
"step": 500
|
60 |
},
|
61 |
{
|
62 |
"epoch": 0.85,
|
63 |
-
"eval_f1": 0.
|
64 |
-
"eval_loss": 1.
|
65 |
-
"eval_runtime": 3.
|
66 |
-
"eval_samples_per_second": 29.
|
67 |
-
"eval_steps_per_second": 29.
|
68 |
"step": 600
|
69 |
},
|
70 |
{
|
71 |
"epoch": 1.0,
|
72 |
-
"eval_f1": 0.
|
73 |
-
"eval_loss": 1.
|
74 |
-
"eval_runtime":
|
75 |
-
"eval_samples_per_second":
|
76 |
-
"eval_steps_per_second":
|
77 |
"step": 700
|
78 |
},
|
79 |
{
|
80 |
"epoch": 1.14,
|
81 |
-
"eval_f1": 0.
|
82 |
-
"eval_loss": 1.
|
83 |
-
"eval_runtime":
|
84 |
-
"eval_samples_per_second":
|
85 |
-
"eval_steps_per_second":
|
86 |
"step": 800
|
87 |
},
|
88 |
{
|
89 |
"epoch": 1.28,
|
90 |
-
"eval_f1": 0.
|
91 |
-
"eval_loss": 1.
|
92 |
-
"eval_runtime":
|
93 |
-
"eval_samples_per_second":
|
94 |
-
"eval_steps_per_second":
|
95 |
"step": 900
|
96 |
},
|
97 |
{
|
98 |
"epoch": 1.42,
|
99 |
"learning_rate": 2.1452991452991456e-05,
|
100 |
-
"loss": 0.
|
101 |
"step": 1000
|
102 |
},
|
103 |
{
|
104 |
"epoch": 1.42,
|
105 |
-
"eval_f1": 0.
|
106 |
-
"eval_loss": 1.
|
107 |
-
"eval_runtime":
|
108 |
-
"eval_samples_per_second":
|
109 |
-
"eval_steps_per_second":
|
110 |
"step": 1000
|
111 |
},
|
112 |
{
|
113 |
"epoch": 1.57,
|
114 |
-
"eval_f1": 0.
|
115 |
-
"eval_loss": 1.
|
116 |
-
"eval_runtime":
|
117 |
-
"eval_samples_per_second":
|
118 |
-
"eval_steps_per_second":
|
119 |
"step": 1100
|
120 |
},
|
121 |
{
|
122 |
"epoch": 1.71,
|
123 |
-
"eval_f1": 0.
|
124 |
-
"eval_loss": 1.
|
125 |
-
"eval_runtime":
|
126 |
-
"eval_samples_per_second":
|
127 |
-
"eval_steps_per_second":
|
128 |
"step": 1200
|
129 |
},
|
130 |
{
|
131 |
"epoch": 1.85,
|
132 |
-
"eval_f1": 0.
|
133 |
-
"eval_loss": 1.
|
134 |
-
"eval_runtime":
|
135 |
-
"eval_samples_per_second":
|
136 |
-
"eval_steps_per_second":
|
137 |
"step": 1300
|
138 |
},
|
139 |
{
|
140 |
"epoch": 1.99,
|
141 |
-
"eval_f1": 0.
|
142 |
-
"eval_loss": 1.
|
143 |
-
"eval_runtime":
|
144 |
-
"eval_samples_per_second":
|
145 |
-
"eval_steps_per_second":
|
146 |
"step": 1400
|
147 |
},
|
148 |
{
|
149 |
"epoch": 2.14,
|
150 |
"learning_rate": 1.7179487179487178e-05,
|
151 |
-
"loss": 0.
|
152 |
"step": 1500
|
153 |
},
|
154 |
{
|
155 |
"epoch": 2.14,
|
156 |
-
"eval_f1": 0.
|
157 |
-
"eval_loss": 1.
|
158 |
-
"eval_runtime":
|
159 |
-
"eval_samples_per_second": 26
|
160 |
-
"eval_steps_per_second": 26
|
161 |
"step": 1500
|
162 |
},
|
163 |
{
|
164 |
"epoch": 2.28,
|
165 |
-
"eval_f1": 0.
|
166 |
-
"eval_loss": 1.
|
167 |
-
"eval_runtime":
|
168 |
-
"eval_samples_per_second":
|
169 |
-
"eval_steps_per_second":
|
170 |
"step": 1600
|
171 |
},
|
172 |
{
|
173 |
"epoch": 2.42,
|
174 |
-
"eval_f1": 0.
|
175 |
-
"eval_loss": 1.
|
176 |
-
"eval_runtime": 5.
|
177 |
-
"eval_samples_per_second":
|
178 |
-
"eval_steps_per_second":
|
179 |
"step": 1700
|
180 |
},
|
181 |
{
|
182 |
"epoch": 2.56,
|
183 |
-
"eval_f1": 0.
|
184 |
-
"eval_loss": 1.
|
185 |
-
"eval_runtime": 5.
|
186 |
-
"eval_samples_per_second":
|
187 |
-
"eval_steps_per_second":
|
188 |
"step": 1800
|
189 |
},
|
190 |
{
|
191 |
"epoch": 2.71,
|
192 |
-
"eval_f1": 0.
|
193 |
-
"eval_loss": 1.
|
194 |
-
"eval_runtime":
|
195 |
-
"eval_samples_per_second":
|
196 |
-
"eval_steps_per_second":
|
197 |
"step": 1900
|
198 |
},
|
199 |
{
|
200 |
"epoch": 2.85,
|
201 |
"learning_rate": 1.2905982905982905e-05,
|
202 |
-
"loss": 0.
|
203 |
"step": 2000
|
204 |
},
|
205 |
{
|
206 |
"epoch": 2.85,
|
207 |
-
"eval_f1": 0.
|
208 |
-
"eval_loss": 1.
|
209 |
-
"eval_runtime":
|
210 |
-
"eval_samples_per_second":
|
211 |
-
"eval_steps_per_second":
|
212 |
"step": 2000
|
213 |
},
|
214 |
{
|
215 |
"epoch": 2.99,
|
216 |
-
"eval_f1": 0.
|
217 |
-
"eval_loss": 1.
|
218 |
-
"eval_runtime":
|
219 |
-
"eval_samples_per_second":
|
220 |
-
"eval_steps_per_second":
|
221 |
"step": 2100
|
222 |
},
|
223 |
{
|
224 |
"epoch": 3.13,
|
225 |
-
"eval_f1": 0.
|
226 |
-
"eval_loss": 1.
|
227 |
-
"eval_runtime":
|
228 |
-
"eval_samples_per_second":
|
229 |
-
"eval_steps_per_second":
|
230 |
"step": 2200
|
231 |
},
|
232 |
{
|
233 |
"epoch": 3.28,
|
234 |
-
"eval_f1": 0.
|
235 |
-
"eval_loss": 1.
|
236 |
-
"eval_runtime":
|
237 |
-
"eval_samples_per_second":
|
238 |
-
"eval_steps_per_second":
|
239 |
"step": 2300
|
240 |
},
|
241 |
{
|
242 |
"epoch": 3.42,
|
243 |
-
"eval_f1": 0.
|
244 |
-
"eval_loss": 1.
|
245 |
-
"eval_runtime": 3.
|
246 |
-
"eval_samples_per_second":
|
247 |
-
"eval_steps_per_second":
|
248 |
"step": 2400
|
249 |
},
|
250 |
{
|
251 |
"epoch": 3.56,
|
252 |
"learning_rate": 8.632478632478633e-06,
|
253 |
-
"loss": 0.
|
254 |
"step": 2500
|
255 |
},
|
256 |
{
|
257 |
"epoch": 3.56,
|
258 |
-
"eval_f1": 0.
|
259 |
-
"eval_loss": 1.
|
260 |
-
"eval_runtime": 3.
|
261 |
-
"eval_samples_per_second": 29.
|
262 |
-
"eval_steps_per_second": 29.
|
263 |
"step": 2500
|
264 |
},
|
265 |
{
|
266 |
"epoch": 3.7,
|
267 |
-
"eval_f1": 0.
|
268 |
-
"eval_loss": 1.
|
269 |
-
"eval_runtime":
|
270 |
-
"eval_samples_per_second":
|
271 |
-
"eval_steps_per_second":
|
272 |
"step": 2600
|
273 |
},
|
274 |
{
|
275 |
"epoch": 3.85,
|
276 |
-
"eval_f1": 0.
|
277 |
-
"eval_loss": 1.
|
278 |
-
"eval_runtime":
|
279 |
-
"eval_samples_per_second":
|
280 |
-
"eval_steps_per_second":
|
281 |
"step": 2700
|
282 |
},
|
283 |
{
|
284 |
"epoch": 3.99,
|
285 |
-
"eval_f1": 0.
|
286 |
-
"eval_loss": 1.
|
287 |
-
"eval_runtime":
|
288 |
-
"eval_samples_per_second":
|
289 |
-
"eval_steps_per_second":
|
290 |
"step": 2800
|
291 |
},
|
292 |
{
|
293 |
"epoch": 4.13,
|
294 |
-
"eval_f1": 0.
|
295 |
-
"eval_loss": 1.
|
296 |
-
"eval_runtime":
|
297 |
-
"eval_samples_per_second":
|
298 |
-
"eval_steps_per_second":
|
299 |
"step": 2900
|
300 |
},
|
301 |
{
|
302 |
"epoch": 4.27,
|
303 |
"learning_rate": 4.358974358974359e-06,
|
304 |
-
"loss": 0.
|
305 |
"step": 3000
|
306 |
},
|
307 |
{
|
308 |
"epoch": 4.27,
|
309 |
-
"eval_f1": 0.
|
310 |
-
"eval_loss": 1.
|
311 |
-
"eval_runtime": 3.
|
312 |
-
"eval_samples_per_second":
|
313 |
-
"eval_steps_per_second":
|
314 |
"step": 3000
|
315 |
},
|
316 |
{
|
317 |
"epoch": 4.42,
|
318 |
-
"eval_f1": 0.
|
319 |
-
"eval_loss": 1.
|
320 |
-
"eval_runtime":
|
321 |
-
"eval_samples_per_second":
|
322 |
-
"eval_steps_per_second":
|
323 |
"step": 3100
|
324 |
},
|
325 |
{
|
326 |
"epoch": 4.56,
|
327 |
-
"eval_f1": 0.
|
328 |
-
"eval_loss": 1.
|
329 |
-
"eval_runtime":
|
330 |
-
"eval_samples_per_second":
|
331 |
-
"eval_steps_per_second":
|
332 |
"step": 3200
|
333 |
},
|
334 |
{
|
335 |
"epoch": 4.7,
|
336 |
-
"eval_f1": 0.
|
337 |
-
"eval_loss": 1.
|
338 |
-
"eval_runtime":
|
339 |
-
"eval_samples_per_second":
|
340 |
-
"eval_steps_per_second":
|
341 |
"step": 3300
|
342 |
},
|
343 |
{
|
344 |
"epoch": 4.84,
|
345 |
-
"eval_f1": 0.
|
346 |
-
"eval_loss": 1.
|
347 |
-
"eval_runtime":
|
348 |
-
"eval_samples_per_second":
|
349 |
-
"eval_steps_per_second":
|
350 |
"step": 3400
|
351 |
},
|
352 |
{
|
353 |
"epoch": 4.99,
|
354 |
"learning_rate": 8.547008547008547e-08,
|
355 |
-
"loss": 0.
|
356 |
"step": 3500
|
357 |
},
|
358 |
{
|
359 |
"epoch": 4.99,
|
360 |
-
"eval_f1": 0.
|
361 |
-
"eval_loss": 1.
|
362 |
-
"eval_runtime":
|
363 |
-
"eval_samples_per_second":
|
364 |
-
"eval_steps_per_second":
|
365 |
"step": 3500
|
366 |
},
|
367 |
{
|
368 |
"epoch": 5.0,
|
369 |
"step": 3510,
|
370 |
"total_flos": 2890172619430200.0,
|
371 |
-
"train_loss": 0.
|
372 |
-
"train_runtime":
|
373 |
-
"train_samples_per_second": 3.
|
374 |
-
"train_steps_per_second": 3.
|
375 |
}
|
376 |
],
|
377 |
"max_steps": 3510,
|
|
|
9 |
"log_history": [
|
10 |
{
|
11 |
"epoch": 0.14,
|
12 |
+
"eval_f1": 0.3735957123448099,
|
13 |
+
"eval_loss": 1.3954416513442993,
|
14 |
+
"eval_runtime": 3.3287,
|
15 |
+
"eval_samples_per_second": 29.441,
|
16 |
+
"eval_steps_per_second": 29.441,
|
17 |
"step": 100
|
18 |
},
|
19 |
{
|
20 |
"epoch": 0.28,
|
21 |
+
"eval_f1": 0.4402748314205965,
|
22 |
+
"eval_loss": 1.2493854761123657,
|
23 |
+
"eval_runtime": 5.52,
|
24 |
+
"eval_samples_per_second": 17.754,
|
25 |
+
"eval_steps_per_second": 17.754,
|
26 |
"step": 200
|
27 |
},
|
28 |
{
|
29 |
"epoch": 0.43,
|
30 |
+
"eval_f1": 0.4049192887670501,
|
31 |
+
"eval_loss": 1.1859477758407593,
|
32 |
+
"eval_runtime": 4.6189,
|
33 |
+
"eval_samples_per_second": 21.217,
|
34 |
+
"eval_steps_per_second": 21.217,
|
35 |
"step": 300
|
36 |
},
|
37 |
{
|
38 |
"epoch": 0.57,
|
39 |
+
"eval_f1": 0.46917462323111053,
|
40 |
+
"eval_loss": 1.3165582418441772,
|
41 |
+
"eval_runtime": 5.117,
|
42 |
+
"eval_samples_per_second": 19.152,
|
43 |
+
"eval_steps_per_second": 19.152,
|
44 |
"step": 400
|
45 |
},
|
46 |
{
|
47 |
"epoch": 0.71,
|
48 |
"learning_rate": 2.572649572649573e-05,
|
49 |
+
"loss": 1.2082,
|
50 |
"step": 500
|
51 |
},
|
52 |
{
|
53 |
"epoch": 0.71,
|
54 |
+
"eval_f1": 0.4667018667844948,
|
55 |
+
"eval_loss": 1.3011000156402588,
|
56 |
+
"eval_runtime": 4.4894,
|
57 |
+
"eval_samples_per_second": 21.829,
|
58 |
+
"eval_steps_per_second": 21.829,
|
59 |
"step": 500
|
60 |
},
|
61 |
{
|
62 |
"epoch": 0.85,
|
63 |
+
"eval_f1": 0.4908908048448308,
|
64 |
+
"eval_loss": 1.2637114524841309,
|
65 |
+
"eval_runtime": 3.3668,
|
66 |
+
"eval_samples_per_second": 29.108,
|
67 |
+
"eval_steps_per_second": 29.108,
|
68 |
"step": 600
|
69 |
},
|
70 |
{
|
71 |
"epoch": 1.0,
|
72 |
+
"eval_f1": 0.4642800618313763,
|
73 |
+
"eval_loss": 1.1596269607543945,
|
74 |
+
"eval_runtime": 5.1253,
|
75 |
+
"eval_samples_per_second": 19.121,
|
76 |
+
"eval_steps_per_second": 19.121,
|
77 |
"step": 700
|
78 |
},
|
79 |
{
|
80 |
"epoch": 1.14,
|
81 |
+
"eval_f1": 0.495361570506607,
|
82 |
+
"eval_loss": 1.2012276649475098,
|
83 |
+
"eval_runtime": 5.0079,
|
84 |
+
"eval_samples_per_second": 19.569,
|
85 |
+
"eval_steps_per_second": 19.569,
|
86 |
"step": 800
|
87 |
},
|
88 |
{
|
89 |
"epoch": 1.28,
|
90 |
+
"eval_f1": 0.4856164962650226,
|
91 |
+
"eval_loss": 1.1207385063171387,
|
92 |
+
"eval_runtime": 5.1293,
|
93 |
+
"eval_samples_per_second": 19.106,
|
94 |
+
"eval_steps_per_second": 19.106,
|
95 |
"step": 900
|
96 |
},
|
97 |
{
|
98 |
"epoch": 1.42,
|
99 |
"learning_rate": 2.1452991452991456e-05,
|
100 |
+
"loss": 0.9276,
|
101 |
"step": 1000
|
102 |
},
|
103 |
{
|
104 |
"epoch": 1.42,
|
105 |
+
"eval_f1": 0.5074106973681244,
|
106 |
+
"eval_loss": 1.3098654747009277,
|
107 |
+
"eval_runtime": 5.0862,
|
108 |
+
"eval_samples_per_second": 19.268,
|
109 |
+
"eval_steps_per_second": 19.268,
|
110 |
"step": 1000
|
111 |
},
|
112 |
{
|
113 |
"epoch": 1.57,
|
114 |
+
"eval_f1": 0.4820613319741061,
|
115 |
+
"eval_loss": 1.2627363204956055,
|
116 |
+
"eval_runtime": 5.2038,
|
117 |
+
"eval_samples_per_second": 18.832,
|
118 |
+
"eval_steps_per_second": 18.832,
|
119 |
"step": 1100
|
120 |
},
|
121 |
{
|
122 |
"epoch": 1.71,
|
123 |
+
"eval_f1": 0.5034445960093111,
|
124 |
+
"eval_loss": 1.120241403579712,
|
125 |
+
"eval_runtime": 5.175,
|
126 |
+
"eval_samples_per_second": 18.937,
|
127 |
+
"eval_steps_per_second": 18.937,
|
128 |
"step": 1200
|
129 |
},
|
130 |
{
|
131 |
"epoch": 1.85,
|
132 |
+
"eval_f1": 0.502158612711185,
|
133 |
+
"eval_loss": 1.1611493825912476,
|
134 |
+
"eval_runtime": 5.037,
|
135 |
+
"eval_samples_per_second": 19.456,
|
136 |
+
"eval_steps_per_second": 19.456,
|
137 |
"step": 1300
|
138 |
},
|
139 |
{
|
140 |
"epoch": 1.99,
|
141 |
+
"eval_f1": 0.5191081295714916,
|
142 |
+
"eval_loss": 1.2113677263259888,
|
143 |
+
"eval_runtime": 5.0637,
|
144 |
+
"eval_samples_per_second": 19.353,
|
145 |
+
"eval_steps_per_second": 19.353,
|
146 |
"step": 1400
|
147 |
},
|
148 |
{
|
149 |
"epoch": 2.14,
|
150 |
"learning_rate": 1.7179487179487178e-05,
|
151 |
+
"loss": 0.8288,
|
152 |
"step": 1500
|
153 |
},
|
154 |
{
|
155 |
"epoch": 2.14,
|
156 |
+
"eval_f1": 0.5078021729955409,
|
157 |
+
"eval_loss": 1.2758545875549316,
|
158 |
+
"eval_runtime": 5.0884,
|
159 |
+
"eval_samples_per_second": 19.26,
|
160 |
+
"eval_steps_per_second": 19.26,
|
161 |
"step": 1500
|
162 |
},
|
163 |
{
|
164 |
"epoch": 2.28,
|
165 |
+
"eval_f1": 0.5286245201650122,
|
166 |
+
"eval_loss": 1.3322173357009888,
|
167 |
+
"eval_runtime": 5.2011,
|
168 |
+
"eval_samples_per_second": 18.842,
|
169 |
+
"eval_steps_per_second": 18.842,
|
170 |
"step": 1600
|
171 |
},
|
172 |
{
|
173 |
"epoch": 2.42,
|
174 |
+
"eval_f1": 0.5300667878954677,
|
175 |
+
"eval_loss": 1.2991284132003784,
|
176 |
+
"eval_runtime": 5.1327,
|
177 |
+
"eval_samples_per_second": 19.093,
|
178 |
+
"eval_steps_per_second": 19.093,
|
179 |
"step": 1700
|
180 |
},
|
181 |
{
|
182 |
"epoch": 2.56,
|
183 |
+
"eval_f1": 0.500402521720021,
|
184 |
+
"eval_loss": 1.2623299360275269,
|
185 |
+
"eval_runtime": 5.5478,
|
186 |
+
"eval_samples_per_second": 17.665,
|
187 |
+
"eval_steps_per_second": 17.665,
|
188 |
"step": 1800
|
189 |
},
|
190 |
{
|
191 |
"epoch": 2.71,
|
192 |
+
"eval_f1": 0.5244540639978136,
|
193 |
+
"eval_loss": 1.3173421621322632,
|
194 |
+
"eval_runtime": 5.5919,
|
195 |
+
"eval_samples_per_second": 17.525,
|
196 |
+
"eval_steps_per_second": 17.525,
|
197 |
"step": 1900
|
198 |
},
|
199 |
{
|
200 |
"epoch": 2.85,
|
201 |
"learning_rate": 1.2905982905982905e-05,
|
202 |
+
"loss": 0.6347,
|
203 |
"step": 2000
|
204 |
},
|
205 |
{
|
206 |
"epoch": 2.85,
|
207 |
+
"eval_f1": 0.5317743052897723,
|
208 |
+
"eval_loss": 1.3929016590118408,
|
209 |
+
"eval_runtime": 5.7399,
|
210 |
+
"eval_samples_per_second": 17.073,
|
211 |
+
"eval_steps_per_second": 17.073,
|
212 |
"step": 2000
|
213 |
},
|
214 |
{
|
215 |
"epoch": 2.99,
|
216 |
+
"eval_f1": 0.5383445546938374,
|
217 |
+
"eval_loss": 1.3333723545074463,
|
218 |
+
"eval_runtime": 3.3412,
|
219 |
+
"eval_samples_per_second": 29.331,
|
220 |
+
"eval_steps_per_second": 29.331,
|
221 |
"step": 2100
|
222 |
},
|
223 |
{
|
224 |
"epoch": 3.13,
|
225 |
+
"eval_f1": 0.5275077326569355,
|
226 |
+
"eval_loss": 1.5553914308547974,
|
227 |
+
"eval_runtime": 3.3081,
|
228 |
+
"eval_samples_per_second": 29.624,
|
229 |
+
"eval_steps_per_second": 29.624,
|
230 |
"step": 2200
|
231 |
},
|
232 |
{
|
233 |
"epoch": 3.28,
|
234 |
+
"eval_f1": 0.5592106629133059,
|
235 |
+
"eval_loss": 1.5034139156341553,
|
236 |
+
"eval_runtime": 3.3379,
|
237 |
+
"eval_samples_per_second": 29.36,
|
238 |
+
"eval_steps_per_second": 29.36,
|
239 |
"step": 2300
|
240 |
},
|
241 |
{
|
242 |
"epoch": 3.42,
|
243 |
+
"eval_f1": 0.5715959486823535,
|
244 |
+
"eval_loss": 1.5117721557617188,
|
245 |
+
"eval_runtime": 3.3428,
|
246 |
+
"eval_samples_per_second": 29.317,
|
247 |
+
"eval_steps_per_second": 29.317,
|
248 |
"step": 2400
|
249 |
},
|
250 |
{
|
251 |
"epoch": 3.56,
|
252 |
"learning_rate": 8.632478632478633e-06,
|
253 |
+
"loss": 0.4923,
|
254 |
"step": 2500
|
255 |
},
|
256 |
{
|
257 |
"epoch": 3.56,
|
258 |
+
"eval_f1": 0.5210855337430568,
|
259 |
+
"eval_loss": 1.4938777685165405,
|
260 |
+
"eval_runtime": 3.3285,
|
261 |
+
"eval_samples_per_second": 29.443,
|
262 |
+
"eval_steps_per_second": 29.443,
|
263 |
"step": 2500
|
264 |
},
|
265 |
{
|
266 |
"epoch": 3.7,
|
267 |
+
"eval_f1": 0.548457367895942,
|
268 |
+
"eval_loss": 1.5325443744659424,
|
269 |
+
"eval_runtime": 3.3057,
|
270 |
+
"eval_samples_per_second": 29.646,
|
271 |
+
"eval_steps_per_second": 29.646,
|
272 |
"step": 2600
|
273 |
},
|
274 |
{
|
275 |
"epoch": 3.85,
|
276 |
+
"eval_f1": 0.5553468907576662,
|
277 |
+
"eval_loss": 1.5296635627746582,
|
278 |
+
"eval_runtime": 3.3038,
|
279 |
+
"eval_samples_per_second": 29.663,
|
280 |
+
"eval_steps_per_second": 29.663,
|
281 |
"step": 2700
|
282 |
},
|
283 |
{
|
284 |
"epoch": 3.99,
|
285 |
+
"eval_f1": 0.5419549676909157,
|
286 |
+
"eval_loss": 1.5025768280029297,
|
287 |
+
"eval_runtime": 3.3058,
|
288 |
+
"eval_samples_per_second": 29.644,
|
289 |
+
"eval_steps_per_second": 29.644,
|
290 |
"step": 2800
|
291 |
},
|
292 |
{
|
293 |
"epoch": 4.13,
|
294 |
+
"eval_f1": 0.5461023124307824,
|
295 |
+
"eval_loss": 1.5561065673828125,
|
296 |
+
"eval_runtime": 3.3631,
|
297 |
+
"eval_samples_per_second": 29.14,
|
298 |
+
"eval_steps_per_second": 29.14,
|
299 |
"step": 2900
|
300 |
},
|
301 |
{
|
302 |
"epoch": 4.27,
|
303 |
"learning_rate": 4.358974358974359e-06,
|
304 |
+
"loss": 0.3913,
|
305 |
"step": 3000
|
306 |
},
|
307 |
{
|
308 |
"epoch": 4.27,
|
309 |
+
"eval_f1": 0.5419929767805605,
|
310 |
+
"eval_loss": 1.6237069368362427,
|
311 |
+
"eval_runtime": 3.316,
|
312 |
+
"eval_samples_per_second": 29.554,
|
313 |
+
"eval_steps_per_second": 29.554,
|
314 |
"step": 3000
|
315 |
},
|
316 |
{
|
317 |
"epoch": 4.42,
|
318 |
+
"eval_f1": 0.5506356176274623,
|
319 |
+
"eval_loss": 1.6494747400283813,
|
320 |
+
"eval_runtime": 3.3595,
|
321 |
+
"eval_samples_per_second": 29.171,
|
322 |
+
"eval_steps_per_second": 29.171,
|
323 |
"step": 3100
|
324 |
},
|
325 |
{
|
326 |
"epoch": 4.56,
|
327 |
+
"eval_f1": 0.5502274173692974,
|
328 |
+
"eval_loss": 1.7337164878845215,
|
329 |
+
"eval_runtime": 3.3475,
|
330 |
+
"eval_samples_per_second": 29.275,
|
331 |
+
"eval_steps_per_second": 29.275,
|
332 |
"step": 3200
|
333 |
},
|
334 |
{
|
335 |
"epoch": 4.7,
|
336 |
+
"eval_f1": 0.5446428110985144,
|
337 |
+
"eval_loss": 1.7263941764831543,
|
338 |
+
"eval_runtime": 3.3549,
|
339 |
+
"eval_samples_per_second": 29.211,
|
340 |
+
"eval_steps_per_second": 29.211,
|
341 |
"step": 3300
|
342 |
},
|
343 |
{
|
344 |
"epoch": 4.84,
|
345 |
+
"eval_f1": 0.5449539102387533,
|
346 |
+
"eval_loss": 1.719780445098877,
|
347 |
+
"eval_runtime": 3.3251,
|
348 |
+
"eval_samples_per_second": 29.473,
|
349 |
+
"eval_steps_per_second": 29.473,
|
350 |
"step": 3400
|
351 |
},
|
352 |
{
|
353 |
"epoch": 4.99,
|
354 |
"learning_rate": 8.547008547008547e-08,
|
355 |
+
"loss": 0.3154,
|
356 |
"step": 3500
|
357 |
},
|
358 |
{
|
359 |
"epoch": 4.99,
|
360 |
+
"eval_f1": 0.5471660816519186,
|
361 |
+
"eval_loss": 1.7049323320388794,
|
362 |
+
"eval_runtime": 3.3011,
|
363 |
+
"eval_samples_per_second": 29.687,
|
364 |
+
"eval_steps_per_second": 29.687,
|
365 |
"step": 3500
|
366 |
},
|
367 |
{
|
368 |
"epoch": 5.0,
|
369 |
"step": 3510,
|
370 |
"total_flos": 2890172619430200.0,
|
371 |
+
"train_loss": 0.6843088025041455,
|
372 |
+
"train_runtime": 956.7933,
|
373 |
+
"train_samples_per_second": 3.669,
|
374 |
+
"train_steps_per_second": 3.669
|
375 |
}
|
376 |
],
|
377 |
"max_steps": 3510,
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3899
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:642618f727148faa2317b7e8bcc0a316595ef1dbf4f9c15f72d6910bcb87a05c
|
3 |
size 3899
|