alex2awesome commited on
Commit
d42f148
·
1 Parent(s): e502568

Training in progress, step 500

Browse files
all_results.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
  "epoch": 5.0,
3
  "eval_samples": 98,
4
- "test_f1": 0.5503861750639598,
5
- "test_loss": 1.6987024545669556,
6
- "test_runtime": 4.9602,
7
- "test_samples_per_second": 19.757,
8
- "test_steps_per_second": 19.757,
9
- "train_loss": 0.6695007115008145,
10
- "train_runtime": 916.4977,
11
  "train_samples": 702,
12
- "train_samples_per_second": 3.83,
13
- "train_steps_per_second": 3.83
14
  }
 
1
  {
2
  "epoch": 5.0,
3
  "eval_samples": 98,
4
+ "test_f1": 0.5474589844403075,
5
+ "test_loss": 1.7049692869186401,
6
+ "test_runtime": 3.311,
7
+ "test_samples_per_second": 29.598,
8
+ "test_steps_per_second": 29.598,
9
+ "train_loss": 0.6843088025041455,
10
+ "train_runtime": 956.7933,
11
  "train_samples": 702,
12
+ "train_samples_per_second": 3.669,
13
+ "train_steps_per_second": 3.669
14
  }
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "alex2awesome/newsdiscourse-model",
3
  "architectures": [
4
  "SentenceClassificationModel"
5
  ],
@@ -10,6 +10,87 @@
10
  "pooling_method": "attention"
11
  },
12
  "classifier_dropout": null,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  "context_layer": "transformer",
14
  "eos_token_id": 2,
15
  "frozen_layers": [
@@ -31,10 +112,10 @@
31
  "initializer_range": 0.02,
32
  "intermediate_size": 3072,
33
  "layer_norm_eps": 1e-05,
34
- "max_position_embeddings": 120,
35
  "model_type": "roberta",
36
- "num_attention_heads": 2,
37
- "num_hidden_layers": 2,
38
  "pad_token_id": 1,
39
  "position_embedding_type": "absolute",
40
  "torch_dtype": "float32",
 
1
  {
2
+ "_name_or_path": "roberta-base",
3
  "architectures": [
4
  "SentenceClassificationModel"
5
  ],
 
10
  "pooling_method": "attention"
11
  },
12
  "classifier_dropout": null,
13
+ "context_config": {
14
+ "_name_or_path": "roberta-base",
15
+ "add_cross_attention": false,
16
+ "architectures": [
17
+ "RobertaForMaskedLM"
18
+ ],
19
+ "attention_probs_dropout_prob": 0.1,
20
+ "bad_words_ids": null,
21
+ "begin_suppress_tokens": null,
22
+ "bos_token_id": 0,
23
+ "chunk_size_feed_forward": 0,
24
+ "classifier_dropout": null,
25
+ "cross_attention_hidden_size": null,
26
+ "decoder_start_token_id": null,
27
+ "diversity_penalty": 0.0,
28
+ "do_sample": false,
29
+ "early_stopping": false,
30
+ "encoder_no_repeat_ngram_size": 0,
31
+ "eos_token_id": 2,
32
+ "exponential_decay_length_penalty": null,
33
+ "finetuning_task": null,
34
+ "forced_bos_token_id": null,
35
+ "forced_eos_token_id": null,
36
+ "hidden_act": "gelu",
37
+ "hidden_dropout_prob": 0.1,
38
+ "hidden_size": 768,
39
+ "id2label": {
40
+ "0": "LABEL_0",
41
+ "1": "LABEL_1"
42
+ },
43
+ "initializer_range": 0.02,
44
+ "intermediate_size": 3072,
45
+ "is_decoder": false,
46
+ "is_encoder_decoder": false,
47
+ "label2id": {
48
+ "LABEL_0": 0,
49
+ "LABEL_1": 1
50
+ },
51
+ "layer_norm_eps": 1e-05,
52
+ "length_penalty": 1.0,
53
+ "max_length": 20,
54
+ "max_position_embeddings": 120,
55
+ "min_length": 0,
56
+ "model_type": "roberta",
57
+ "no_repeat_ngram_size": 0,
58
+ "num_attention_heads": 2,
59
+ "num_beam_groups": 1,
60
+ "num_beams": 1,
61
+ "num_hidden_layers": 2,
62
+ "num_return_sequences": 1,
63
+ "output_attentions": false,
64
+ "output_hidden_states": false,
65
+ "output_scores": false,
66
+ "pad_token_id": 1,
67
+ "position_embedding_type": "absolute",
68
+ "prefix": null,
69
+ "problem_type": null,
70
+ "pruned_heads": {},
71
+ "remove_invalid_values": false,
72
+ "repetition_penalty": 1.0,
73
+ "return_dict": true,
74
+ "return_dict_in_generate": false,
75
+ "sep_token_id": null,
76
+ "suppress_tokens": null,
77
+ "task_specific_params": null,
78
+ "temperature": 1.0,
79
+ "tf_legacy_loss": false,
80
+ "tie_encoder_decoder": false,
81
+ "tie_word_embeddings": true,
82
+ "tokenizer_class": null,
83
+ "top_k": 50,
84
+ "top_p": 1.0,
85
+ "torch_dtype": null,
86
+ "torchscript": false,
87
+ "transformers_version": "4.30.2",
88
+ "type_vocab_size": 1,
89
+ "typical_p": 1.0,
90
+ "use_bfloat16": false,
91
+ "use_cache": true,
92
+ "vocab_size": 50265
93
+ },
94
  "context_layer": "transformer",
95
  "eos_token_id": 2,
96
  "frozen_layers": [
 
112
  "initializer_range": 0.02,
113
  "intermediate_size": 3072,
114
  "layer_norm_eps": 1e-05,
115
+ "max_position_embeddings": 514,
116
  "model_type": "roberta",
117
+ "num_attention_heads": 12,
118
+ "num_hidden_layers": 12,
119
  "pad_token_id": 1,
120
  "position_embedding_type": "absolute",
121
  "torch_dtype": "float32",
post-training eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "eval_samples": 98,
3
- "test_f1": 0.5503861750639598,
4
- "test_loss": 1.6987024545669556,
5
- "test_runtime": 4.9602,
6
- "test_samples_per_second": 19.757,
7
- "test_steps_per_second": 19.757
8
  }
 
1
  {
2
  "eval_samples": 98,
3
+ "test_f1": 0.5474589844403075,
4
+ "test_loss": 1.7049692869186401,
5
+ "test_runtime": 3.311,
6
+ "test_samples_per_second": 29.598,
7
+ "test_steps_per_second": 29.598
8
  }
prediction_output.jsonl CHANGED
@@ -1 +1 @@
1
- [{"pred": 10.019140243530273, "label": 0.0}, {"pred": 0.6532726287841797, "label": 5.0}, {"pred": -1.5070127248764038, "label": 2.0}, {"pred": 0.24536748230457306, "label": 4.0}, {"pred": -2.25246524810791, "label": 2.0}, {"pred": -2.66007924079895, "label": 8.0}, {"pred": -1.6002302169799805, "label": 8.0}, {"pred": -2.7276155948638916, "label": 2.0}, {"pred": -1.574631690979004, "label": 8.0}, {"pred": -0.33556482195854187, "label": 2.0}, {"pred": -3.09733247756958, "label": 2.0}, {"pred": 0.4764690697193146, "label": 2.0}, {"pred": 6.7093048095703125, "label": 5.0}, {"pred": -2.442629337310791, "label": 5.0}, {"pred": 2.1584315299987793, "label": 5.0}, {"pred": -1.3713687658309937, "label": 5.0}, {"pred": 0.45947328209877014, "label": 5.0}, {"pred": -3.8779995441436768, "label": 6.0}, {"pred": 0.47429022192955017, "label": 6.0}, {"pred": 1.6603984832763672, "label": 5.0}, {"pred": 8.309249877929688, "label": 8.0}, {"pred": 3.2501542568206787, "label": 8.0}, {"pred": -2.717172384262085, "label": 2.0}, {"pred": -0.39201176166534424, "label": 5.0}, {"pred": -4.595953464508057, "label": 5.0}, {"pred": -2.235063076019287, "label": 5.0}, {"pred": -2.44830584526062, "label": 2.0}, {"pred": -0.0069770533591508865, "label": 2.0}, {"pred": 0.969771146774292, "label": 2.0}, {"pred": 7.553024768829346, "label": 3.0}, {"pred": 2.6176557540893555, "label": 3.0}, {"pred": 0.691173255443573, "label": 5.0}, {"pred": 0.06238013133406639, "label": 0.0}, {"pred": -4.164968013763428, "label": 2.0}, {"pred": -3.2204692363739014, "label": 3.0}, {"pred": -3.3176817893981934, "label": 3.0}, {"pred": -2.7758054733276367, "label": 3.0}, {"pred": -0.1663847714662552, "label": 7.0}, {"pred": 9.606531143188477, "label": 8.0}, {"pred": -1.0946831703186035, "label": 7.0}, {"pred": -0.14866292476654053, "label": 7.0}, {"pred": 0.30310508608818054, "label": 2.0}, {"pred": -3.1653261184692383, "label": 2.0}, {"pred": -1.565669298171997, "label": 3.0}, {"pred": -0.18176725506782532, "label": 8.0}, {"pred": -1.1957494020462036, "label": 0.0}, {"pred": -1.6685035228729248, "label": 2.0}, {"pred": -0.043660327792167664, "label": 2.0}, {"pred": -1.7757004499435425, "label": 2.0}, {"pred": -1.4604750871658325, "label": 2.0}, {"pred": 1.0374609231948853, "label": 2.0}, {"pred": -1.8439505100250244, "label": 2.0}, {"pred": -0.7517821788787842, "label": 0.0}, {"pred": 8.289470672607422, "label": 7.0}, {"pred": -1.8440061807632446, "label": 7.0}, {"pred": -1.2976378202438354, "label": 7.0}, {"pred": -0.34481102228164673, "label": 7.0}, {"pred": -2.668887138366699, "label": 8.0}, {"pred": -1.2527223825454712, "label": 8.0}, {"pred": -0.075716033577919, "label": 8.0}, {"pred": -1.3003135919570923, "label": 3.0}, {"pred": -1.1259794235229492, "label": 6.0}, {"pred": 10.510120391845703, "label": 0.0}, {"pred": 2.0237245559692383, "label": 2.0}, {"pred": 1.3917936086654663, "label": 3.0}, {"pred": 6.7284111976623535, "label": 7.0}, {"pred": 4.734694957733154, "label": 0.0}, {"pred": -3.3061137199401855, "label": 5.0}, {"pred": -1.8624444007873535, "label": 2.0}, {"pred": -4.001344203948975, "label": 2.0}, {"pred": -1.8468009233474731, "label": 7.0}, {"pred": -3.2456326484680176, "label": 2.0}, {"pred": 0.39871928095817566, "label": 2.0}, {"pred": -2.057805299758911, "label": 2.0}, {"pred": 0.13218283653259277, "label": 8.0}, {"pred": -0.892180323600769, "label": 8.0}, {"pred": -1.2932769060134888, "label": 3.0}, {"pred": 0.1954576075077057, "label": 0.0}, {"pred": -2.229828357696533, "label": 0.0}, {"pred": -2.0269217491149902, "label": 2.0}, {"pred": 8.243239402770996, "label": 2.0}, {"pred": -2.59175968170166, "label": 2.0}, {"pred": 0.8506417870521545, "label": 8.0}, {"pred": 9.682441711425781, "label": 6.0}, {"pred": -1.4374347925186157, "label": 2.0}, {"pred": 0.009745392948389053, "label": 2.0}, {"pred": -0.4231429994106293, "label": 8.0}, {"pred": -2.4760589599609375, "label": 0.0}, {"pred": -1.4600414037704468, "label": 2.0}, {"pred": -0.09000376611948013, "label": 3.0}, {"pred": -3.15801739692688, "label": 3.0}, {"pred": -1.641605257987976, "label": 2.0}, {"pred": 1.6113959550857544, "label": 6.0}, {"pred": 1.35031259059906, "label": 0.0}, {"pred": 1.4748491048812866, "label": 0.0}, {"pred": 8.973405838012695, "label": 5.0}, {"pred": -0.44036799669265747, "label": 5.0}, {"pred": -2.8449594974517822, "label": 5.0}, {"pred": -2.715689182281494, "label": 5.0}, {"pred": -0.5715784430503845, "label": 5.0}, {"pred": -1.1643404960632324, "label": 2.0}, {"pred": 2.7366137504577637, "label": 2.0}, {"pred": 4.114213466644287, "label": 2.0}, {"pred": -1.6327688694000244, "label": 5.0}, {"pred": 3.206129312515259, "label": 4.0}, {"pred": 1.428622841835022, "label": 8.0}, {"pred": -1.9255108833312988, "label": 4.0}, {"pred": -5.721708297729492, "label": 4.0}, {"pred": -1.8454394340515137, "label": 4.0}, {"pred": -0.906534731388092, "label": 8.0}, {"pred": 0.5136074423789978, "label": 2.0}, {"pred": 0.31771302223205566, "label": 3.0}, {"pred": 6.170832633972168, "label": 2.0}, {"pred": 5.061820983886719, "label": 5.0}, {"pred": -2.271714448928833, "label": 5.0}, {"pred": -1.850223422050476, "label": 5.0}, {"pred": -3.501612901687622, "label": 0.0}, {"pred": -2.0070955753326416, "label": 6.0}, {"pred": -1.9781445264816284, "label": 5.0}, {"pred": -1.6112139225006104, "label": 5.0}, {"pred": -1.830712914466858, "label": 5.0}, {"pred": -0.9544073939323425, "label": 2.0}, {"pred": 3.8555877208709717, "label": 2.0}, {"pred": 4.870663166046143, "label": 2.0}, {"pred": -1.867636799812317, "label": 2.0}, {"pred": 1.9673343896865845, "label": 2.0}, {"pred": -0.6791107058525085, "label": 2.0}, {"pred": -1.7167493104934692, "label": 5.0}, {"pred": -2.843881368637085, "label": 5.0}, {"pred": -1.1429674625396729, "label": 6.0}, {"pred": 2.594322443008423, "label": 6.0}, {"pred": 7.284641265869141, "label": 0.0}, {"pred": 2.655223846435547, "label": 2.0}, {"pred": -2.549745798110962, "label": 2.0}, {"pred": -3.192248821258545, "label": 6.0}, {"pred": 0.08405379951000214, "label": 2.0}, {"pred": -3.116849184036255, "label": 5.0}, {"pred": -0.5237486958503723, "label": 5.0}, {"pred": 2.366180419921875, "label": 6.0}, {"pred": -1.2737070322036743, "label": 2.0}, {"pred": 2.989075183868408, "label": 5.0}, {"pred": 5.9830098152160645, "label": 2.0}, {"pred": -2.1102664470672607, "label": 8.0}, {"pred": -3.5376365184783936, "label": 2.0}, {"pred": -0.9231218099594116, "label": 2.0}, {"pred": -2.3496975898742676, "label": 5.0}, {"pred": -0.7576992511749268, "label": 8.0}, {"pred": 0.5094679594039917, "label": 5.0}, {"pred": -0.6562840342521667, "label": 5.0}, {"pred": 2.5642170906066895, "label": 5.0}, {"pred": 7.409799575805664, "label": 5.0}, {"pred": -2.4087905883789062, "label": 5.0}, {"pred": -2.6217246055603027, "label": 6.0}, {"pred": -0.6317837834358215, "label": 8.0}, {"pred": 0.762174665927887, "label": 6.0}, {"pred": 0.018339872360229492, "label": 7.0}, {"pred": -0.6353716254234314, "label": 6.0}, {"pred": -2.467911958694458, "label": 8.0}, {"pred": -1.4698598384857178, "label": 0.0}, {"pred": 6.675502777099609, "label": 2.0}, {"pred": -1.6566411256790161, "label": 6.0}, {"pred": -1.6938785314559937, "label": 5.0}, {"pred": 2.345128297805786, "label": 5.0}, {"pred": -0.5916361808776855, "label": 2.0}, {"pred": -3.186824083328247, "label": 2.0}, {"pred": -0.9120825529098511, "label": 8.0}, {"pred": -0.8187981247901917, "label": 8.0}, {"pred": -0.8101270198822021, "label": 0.0}, {"pred": 7.5780930519104, "label": 2.0}, {"pred": -2.928427219390869, "label": 0.0}, {"pred": -1.3859901428222656, "label": 7.0}, {"pred": 3.0276236534118652, "label": 2.0}, {"pred": -1.8224083185195923, "label": 2.0}, {"pred": -4.104769706726074, "label": 8.0}, {"pred": 0.03777565062046051, "label": 6.0}, {"pred": -0.014997448772192001, "label": 6.0}, {"pred": 1.0839900970458984, "label": 5.0}, {"pred": 6.335562229156494, "label": 5.0}, {"pred": -3.4330081939697266, "label": 5.0}, {"pred": -2.156536817550659, "label": 5.0}, {"pred": -1.3603081703186035, "label": 5.0}, {"pred": -1.5407929420471191, "label": 5.0}, {"pred": -0.07066483795642853, "label": 5.0}, {"pred": -1.6604976654052734, "label": 6.0}, {"pred": -1.440683364868164, "label": 8.0}, {"pred": 0.9588245153427124, "label": 5.0}, {"pred": -1.7472809553146362, "label": 5.0}, {"pred": -1.087075114250183, "label": 3.0}, {"pred": 8.69987678527832, "label": 3.0}, {"pred": -1.4591203927993774, "label": 6.0}, {"pred": -1.2280433177947998, "label": 2.0}, {"pred": -0.5524106621742249, "label": 3.0}, {"pred": -2.392406463623047, "label": 2.0}, {"pred": -1.3709814548492432, "label": 2.0}, {"pred": -0.1875690072774887, "label": 2.0}, {"pred": -1.5981941223144531, "label": 0.0}, {"pred": -1.1282113790512085, "label": 2.0}, {"pred": 10.509876251220703, "label": 5.0}, {"pred": -0.5587414503097534, "label": 5.0}, {"pred": -0.11731800436973572, "label": 5.0}, {"pred": 8.627680778503418, "label": 2.0}, {"pred": 4.010296821594238, "label": 6.0}, {"pred": -3.5236639976501465, "label": 3.0}, {"pred": -2.157024383544922, "label": 3.0}, {"pred": -1.5002130270004272, "label": 6.0}, {"pred": -1.053414225578308, "label": 3.0}, {"pred": -3.027621030807495, "label": 2.0}, {"pred": 3.2739715576171875, "label": 8.0}, {"pred": -2.5486247539520264, "label": 8.0}, {"pred": -3.9105663299560547, "label": 0.0}, {"pred": 0.9814618825912476, "label": 1.0}, {"pred": -0.46406108140945435, "label": 2.0}, {"pred": 2.880763053894043, "label": 3.0}, {"pred": 5.093158721923828, "label": 6.0}, {"pred": -3.3170182704925537, "label": 8.0}, {"pred": -2.14131498336792, "label": 7.0}, {"pred": -1.5651869773864746, "label": 0.0}, {"pred": 0.6249361038208008, "label": 8.0}, {"pred": 3.9404008388519287, "label": 2.0}, {"pred": 2.718013048171997, "label": 2.0}, {"pred": -2.9450290203094482, "label": 5.0}, {"pred": 6.749814033508301, "label": 5.0}, {"pred": -1.64478600025177, "label": 7.0}, {"pred": -4.171202659606934, "label": 2.0}, {"pred": -3.1550910472869873, "label": 2.0}, {"pred": -2.1020801067352295, "label": 8.0}, {"pred": -1.6797502040863037, "label": 5.0}, {"pred": 6.253066062927246, "label": 5.0}, {"pred": 4.325278282165527, "label": 2.0}, {"pred": -3.1421334743499756, "label": 6.0}, {"pred": 4.086713790893555, "label": 6.0}, {"pred": -1.0735156536102295, "label": 2.0}, {"pred": -3.3857691287994385, "label": 5.0}, {"pred": -2.6780574321746826, "label": 2.0}, {"pred": -2.3147029876708984, "label": 5.0}, {"pred": 0.46940743923187256, "label": 5.0}, {"pred": 9.495539665222168, "label": 8.0}, {"pred": -1.889843463897705, "label": 5.0}, {"pred": 0.36386972665786743, "label": 2.0}, {"pred": -0.4371775984764099, "label": 6.0}, {"pred": -2.8748319149017334, "label": 6.0}, {"pred": -1.1083565950393677, "label": 5.0}, {"pred": 0.13744854927062988, "label": 6.0}, {"pred": -1.4635956287384033, "label": 2.0}, {"pred": -0.5616008043289185, "label": 2.0}, {"pred": 8.173700332641602, "label": 5.0}, {"pred": 4.250307083129883, "label": 5.0}, {"pred": -2.4926605224609375, "label": 0.0}, {"pred": -0.31131792068481445, "label": 5.0}, {"pred": -3.5435919761657715, "label": 2.0}, {"pred": -0.29817143082618713, "label": 2.0}, {"pred": -2.634324073791504, "label": 2.0}, {"pred": -1.6608585119247437, "label": 5.0}, {"pred": -1.39112389087677, "label": 2.0}, {"pred": 7.657371997833252, "label": 6.0}, {"pred": 0.5764560699462891, "label": 6.0}, {"pred": -0.8287283182144165, "label": 6.0}, {"pred": 4.195872783660889, "label": 6.0}, {"pred": -0.7789970636367798, "label": 5.0}, {"pred": -2.123466730117798, "label": 5.0}, {"pred": -3.2755870819091797, "label": 5.0}, {"pred": -2.2995500564575195, "label": 5.0}, {"pred": -4.09137487411499, "label": 5.0}, {"pred": 3.905306577682495, "label": 2.0}, {"pred": 5.917882442474365, "label": 5.0}, {"pred": -2.399942636489868, "label": 8.0}, {"pred": 0.7929872274398804, "label": 8.0}, {"pred": -0.8256539106369019, "label": 0.0}, {"pred": 1.5062816143035889, "label": 3.0}, {"pred": -2.261814594268799, "label": 2.0}, {"pred": -0.10297468304634094, "label": 2.0}, {"pred": -0.8380897641181946, "label": 2.0}, {"pred": 1.7574100494384766, "label": 8.0}, {"pred": -0.9829455614089966, "label": 8.0}, {"pred": -1.2617555856704712, "label": 0.0}, {"pred": 6.876880168914795, "label": 2.0}, {"pred": 1.2606760263442993, "label": 0.0}, {"pred": -3.835230588912964, "label": 7.0}, {"pred": -1.5416922569274902, "label": 0.0}, {"pred": -3.7833173274993896, "label": 0.0}, {"pred": -1.4478707313537598, "label": 0.0}, {"pred": 3.486963987350464, "label": 8.0}, {"pred": 1.1780611276626587, "label": 8.0}, {"pred": -1.6854732036590576, "label": 0.0}, {"pred": 5.843297004699707, "label": 0.0}, {"pred": 3.6902544498443604, "label": 2.0}, {"pred": -2.237409830093384, "label": 2.0}, {"pred": -3.392303943634033, "label": 0.0}]
 
1
+ [{"pred": 9.395666122436523, "label": 0.0}, {"pred": 0.4096878468990326, "label": 5.0}, {"pred": -1.3507310152053833, "label": 2.0}, {"pred": -0.09250641614198685, "label": 4.0}, {"pred": -2.3654043674468994, "label": 2.0}, {"pred": -3.81583571434021, "label": 8.0}, {"pred": -1.1759780645370483, "label": 8.0}, {"pred": -2.295236349105835, "label": 2.0}, {"pred": 0.5764787197113037, "label": 8.0}, {"pred": 1.1780004501342773, "label": 2.0}, {"pred": -1.306742787361145, "label": 2.0}, {"pred": 2.852921485900879, "label": 2.0}, {"pred": 5.584786891937256, "label": 5.0}, {"pred": -4.040923118591309, "label": 5.0}, {"pred": 0.4870939552783966, "label": 5.0}, {"pred": -1.0437679290771484, "label": 5.0}, {"pred": -1.8512382507324219, "label": 5.0}, {"pred": -2.754425048828125, "label": 6.0}, {"pred": 0.00021423818543553352, "label": 6.0}, {"pred": 1.0303763151168823, "label": 5.0}, {"pred": 7.444942474365234, "label": 8.0}, {"pred": 3.7297921180725098, "label": 8.0}, {"pred": -3.4230027198791504, "label": 2.0}, {"pred": -0.2933298647403717, "label": 5.0}, {"pred": -4.0809712409973145, "label": 5.0}, {"pred": -2.418494701385498, "label": 5.0}, {"pred": -3.129002332687378, "label": 2.0}, {"pred": -0.008926194161176682, "label": 2.0}, {"pred": 0.5936489701271057, "label": 2.0}, {"pred": 7.085031509399414, "label": 3.0}, {"pred": 2.1849164962768555, "label": 3.0}, {"pred": 0.3647543489933014, "label": 5.0}, {"pred": -0.2870313823223114, "label": 0.0}, {"pred": -3.9118170738220215, "label": 2.0}, {"pred": -2.2775864601135254, "label": 3.0}, {"pred": -5.056410789489746, "label": 3.0}, {"pred": -2.984327554702759, "label": 3.0}, {"pred": -0.5400028824806213, "label": 7.0}, {"pred": 9.985075950622559, "label": 8.0}, {"pred": -1.0413215160369873, "label": 7.0}, {"pred": -0.9643221497535706, "label": 7.0}, {"pred": 0.15067163109779358, "label": 2.0}, {"pred": -2.808295488357544, "label": 2.0}, {"pred": -1.4440401792526245, "label": 3.0}, {"pred": -1.1782810688018799, "label": 8.0}, {"pred": -1.2901784181594849, "label": 0.0}, {"pred": -1.437712550163269, "label": 2.0}, {"pred": -1.0705559253692627, "label": 2.0}, {"pred": -1.2407697439193726, "label": 2.0}, {"pred": -1.2407119274139404, "label": 2.0}, {"pred": -0.7768977880477905, "label": 2.0}, {"pred": -0.9326913952827454, "label": 2.0}, {"pred": -0.2104552686214447, "label": 0.0}, {"pred": 8.760655403137207, "label": 7.0}, {"pred": -1.9766864776611328, "label": 7.0}, {"pred": -0.9865150451660156, "label": 7.0}, {"pred": -1.477076530456543, "label": 7.0}, {"pred": -1.1692349910736084, "label": 8.0}, {"pred": -1.8697360754013062, "label": 8.0}, {"pred": -0.8081696033477783, "label": 8.0}, {"pred": -1.256617546081543, "label": 3.0}, {"pred": -0.06512688100337982, "label": 6.0}, {"pred": 10.312076568603516, "label": 0.0}, {"pred": 1.8776612281799316, "label": 2.0}, {"pred": 0.12151290476322174, "label": 3.0}, {"pred": 5.705040454864502, "label": 7.0}, {"pred": 5.735830307006836, "label": 0.0}, {"pred": -3.6927287578582764, "label": 5.0}, {"pred": -3.134650468826294, "label": 2.0}, {"pred": -4.3415656089782715, "label": 2.0}, {"pred": -1.1031872034072876, "label": 7.0}, {"pred": -2.867774724960327, "label": 2.0}, {"pred": -1.0985033512115479, "label": 2.0}, {"pred": 0.1397317796945572, "label": 2.0}, {"pred": -0.2997811436653137, "label": 8.0}, {"pred": -1.7282167673110962, "label": 8.0}, {"pred": -2.1505489349365234, "label": 3.0}, {"pred": 0.01920280233025551, "label": 0.0}, {"pred": -1.770388126373291, "label": 0.0}, {"pred": -0.7139682769775391, "label": 2.0}, {"pred": 9.983798027038574, "label": 2.0}, {"pred": -2.4545624256134033, "label": 2.0}, {"pred": 0.5524357557296753, "label": 8.0}, {"pred": 9.475410461425781, "label": 6.0}, {"pred": -2.2126882076263428, "label": 2.0}, {"pred": -0.74846351146698, "label": 2.0}, {"pred": -0.7173618078231812, "label": 8.0}, {"pred": -1.5908926725387573, "label": 0.0}, {"pred": -2.2288694381713867, "label": 2.0}, {"pred": -0.41721978783607483, "label": 3.0}, {"pred": -2.9400010108947754, "label": 3.0}, {"pred": -0.6818507313728333, "label": 2.0}, {"pred": 6.953657150268555, "label": 6.0}, {"pred": 3.4554193019866943, "label": 0.0}, {"pred": -0.9989187717437744, "label": 0.0}, {"pred": 2.9632811546325684, "label": 5.0}, {"pred": -3.579110860824585, "label": 5.0}, {"pred": -1.9934924840927124, "label": 5.0}, {"pred": -3.581209421157837, "label": 5.0}, {"pred": 1.6473805904388428, "label": 5.0}, {"pred": -0.1725456863641739, "label": 2.0}, {"pred": 2.595804452896118, "label": 2.0}, {"pred": 3.480557441711426, "label": 2.0}, {"pred": -3.1583964824676514, "label": 5.0}, {"pred": 0.5711352825164795, "label": 4.0}, {"pred": 0.9948037266731262, "label": 8.0}, {"pred": -1.7164541482925415, "label": 4.0}, {"pred": -4.09295654296875, "label": 4.0}, {"pred": -1.60957932472229, "label": 4.0}, {"pred": -1.4958873987197876, "label": 8.0}, {"pred": 0.606967031955719, "label": 2.0}, {"pred": 0.13369253277778625, "label": 3.0}, {"pred": 3.7107839584350586, "label": 2.0}, {"pred": 7.855518817901611, "label": 5.0}, {"pred": -1.616620421409607, "label": 5.0}, {"pred": -2.36954665184021, "label": 5.0}, {"pred": -3.969804286956787, "label": 0.0}, {"pred": -0.8281141519546509, "label": 6.0}, {"pred": -2.75907301902771, "label": 5.0}, {"pred": -1.5639314651489258, "label": 5.0}, {"pred": -1.4895418882369995, "label": 5.0}, {"pred": -0.5654066801071167, "label": 2.0}, {"pred": 5.1662092208862305, "label": 2.0}, {"pred": 4.961638450622559, "label": 2.0}, {"pred": -1.876211166381836, "label": 2.0}, {"pred": 1.0843249559402466, "label": 2.0}, {"pred": 0.7490819096565247, "label": 2.0}, {"pred": -2.537386655807495, "label": 5.0}, {"pred": -2.4651215076446533, "label": 5.0}, {"pred": -0.8757886290550232, "label": 6.0}, {"pred": 2.1154074668884277, "label": 6.0}, {"pred": 7.650022029876709, "label": 0.0}, {"pred": 2.0454721450805664, "label": 2.0}, {"pred": -2.042609214782715, "label": 2.0}, {"pred": -3.1547300815582275, "label": 6.0}, {"pred": 1.4215494394302368, "label": 2.0}, {"pred": -1.9600963592529297, "label": 5.0}, {"pred": -0.6864432692527771, "label": 5.0}, {"pred": 0.5956198573112488, "label": 6.0}, {"pred": -1.2251653671264648, "label": 2.0}, {"pred": 2.744778871536255, "label": 5.0}, {"pred": 6.340786933898926, "label": 2.0}, {"pred": -3.3739516735076904, "label": 8.0}, {"pred": -3.5059309005737305, "label": 2.0}, {"pred": -0.0787801668047905, "label": 2.0}, {"pred": -2.4171361923217773, "label": 5.0}, {"pred": 0.30282217264175415, "label": 8.0}, {"pred": 0.536819338798523, "label": 5.0}, {"pred": -1.4018205404281616, "label": 5.0}, {"pred": 3.8433799743652344, "label": 5.0}, {"pred": 5.5743727684021, "label": 5.0}, {"pred": -2.5080723762512207, "label": 5.0}, {"pred": -3.364506483078003, "label": 6.0}, {"pred": 0.15638209879398346, "label": 8.0}, {"pred": -0.2856593132019043, "label": 6.0}, {"pred": 0.37352120876312256, "label": 7.0}, {"pred": 0.10489370673894882, "label": 6.0}, {"pred": -2.4880270957946777, "label": 8.0}, {"pred": -1.3927054405212402, "label": 0.0}, {"pred": 6.64209508895874, "label": 2.0}, {"pred": -2.0052239894866943, "label": 6.0}, {"pred": -2.2603507041931152, "label": 5.0}, {"pred": 2.793245315551758, "label": 5.0}, {"pred": -0.34100469946861267, "label": 2.0}, {"pred": -3.1591796875, "label": 2.0}, {"pred": -0.7837404608726501, "label": 8.0}, {"pred": -0.8880395889282227, "label": 8.0}, {"pred": -1.5245490074157715, "label": 0.0}, {"pred": 7.281620025634766, "label": 2.0}, {"pred": -2.9855499267578125, "label": 0.0}, {"pred": -0.9715536236763, "label": 7.0}, {"pred": 2.981710433959961, "label": 2.0}, {"pred": -1.71941077709198, "label": 2.0}, {"pred": -4.1776885986328125, "label": 8.0}, {"pred": -0.2737337648868561, "label": 6.0}, {"pred": -0.2323164939880371, "label": 6.0}, {"pred": 0.7083033323287964, "label": 5.0}, {"pred": 7.313948631286621, "label": 5.0}, {"pred": -3.2342827320098877, "label": 5.0}, {"pred": -1.1054272651672363, "label": 5.0}, {"pred": -1.3207857608795166, "label": 5.0}, {"pred": -1.396915316581726, "label": 5.0}, {"pred": -0.9858799576759338, "label": 5.0}, {"pred": -1.2481117248535156, "label": 6.0}, {"pred": -1.185423731803894, "label": 8.0}, {"pred": -0.6355394124984741, "label": 5.0}, {"pred": -1.3282532691955566, "label": 5.0}, {"pred": -0.06940007954835892, "label": 3.0}, {"pred": 8.819711685180664, "label": 3.0}, {"pred": -1.7800638675689697, "label": 6.0}, {"pred": -0.9062626957893372, "label": 2.0}, {"pred": -1.4409409761428833, "label": 3.0}, {"pred": -0.9971213340759277, "label": 2.0}, {"pred": -1.8456844091415405, "label": 2.0}, {"pred": -0.7595651149749756, "label": 2.0}, {"pred": -1.2325983047485352, "label": 0.0}, {"pred": -0.2523927688598633, "label": 2.0}, {"pred": 10.457062721252441, "label": 5.0}, {"pred": 1.2496005296707153, "label": 5.0}, {"pred": -0.0661335289478302, "label": 5.0}, {"pred": 5.924855709075928, "label": 2.0}, {"pred": 5.310446262359619, "label": 6.0}, {"pred": -4.33503532409668, "label": 3.0}, {"pred": -3.246478319168091, "label": 3.0}, {"pred": -1.0310145616531372, "label": 6.0}, {"pred": -1.9235048294067383, "label": 3.0}, {"pred": -3.1231048107147217, "label": 2.0}, {"pred": 2.0666773319244385, "label": 8.0}, {"pred": -2.21870493888855, "label": 8.0}, {"pred": -4.353604316711426, "label": 0.0}, {"pred": 0.4301067292690277, "label": 1.0}, {"pred": 0.6327790021896362, "label": 2.0}, {"pred": 2.628701686859131, "label": 3.0}, {"pred": 6.137325286865234, "label": 6.0}, {"pred": -2.3515894412994385, "label": 8.0}, {"pred": -2.0138840675354004, "label": 7.0}, {"pred": -0.3610410690307617, "label": 0.0}, {"pred": 0.6118225455284119, "label": 8.0}, {"pred": 1.8093969821929932, "label": 2.0}, {"pred": 1.2613120079040527, "label": 2.0}, {"pred": -2.8037238121032715, "label": 5.0}, {"pred": 7.11236047744751, "label": 5.0}, {"pred": 1.2082927227020264, "label": 7.0}, {"pred": -3.570901870727539, "label": 2.0}, {"pred": -3.2932965755462646, "label": 2.0}, {"pred": -0.8259910345077515, "label": 8.0}, {"pred": -0.7468539476394653, "label": 5.0}, {"pred": 6.213565349578857, "label": 5.0}, {"pred": 5.455384254455566, "label": 2.0}, {"pred": -4.203694820404053, "label": 6.0}, {"pred": 1.6539826393127441, "label": 6.0}, {"pred": -1.7960268259048462, "label": 2.0}, {"pred": -3.1243834495544434, "label": 5.0}, {"pred": -2.0782253742218018, "label": 2.0}, {"pred": -2.1685826778411865, "label": 5.0}, {"pred": 0.5615622997283936, "label": 5.0}, {"pred": 9.341123580932617, "label": 8.0}, {"pred": -1.8784537315368652, "label": 5.0}, {"pred": -0.7927582859992981, "label": 2.0}, {"pred": 0.4580661654472351, "label": 6.0}, {"pred": -2.3094468116760254, "label": 6.0}, {"pred": -2.143928289413452, "label": 5.0}, {"pred": -0.9896171689033508, "label": 6.0}, {"pred": -2.01908016204834, "label": 2.0}, {"pred": -0.6412465572357178, "label": 2.0}, {"pred": 6.39767599105835, "label": 5.0}, {"pred": 4.225459098815918, "label": 5.0}, {"pred": -3.1502859592437744, "label": 0.0}, {"pred": 0.43117573857307434, "label": 5.0}, {"pred": -2.445258617401123, "label": 2.0}, {"pred": 0.1308278888463974, "label": 2.0}, {"pred": -2.7748281955718994, "label": 2.0}, {"pred": -2.6895854473114014, "label": 5.0}, {"pred": -1.6292206048965454, "label": 2.0}, {"pred": 7.543752193450928, "label": 6.0}, {"pred": -1.342669129371643, "label": 6.0}, {"pred": -1.7914018630981445, "label": 6.0}, {"pred": 4.5911688804626465, "label": 6.0}, {"pred": -0.3857574164867401, "label": 5.0}, {"pred": -1.955091118812561, "label": 5.0}, {"pred": -1.7245426177978516, "label": 5.0}, {"pred": -2.9511044025421143, "label": 5.0}, {"pred": -2.7611618041992188, "label": 5.0}, {"pred": 3.6846923828125, "label": 2.0}, {"pred": 4.134253025054932, "label": 5.0}, {"pred": -3.159534454345703, "label": 8.0}, {"pred": 2.984583854675293, "label": 8.0}, {"pred": -1.0099399089813232, "label": 0.0}, {"pred": 1.7101552486419678, "label": 3.0}, {"pred": -2.5953452587127686, "label": 2.0}, {"pred": 1.3067090511322021, "label": 2.0}, {"pred": 2.284773111343384, "label": 2.0}, {"pred": 4.78453254699707, "label": 8.0}, {"pred": -2.012260675430298, "label": 8.0}, {"pred": -2.6885504722595215, "label": 0.0}, {"pred": 3.0605850219726562, "label": 2.0}, {"pred": -2.044528007507324, "label": 0.0}, {"pred": -3.634997606277466, "label": 7.0}, {"pred": -2.3484065532684326, "label": 0.0}, {"pred": -2.205092191696167, "label": 0.0}, {"pred": -1.424561858177185, "label": 0.0}, {"pred": 1.915219783782959, "label": 8.0}, {"pred": 2.2997443675994873, "label": 8.0}, {"pred": -1.9867368936538696, "label": 0.0}, {"pred": 6.400612831115723, "label": 0.0}, {"pred": 1.8677934408187866, "label": 2.0}, {"pred": -2.124760150909424, "label": 2.0}, {"pred": -3.5659399032592773, "label": 0.0}]
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a22e667d8e797f8d40a9af97522ff448e425031c738a2ab7ebd58adf146e3c51
3
- size 714919265
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:563526884464f58b3c1b06eac98e478501952393b97712559a9892d6765fe912
3
+ size 714922721
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 5.0,
3
- "train_loss": 0.6695007115008145,
4
- "train_runtime": 916.4977,
5
  "train_samples": 702,
6
- "train_samples_per_second": 3.83,
7
- "train_steps_per_second": 3.83
8
  }
 
1
  {
2
  "epoch": 5.0,
3
+ "train_loss": 0.6843088025041455,
4
+ "train_runtime": 956.7933,
5
  "train_samples": 702,
6
+ "train_samples_per_second": 3.669,
7
+ "train_steps_per_second": 3.669
8
  }
trainer_state.json CHANGED
@@ -9,369 +9,369 @@
9
  "log_history": [
10
  {
11
  "epoch": 0.14,
12
- "eval_f1": 0.3722687284743791,
13
- "eval_loss": 1.3361328840255737,
14
- "eval_runtime": 3.2985,
15
- "eval_samples_per_second": 29.71,
16
- "eval_steps_per_second": 29.71,
17
  "step": 100
18
  },
19
  {
20
  "epoch": 0.28,
21
- "eval_f1": 0.4344699929794222,
22
- "eval_loss": 1.1836130619049072,
23
- "eval_runtime": 3.274,
24
- "eval_samples_per_second": 29.933,
25
- "eval_steps_per_second": 29.933,
26
  "step": 200
27
  },
28
  {
29
  "epoch": 0.43,
30
- "eval_f1": 0.3996935122704078,
31
- "eval_loss": 1.1635534763336182,
32
- "eval_runtime": 3.2772,
33
- "eval_samples_per_second": 29.904,
34
- "eval_steps_per_second": 29.904,
35
  "step": 300
36
  },
37
  {
38
  "epoch": 0.57,
39
- "eval_f1": 0.5028054395723479,
40
- "eval_loss": 1.353499174118042,
41
- "eval_runtime": 3.2926,
42
- "eval_samples_per_second": 29.764,
43
- "eval_steps_per_second": 29.764,
44
  "step": 400
45
  },
46
  {
47
  "epoch": 0.71,
48
  "learning_rate": 2.572649572649573e-05,
49
- "loss": 1.2064,
50
  "step": 500
51
  },
52
  {
53
  "epoch": 0.71,
54
- "eval_f1": 0.47071964928887344,
55
- "eval_loss": 1.2940737009048462,
56
- "eval_runtime": 3.3744,
57
- "eval_samples_per_second": 29.042,
58
- "eval_steps_per_second": 29.042,
59
  "step": 500
60
  },
61
  {
62
  "epoch": 0.85,
63
- "eval_f1": 0.49370416278560797,
64
- "eval_loss": 1.2891041040420532,
65
- "eval_runtime": 3.3094,
66
- "eval_samples_per_second": 29.613,
67
- "eval_steps_per_second": 29.613,
68
  "step": 600
69
  },
70
  {
71
  "epoch": 1.0,
72
- "eval_f1": 0.47736617527595926,
73
- "eval_loss": 1.2047343254089355,
74
- "eval_runtime": 3.3145,
75
- "eval_samples_per_second": 29.567,
76
- "eval_steps_per_second": 29.567,
77
  "step": 700
78
  },
79
  {
80
  "epoch": 1.14,
81
- "eval_f1": 0.4943830125990876,
82
- "eval_loss": 1.2190661430358887,
83
- "eval_runtime": 3.3377,
84
- "eval_samples_per_second": 29.362,
85
- "eval_steps_per_second": 29.362,
86
  "step": 800
87
  },
88
  {
89
  "epoch": 1.28,
90
- "eval_f1": 0.4777863203104454,
91
- "eval_loss": 1.174961805343628,
92
- "eval_runtime": 3.3391,
93
- "eval_samples_per_second": 29.349,
94
- "eval_steps_per_second": 29.349,
95
  "step": 900
96
  },
97
  {
98
  "epoch": 1.42,
99
  "learning_rate": 2.1452991452991456e-05,
100
- "loss": 0.9207,
101
  "step": 1000
102
  },
103
  {
104
  "epoch": 1.42,
105
- "eval_f1": 0.49086129207075757,
106
- "eval_loss": 1.3087153434753418,
107
- "eval_runtime": 3.4003,
108
- "eval_samples_per_second": 28.821,
109
- "eval_steps_per_second": 28.821,
110
  "step": 1000
111
  },
112
  {
113
  "epoch": 1.57,
114
- "eval_f1": 0.49757882395260544,
115
- "eval_loss": 1.2435556650161743,
116
- "eval_runtime": 3.4366,
117
- "eval_samples_per_second": 28.517,
118
- "eval_steps_per_second": 28.517,
119
  "step": 1100
120
  },
121
  {
122
  "epoch": 1.71,
123
- "eval_f1": 0.503327058221218,
124
- "eval_loss": 1.1465363502502441,
125
- "eval_runtime": 3.2944,
126
- "eval_samples_per_second": 29.748,
127
- "eval_steps_per_second": 29.748,
128
  "step": 1200
129
  },
130
  {
131
  "epoch": 1.85,
132
- "eval_f1": 0.5141589868888157,
133
- "eval_loss": 1.113364577293396,
134
- "eval_runtime": 3.3642,
135
- "eval_samples_per_second": 29.131,
136
- "eval_steps_per_second": 29.131,
137
  "step": 1300
138
  },
139
  {
140
  "epoch": 1.99,
141
- "eval_f1": 0.5383469405673188,
142
- "eval_loss": 1.1939647197723389,
143
- "eval_runtime": 3.3033,
144
- "eval_samples_per_second": 29.668,
145
- "eval_steps_per_second": 29.668,
146
  "step": 1400
147
  },
148
  {
149
  "epoch": 2.14,
150
  "learning_rate": 1.7179487179487178e-05,
151
- "loss": 0.8149,
152
  "step": 1500
153
  },
154
  {
155
  "epoch": 2.14,
156
- "eval_f1": 0.5291030100787034,
157
- "eval_loss": 1.2552497386932373,
158
- "eval_runtime": 3.7541,
159
- "eval_samples_per_second": 26.105,
160
- "eval_steps_per_second": 26.105,
161
  "step": 1500
162
  },
163
  {
164
  "epoch": 2.28,
165
- "eval_f1": 0.5259736412492381,
166
- "eval_loss": 1.3746747970581055,
167
- "eval_runtime": 4.9995,
168
- "eval_samples_per_second": 19.602,
169
- "eval_steps_per_second": 19.602,
170
  "step": 1600
171
  },
172
  {
173
  "epoch": 2.42,
174
- "eval_f1": 0.5329388682083431,
175
- "eval_loss": 1.3680145740509033,
176
- "eval_runtime": 5.1597,
177
- "eval_samples_per_second": 18.993,
178
- "eval_steps_per_second": 18.993,
179
  "step": 1700
180
  },
181
  {
182
  "epoch": 2.56,
183
- "eval_f1": 0.5189920532535025,
184
- "eval_loss": 1.27865469455719,
185
- "eval_runtime": 5.0223,
186
- "eval_samples_per_second": 19.513,
187
- "eval_steps_per_second": 19.513,
188
  "step": 1800
189
  },
190
  {
191
  "epoch": 2.71,
192
- "eval_f1": 0.5409205239275264,
193
- "eval_loss": 1.3888845443725586,
194
- "eval_runtime": 3.3132,
195
- "eval_samples_per_second": 29.579,
196
- "eval_steps_per_second": 29.579,
197
  "step": 1900
198
  },
199
  {
200
  "epoch": 2.85,
201
  "learning_rate": 1.2905982905982905e-05,
202
- "loss": 0.6152,
203
  "step": 2000
204
  },
205
  {
206
  "epoch": 2.85,
207
- "eval_f1": 0.543504294934508,
208
- "eval_loss": 1.3602004051208496,
209
- "eval_runtime": 3.3336,
210
- "eval_samples_per_second": 29.398,
211
- "eval_steps_per_second": 29.398,
212
  "step": 2000
213
  },
214
  {
215
  "epoch": 2.99,
216
- "eval_f1": 0.5467811408362643,
217
- "eval_loss": 1.3174574375152588,
218
- "eval_runtime": 5.0569,
219
- "eval_samples_per_second": 19.379,
220
- "eval_steps_per_second": 19.379,
221
  "step": 2100
222
  },
223
  {
224
  "epoch": 3.13,
225
- "eval_f1": 0.5365057187973831,
226
- "eval_loss": 1.5886870622634888,
227
- "eval_runtime": 4.5058,
228
- "eval_samples_per_second": 21.75,
229
- "eval_steps_per_second": 21.75,
230
  "step": 2200
231
  },
232
  {
233
  "epoch": 3.28,
234
- "eval_f1": 0.5563382534701277,
235
- "eval_loss": 1.517231822013855,
236
- "eval_runtime": 4.5269,
237
- "eval_samples_per_second": 21.648,
238
- "eval_steps_per_second": 21.648,
239
  "step": 2300
240
  },
241
  {
242
  "epoch": 3.42,
243
- "eval_f1": 0.5661390498930103,
244
- "eval_loss": 1.5470443964004517,
245
- "eval_runtime": 3.6905,
246
- "eval_samples_per_second": 26.555,
247
- "eval_steps_per_second": 26.555,
248
  "step": 2400
249
  },
250
  {
251
  "epoch": 3.56,
252
  "learning_rate": 8.632478632478633e-06,
253
- "loss": 0.4719,
254
  "step": 2500
255
  },
256
  {
257
  "epoch": 3.56,
258
- "eval_f1": 0.521216772952552,
259
- "eval_loss": 1.4928430318832397,
260
- "eval_runtime": 3.3155,
261
- "eval_samples_per_second": 29.558,
262
- "eval_steps_per_second": 29.558,
263
  "step": 2500
264
  },
265
  {
266
  "epoch": 3.7,
267
- "eval_f1": 0.5356457612585566,
268
- "eval_loss": 1.6497721672058105,
269
- "eval_runtime": 4.8518,
270
- "eval_samples_per_second": 20.199,
271
- "eval_steps_per_second": 20.199,
272
  "step": 2600
273
  },
274
  {
275
  "epoch": 3.85,
276
- "eval_f1": 0.5596834952223371,
277
- "eval_loss": 1.4976708889007568,
278
- "eval_runtime": 4.6972,
279
- "eval_samples_per_second": 20.863,
280
- "eval_steps_per_second": 20.863,
281
  "step": 2700
282
  },
283
  {
284
  "epoch": 3.99,
285
- "eval_f1": 0.5470066167039311,
286
- "eval_loss": 1.471981167793274,
287
- "eval_runtime": 4.5019,
288
- "eval_samples_per_second": 21.769,
289
- "eval_steps_per_second": 21.769,
290
  "step": 2800
291
  },
292
  {
293
  "epoch": 4.13,
294
- "eval_f1": 0.5492964393504802,
295
- "eval_loss": 1.5796676874160767,
296
- "eval_runtime": 4.7041,
297
- "eval_samples_per_second": 20.833,
298
- "eval_steps_per_second": 20.833,
299
  "step": 2900
300
  },
301
  {
302
  "epoch": 4.27,
303
  "learning_rate": 4.358974358974359e-06,
304
- "loss": 0.372,
305
  "step": 3000
306
  },
307
  {
308
  "epoch": 4.27,
309
- "eval_f1": 0.5445354826532323,
310
- "eval_loss": 1.6874395608901978,
311
- "eval_runtime": 3.9793,
312
- "eval_samples_per_second": 24.628,
313
- "eval_steps_per_second": 24.628,
314
  "step": 3000
315
  },
316
  {
317
  "epoch": 4.42,
318
- "eval_f1": 0.5544723066439012,
319
- "eval_loss": 1.6702477931976318,
320
- "eval_runtime": 4.7443,
321
- "eval_samples_per_second": 20.656,
322
- "eval_steps_per_second": 20.656,
323
  "step": 3100
324
  },
325
  {
326
  "epoch": 4.56,
327
- "eval_f1": 0.5469058666319371,
328
- "eval_loss": 1.7671833038330078,
329
- "eval_runtime": 4.6665,
330
- "eval_samples_per_second": 21.001,
331
- "eval_steps_per_second": 21.001,
332
  "step": 3200
333
  },
334
  {
335
  "epoch": 4.7,
336
- "eval_f1": 0.5485370297299399,
337
- "eval_loss": 1.7351080179214478,
338
- "eval_runtime": 4.8851,
339
- "eval_samples_per_second": 20.061,
340
- "eval_steps_per_second": 20.061,
341
  "step": 3300
342
  },
343
  {
344
  "epoch": 4.84,
345
- "eval_f1": 0.5497797755164764,
346
- "eval_loss": 1.7282612323760986,
347
- "eval_runtime": 5.1791,
348
- "eval_samples_per_second": 18.922,
349
- "eval_steps_per_second": 18.922,
350
  "step": 3400
351
  },
352
  {
353
  "epoch": 4.99,
354
  "learning_rate": 8.547008547008547e-08,
355
- "loss": 0.2944,
356
  "step": 3500
357
  },
358
  {
359
  "epoch": 4.99,
360
- "eval_f1": 0.5503861750639598,
361
- "eval_loss": 1.698703408241272,
362
- "eval_runtime": 5.6701,
363
- "eval_samples_per_second": 17.284,
364
- "eval_steps_per_second": 17.284,
365
  "step": 3500
366
  },
367
  {
368
  "epoch": 5.0,
369
  "step": 3510,
370
  "total_flos": 2890172619430200.0,
371
- "train_loss": 0.6695007115008145,
372
- "train_runtime": 916.4977,
373
- "train_samples_per_second": 3.83,
374
- "train_steps_per_second": 3.83
375
  }
376
  ],
377
  "max_steps": 3510,
 
9
  "log_history": [
10
  {
11
  "epoch": 0.14,
12
+ "eval_f1": 0.3735957123448099,
13
+ "eval_loss": 1.3954416513442993,
14
+ "eval_runtime": 3.3287,
15
+ "eval_samples_per_second": 29.441,
16
+ "eval_steps_per_second": 29.441,
17
  "step": 100
18
  },
19
  {
20
  "epoch": 0.28,
21
+ "eval_f1": 0.4402748314205965,
22
+ "eval_loss": 1.2493854761123657,
23
+ "eval_runtime": 5.52,
24
+ "eval_samples_per_second": 17.754,
25
+ "eval_steps_per_second": 17.754,
26
  "step": 200
27
  },
28
  {
29
  "epoch": 0.43,
30
+ "eval_f1": 0.4049192887670501,
31
+ "eval_loss": 1.1859477758407593,
32
+ "eval_runtime": 4.6189,
33
+ "eval_samples_per_second": 21.217,
34
+ "eval_steps_per_second": 21.217,
35
  "step": 300
36
  },
37
  {
38
  "epoch": 0.57,
39
+ "eval_f1": 0.46917462323111053,
40
+ "eval_loss": 1.3165582418441772,
41
+ "eval_runtime": 5.117,
42
+ "eval_samples_per_second": 19.152,
43
+ "eval_steps_per_second": 19.152,
44
  "step": 400
45
  },
46
  {
47
  "epoch": 0.71,
48
  "learning_rate": 2.572649572649573e-05,
49
+ "loss": 1.2082,
50
  "step": 500
51
  },
52
  {
53
  "epoch": 0.71,
54
+ "eval_f1": 0.4667018667844948,
55
+ "eval_loss": 1.3011000156402588,
56
+ "eval_runtime": 4.4894,
57
+ "eval_samples_per_second": 21.829,
58
+ "eval_steps_per_second": 21.829,
59
  "step": 500
60
  },
61
  {
62
  "epoch": 0.85,
63
+ "eval_f1": 0.4908908048448308,
64
+ "eval_loss": 1.2637114524841309,
65
+ "eval_runtime": 3.3668,
66
+ "eval_samples_per_second": 29.108,
67
+ "eval_steps_per_second": 29.108,
68
  "step": 600
69
  },
70
  {
71
  "epoch": 1.0,
72
+ "eval_f1": 0.4642800618313763,
73
+ "eval_loss": 1.1596269607543945,
74
+ "eval_runtime": 5.1253,
75
+ "eval_samples_per_second": 19.121,
76
+ "eval_steps_per_second": 19.121,
77
  "step": 700
78
  },
79
  {
80
  "epoch": 1.14,
81
+ "eval_f1": 0.495361570506607,
82
+ "eval_loss": 1.2012276649475098,
83
+ "eval_runtime": 5.0079,
84
+ "eval_samples_per_second": 19.569,
85
+ "eval_steps_per_second": 19.569,
86
  "step": 800
87
  },
88
  {
89
  "epoch": 1.28,
90
+ "eval_f1": 0.4856164962650226,
91
+ "eval_loss": 1.1207385063171387,
92
+ "eval_runtime": 5.1293,
93
+ "eval_samples_per_second": 19.106,
94
+ "eval_steps_per_second": 19.106,
95
  "step": 900
96
  },
97
  {
98
  "epoch": 1.42,
99
  "learning_rate": 2.1452991452991456e-05,
100
+ "loss": 0.9276,
101
  "step": 1000
102
  },
103
  {
104
  "epoch": 1.42,
105
+ "eval_f1": 0.5074106973681244,
106
+ "eval_loss": 1.3098654747009277,
107
+ "eval_runtime": 5.0862,
108
+ "eval_samples_per_second": 19.268,
109
+ "eval_steps_per_second": 19.268,
110
  "step": 1000
111
  },
112
  {
113
  "epoch": 1.57,
114
+ "eval_f1": 0.4820613319741061,
115
+ "eval_loss": 1.2627363204956055,
116
+ "eval_runtime": 5.2038,
117
+ "eval_samples_per_second": 18.832,
118
+ "eval_steps_per_second": 18.832,
119
  "step": 1100
120
  },
121
  {
122
  "epoch": 1.71,
123
+ "eval_f1": 0.5034445960093111,
124
+ "eval_loss": 1.120241403579712,
125
+ "eval_runtime": 5.175,
126
+ "eval_samples_per_second": 18.937,
127
+ "eval_steps_per_second": 18.937,
128
  "step": 1200
129
  },
130
  {
131
  "epoch": 1.85,
132
+ "eval_f1": 0.502158612711185,
133
+ "eval_loss": 1.1611493825912476,
134
+ "eval_runtime": 5.037,
135
+ "eval_samples_per_second": 19.456,
136
+ "eval_steps_per_second": 19.456,
137
  "step": 1300
138
  },
139
  {
140
  "epoch": 1.99,
141
+ "eval_f1": 0.5191081295714916,
142
+ "eval_loss": 1.2113677263259888,
143
+ "eval_runtime": 5.0637,
144
+ "eval_samples_per_second": 19.353,
145
+ "eval_steps_per_second": 19.353,
146
  "step": 1400
147
  },
148
  {
149
  "epoch": 2.14,
150
  "learning_rate": 1.7179487179487178e-05,
151
+ "loss": 0.8288,
152
  "step": 1500
153
  },
154
  {
155
  "epoch": 2.14,
156
+ "eval_f1": 0.5078021729955409,
157
+ "eval_loss": 1.2758545875549316,
158
+ "eval_runtime": 5.0884,
159
+ "eval_samples_per_second": 19.26,
160
+ "eval_steps_per_second": 19.26,
161
  "step": 1500
162
  },
163
  {
164
  "epoch": 2.28,
165
+ "eval_f1": 0.5286245201650122,
166
+ "eval_loss": 1.3322173357009888,
167
+ "eval_runtime": 5.2011,
168
+ "eval_samples_per_second": 18.842,
169
+ "eval_steps_per_second": 18.842,
170
  "step": 1600
171
  },
172
  {
173
  "epoch": 2.42,
174
+ "eval_f1": 0.5300667878954677,
175
+ "eval_loss": 1.2991284132003784,
176
+ "eval_runtime": 5.1327,
177
+ "eval_samples_per_second": 19.093,
178
+ "eval_steps_per_second": 19.093,
179
  "step": 1700
180
  },
181
  {
182
  "epoch": 2.56,
183
+ "eval_f1": 0.500402521720021,
184
+ "eval_loss": 1.2623299360275269,
185
+ "eval_runtime": 5.5478,
186
+ "eval_samples_per_second": 17.665,
187
+ "eval_steps_per_second": 17.665,
188
  "step": 1800
189
  },
190
  {
191
  "epoch": 2.71,
192
+ "eval_f1": 0.5244540639978136,
193
+ "eval_loss": 1.3173421621322632,
194
+ "eval_runtime": 5.5919,
195
+ "eval_samples_per_second": 17.525,
196
+ "eval_steps_per_second": 17.525,
197
  "step": 1900
198
  },
199
  {
200
  "epoch": 2.85,
201
  "learning_rate": 1.2905982905982905e-05,
202
+ "loss": 0.6347,
203
  "step": 2000
204
  },
205
  {
206
  "epoch": 2.85,
207
+ "eval_f1": 0.5317743052897723,
208
+ "eval_loss": 1.3929016590118408,
209
+ "eval_runtime": 5.7399,
210
+ "eval_samples_per_second": 17.073,
211
+ "eval_steps_per_second": 17.073,
212
  "step": 2000
213
  },
214
  {
215
  "epoch": 2.99,
216
+ "eval_f1": 0.5383445546938374,
217
+ "eval_loss": 1.3333723545074463,
218
+ "eval_runtime": 3.3412,
219
+ "eval_samples_per_second": 29.331,
220
+ "eval_steps_per_second": 29.331,
221
  "step": 2100
222
  },
223
  {
224
  "epoch": 3.13,
225
+ "eval_f1": 0.5275077326569355,
226
+ "eval_loss": 1.5553914308547974,
227
+ "eval_runtime": 3.3081,
228
+ "eval_samples_per_second": 29.624,
229
+ "eval_steps_per_second": 29.624,
230
  "step": 2200
231
  },
232
  {
233
  "epoch": 3.28,
234
+ "eval_f1": 0.5592106629133059,
235
+ "eval_loss": 1.5034139156341553,
236
+ "eval_runtime": 3.3379,
237
+ "eval_samples_per_second": 29.36,
238
+ "eval_steps_per_second": 29.36,
239
  "step": 2300
240
  },
241
  {
242
  "epoch": 3.42,
243
+ "eval_f1": 0.5715959486823535,
244
+ "eval_loss": 1.5117721557617188,
245
+ "eval_runtime": 3.3428,
246
+ "eval_samples_per_second": 29.317,
247
+ "eval_steps_per_second": 29.317,
248
  "step": 2400
249
  },
250
  {
251
  "epoch": 3.56,
252
  "learning_rate": 8.632478632478633e-06,
253
+ "loss": 0.4923,
254
  "step": 2500
255
  },
256
  {
257
  "epoch": 3.56,
258
+ "eval_f1": 0.5210855337430568,
259
+ "eval_loss": 1.4938777685165405,
260
+ "eval_runtime": 3.3285,
261
+ "eval_samples_per_second": 29.443,
262
+ "eval_steps_per_second": 29.443,
263
  "step": 2500
264
  },
265
  {
266
  "epoch": 3.7,
267
+ "eval_f1": 0.548457367895942,
268
+ "eval_loss": 1.5325443744659424,
269
+ "eval_runtime": 3.3057,
270
+ "eval_samples_per_second": 29.646,
271
+ "eval_steps_per_second": 29.646,
272
  "step": 2600
273
  },
274
  {
275
  "epoch": 3.85,
276
+ "eval_f1": 0.5553468907576662,
277
+ "eval_loss": 1.5296635627746582,
278
+ "eval_runtime": 3.3038,
279
+ "eval_samples_per_second": 29.663,
280
+ "eval_steps_per_second": 29.663,
281
  "step": 2700
282
  },
283
  {
284
  "epoch": 3.99,
285
+ "eval_f1": 0.5419549676909157,
286
+ "eval_loss": 1.5025768280029297,
287
+ "eval_runtime": 3.3058,
288
+ "eval_samples_per_second": 29.644,
289
+ "eval_steps_per_second": 29.644,
290
  "step": 2800
291
  },
292
  {
293
  "epoch": 4.13,
294
+ "eval_f1": 0.5461023124307824,
295
+ "eval_loss": 1.5561065673828125,
296
+ "eval_runtime": 3.3631,
297
+ "eval_samples_per_second": 29.14,
298
+ "eval_steps_per_second": 29.14,
299
  "step": 2900
300
  },
301
  {
302
  "epoch": 4.27,
303
  "learning_rate": 4.358974358974359e-06,
304
+ "loss": 0.3913,
305
  "step": 3000
306
  },
307
  {
308
  "epoch": 4.27,
309
+ "eval_f1": 0.5419929767805605,
310
+ "eval_loss": 1.6237069368362427,
311
+ "eval_runtime": 3.316,
312
+ "eval_samples_per_second": 29.554,
313
+ "eval_steps_per_second": 29.554,
314
  "step": 3000
315
  },
316
  {
317
  "epoch": 4.42,
318
+ "eval_f1": 0.5506356176274623,
319
+ "eval_loss": 1.6494747400283813,
320
+ "eval_runtime": 3.3595,
321
+ "eval_samples_per_second": 29.171,
322
+ "eval_steps_per_second": 29.171,
323
  "step": 3100
324
  },
325
  {
326
  "epoch": 4.56,
327
+ "eval_f1": 0.5502274173692974,
328
+ "eval_loss": 1.7337164878845215,
329
+ "eval_runtime": 3.3475,
330
+ "eval_samples_per_second": 29.275,
331
+ "eval_steps_per_second": 29.275,
332
  "step": 3200
333
  },
334
  {
335
  "epoch": 4.7,
336
+ "eval_f1": 0.5446428110985144,
337
+ "eval_loss": 1.7263941764831543,
338
+ "eval_runtime": 3.3549,
339
+ "eval_samples_per_second": 29.211,
340
+ "eval_steps_per_second": 29.211,
341
  "step": 3300
342
  },
343
  {
344
  "epoch": 4.84,
345
+ "eval_f1": 0.5449539102387533,
346
+ "eval_loss": 1.719780445098877,
347
+ "eval_runtime": 3.3251,
348
+ "eval_samples_per_second": 29.473,
349
+ "eval_steps_per_second": 29.473,
350
  "step": 3400
351
  },
352
  {
353
  "epoch": 4.99,
354
  "learning_rate": 8.547008547008547e-08,
355
+ "loss": 0.3154,
356
  "step": 3500
357
  },
358
  {
359
  "epoch": 4.99,
360
+ "eval_f1": 0.5471660816519186,
361
+ "eval_loss": 1.7049323320388794,
362
+ "eval_runtime": 3.3011,
363
+ "eval_samples_per_second": 29.687,
364
+ "eval_steps_per_second": 29.687,
365
  "step": 3500
366
  },
367
  {
368
  "epoch": 5.0,
369
  "step": 3510,
370
  "total_flos": 2890172619430200.0,
371
+ "train_loss": 0.6843088025041455,
372
+ "train_runtime": 956.7933,
373
+ "train_samples_per_second": 3.669,
374
+ "train_steps_per_second": 3.669
375
  }
376
  ],
377
  "max_steps": 3510,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cbdd39e5dadc71c4520b7ee1b5c990a33eee4d7fdad887960a5ecdb44855941e
3
  size 3899
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:642618f727148faa2317b7e8bcc0a316595ef1dbf4f9c15f72d6910bcb87a05c
3
  size 3899