jed351 commited on
Commit
a08367c
1 Parent(s): 93c3185

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. deberta-v3-finetuned/fold_0/0/checkpoint-500/config.json +42 -0
  2. deberta-v3-finetuned/fold_0/0/checkpoint-500/optimizer.pt +3 -0
  3. deberta-v3-finetuned/fold_0/0/checkpoint-500/pytorch_model.bin +3 -0
  4. deberta-v3-finetuned/fold_0/0/checkpoint-500/rng_state.pth +3 -0
  5. deberta-v3-finetuned/fold_0/0/checkpoint-500/scheduler.pt +3 -0
  6. deberta-v3-finetuned/fold_0/0/checkpoint-500/special_tokens_map.json +9 -0
  7. deberta-v3-finetuned/fold_0/0/checkpoint-500/tokenizer.json +0 -0
  8. deberta-v3-finetuned/fold_0/0/checkpoint-500/tokenizer_config.json +16 -0
  9. deberta-v3-finetuned/fold_0/0/checkpoint-500/trainer_state.json +70 -0
  10. deberta-v3-finetuned/fold_0/0/checkpoint-500/training_args.bin +3 -0
  11. deberta-v3-finetuned/fold_0/config.json +42 -0
  12. deberta-v3-finetuned/fold_0/pytorch_model.bin +3 -0
  13. deberta-v3-finetuned/fold_0/special_tokens_map.json +9 -0
  14. deberta-v3-finetuned/fold_0/tokenizer.json +0 -0
  15. deberta-v3-finetuned/fold_0/tokenizer_config.json +16 -0
  16. deberta-v3-finetuned/fold_1/1/checkpoint-1000/config.json +42 -0
  17. deberta-v3-finetuned/fold_1/1/checkpoint-1000/optimizer.pt +3 -0
  18. deberta-v3-finetuned/fold_1/1/checkpoint-1000/pytorch_model.bin +3 -0
  19. deberta-v3-finetuned/fold_1/1/checkpoint-1000/rng_state.pth +3 -0
  20. deberta-v3-finetuned/fold_1/1/checkpoint-1000/scheduler.pt +3 -0
  21. deberta-v3-finetuned/fold_1/1/checkpoint-1000/special_tokens_map.json +9 -0
  22. deberta-v3-finetuned/fold_1/1/checkpoint-1000/tokenizer.json +0 -0
  23. deberta-v3-finetuned/fold_1/1/checkpoint-1000/tokenizer_config.json +16 -0
  24. deberta-v3-finetuned/fold_1/1/checkpoint-1000/trainer_state.json +121 -0
  25. deberta-v3-finetuned/fold_1/1/checkpoint-1000/training_args.bin +3 -0
  26. deberta-v3-finetuned/fold_1/config.json +42 -0
  27. deberta-v3-finetuned/fold_1/pytorch_model.bin +3 -0
  28. deberta-v3-finetuned/fold_1/special_tokens_map.json +9 -0
  29. deberta-v3-finetuned/fold_1/tokenizer.json +0 -0
  30. deberta-v3-finetuned/fold_1/tokenizer_config.json +16 -0
  31. deberta-v3-finetuned/fold_2/2/checkpoint-400/config.json +42 -0
  32. deberta-v3-finetuned/fold_2/2/checkpoint-400/optimizer.pt +3 -0
  33. deberta-v3-finetuned/fold_2/2/checkpoint-400/pytorch_model.bin +3 -0
  34. deberta-v3-finetuned/fold_2/2/checkpoint-400/rng_state.pth +3 -0
  35. deberta-v3-finetuned/fold_2/2/checkpoint-400/scheduler.pt +3 -0
  36. deberta-v3-finetuned/fold_2/2/checkpoint-400/special_tokens_map.json +9 -0
  37. deberta-v3-finetuned/fold_2/2/checkpoint-400/tokenizer.json +0 -0
  38. deberta-v3-finetuned/fold_2/2/checkpoint-400/tokenizer_config.json +16 -0
  39. deberta-v3-finetuned/fold_2/2/checkpoint-400/trainer_state.json +55 -0
  40. deberta-v3-finetuned/fold_2/2/checkpoint-400/training_args.bin +3 -0
  41. deberta-v3-finetuned/fold_2/config.json +42 -0
  42. deberta-v3-finetuned/fold_2/pytorch_model.bin +3 -0
  43. deberta-v3-finetuned/fold_2/special_tokens_map.json +9 -0
  44. deberta-v3-finetuned/fold_2/tokenizer.json +0 -0
  45. deberta-v3-finetuned/fold_2/tokenizer_config.json +16 -0
  46. deberta-v3-finetuned/fold_3/3/checkpoint-100/config.json +42 -0
  47. deberta-v3-finetuned/fold_3/3/checkpoint-100/optimizer.pt +3 -0
  48. deberta-v3-finetuned/fold_3/3/checkpoint-100/pytorch_model.bin +3 -0
  49. deberta-v3-finetuned/fold_3/3/checkpoint-100/rng_state.pth +3 -0
  50. deberta-v3-finetuned/fold_3/3/checkpoint-100/scheduler.pt +3 -0
deberta-v3-finetuned/fold_0/0/checkpoint-500/config.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/gpfs/home/jc3821/kaggle/microsoft-deberta-v3-large",
3
+ "architectures": [
4
+ "DebertaV2ForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.007,
7
+ "hidden_act": "gelu",
8
+ "hidden_dropout_prob": 0.007,
9
+ "hidden_size": 1024,
10
+ "id2label": {
11
+ "0": "LABEL_0"
12
+ },
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 4096,
15
+ "label2id": {
16
+ "LABEL_0": 0
17
+ },
18
+ "layer_norm_eps": 1e-07,
19
+ "max_position_embeddings": 512,
20
+ "max_relative_positions": -1,
21
+ "model_type": "deberta-v2",
22
+ "norm_rel_ebd": "layer_norm",
23
+ "num_attention_heads": 16,
24
+ "num_hidden_layers": 24,
25
+ "pad_token_id": 0,
26
+ "pooler_dropout": 0,
27
+ "pooler_hidden_act": "gelu",
28
+ "pooler_hidden_size": 1024,
29
+ "pos_att_type": [
30
+ "p2c",
31
+ "c2p"
32
+ ],
33
+ "position_biased_input": false,
34
+ "position_buckets": 256,
35
+ "problem_type": "regression",
36
+ "relative_attention": true,
37
+ "share_att_key": true,
38
+ "torch_dtype": "float32",
39
+ "transformers_version": "4.32.1",
40
+ "type_vocab_size": 0,
41
+ "vocab_size": 128100
42
+ }
deberta-v3-finetuned/fold_0/0/checkpoint-500/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ce4b3555aaabd6ebf364eef18675e77db495a9c88a5af48454ebfefcddf1b52
3
+ size 3480831547
deberta-v3-finetuned/fold_0/0/checkpoint-500/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bdc5261521c4981b187281dea24a24d8290fd2c4e222a5efff1cf0e9234c7cf5
3
+ size 1740387701
deberta-v3-finetuned/fold_0/0/checkpoint-500/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:51783737039aaae77df9f6cc876318bdb54431cf6e9bffdfbb995a59239ef270
3
+ size 14575
deberta-v3-finetuned/fold_0/0/checkpoint-500/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af2fa603561d1610ba73b457cac52ea6a0ab7ffa9c9c41a75a141811fc0185a3
3
+ size 627
deberta-v3-finetuned/fold_0/0/checkpoint-500/special_tokens_map.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "[CLS]",
3
+ "cls_token": "[CLS]",
4
+ "eos_token": "[SEP]",
5
+ "mask_token": "[MASK]",
6
+ "pad_token": "[PAD]",
7
+ "sep_token": "[SEP]",
8
+ "unk_token": "[UNK]"
9
+ }
deberta-v3-finetuned/fold_0/0/checkpoint-500/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
deberta-v3-finetuned/fold_0/0/checkpoint-500/tokenizer_config.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "[CLS]",
3
+ "clean_up_tokenization_spaces": true,
4
+ "cls_token": "[CLS]",
5
+ "do_lower_case": false,
6
+ "eos_token": "[SEP]",
7
+ "mask_token": "[MASK]",
8
+ "model_max_length": 1000000000000000019884624838656,
9
+ "pad_token": "[PAD]",
10
+ "sep_token": "[SEP]",
11
+ "sp_model_kwargs": {},
12
+ "split_by_punct": false,
13
+ "tokenizer_class": "DebertaV2Tokenizer",
14
+ "unk_token": "[UNK]",
15
+ "vocab_type": "spm"
16
+ }
deberta-v3-finetuned/fold_0/0/checkpoint-500/trainer_state.json ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.4025963544845581,
3
+ "best_model_checkpoint": "/gpfs/home/jc3821/kaggle/content/deberta-v3-finetuned/fold_0/0/checkpoint-500",
4
+ "epoch": 1.953125,
5
+ "eval_steps": 100,
6
+ "global_step": 500,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.39,
13
+ "eval_loss": 0.20960840582847595,
14
+ "eval_rmse": 0.4578301012516022,
15
+ "eval_runtime": 22.7398,
16
+ "eval_samples_per_second": 90.458,
17
+ "eval_steps_per_second": 11.346,
18
+ "step": 100
19
+ },
20
+ {
21
+ "epoch": 0.78,
22
+ "eval_loss": 0.23187659680843353,
23
+ "eval_rmse": 0.4815356731414795,
24
+ "eval_runtime": 22.6916,
25
+ "eval_samples_per_second": 90.65,
26
+ "eval_steps_per_second": 11.37,
27
+ "step": 200
28
+ },
29
+ {
30
+ "epoch": 1.17,
31
+ "eval_loss": 0.16903835535049438,
32
+ "eval_rmse": 0.4111427366733551,
33
+ "eval_runtime": 22.6924,
34
+ "eval_samples_per_second": 90.647,
35
+ "eval_steps_per_second": 11.369,
36
+ "step": 300
37
+ },
38
+ {
39
+ "epoch": 1.56,
40
+ "eval_loss": 0.3062863349914551,
41
+ "eval_rmse": 0.5534313917160034,
42
+ "eval_runtime": 22.6866,
43
+ "eval_samples_per_second": 90.67,
44
+ "eval_steps_per_second": 11.372,
45
+ "step": 400
46
+ },
47
+ {
48
+ "epoch": 1.95,
49
+ "learning_rate": 9.140625e-06,
50
+ "loss": 0.2205,
51
+ "step": 500
52
+ },
53
+ {
54
+ "epoch": 1.95,
55
+ "eval_loss": 0.16208384931087494,
56
+ "eval_rmse": 0.4025963544845581,
57
+ "eval_runtime": 22.6795,
58
+ "eval_samples_per_second": 90.699,
59
+ "eval_steps_per_second": 11.376,
60
+ "step": 500
61
+ }
62
+ ],
63
+ "logging_steps": 500,
64
+ "max_steps": 1280,
65
+ "num_train_epochs": 5,
66
+ "save_steps": 100,
67
+ "total_flos": 5457160921939152.0,
68
+ "trial_name": null,
69
+ "trial_params": null
70
+ }
deberta-v3-finetuned/fold_0/0/checkpoint-500/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0439c9395e86496a1acd5f3dc9d68a57fea982eccfb5766d700c8191ec8b133
3
+ size 4091
deberta-v3-finetuned/fold_0/config.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/gpfs/home/jc3821/kaggle/microsoft-deberta-v3-large",
3
+ "architectures": [
4
+ "DebertaV2ForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.007,
7
+ "hidden_act": "gelu",
8
+ "hidden_dropout_prob": 0.007,
9
+ "hidden_size": 1024,
10
+ "id2label": {
11
+ "0": "LABEL_0"
12
+ },
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 4096,
15
+ "label2id": {
16
+ "LABEL_0": 0
17
+ },
18
+ "layer_norm_eps": 1e-07,
19
+ "max_position_embeddings": 512,
20
+ "max_relative_positions": -1,
21
+ "model_type": "deberta-v2",
22
+ "norm_rel_ebd": "layer_norm",
23
+ "num_attention_heads": 16,
24
+ "num_hidden_layers": 24,
25
+ "pad_token_id": 0,
26
+ "pooler_dropout": 0,
27
+ "pooler_hidden_act": "gelu",
28
+ "pooler_hidden_size": 1024,
29
+ "pos_att_type": [
30
+ "p2c",
31
+ "c2p"
32
+ ],
33
+ "position_biased_input": false,
34
+ "position_buckets": 256,
35
+ "problem_type": "regression",
36
+ "relative_attention": true,
37
+ "share_att_key": true,
38
+ "torch_dtype": "float32",
39
+ "transformers_version": "4.32.1",
40
+ "type_vocab_size": 0,
41
+ "vocab_size": 128100
42
+ }
deberta-v3-finetuned/fold_0/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bdc5261521c4981b187281dea24a24d8290fd2c4e222a5efff1cf0e9234c7cf5
3
+ size 1740387701
deberta-v3-finetuned/fold_0/special_tokens_map.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "[CLS]",
3
+ "cls_token": "[CLS]",
4
+ "eos_token": "[SEP]",
5
+ "mask_token": "[MASK]",
6
+ "pad_token": "[PAD]",
7
+ "sep_token": "[SEP]",
8
+ "unk_token": "[UNK]"
9
+ }
deberta-v3-finetuned/fold_0/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
deberta-v3-finetuned/fold_0/tokenizer_config.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "[CLS]",
3
+ "clean_up_tokenization_spaces": true,
4
+ "cls_token": "[CLS]",
5
+ "do_lower_case": false,
6
+ "eos_token": "[SEP]",
7
+ "mask_token": "[MASK]",
8
+ "model_max_length": 1000000000000000019884624838656,
9
+ "pad_token": "[PAD]",
10
+ "sep_token": "[SEP]",
11
+ "sp_model_kwargs": {},
12
+ "split_by_punct": false,
13
+ "tokenizer_class": "DebertaV2Tokenizer",
14
+ "unk_token": "[UNK]",
15
+ "vocab_type": "spm"
16
+ }
deberta-v3-finetuned/fold_1/1/checkpoint-1000/config.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/gpfs/home/jc3821/kaggle/microsoft-deberta-v3-large",
3
+ "architectures": [
4
+ "DebertaV2ForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.007,
7
+ "hidden_act": "gelu",
8
+ "hidden_dropout_prob": 0.007,
9
+ "hidden_size": 1024,
10
+ "id2label": {
11
+ "0": "LABEL_0"
12
+ },
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 4096,
15
+ "label2id": {
16
+ "LABEL_0": 0
17
+ },
18
+ "layer_norm_eps": 1e-07,
19
+ "max_position_embeddings": 512,
20
+ "max_relative_positions": -1,
21
+ "model_type": "deberta-v2",
22
+ "norm_rel_ebd": "layer_norm",
23
+ "num_attention_heads": 16,
24
+ "num_hidden_layers": 24,
25
+ "pad_token_id": 0,
26
+ "pooler_dropout": 0,
27
+ "pooler_hidden_act": "gelu",
28
+ "pooler_hidden_size": 1024,
29
+ "pos_att_type": [
30
+ "p2c",
31
+ "c2p"
32
+ ],
33
+ "position_biased_input": false,
34
+ "position_buckets": 256,
35
+ "problem_type": "regression",
36
+ "relative_attention": true,
37
+ "share_att_key": true,
38
+ "torch_dtype": "float32",
39
+ "transformers_version": "4.32.1",
40
+ "type_vocab_size": 0,
41
+ "vocab_size": 128100
42
+ }
deberta-v3-finetuned/fold_1/1/checkpoint-1000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:635821f4fe588432a6200b884f1b1d97ceabcde528ef510f99a3074b07be7eb9
3
+ size 3480831547
deberta-v3-finetuned/fold_1/1/checkpoint-1000/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f2cbd3bab0adc0d8c2db3cdd23f8fc8a30712e8f7908c9a31e7d2da1698518f
3
+ size 1740387701
deberta-v3-finetuned/fold_1/1/checkpoint-1000/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5458d3bcbcf4f0bc302eba1f212281704d725141b083eb254d6baf69117be06c
3
+ size 14575
deberta-v3-finetuned/fold_1/1/checkpoint-1000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:41a3073593c1c4cceb7a03282a47eee18756b89792798e8d3b798cb70f5a3bbe
3
+ size 627
deberta-v3-finetuned/fold_1/1/checkpoint-1000/special_tokens_map.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "[CLS]",
3
+ "cls_token": "[CLS]",
4
+ "eos_token": "[SEP]",
5
+ "mask_token": "[MASK]",
6
+ "pad_token": "[PAD]",
7
+ "sep_token": "[SEP]",
8
+ "unk_token": "[UNK]"
9
+ }
deberta-v3-finetuned/fold_1/1/checkpoint-1000/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
deberta-v3-finetuned/fold_1/1/checkpoint-1000/tokenizer_config.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "[CLS]",
3
+ "clean_up_tokenization_spaces": true,
4
+ "cls_token": "[CLS]",
5
+ "do_lower_case": false,
6
+ "eos_token": "[SEP]",
7
+ "mask_token": "[MASK]",
8
+ "model_max_length": 1000000000000000019884624838656,
9
+ "pad_token": "[PAD]",
10
+ "sep_token": "[SEP]",
11
+ "sp_model_kwargs": {},
12
+ "split_by_punct": false,
13
+ "tokenizer_class": "DebertaV2Tokenizer",
14
+ "unk_token": "[UNK]",
15
+ "vocab_type": "spm"
16
+ }
deberta-v3-finetuned/fold_1/1/checkpoint-1000/trainer_state.json ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.4920215308666229,
3
+ "best_model_checkpoint": "/gpfs/home/jc3821/kaggle/content/deberta-v3-finetuned/fold_1/1/checkpoint-1000",
4
+ "epoch": 3.875968992248062,
5
+ "eval_steps": 100,
6
+ "global_step": 1000,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.39,
13
+ "eval_loss": 0.2649173140525818,
14
+ "eval_rmse": 0.5147011876106262,
15
+ "eval_runtime": 31.3445,
16
+ "eval_samples_per_second": 64.094,
17
+ "eval_steps_per_second": 8.04,
18
+ "step": 100
19
+ },
20
+ {
21
+ "epoch": 0.78,
22
+ "eval_loss": 0.4311714172363281,
23
+ "eval_rmse": 0.6566364169120789,
24
+ "eval_runtime": 31.3047,
25
+ "eval_samples_per_second": 64.176,
26
+ "eval_steps_per_second": 8.05,
27
+ "step": 200
28
+ },
29
+ {
30
+ "epoch": 1.16,
31
+ "eval_loss": 0.3980819880962372,
32
+ "eval_rmse": 0.630937397480011,
33
+ "eval_runtime": 31.303,
34
+ "eval_samples_per_second": 64.179,
35
+ "eval_steps_per_second": 8.05,
36
+ "step": 300
37
+ },
38
+ {
39
+ "epoch": 1.55,
40
+ "eval_loss": 0.2832517623901367,
41
+ "eval_rmse": 0.5322140455245972,
42
+ "eval_runtime": 31.2992,
43
+ "eval_samples_per_second": 64.187,
44
+ "eval_steps_per_second": 8.051,
45
+ "step": 400
46
+ },
47
+ {
48
+ "epoch": 1.94,
49
+ "learning_rate": 9.186046511627908e-06,
50
+ "loss": 0.2244,
51
+ "step": 500
52
+ },
53
+ {
54
+ "epoch": 1.94,
55
+ "eval_loss": 0.26878467202186584,
56
+ "eval_rmse": 0.5184444785118103,
57
+ "eval_runtime": 31.2847,
58
+ "eval_samples_per_second": 64.217,
59
+ "eval_steps_per_second": 8.055,
60
+ "step": 500
61
+ },
62
+ {
63
+ "epoch": 2.33,
64
+ "eval_loss": 0.2900581955909729,
65
+ "eval_rmse": 0.5385705232620239,
66
+ "eval_runtime": 31.3051,
67
+ "eval_samples_per_second": 64.175,
68
+ "eval_steps_per_second": 8.05,
69
+ "step": 600
70
+ },
71
+ {
72
+ "epoch": 2.71,
73
+ "eval_loss": 0.3477973937988281,
74
+ "eval_rmse": 0.5897434949874878,
75
+ "eval_runtime": 31.2983,
76
+ "eval_samples_per_second": 64.189,
77
+ "eval_steps_per_second": 8.052,
78
+ "step": 700
79
+ },
80
+ {
81
+ "epoch": 3.1,
82
+ "eval_loss": 0.27153390645980835,
83
+ "eval_rmse": 0.5210891366004944,
84
+ "eval_runtime": 31.2968,
85
+ "eval_samples_per_second": 64.192,
86
+ "eval_steps_per_second": 8.052,
87
+ "step": 800
88
+ },
89
+ {
90
+ "epoch": 3.49,
91
+ "eval_loss": 0.24941422045230865,
92
+ "eval_rmse": 0.4994138777256012,
93
+ "eval_runtime": 31.3146,
94
+ "eval_samples_per_second": 64.155,
95
+ "eval_steps_per_second": 8.047,
96
+ "step": 900
97
+ },
98
+ {
99
+ "epoch": 3.88,
100
+ "learning_rate": 3.372093023255814e-06,
101
+ "loss": 0.0945,
102
+ "step": 1000
103
+ },
104
+ {
105
+ "epoch": 3.88,
106
+ "eval_loss": 0.24208517372608185,
107
+ "eval_rmse": 0.4920215308666229,
108
+ "eval_runtime": 31.2894,
109
+ "eval_samples_per_second": 64.207,
110
+ "eval_steps_per_second": 8.054,
111
+ "step": 1000
112
+ }
113
+ ],
114
+ "logging_steps": 500,
115
+ "max_steps": 1290,
116
+ "num_train_epochs": 5,
117
+ "save_steps": 100,
118
+ "total_flos": 9605995008551688.0,
119
+ "trial_name": null,
120
+ "trial_params": null
121
+ }
deberta-v3-finetuned/fold_1/1/checkpoint-1000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4eadc822c2797edd54906ce4e0b7f9ef7987f46ee2743298e1f123e0e41dc785
3
+ size 4091
deberta-v3-finetuned/fold_1/config.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/gpfs/home/jc3821/kaggle/microsoft-deberta-v3-large",
3
+ "architectures": [
4
+ "DebertaV2ForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.007,
7
+ "hidden_act": "gelu",
8
+ "hidden_dropout_prob": 0.007,
9
+ "hidden_size": 1024,
10
+ "id2label": {
11
+ "0": "LABEL_0"
12
+ },
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 4096,
15
+ "label2id": {
16
+ "LABEL_0": 0
17
+ },
18
+ "layer_norm_eps": 1e-07,
19
+ "max_position_embeddings": 512,
20
+ "max_relative_positions": -1,
21
+ "model_type": "deberta-v2",
22
+ "norm_rel_ebd": "layer_norm",
23
+ "num_attention_heads": 16,
24
+ "num_hidden_layers": 24,
25
+ "pad_token_id": 0,
26
+ "pooler_dropout": 0,
27
+ "pooler_hidden_act": "gelu",
28
+ "pooler_hidden_size": 1024,
29
+ "pos_att_type": [
30
+ "p2c",
31
+ "c2p"
32
+ ],
33
+ "position_biased_input": false,
34
+ "position_buckets": 256,
35
+ "problem_type": "regression",
36
+ "relative_attention": true,
37
+ "share_att_key": true,
38
+ "torch_dtype": "float32",
39
+ "transformers_version": "4.32.1",
40
+ "type_vocab_size": 0,
41
+ "vocab_size": 128100
42
+ }
deberta-v3-finetuned/fold_1/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f2cbd3bab0adc0d8c2db3cdd23f8fc8a30712e8f7908c9a31e7d2da1698518f
3
+ size 1740387701
deberta-v3-finetuned/fold_1/special_tokens_map.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "[CLS]",
3
+ "cls_token": "[CLS]",
4
+ "eos_token": "[SEP]",
5
+ "mask_token": "[MASK]",
6
+ "pad_token": "[PAD]",
7
+ "sep_token": "[SEP]",
8
+ "unk_token": "[UNK]"
9
+ }
deberta-v3-finetuned/fold_1/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
deberta-v3-finetuned/fold_1/tokenizer_config.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "[CLS]",
3
+ "clean_up_tokenization_spaces": true,
4
+ "cls_token": "[CLS]",
5
+ "do_lower_case": false,
6
+ "eos_token": "[SEP]",
7
+ "mask_token": "[MASK]",
8
+ "model_max_length": 1000000000000000019884624838656,
9
+ "pad_token": "[PAD]",
10
+ "sep_token": "[SEP]",
11
+ "sp_model_kwargs": {},
12
+ "split_by_punct": false,
13
+ "tokenizer_class": "DebertaV2Tokenizer",
14
+ "unk_token": "[UNK]",
15
+ "vocab_type": "spm"
16
+ }
deberta-v3-finetuned/fold_2/2/checkpoint-400/config.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/gpfs/home/jc3821/kaggle/microsoft-deberta-v3-large",
3
+ "architectures": [
4
+ "DebertaV2ForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.007,
7
+ "hidden_act": "gelu",
8
+ "hidden_dropout_prob": 0.007,
9
+ "hidden_size": 1024,
10
+ "id2label": {
11
+ "0": "LABEL_0"
12
+ },
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 4096,
15
+ "label2id": {
16
+ "LABEL_0": 0
17
+ },
18
+ "layer_norm_eps": 1e-07,
19
+ "max_position_embeddings": 512,
20
+ "max_relative_positions": -1,
21
+ "model_type": "deberta-v2",
22
+ "norm_rel_ebd": "layer_norm",
23
+ "num_attention_heads": 16,
24
+ "num_hidden_layers": 24,
25
+ "pad_token_id": 0,
26
+ "pooler_dropout": 0,
27
+ "pooler_hidden_act": "gelu",
28
+ "pooler_hidden_size": 1024,
29
+ "pos_att_type": [
30
+ "p2c",
31
+ "c2p"
32
+ ],
33
+ "position_biased_input": false,
34
+ "position_buckets": 256,
35
+ "problem_type": "regression",
36
+ "relative_attention": true,
37
+ "share_att_key": true,
38
+ "torch_dtype": "float32",
39
+ "transformers_version": "4.32.1",
40
+ "type_vocab_size": 0,
41
+ "vocab_size": 128100
42
+ }
deberta-v3-finetuned/fold_2/2/checkpoint-400/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce44670793ed58c21f8c2cbce6afc4efc891a30dfde7518e85135bd442780680
3
+ size 3480831547
deberta-v3-finetuned/fold_2/2/checkpoint-400/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d0bb0e3c058a48281a2b64af0f6ecdf014de51e36ea51b0251bfb5cc4c00691
3
+ size 1740387701
deberta-v3-finetuned/fold_2/2/checkpoint-400/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e4e100a81cf298499260f07579dde148991722b91ee300d8212533f095c23a93
3
+ size 14575
deberta-v3-finetuned/fold_2/2/checkpoint-400/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d7cd6229b27eb04441fb524f85b3a83bef58d5a81203fc33e818a099412769c8
3
+ size 627
deberta-v3-finetuned/fold_2/2/checkpoint-400/special_tokens_map.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "[CLS]",
3
+ "cls_token": "[CLS]",
4
+ "eos_token": "[SEP]",
5
+ "mask_token": "[MASK]",
6
+ "pad_token": "[PAD]",
7
+ "sep_token": "[SEP]",
8
+ "unk_token": "[UNK]"
9
+ }
deberta-v3-finetuned/fold_2/2/checkpoint-400/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
deberta-v3-finetuned/fold_2/2/checkpoint-400/tokenizer_config.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "[CLS]",
3
+ "clean_up_tokenization_spaces": true,
4
+ "cls_token": "[CLS]",
5
+ "do_lower_case": false,
6
+ "eos_token": "[SEP]",
7
+ "mask_token": "[MASK]",
8
+ "model_max_length": 1000000000000000019884624838656,
9
+ "pad_token": "[PAD]",
10
+ "sep_token": "[SEP]",
11
+ "sp_model_kwargs": {},
12
+ "split_by_punct": false,
13
+ "tokenizer_class": "DebertaV2Tokenizer",
14
+ "unk_token": "[UNK]",
15
+ "vocab_type": "spm"
16
+ }
deberta-v3-finetuned/fold_2/2/checkpoint-400/trainer_state.json ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.43666166067123413,
3
+ "best_model_checkpoint": "/gpfs/home/jc3821/kaggle/content/deberta-v3-finetuned/fold_2/2/checkpoint-400",
4
+ "epoch": 1.5444015444015444,
5
+ "eval_steps": 100,
6
+ "global_step": 400,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.39,
13
+ "eval_loss": 0.31523793935775757,
14
+ "eval_rmse": 0.561460554599762,
15
+ "eval_runtime": 28.1344,
16
+ "eval_samples_per_second": 70.945,
17
+ "eval_steps_per_second": 8.886,
18
+ "step": 100
19
+ },
20
+ {
21
+ "epoch": 0.77,
22
+ "eval_loss": 0.30585768818855286,
23
+ "eval_rmse": 0.5530440211296082,
24
+ "eval_runtime": 28.135,
25
+ "eval_samples_per_second": 70.944,
26
+ "eval_steps_per_second": 8.886,
27
+ "step": 200
28
+ },
29
+ {
30
+ "epoch": 1.16,
31
+ "eval_loss": 0.33985063433647156,
32
+ "eval_rmse": 0.5829670429229736,
33
+ "eval_runtime": 28.1345,
34
+ "eval_samples_per_second": 70.945,
35
+ "eval_steps_per_second": 8.886,
36
+ "step": 300
37
+ },
38
+ {
39
+ "epoch": 1.54,
40
+ "eval_loss": 0.19067342579364777,
41
+ "eval_rmse": 0.43666166067123413,
42
+ "eval_runtime": 28.134,
43
+ "eval_samples_per_second": 70.946,
44
+ "eval_steps_per_second": 8.886,
45
+ "step": 400
46
+ }
47
+ ],
48
+ "logging_steps": 500,
49
+ "max_steps": 1295,
50
+ "num_train_epochs": 5,
51
+ "save_steps": 100,
52
+ "total_flos": 4057351609521216.0,
53
+ "trial_name": null,
54
+ "trial_params": null
55
+ }
deberta-v3-finetuned/fold_2/2/checkpoint-400/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:41d0d43d565b7b3f73ca58f024d9c5d913daab26a6cc112d5207845da19c1431
3
+ size 4091
deberta-v3-finetuned/fold_2/config.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/gpfs/home/jc3821/kaggle/microsoft-deberta-v3-large",
3
+ "architectures": [
4
+ "DebertaV2ForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.007,
7
+ "hidden_act": "gelu",
8
+ "hidden_dropout_prob": 0.007,
9
+ "hidden_size": 1024,
10
+ "id2label": {
11
+ "0": "LABEL_0"
12
+ },
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 4096,
15
+ "label2id": {
16
+ "LABEL_0": 0
17
+ },
18
+ "layer_norm_eps": 1e-07,
19
+ "max_position_embeddings": 512,
20
+ "max_relative_positions": -1,
21
+ "model_type": "deberta-v2",
22
+ "norm_rel_ebd": "layer_norm",
23
+ "num_attention_heads": 16,
24
+ "num_hidden_layers": 24,
25
+ "pad_token_id": 0,
26
+ "pooler_dropout": 0,
27
+ "pooler_hidden_act": "gelu",
28
+ "pooler_hidden_size": 1024,
29
+ "pos_att_type": [
30
+ "p2c",
31
+ "c2p"
32
+ ],
33
+ "position_biased_input": false,
34
+ "position_buckets": 256,
35
+ "problem_type": "regression",
36
+ "relative_attention": true,
37
+ "share_att_key": true,
38
+ "torch_dtype": "float32",
39
+ "transformers_version": "4.32.1",
40
+ "type_vocab_size": 0,
41
+ "vocab_size": 128100
42
+ }
deberta-v3-finetuned/fold_2/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d0bb0e3c058a48281a2b64af0f6ecdf014de51e36ea51b0251bfb5cc4c00691
3
+ size 1740387701
deberta-v3-finetuned/fold_2/special_tokens_map.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "[CLS]",
3
+ "cls_token": "[CLS]",
4
+ "eos_token": "[SEP]",
5
+ "mask_token": "[MASK]",
6
+ "pad_token": "[PAD]",
7
+ "sep_token": "[SEP]",
8
+ "unk_token": "[UNK]"
9
+ }
deberta-v3-finetuned/fold_2/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
deberta-v3-finetuned/fold_2/tokenizer_config.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "[CLS]",
3
+ "clean_up_tokenization_spaces": true,
4
+ "cls_token": "[CLS]",
5
+ "do_lower_case": false,
6
+ "eos_token": "[SEP]",
7
+ "mask_token": "[MASK]",
8
+ "model_max_length": 1000000000000000019884624838656,
9
+ "pad_token": "[PAD]",
10
+ "sep_token": "[SEP]",
11
+ "sp_model_kwargs": {},
12
+ "split_by_punct": false,
13
+ "tokenizer_class": "DebertaV2Tokenizer",
14
+ "unk_token": "[UNK]",
15
+ "vocab_type": "spm"
16
+ }
deberta-v3-finetuned/fold_3/3/checkpoint-100/config.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/gpfs/home/jc3821/kaggle/microsoft-deberta-v3-large",
3
+ "architectures": [
4
+ "DebertaV2ForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.007,
7
+ "hidden_act": "gelu",
8
+ "hidden_dropout_prob": 0.007,
9
+ "hidden_size": 1024,
10
+ "id2label": {
11
+ "0": "LABEL_0"
12
+ },
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 4096,
15
+ "label2id": {
16
+ "LABEL_0": 0
17
+ },
18
+ "layer_norm_eps": 1e-07,
19
+ "max_position_embeddings": 512,
20
+ "max_relative_positions": -1,
21
+ "model_type": "deberta-v2",
22
+ "norm_rel_ebd": "layer_norm",
23
+ "num_attention_heads": 16,
24
+ "num_hidden_layers": 24,
25
+ "pad_token_id": 0,
26
+ "pooler_dropout": 0,
27
+ "pooler_hidden_act": "gelu",
28
+ "pooler_hidden_size": 1024,
29
+ "pos_att_type": [
30
+ "p2c",
31
+ "c2p"
32
+ ],
33
+ "position_biased_input": false,
34
+ "position_buckets": 256,
35
+ "problem_type": "regression",
36
+ "relative_attention": true,
37
+ "share_att_key": true,
38
+ "torch_dtype": "float32",
39
+ "transformers_version": "4.32.1",
40
+ "type_vocab_size": 0,
41
+ "vocab_size": 128100
42
+ }
deberta-v3-finetuned/fold_3/3/checkpoint-100/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b20b449044c88ca077f2e4d53ed3e7965841a392bfcc938260acb2ff57021f6c
3
+ size 3480831547
deberta-v3-finetuned/fold_3/3/checkpoint-100/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c9b7956e94bf38e46350de37247628745d4153d0f030e66a40e9099c62a7e70
3
+ size 1740387701
deberta-v3-finetuned/fold_3/3/checkpoint-100/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:72b166cc4874ed59847a59cc4b7fa887c9be4c1c2d459fe1b29872f6ec46e8ea
3
+ size 14575
deberta-v3-finetuned/fold_3/3/checkpoint-100/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64a05a810a830e4742e4818737ab479a0943e4c2e2dab122df5475f155021251
3
+ size 627