ccore commited on
Commit
32a84c0
1 Parent(s): 832ae42

Upload folder using huggingface_hub

Browse files
README.md CHANGED
@@ -1,36 +1,27 @@
1
  ---
2
- license: other
3
- datasets:
4
- - Open-Orca/OpenOrca
5
- - ehartford/wizard_vicuna_70k_unfiltered
6
  tags:
7
- - code
8
- - prompt
9
- - reverse prompt
10
- widget:
11
- - text: "Photosynthesis is the process by which plants, algae and some bacteria convert carbon dioxide and water into glucose and oxygen, using the energy of sunlight. This process is fundamental to life on Earth, as it provides the basis for almost all food chains and also contributes to the carbon cycle by helping to regulate the concentration of carbon dioxide in the atmosphere.\n[REVERSED-PROMPT] "
12
- example_title: "reverse prompt"
13
-
14
  ---
15
 
16
- # core-prompt-reverser-opt-1.3b
 
 
 
17
 
18
- This model is a fine-tuned version of [facebook/opt-1.3b](https://huggingface.co/facebook/opt-1.3b) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
- - Loss: 1.4784
21
- - Accuracy: 0.6753
22
 
23
  ## Model description
24
 
25
- [INSTRUCTION] {your question}
26
- [RESPONSE] {model response}
27
-
28
- or
29
-
30
- [RESPONSE] {response}
31
- [REVERSED-PROMPT] {model prompt reversed}
32
-
33
-
34
 
35
  ## Intended uses & limitations
36
 
@@ -38,9 +29,9 @@ More information needed
38
 
39
  ## Training and evaluation data
40
 
41
- Wizard, openOrca, custom data
42
-
43
 
 
44
 
45
  ### Training hyperparameters
46
 
@@ -54,7 +45,7 @@ The following hyperparameters were used during training:
54
  - num_epochs: 1.0
55
 
56
  ### Training results
57
- this model is still training, it ran only 5% of the total training data, it will finish in 4/set
58
 
59
 
60
  ### Framework versions
@@ -62,4 +53,4 @@ The following hyperparameters were used during training:
62
  - Transformers 4.33.0.dev0
63
  - Pytorch 2.1.0.dev20230605+cu121
64
  - Datasets 2.14.4
65
- - Tokenizers 0.13.3
 
1
  ---
2
+ base_model: ss5
 
 
 
3
  tags:
4
+ - generated_from_trainer
5
+ metrics:
6
+ - accuracy
7
+ model-index:
8
+ - name: ss6
9
+ results: []
 
10
  ---
11
 
12
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
13
+ should probably proofread and complete it, then remove this comment. -->
14
+
15
+ # ss6
16
 
17
+ This model is a fine-tuned version of [ss5](https://huggingface.co/ss5) on an unknown dataset.
18
  It achieves the following results on the evaluation set:
19
+ - Loss: 1.2950
20
+ - Accuracy: 0.7084
21
 
22
  ## Model description
23
 
24
+ More information needed
 
 
 
 
 
 
 
 
25
 
26
  ## Intended uses & limitations
27
 
 
29
 
30
  ## Training and evaluation data
31
 
32
+ More information needed
 
33
 
34
+ ## Training procedure
35
 
36
  ### Training hyperparameters
37
 
 
45
  - num_epochs: 1.0
46
 
47
  ### Training results
48
+
49
 
50
 
51
  ### Framework versions
 
53
  - Transformers 4.33.0.dev0
54
  - Pytorch 2.1.0.dev20230605+cu121
55
  - Datasets 2.14.4
56
+ - Tokenizers 0.13.3
all_results.json CHANGED
@@ -1,15 +1,15 @@
1
  {
2
  "epoch": 1.0,
3
- "eval_accuracy": 0.6752705025786228,
4
- "eval_loss": 1.4784468412399292,
5
- "eval_runtime": 55.0697,
6
  "eval_samples": 232,
7
- "eval_samples_per_second": 4.213,
8
- "eval_steps_per_second": 0.527,
9
- "perplexity": 4.3861280346659415,
10
- "train_loss": 1.700456760354238,
11
- "train_runtime": 3303.7788,
12
- "train_samples": 11678,
13
- "train_samples_per_second": 3.535,
14
- "train_steps_per_second": 0.442
15
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "eval_accuracy": 0.7083881079987865,
4
+ "eval_loss": 1.294954776763916,
5
+ "eval_runtime": 53.4048,
6
  "eval_samples": 232,
7
+ "eval_samples_per_second": 4.344,
8
+ "eval_steps_per_second": 0.543,
9
+ "perplexity": 3.6508308680217363,
10
+ "train_loss": 1.2382940134009472,
11
+ "train_runtime": 11022.1698,
12
+ "train_samples": 38349,
13
+ "train_samples_per_second": 3.479,
14
+ "train_steps_per_second": 0.435
15
  }
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "facebook/opt-1.3b",
3
  "_remove_final_layer_norm": false,
4
  "activation_dropout": 0.0,
5
  "activation_function": "relu",
 
1
  {
2
+ "_name_or_path": "ss5",
3
  "_remove_final_layer_norm": false,
4
  "activation_dropout": 0.0,
5
  "activation_function": "relu",
eval_results.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
  "epoch": 1.0,
3
- "eval_accuracy": 0.6752705025786228,
4
- "eval_loss": 1.4784468412399292,
5
- "eval_runtime": 55.0697,
6
  "eval_samples": 232,
7
- "eval_samples_per_second": 4.213,
8
- "eval_steps_per_second": 0.527,
9
- "perplexity": 4.3861280346659415
10
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "eval_accuracy": 0.7083881079987865,
4
+ "eval_loss": 1.294954776763916,
5
+ "eval_runtime": 53.4048,
6
  "eval_samples": 232,
7
+ "eval_samples_per_second": 4.344,
8
+ "eval_steps_per_second": 0.543,
9
+ "perplexity": 3.6508308680217363
10
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c82ac35a6d50d4f6668cf6fc2037aece6e394df4a65ef6ff81ad131822fe6e70
3
  size 2631648218
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8c1c4b4f568da599148a79e669bbd5590428dfaf7064385627c5948af153ced
3
  size 2631648218
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 1.0,
3
- "train_loss": 1.700456760354238,
4
- "train_runtime": 3303.7788,
5
- "train_samples": 11678,
6
- "train_samples_per_second": 3.535,
7
- "train_steps_per_second": 0.442
8
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "train_loss": 1.2382940134009472,
4
+ "train_runtime": 11022.1698,
5
+ "train_samples": 38349,
6
+ "train_samples_per_second": 3.479,
7
+ "train_steps_per_second": 0.435
8
  }
trainer_state.json CHANGED
@@ -3,38 +3,80 @@
3
  "best_model_checkpoint": null,
4
  "epoch": 1.0,
5
  "eval_steps": 500,
6
- "global_step": 1460,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.34,
13
- "learning_rate": 3.287671232876712e-05,
14
- "loss": 1.776,
15
  "step": 500
16
  },
17
  {
18
- "epoch": 0.68,
19
- "learning_rate": 1.5753424657534248e-05,
20
- "loss": 1.6672,
21
  "step": 1000
22
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  {
24
  "epoch": 1.0,
25
- "step": 1460,
26
- "total_flos": 8.671679350623437e+16,
27
- "train_loss": 1.700456760354238,
28
- "train_runtime": 3303.7788,
29
- "train_samples_per_second": 3.535,
30
- "train_steps_per_second": 0.442
31
  }
32
  ],
33
  "logging_steps": 500,
34
- "max_steps": 1460,
35
  "num_train_epochs": 1,
36
- "save_steps": -1460,
37
- "total_flos": 8.671679350623437e+16,
38
  "trial_name": null,
39
  "trial_params": null
40
  }
 
3
  "best_model_checkpoint": null,
4
  "epoch": 1.0,
5
  "eval_steps": 500,
6
+ "global_step": 4794,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.1,
13
+ "learning_rate": 4.478514810179391e-05,
14
+ "loss": 1.3792,
15
  "step": 500
16
  },
17
  {
18
+ "epoch": 0.21,
19
+ "learning_rate": 3.9570296203587817e-05,
20
+ "loss": 1.3043,
21
  "step": 1000
22
  },
23
+ {
24
+ "epoch": 0.31,
25
+ "learning_rate": 3.435544430538173e-05,
26
+ "loss": 1.2641,
27
+ "step": 1500
28
+ },
29
+ {
30
+ "epoch": 0.42,
31
+ "learning_rate": 2.9140592407175638e-05,
32
+ "loss": 1.2319,
33
+ "step": 2000
34
+ },
35
+ {
36
+ "epoch": 0.52,
37
+ "learning_rate": 2.3925740508969545e-05,
38
+ "loss": 1.2035,
39
+ "step": 2500
40
+ },
41
+ {
42
+ "epoch": 0.63,
43
+ "learning_rate": 1.8710888610763455e-05,
44
+ "loss": 1.2072,
45
+ "step": 3000
46
+ },
47
+ {
48
+ "epoch": 0.73,
49
+ "learning_rate": 1.3496036712557364e-05,
50
+ "loss": 1.1955,
51
+ "step": 3500
52
+ },
53
+ {
54
+ "epoch": 0.83,
55
+ "learning_rate": 8.281184814351273e-06,
56
+ "loss": 1.1949,
57
+ "step": 4000
58
+ },
59
+ {
60
+ "epoch": 0.94,
61
+ "learning_rate": 3.0663329161451816e-06,
62
+ "loss": 1.194,
63
+ "step": 4500
64
+ },
65
  {
66
  "epoch": 1.0,
67
+ "step": 4794,
68
+ "total_flos": 2.8476642525865574e+17,
69
+ "train_loss": 1.2382940134009472,
70
+ "train_runtime": 11022.1698,
71
+ "train_samples_per_second": 3.479,
72
+ "train_steps_per_second": 0.435
73
  }
74
  ],
75
  "logging_steps": 500,
76
+ "max_steps": 4794,
77
  "num_train_epochs": 1,
78
+ "save_steps": -4794,
79
+ "total_flos": 2.8476642525865574e+17,
80
  "trial_name": null,
81
  "trial_params": null
82
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:68e5b42e7290d1422423b958c1cf982b94ae5db9e379c560d977a8e2c38edab8
3
  size 4472
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13435d31ff4c3a7b19d246d3aedb0e34f659b86388d243bd7658841e425d0944
3
  size 4472