sedrickkeh commited on
Commit
d13d9ec
1 Parent(s): d9acaa1

Model save

Browse files
README.md CHANGED
@@ -16,6 +16,8 @@ should probably proofread and complete it, then remove this comment. -->
16
  # checkpoints
17
 
18
  This model is a fine-tuned version of [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) on an unknown dataset.
 
 
19
 
20
  ## Model description
21
 
 
16
  # checkpoints
17
 
18
  This model is a fine-tuned version of [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) on an unknown dataset.
19
+ It achieves the following results on the evaluation set:
20
+ - Loss: 1.6955
21
 
22
  ## Model description
23
 
all_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
  "epoch": 0.13043478260869565,
3
- "eval_loss": 1.6952797174453735,
4
- "eval_runtime": 2.2512,
5
- "eval_samples_per_second": 274.076,
6
- "eval_steps_per_second": 4.442,
7
  "total_flos": 3.355230384370483e+16,
8
- "train_loss": 1.4136702219645183,
9
- "train_runtime": 204.3723,
10
- "train_samples_per_second": 7.516,
11
  "train_steps_per_second": 0.015
12
  }
 
1
  {
2
  "epoch": 0.13043478260869565,
3
+ "eval_loss": 1.6954997777938843,
4
+ "eval_runtime": 2.0177,
5
+ "eval_samples_per_second": 305.789,
6
+ "eval_steps_per_second": 4.956,
7
  "total_flos": 3.355230384370483e+16,
8
+ "train_loss": 1.413570721944173,
9
+ "train_runtime": 204.0944,
10
+ "train_samples_per_second": 7.526,
11
  "train_steps_per_second": 0.015
12
  }
eval_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "epoch": 0.13043478260869565,
3
- "eval_loss": 1.6952797174453735,
4
- "eval_runtime": 2.2512,
5
- "eval_samples_per_second": 274.076,
6
- "eval_steps_per_second": 4.442
7
  }
 
1
  {
2
  "epoch": 0.13043478260869565,
3
+ "eval_loss": 1.6954997777938843,
4
+ "eval_runtime": 2.0177,
5
+ "eval_samples_per_second": 305.789,
6
+ "eval_steps_per_second": 4.956
7
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 0.13043478260869565,
3
  "total_flos": 3.355230384370483e+16,
4
- "train_loss": 1.4136702219645183,
5
- "train_runtime": 204.3723,
6
- "train_samples_per_second": 7.516,
7
  "train_steps_per_second": 0.015
8
  }
 
1
  {
2
  "epoch": 0.13043478260869565,
3
  "total_flos": 3.355230384370483e+16,
4
+ "train_loss": 1.413570721944173,
5
+ "train_runtime": 204.0944,
6
+ "train_samples_per_second": 7.526,
7
  "train_steps_per_second": 0.015
8
  }
trainer_state.json CHANGED
@@ -10,19 +10,19 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.13043478260869565,
13
- "eval_loss": 1.6952797174453735,
14
- "eval_runtime": 1.8822,
15
- "eval_samples_per_second": 327.804,
16
- "eval_steps_per_second": 5.313,
17
  "step": 3
18
  },
19
  {
20
  "epoch": 0.13043478260869565,
21
  "step": 3,
22
  "total_flos": 3.355230384370483e+16,
23
- "train_loss": 1.4136702219645183,
24
- "train_runtime": 204.3723,
25
- "train_samples_per_second": 7.516,
26
  "train_steps_per_second": 0.015
27
  }
28
  ],
 
10
  "log_history": [
11
  {
12
  "epoch": 0.13043478260869565,
13
+ "eval_loss": 1.6954997777938843,
14
+ "eval_runtime": 1.8869,
15
+ "eval_samples_per_second": 326.995,
16
+ "eval_steps_per_second": 5.3,
17
  "step": 3
18
  },
19
  {
20
  "epoch": 0.13043478260869565,
21
  "step": 3,
22
  "total_flos": 3.355230384370483e+16,
23
+ "train_loss": 1.413570721944173,
24
+ "train_runtime": 204.0944,
25
+ "train_samples_per_second": 7.526,
26
  "train_steps_per_second": 0.015
27
  }
28
  ],
training_eval_loss.png CHANGED