sedrickkeh commited on
Commit
c0f98fc
1 Parent(s): 5657d72

End of training

Browse files
README.md CHANGED
@@ -16,7 +16,9 @@ should probably proofread and complete it, then remove this comment. -->
16
 
17
  # test_run_mini
18
 
19
- This model is a fine-tuned version of [meta-llama/Llama-3.2-1B](https://huggingface.co/meta-llama/Llama-3.2-1B) on an unknown dataset.
 
 
20
 
21
  ## Model description
22
 
 
16
 
17
  # test_run_mini
18
 
19
+ This model is a fine-tuned version of [meta-llama/Llama-3.2-1B](https://huggingface.co/meta-llama/Llama-3.2-1B) on the llamafactory/alpaca_en dataset.
20
+ It achieves the following results on the evaluation set:
21
+ - Loss: 1.4381
22
 
23
  ## Model description
24
 
all_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
  "epoch": 0.15384615384615385,
3
- "eval_loss": 1.438093900680542,
4
- "eval_runtime": 1.614,
5
- "eval_samples_per_second": 325.904,
6
- "eval_steps_per_second": 5.576,
7
  "total_flos": 305613766656.0,
8
- "train_loss": 1.5371843973795574,
9
- "train_runtime": 192.8691,
10
- "train_samples_per_second": 7.964,
11
- "train_steps_per_second": 0.016
12
  }
 
1
  {
2
  "epoch": 0.15384615384615385,
3
+ "eval_loss": 1.438112735748291,
4
+ "eval_runtime": 1.7111,
5
+ "eval_samples_per_second": 307.407,
6
+ "eval_steps_per_second": 5.26,
7
  "total_flos": 305613766656.0,
8
+ "train_loss": 1.5370841026306152,
9
+ "train_runtime": 320.2266,
10
+ "train_samples_per_second": 4.797,
11
+ "train_steps_per_second": 0.009
12
  }
eval_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "epoch": 0.15384615384615385,
3
- "eval_loss": 1.438093900680542,
4
- "eval_runtime": 1.614,
5
- "eval_samples_per_second": 325.904,
6
- "eval_steps_per_second": 5.576
7
  }
 
1
  {
2
  "epoch": 0.15384615384615385,
3
+ "eval_loss": 1.438112735748291,
4
+ "eval_runtime": 1.7111,
5
+ "eval_samples_per_second": 307.407,
6
+ "eval_steps_per_second": 5.26
7
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 0.15384615384615385,
3
  "total_flos": 305613766656.0,
4
- "train_loss": 1.5371843973795574,
5
- "train_runtime": 192.8691,
6
- "train_samples_per_second": 7.964,
7
- "train_steps_per_second": 0.016
8
  }
 
1
  {
2
  "epoch": 0.15384615384615385,
3
  "total_flos": 305613766656.0,
4
+ "train_loss": 1.5370841026306152,
5
+ "train_runtime": 320.2266,
6
+ "train_samples_per_second": 4.797,
7
+ "train_steps_per_second": 0.009
8
  }
trainer_state.json CHANGED
@@ -10,20 +10,20 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.15384615384615385,
13
- "eval_loss": 1.438093900680542,
14
- "eval_runtime": 1.81,
15
- "eval_samples_per_second": 290.611,
16
- "eval_steps_per_second": 4.972,
17
  "step": 3
18
  },
19
  {
20
  "epoch": 0.15384615384615385,
21
  "step": 3,
22
  "total_flos": 305613766656.0,
23
- "train_loss": 1.5371843973795574,
24
- "train_runtime": 192.8691,
25
- "train_samples_per_second": 7.964,
26
- "train_steps_per_second": 0.016
27
  }
28
  ],
29
  "logging_steps": 10,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.15384615384615385,
13
+ "eval_loss": 1.438112735748291,
14
+ "eval_runtime": 1.9307,
15
+ "eval_samples_per_second": 272.436,
16
+ "eval_steps_per_second": 4.661,
17
  "step": 3
18
  },
19
  {
20
  "epoch": 0.15384615384615385,
21
  "step": 3,
22
  "total_flos": 305613766656.0,
23
+ "train_loss": 1.5370841026306152,
24
+ "train_runtime": 320.2266,
25
+ "train_samples_per_second": 4.797,
26
+ "train_steps_per_second": 0.009
27
  }
28
  ],
29
  "logging_steps": 10,
training_eval_loss.png CHANGED