merty commited on
Commit
d0a0a74
1 Parent(s): cd09bd1

End of training

Browse files
Files changed (4) hide show
  1. README.md +6 -1
  2. all_results.json +5 -0
  3. config.json +2 -3
  4. eval_results.json +6 -6
README.md CHANGED
@@ -1,8 +1,13 @@
1
  ---
2
  base_model: barc0/Llama-3.1-ARC-Potpourri-Transduction-8B
 
 
 
 
3
  library_name: peft
4
  license: llama3.1
5
  tags:
 
6
  - trl
7
  - sft
8
  - generated_from_trainer
@@ -16,7 +21,7 @@ should probably proofread and complete it, then remove this comment. -->
16
 
17
  # engineer1-heavy-barc-llama3.1-8b-instruct-lora64-testtime-finetuning
18
 
19
- This model is a fine-tuned version of [barc0/Llama-3.1-ARC-Potpourri-Transduction-8B](https://huggingface.co/barc0/Llama-3.1-ARC-Potpourri-Transduction-8B) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
21
  - Loss: 0.0000
22
 
 
1
  ---
2
  base_model: barc0/Llama-3.1-ARC-Potpourri-Transduction-8B
3
+ datasets:
4
+ - tttx/problem0_data
5
+ - barc0/transduction_formatted_rearc_dataset_100k
6
+ - barc0/transduction_heavy_100k_jsonl
7
  library_name: peft
8
  license: llama3.1
9
  tags:
10
+ - alignment-handbook
11
  - trl
12
  - sft
13
  - generated_from_trainer
 
21
 
22
  # engineer1-heavy-barc-llama3.1-8b-instruct-lora64-testtime-finetuning
23
 
24
+ This model is a fine-tuned version of [barc0/Llama-3.1-ARC-Potpourri-Transduction-8B](https://huggingface.co/barc0/Llama-3.1-ARC-Potpourri-Transduction-8B) on the tttx/problem0_data, the barc0/transduction_formatted_rearc_dataset_100k and the barc0/transduction_heavy_100k_jsonl datasets.
25
  It achieves the following results on the evaluation set:
26
  - Loss: 0.0000
27
 
all_results.json CHANGED
@@ -1,5 +1,10 @@
1
  {
2
  "epoch": 10.0,
 
 
 
 
 
3
  "total_flos": 4718728839168.0,
4
  "train_loss": 0.0003110202618358926,
5
  "train_runtime": 115.7621,
 
1
  {
2
  "epoch": 10.0,
3
+ "eval_loss": 2.355557808186859e-05,
4
+ "eval_runtime": 0.6765,
5
+ "eval_samples": 1,
6
+ "eval_samples_per_second": 1.478,
7
+ "eval_steps_per_second": 1.478,
8
  "total_flos": 4718728839168.0,
9
  "train_loss": 0.0003110202618358926,
10
  "train_runtime": 115.7621,
config.json CHANGED
@@ -1,6 +1,5 @@
1
  {
2
- "_attn_implementation_autoset": true,
3
- "_name_or_path": "barc0/Llama-3.1-ARC-Heavy-Transduction-8B",
4
  "architectures": [
5
  "LlamaForCausalLM"
6
  ],
@@ -35,7 +34,7 @@
35
  "rope_theta": 500000.0,
36
  "tie_word_embeddings": false,
37
  "torch_dtype": "bfloat16",
38
- "transformers_version": "4.46.2",
39
  "use_cache": true,
40
  "vocab_size": 128256
41
  }
 
1
  {
2
+ "_name_or_path": "barc0/Llama-3.1-ARC-Potpourri-Transduction-8B",
 
3
  "architectures": [
4
  "LlamaForCausalLM"
5
  ],
 
34
  "rope_theta": 500000.0,
35
  "tie_word_embeddings": false,
36
  "torch_dtype": "bfloat16",
37
+ "transformers_version": "4.45.2",
38
  "use_cache": true,
39
  "vocab_size": 128256
40
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 3.0,
3
- "eval_loss": 0.09748542308807373,
4
- "eval_runtime": 890.5512,
5
- "eval_samples": 10376,
6
- "eval_samples_per_second": 11.651,
7
- "eval_steps_per_second": 1.456
8
  }
 
1
  {
2
+ "epoch": 10.0,
3
+ "eval_loss": 2.355557808186859e-05,
4
+ "eval_runtime": 0.6765,
5
+ "eval_samples": 1,
6
+ "eval_samples_per_second": 1.478,
7
+ "eval_steps_per_second": 1.478
8
  }