emdemor commited on
Commit
45defd3
1 Parent(s): 2a39785

Training in progress, step 50

Browse files
adapter_config.json CHANGED
@@ -20,12 +20,12 @@
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
- "o_proj",
24
  "q_proj",
25
- "down_proj",
26
  "gate_proj",
27
- "v_proj",
28
  "k_proj",
 
29
  "up_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
 
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
 
23
  "q_proj",
24
+ "o_proj",
25
  "gate_proj",
26
+ "down_proj",
27
  "k_proj",
28
+ "v_proj",
29
  "up_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c0064e74028932ad7f7ba5f4b0b4726cba07b854acdb38ca06c6ead7e6b9125d
3
  size 35668592
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2bf964742964d2ec503f25da4ead7a6c8a5691fc8d1cb60aa46b3a6fe8d444f8
3
  size 35668592
metrics.json CHANGED
@@ -1,6 +1 @@
1
- {"Step":50,"eval_loss":1.7156720161,"eval_runtime":149.1859,"eval_samples_per_second":3.352,"eval_steps_per_second":0.422,"epoch":0.08}
2
- {"Step":100,"eval_loss":1.5024453402,"eval_runtime":149.3514,"eval_samples_per_second":3.348,"eval_steps_per_second":0.422,"epoch":0.16}
3
- {"Step":150,"eval_loss":1.4826966524,"eval_runtime":149.1669,"eval_samples_per_second":3.352,"eval_steps_per_second":0.422,"epoch":0.24}
4
- {"Step":200,"eval_loss":1.4759048223,"eval_runtime":149.1268,"eval_samples_per_second":3.353,"eval_steps_per_second":0.422,"epoch":0.32}
5
- {"Step":250,"eval_loss":1.4729492664,"eval_runtime":149.0332,"eval_samples_per_second":3.355,"eval_steps_per_second":0.423,"epoch":0.4}
6
- {"Step":300,"eval_loss":1.4703065157,"eval_runtime":149.278,"eval_samples_per_second":3.349,"eval_steps_per_second":0.422,"epoch":0.48}
 
1
+ {"Step":50,"eval_loss":1.7071695328,"eval_runtime":27.6118,"eval_samples_per_second":3.622,"eval_steps_per_second":0.471,"epoch":0.08}
 
 
 
 
 
state.json CHANGED
@@ -1,89 +1,13 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.48,
5
  "eval_steps": 50,
6
- "global_step": 300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
- "log_history": [
11
- {
12
- "epoch": 0.08,
13
- "grad_norm": 0.05632242560386658,
14
- "learning_rate": 0.00013297872340425532,
15
- "loss": 1.8852,
16
- "step": 50
17
- },
18
- {
19
- "epoch": 0.08,
20
- "eval_loss": 1.7156720161437988,
21
- "eval_runtime": 149.1859,
22
- "eval_samples_per_second": 3.352,
23
- "eval_steps_per_second": 0.422,
24
- "step": 50
25
- },
26
- {
27
- "epoch": 0.16,
28
- "grad_norm": 0.02606302499771118,
29
- "learning_rate": 0.0002632978723404255,
30
- "loss": 1.6036,
31
- "step": 100
32
- },
33
- {
34
- "epoch": 0.16,
35
- "eval_loss": 1.5024453401565552,
36
- "eval_runtime": 149.3514,
37
- "eval_samples_per_second": 3.348,
38
- "eval_steps_per_second": 0.422,
39
- "step": 100
40
- },
41
- {
42
- "epoch": 0.24,
43
- "grad_norm": 0.034023039042949677,
44
- "learning_rate": 0.00039627659574468084,
45
- "loss": 1.5186,
46
- "step": 150
47
- },
48
- {
49
- "epoch": 0.24,
50
- "eval_loss": 1.4826966524124146,
51
- "eval_runtime": 149.1669,
52
- "eval_samples_per_second": 3.352,
53
- "eval_steps_per_second": 0.422,
54
- "step": 150
55
- },
56
- {
57
- "epoch": 0.32,
58
- "grad_norm": 0.04070857912302017,
59
- "learning_rate": 0.0004967397747480735,
60
- "loss": 1.4822,
61
- "step": 200
62
- },
63
- {
64
- "epoch": 0.32,
65
- "eval_loss": 1.4759048223495483,
66
- "eval_runtime": 149.1268,
67
- "eval_samples_per_second": 3.353,
68
- "eval_steps_per_second": 0.422,
69
- "step": 200
70
- },
71
- {
72
- "epoch": 0.4,
73
- "grad_norm": 0.03537657856941223,
74
- "learning_rate": 0.00048192056905749855,
75
- "loss": 1.5035,
76
- "step": 250
77
- },
78
- {
79
- "epoch": 0.4,
80
- "eval_loss": 1.4729492664337158,
81
- "eval_runtime": 149.0332,
82
- "eval_samples_per_second": 3.355,
83
- "eval_steps_per_second": 0.423,
84
- "step": 250
85
- }
86
- ],
87
  "logging_steps": 50,
88
  "max_steps": 1875,
89
  "num_input_tokens_seen": 0,
@@ -95,13 +19,13 @@
95
  "should_epoch_stop": false,
96
  "should_evaluate": false,
97
  "should_log": false,
98
- "should_save": true,
99
  "should_training_stop": false
100
  },
101
  "attributes": {}
102
  }
103
  },
104
- "total_flos": 1.999548261801984e+16,
105
  "train_batch_size": 8,
106
  "trial_name": null,
107
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.08,
5
  "eval_steps": 50,
6
+ "global_step": 50,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
+ "log_history": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  "logging_steps": 50,
12
  "max_steps": 1875,
13
  "num_input_tokens_seen": 0,
 
19
  "should_epoch_stop": false,
20
  "should_evaluate": false,
21
  "should_log": false,
22
+ "should_save": false,
23
  "should_training_stop": false
24
  },
25
  "attributes": {}
26
  }
27
  },
28
+ "total_flos": 0,
29
  "train_batch_size": 8,
30
  "trial_name": null,
31
  "trial_params": null
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f6aadb86e7c11eaf53076fa580bc16d46538da6411436f54844013f1933386b8
3
  size 5432
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f5858438e1844bba514e3e6e10e8359b859b0963a7a05503baf18f6704d9be96
3
  size 5432