Fredh99 commited on
Commit
ca43bfa
1 Parent(s): 7c7023a

Model save

Browse files
adapter_config.json CHANGED
@@ -16,9 +16,9 @@
16
  "rank_pattern": {},
17
  "revision": null,
18
  "target_modules": [
19
- "o_proj",
20
- "k_proj",
21
  "q_proj",
 
 
22
  "v_proj"
23
  ],
24
  "task_type": "CAUSAL_LM"
 
16
  "rank_pattern": {},
17
  "revision": null,
18
  "target_modules": [
 
 
19
  "q_proj",
20
+ "k_proj",
21
+ "o_proj",
22
  "v_proj"
23
  ],
24
  "task_type": "CAUSAL_LM"
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6960fd089e19d164d7894dc5fb001aa03ce42dd75ec66e7dc79904d6abf7b02f
3
  size 134252592
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59776a288423adf8e1413385dbd08b91feca51bb66e8eff856a4ab646069f9cf
3
  size 134252592
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
  "epoch": 0.69,
3
- "eval_loss": 1.444887399673462,
4
- "eval_runtime": 364.7595,
5
  "eval_samples": 23110,
6
- "eval_samples_per_second": 63.357,
7
  "eval_steps_per_second": 0.992,
8
- "train_loss": 1.5280099289757865,
9
- "train_runtime": 11248.8271,
10
  "train_samples": 207865,
11
- "train_samples_per_second": 18.479,
12
  "train_steps_per_second": 0.004
13
  }
 
1
  {
2
  "epoch": 0.69,
3
+ "eval_loss": 1.444868564605713,
4
+ "eval_runtime": 364.8083,
5
  "eval_samples": 23110,
6
+ "eval_samples_per_second": 63.348,
7
  "eval_steps_per_second": 0.992,
8
+ "train_loss": 1.5280096292495728,
9
+ "train_runtime": 11257.3852,
10
  "train_samples": 207865,
11
+ "train_samples_per_second": 18.465,
12
  "train_steps_per_second": 0.004
13
  }
config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "meta-llama/Llama-2-7b-hf",
3
+ "architectures": [
4
+ "LlamaForCausalLM"
5
+ ],
6
+ "attention_bias": false,
7
+ "bos_token_id": 1,
8
+ "eos_token_id": 2,
9
+ "hidden_act": "silu",
10
+ "hidden_size": 4096,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 11008,
13
+ "max_position_embeddings": 4096,
14
+ "model_type": "llama",
15
+ "num_attention_heads": 32,
16
+ "num_hidden_layers": 32,
17
+ "num_key_value_heads": 32,
18
+ "pretraining_tp": 1,
19
+ "rms_norm_eps": 1e-05,
20
+ "rope_scaling": null,
21
+ "rope_theta": 10000.0,
22
+ "tie_word_embeddings": false,
23
+ "torch_dtype": "float16",
24
+ "transformers_version": "4.35.0",
25
+ "use_cache": true,
26
+ "vocab_size": 32000
27
+ }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 0.69,
3
- "eval_loss": 1.444887399673462,
4
- "eval_runtime": 364.7595,
5
  "eval_samples": 23110,
6
- "eval_samples_per_second": 63.357,
7
  "eval_steps_per_second": 0.992
8
  }
 
1
  {
2
  "epoch": 0.69,
3
+ "eval_loss": 1.444868564605713,
4
+ "eval_runtime": 364.8083,
5
  "eval_samples": 23110,
6
+ "eval_samples_per_second": 63.348,
7
  "eval_steps_per_second": 0.992
8
  }
runs/Dec12_06-25-04_beta-a100-3-do-not-terminate/events.out.tfevents.1702362326.beta-a100-3-do-not-terminate.140581.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:50e7b1fdd69612d0ca08a49df49f7bec25e9ddf59d4b7c912006fbfcdd3dab18
3
+ size 6174
runs/Dec12_06-25-04_beta-a100-3-do-not-terminate/events.out.tfevents.1702373948.beta-a100-3-do-not-terminate.140581.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:441824f2dfe971e3b99522dcce10fd78d6d966ba82df1fefbe03050f622370c8
3
+ size 354
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 0.69,
3
- "train_loss": 1.5280099289757865,
4
- "train_runtime": 11248.8271,
5
  "train_samples": 207865,
6
- "train_samples_per_second": 18.479,
7
  "train_steps_per_second": 0.004
8
  }
 
1
  {
2
  "epoch": 0.69,
3
+ "train_loss": 1.5280096292495728,
4
+ "train_runtime": 11257.3852,
5
  "train_samples": 207865,
6
+ "train_samples_per_second": 18.465,
7
  "train_steps_per_second": 0.004
8
  }
trainer_state.json CHANGED
@@ -58,19 +58,19 @@
58
  },
59
  {
60
  "epoch": 0.69,
61
- "eval_loss": 1.4448440074920654,
62
- "eval_runtime": 364.9267,
63
- "eval_samples_per_second": 63.328,
64
- "eval_steps_per_second": 0.992,
65
  "step": 35
66
  },
67
  {
68
  "epoch": 0.69,
69
  "step": 35,
70
  "total_flos": 9.151767518380032e+16,
71
- "train_loss": 1.5280099289757865,
72
- "train_runtime": 11248.8271,
73
- "train_samples_per_second": 18.479,
74
  "train_steps_per_second": 0.004
75
  }
76
  ],
 
58
  },
59
  {
60
  "epoch": 0.69,
61
+ "eval_loss": 1.4448249340057373,
62
+ "eval_runtime": 365.3304,
63
+ "eval_samples_per_second": 63.258,
64
+ "eval_steps_per_second": 0.991,
65
  "step": 35
66
  },
67
  {
68
  "epoch": 0.69,
69
  "step": 35,
70
  "total_flos": 9.151767518380032e+16,
71
+ "train_loss": 1.5280096292495728,
72
+ "train_runtime": 11257.3852,
73
+ "train_samples_per_second": 18.465,
74
  "train_steps_per_second": 0.004
75
  }
76
  ],
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f95da2441cd921f3f95e05efb2b79fa496411b703a2e11dd1e9675cca4b3a0e4
3
  size 5688
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31fbe82d73129fb8341d01ddf859411b4efa5022cffc1bfee4f83b9709d8f0fd
3
  size 5688