VikrantRamesh commited on
Commit
abe5483
1 Parent(s): 64e0910

Model save

Browse files
README.md ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ base_model: tiiuae/falcon-7b
4
+ tags:
5
+ - generated_from_trainer
6
+ model-index:
7
+ - name: Falcon-CN
8
+ results: []
9
+ ---
10
+
11
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
12
+ should probably proofread and complete it, then remove this comment. -->
13
+
14
+ # Falcon-CN
15
+
16
+ This model is a fine-tuned version of [tiiuae/falcon-7b](https://huggingface.co/tiiuae/falcon-7b) on an unknown dataset.
17
+ It achieves the following results on the evaluation set:
18
+ - Loss: 2.2484
19
+
20
+ ## Model description
21
+
22
+ More information needed
23
+
24
+ ## Intended uses & limitations
25
+
26
+ More information needed
27
+
28
+ ## Training and evaluation data
29
+
30
+ More information needed
31
+
32
+ ## Training procedure
33
+
34
+ ### Training hyperparameters
35
+
36
+ The following hyperparameters were used during training:
37
+ - learning_rate: 0.0002
38
+ - train_batch_size: 8
39
+ - eval_batch_size: 8
40
+ - seed: 42
41
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
42
+ - lr_scheduler_type: linear
43
+ - training_steps: 200
44
+
45
+ ### Training results
46
+
47
+ | Training Loss | Epoch | Step | Validation Loss |
48
+ |:-------------:|:-----:|:----:|:---------------:|
49
+ | 2.5969 | 0.24 | 10 | 2.5105 |
50
+ | 2.332 | 0.49 | 20 | 2.4691 |
51
+ | 2.418 | 0.73 | 30 | 2.4289 |
52
+ | 2.4031 | 0.98 | 40 | 2.4040 |
53
+ | 2.3109 | 1.22 | 50 | 2.3807 |
54
+ | 2.3516 | 1.46 | 60 | 2.3600 |
55
+ | 2.2906 | 1.71 | 70 | 2.3406 |
56
+ | 2.3594 | 1.95 | 80 | 2.3265 |
57
+ | 2.2031 | 2.2 | 90 | 2.3151 |
58
+ | 2.25 | 2.44 | 100 | 2.3039 |
59
+ | 2.2148 | 2.68 | 110 | 2.2911 |
60
+ | 2.2594 | 2.93 | 120 | 2.2803 |
61
+ | 2.1844 | 3.17 | 130 | 2.2752 |
62
+ | 2.0914 | 3.41 | 140 | 2.2714 |
63
+ | 2.2008 | 3.66 | 150 | 2.2624 |
64
+ | 2.2109 | 3.9 | 160 | 2.2586 |
65
+ | 2.1648 | 4.15 | 170 | 2.2548 |
66
+ | 2.1484 | 4.39 | 180 | 2.2535 |
67
+ | 2.193 | 4.63 | 190 | 2.2484 |
68
+ | 2.1219 | 4.88 | 200 | 2.2484 |
69
+
70
+
71
+ ### Framework versions
72
+
73
+ - Transformers 4.39.0.dev0
74
+ - Pytorch 2.1.0+cu121
75
+ - Datasets 2.18.0
76
+ - Tokenizers 0.15.2
adapter_config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "tiiuae/falcon-7b",
5
+ "bias": "none",
6
+ "fan_in_fan_out": false,
7
+ "inference_mode": false,
8
+ "init_lora_weights": true,
9
+ "layers_pattern": null,
10
+ "layers_to_transform": null,
11
+ "loftq_config": {},
12
+ "lora_alpha": 16,
13
+ "lora_dropout": 0.1,
14
+ "megatron_config": null,
15
+ "megatron_core": "megatron.core",
16
+ "modules_to_save": null,
17
+ "peft_type": "LORA",
18
+ "r": 8,
19
+ "rank_pattern": {},
20
+ "revision": null,
21
+ "target_modules": [
22
+ "query_key_value"
23
+ ],
24
+ "task_type": "CAUSAL_LM",
25
+ "use_dora": false,
26
+ "use_rslora": false
27
+ }
adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:756bcd0399f30bc18f939cfe343417d47dd2288e6cd8df84d27dc270c4f2f387
3
+ size 9446600
generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 11,
4
+ "eos_token_id": 11,
5
+ "transformers_version": "4.39.0.dev0"
6
+ }
runs/Mar08_05-59-00_aad05b26ba00/events.out.tfevents.1709877575.aad05b26ba00.436.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d39617061cd467216e4c025e5a9c7cc4578638729559141bdc6e2a58ce4b4506
3
+ size 7606
runs/Mar08_06-25-13_aad05b26ba00/events.out.tfevents.1709879113.aad05b26ba00.8054.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:440e00b6d697fec02bbd9e01466325176e7b9fd3fd6af311cd15686a2aa44d40
3
+ size 15554
runs/Mar08_06-25-13_aad05b26ba00/events.out.tfevents.1709883808.aad05b26ba00.8054.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d91649f19ff94045a1e3a8749624a617f2fa0c96e564ed976289441330cd1397
3
+ size 359
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:572bc5f328aa44f0d1d72bdd4fe12298b7f278748b004b15f1d699335865b6a2
3
+ size 4856