stinkypoop commited on
Commit
0be9f8b
1 Parent(s): 64d26d6

Delete QwenTT

Browse files
QwenTT/README.md DELETED
@@ -1,59 +0,0 @@
1
- ---
2
- license: other
3
- library_name: peft
4
- tags:
5
- - llama-factory
6
- - lora
7
- - generated_from_trainer
8
- base_model: Qwen/Qwen1.5-0.5B
9
- model-index:
10
- - name: QwenTT
11
- results: []
12
- ---
13
-
14
- <!-- This model card has been generated automatically according to the information the Trainer had access to. You
15
- should probably proofread and complete it, then remove this comment. -->
16
-
17
- # QwenTT
18
-
19
- This model is a fine-tuned version of [Qwen/Qwen1.5-0.5B](https://huggingface.co/Qwen/Qwen1.5-0.5B) on the identity and the wikipedia_en datasets.
20
-
21
- ## Model description
22
-
23
- More information needed
24
-
25
- ## Intended uses & limitations
26
-
27
- More information needed
28
-
29
- ## Training and evaluation data
30
-
31
- More information needed
32
-
33
- ## Training procedure
34
-
35
- ### Training hyperparameters
36
-
37
- The following hyperparameters were used during training:
38
- - learning_rate: 5e-05
39
- - train_batch_size: 2
40
- - eval_batch_size: 8
41
- - seed: 42
42
- - gradient_accumulation_steps: 8
43
- - total_train_batch_size: 16
44
- - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
45
- - lr_scheduler_type: cosine
46
- - num_epochs: 3.0
47
- - mixed_precision_training: Native AMP
48
-
49
- ### Training results
50
-
51
-
52
-
53
- ### Framework versions
54
-
55
- - PEFT 0.11.1
56
- - Transformers 4.40.2
57
- - Pytorch 2.2.1+cu121
58
- - Datasets 2.19.1
59
- - Tokenizers 0.19.1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
QwenTT/added_tokens.json DELETED
@@ -1,5 +0,0 @@
1
- {
2
- "<|endoftext|>": 151643,
3
- "<|im_end|>": 151645,
4
- "<|im_start|>": 151644
5
- }
 
 
 
 
 
 
QwenTT/all_results.json DELETED
@@ -1,8 +0,0 @@
1
- {
2
- "epoch": 2.608695652173913,
3
- "total_flos": 27871774801920.0,
4
- "train_loss": 3.2534534454345705,
5
- "train_runtime": 85.4079,
6
- "train_samples_per_second": 3.196,
7
- "train_steps_per_second": 0.176
8
- }
 
 
 
 
 
 
 
 
 
QwenTT/config.json DELETED
@@ -1,29 +0,0 @@
1
- {
2
- "alpha_pattern": {},
3
- "auto_mapping": null,
4
- "base_model_name_or_path": "Qwen/Qwen1.5-0.5B",
5
- "bias": "none",
6
- "fan_in_fan_out": false,
7
- "inference_mode": true,
8
- "init_lora_weights": true,
9
- "layer_replication": null,
10
- "layers_pattern": null,
11
- "layers_to_transform": null,
12
- "loftq_config": {},
13
- "lora_alpha": 16,
14
- "lora_dropout": 0,
15
- "megatron_config": null,
16
- "megatron_core": "megatron.core",
17
- "modules_to_save": null,
18
- "peft_type": "LORA",
19
- "r": 8,
20
- "rank_pattern": {},
21
- "revision": null,
22
- "target_modules": [
23
- "v_proj",
24
- "q_proj"
25
- ],
26
- "task_type": "CAUSAL_LM",
27
- "use_dora": false,
28
- "use_rslora": false
29
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
QwenTT/merges.txt DELETED
The diff for this file is too large to render. See raw diff
 
QwenTT/model.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:31f5d9f1b65b5a0006ffc61f8ef6bd7ccffd1a64c896a8454f8101ec8c9c78e7
3
- size 3158328
 
 
 
 
QwenTT/running_log.txt DELETED
@@ -1,161 +0,0 @@
1
- 05/17/2024 21:21:12 - INFO - transformers.tokenization_utils_base - loading file vocab.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen1.5-0.5B/snapshots/8f445e3628f3500ee69f24e1303c9f10f5342a39/vocab.json
2
-
3
- 05/17/2024 21:21:12 - INFO - transformers.tokenization_utils_base - loading file merges.txt from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen1.5-0.5B/snapshots/8f445e3628f3500ee69f24e1303c9f10f5342a39/merges.txt
4
-
5
- 05/17/2024 21:21:12 - INFO - transformers.tokenization_utils_base - loading file tokenizer.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen1.5-0.5B/snapshots/8f445e3628f3500ee69f24e1303c9f10f5342a39/tokenizer.json
6
-
7
- 05/17/2024 21:21:12 - INFO - transformers.tokenization_utils_base - loading file added_tokens.json from cache at None
8
-
9
- 05/17/2024 21:21:12 - INFO - transformers.tokenization_utils_base - loading file special_tokens_map.json from cache at None
10
-
11
- 05/17/2024 21:21:12 - INFO - transformers.tokenization_utils_base - loading file tokenizer_config.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen1.5-0.5B/snapshots/8f445e3628f3500ee69f24e1303c9f10f5342a39/tokenizer_config.json
12
-
13
- 05/17/2024 21:21:12 - WARNING - transformers.tokenization_utils_base - Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
14
-
15
- 05/17/2024 21:21:12 - INFO - llamafactory.data.template - Replace eos token: <|im_end|>
16
-
17
- 05/17/2024 21:21:12 - INFO - llamafactory.data.loader - Loading dataset identity.json...
18
-
19
- 05/17/2024 21:21:13 - INFO - llamafactory.data.loader - Loading dataset olm/olm-wikipedia-20221220...
20
-
21
- 05/17/2024 21:22:39 - INFO - transformers.configuration_utils - loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen1.5-0.5B/snapshots/8f445e3628f3500ee69f24e1303c9f10f5342a39/config.json
22
-
23
- 05/17/2024 21:22:39 - INFO - transformers.configuration_utils - Model config Qwen2Config {
24
- "_name_or_path": "Qwen/Qwen1.5-0.5B",
25
- "architectures": [
26
- "Qwen2ForCausalLM"
27
- ],
28
- "attention_dropout": 0.0,
29
- "bos_token_id": 151643,
30
- "eos_token_id": 151643,
31
- "hidden_act": "silu",
32
- "hidden_size": 1024,
33
- "initializer_range": 0.02,
34
- "intermediate_size": 2816,
35
- "max_position_embeddings": 32768,
36
- "max_window_layers": 21,
37
- "model_type": "qwen2",
38
- "num_attention_heads": 16,
39
- "num_hidden_layers": 24,
40
- "num_key_value_heads": 16,
41
- "rms_norm_eps": 1e-06,
42
- "rope_theta": 1000000.0,
43
- "sliding_window": 32768,
44
- "tie_word_embeddings": true,
45
- "torch_dtype": "bfloat16",
46
- "transformers_version": "4.40.2",
47
- "use_cache": true,
48
- "use_sliding_window": false,
49
- "vocab_size": 151936
50
- }
51
-
52
-
53
- 05/17/2024 21:22:39 - INFO - llamafactory.model.utils.quantization - Quantizing model to 8 bit.
54
-
55
- 05/17/2024 21:22:45 - INFO - transformers.modeling_utils - loading weights file model.safetensors from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen1.5-0.5B/snapshots/8f445e3628f3500ee69f24e1303c9f10f5342a39/model.safetensors
56
-
57
- 05/17/2024 21:22:45 - INFO - transformers.modeling_utils - Instantiating Qwen2ForCausalLM model under default dtype torch.float16.
58
-
59
- 05/17/2024 21:22:45 - INFO - transformers.generation.configuration_utils - Generate config GenerationConfig {
60
- "bos_token_id": 151643,
61
- "eos_token_id": 151643
62
- }
63
-
64
-
65
- 05/17/2024 21:22:49 - INFO - transformers.modeling_utils - All model checkpoint weights were used when initializing Qwen2ForCausalLM.
66
-
67
-
68
- 05/17/2024 21:22:49 - INFO - transformers.modeling_utils - All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at Qwen/Qwen1.5-0.5B.
69
- If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training.
70
-
71
- 05/17/2024 21:22:49 - INFO - transformers.generation.configuration_utils - loading configuration file generation_config.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen1.5-0.5B/snapshots/8f445e3628f3500ee69f24e1303c9f10f5342a39/generation_config.json
72
-
73
- 05/17/2024 21:22:49 - INFO - transformers.generation.configuration_utils - Generate config GenerationConfig {
74
- "bos_token_id": 151643,
75
- "eos_token_id": 151643,
76
- "max_new_tokens": 2048
77
- }
78
-
79
-
80
- 05/17/2024 21:22:49 - INFO - llamafactory.model.utils.checkpointing - Gradient checkpointing enabled.
81
-
82
- 05/17/2024 21:22:49 - INFO - llamafactory.model.utils.attention - Using torch SDPA for faster training and inference.
83
-
84
- 05/17/2024 21:22:49 - INFO - llamafactory.model.adapter - Upcasting trainable params to float32.
85
-
86
- 05/17/2024 21:22:49 - INFO - llamafactory.model.adapter - Fine-tuning method: LoRA
87
-
88
- 05/17/2024 21:22:50 - INFO - llamafactory.model.loader - trainable params: 786432 || all params: 464774144 || trainable%: 0.1692
89
-
90
- 05/17/2024 21:22:50 - INFO - transformers.trainer - Using auto half precision backend
91
-
92
- 05/17/2024 21:22:50 - INFO - transformers.trainer - ***** Running training *****
93
-
94
- 05/17/2024 21:22:50 - INFO - transformers.trainer - Num examples = 91
95
-
96
- 05/17/2024 21:22:50 - INFO - transformers.trainer - Num Epochs = 3
97
-
98
- 05/17/2024 21:22:50 - INFO - transformers.trainer - Instantaneous batch size per device = 2
99
-
100
- 05/17/2024 21:22:50 - INFO - transformers.trainer - Total train batch size (w. parallel, distributed & accumulation) = 16
101
-
102
- 05/17/2024 21:22:50 - INFO - transformers.trainer - Gradient Accumulation steps = 8
103
-
104
- 05/17/2024 21:22:50 - INFO - transformers.trainer - Total optimization steps = 15
105
-
106
- 05/17/2024 21:22:50 - INFO - transformers.trainer - Number of trainable parameters = 786,432
107
-
108
- 05/17/2024 21:23:19 - INFO - llamafactory.extras.callbacks - {'loss': 3.2997, 'learning_rate': 3.7500e-05, 'epoch': 0.87}
109
-
110
- 05/17/2024 21:23:47 - INFO - llamafactory.extras.callbacks - {'loss': 3.2782, 'learning_rate': 1.2500e-05, 'epoch': 1.74}
111
-
112
- 05/17/2024 21:24:15 - INFO - llamafactory.extras.callbacks - {'loss': 3.1825, 'learning_rate': 0.0000e+00, 'epoch': 2.61}
113
-
114
- 05/17/2024 21:24:15 - INFO - transformers.trainer -
115
-
116
- Training completed. Do not forget to share your model on huggingface.co/models =)
117
-
118
-
119
-
120
- 05/17/2024 21:24:15 - INFO - transformers.trainer - Saving model checkpoint to saves/Qwen1.5-0.5B/lora/QwenTT
121
-
122
- 05/17/2024 21:24:16 - INFO - transformers.configuration_utils - loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen1.5-0.5B/snapshots/8f445e3628f3500ee69f24e1303c9f10f5342a39/config.json
123
-
124
- 05/17/2024 21:24:16 - INFO - transformers.configuration_utils - Model config Qwen2Config {
125
- "architectures": [
126
- "Qwen2ForCausalLM"
127
- ],
128
- "attention_dropout": 0.0,
129
- "bos_token_id": 151643,
130
- "eos_token_id": 151643,
131
- "hidden_act": "silu",
132
- "hidden_size": 1024,
133
- "initializer_range": 0.02,
134
- "intermediate_size": 2816,
135
- "max_position_embeddings": 32768,
136
- "max_window_layers": 21,
137
- "model_type": "qwen2",
138
- "num_attention_heads": 16,
139
- "num_hidden_layers": 24,
140
- "num_key_value_heads": 16,
141
- "rms_norm_eps": 1e-06,
142
- "rope_theta": 1000000.0,
143
- "sliding_window": 32768,
144
- "tie_word_embeddings": true,
145
- "torch_dtype": "bfloat16",
146
- "transformers_version": "4.40.2",
147
- "use_cache": true,
148
- "use_sliding_window": false,
149
- "vocab_size": 151936
150
- }
151
-
152
-
153
- 05/17/2024 21:24:16 - INFO - transformers.tokenization_utils_base - tokenizer config file saved in saves/Qwen1.5-0.5B/lora/QwenTT/tokenizer_config.json
154
-
155
- 05/17/2024 21:24:16 - INFO - transformers.tokenization_utils_base - Special tokens file saved in saves/Qwen1.5-0.5B/lora/QwenTT/special_tokens_map.json
156
-
157
- 05/17/2024 21:24:16 - WARNING - llamafactory.extras.ploting - No metric eval_loss to plot.
158
-
159
- 05/17/2024 21:24:16 - INFO - transformers.modelcard - Dropping the following result as it does not have all the necessary fields:
160
- {'task': {'name': 'Causal Language Modeling', 'type': 'text-generation'}}
161
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
QwenTT/special_tokens_map.json DELETED
@@ -1,20 +0,0 @@
1
- {
2
- "additional_special_tokens": [
3
- "<|im_start|>",
4
- "<|im_end|>"
5
- ],
6
- "eos_token": {
7
- "content": "<|im_end|>",
8
- "lstrip": false,
9
- "normalized": false,
10
- "rstrip": false,
11
- "single_word": false
12
- },
13
- "pad_token": {
14
- "content": "<|endoftext|>",
15
- "lstrip": false,
16
- "normalized": false,
17
- "rstrip": false,
18
- "single_word": false
19
- }
20
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
QwenTT/tokenizer.json DELETED
The diff for this file is too large to render. See raw diff
 
QwenTT/tokenizer_config.json DELETED
@@ -1,44 +0,0 @@
1
- {
2
- "add_prefix_space": false,
3
- "added_tokens_decoder": {
4
- "151643": {
5
- "content": "<|endoftext|>",
6
- "lstrip": false,
7
- "normalized": false,
8
- "rstrip": false,
9
- "single_word": false,
10
- "special": true
11
- },
12
- "151644": {
13
- "content": "<|im_start|>",
14
- "lstrip": false,
15
- "normalized": false,
16
- "rstrip": false,
17
- "single_word": false,
18
- "special": true
19
- },
20
- "151645": {
21
- "content": "<|im_end|>",
22
- "lstrip": false,
23
- "normalized": false,
24
- "rstrip": false,
25
- "single_word": false,
26
- "special": true
27
- }
28
- },
29
- "additional_special_tokens": [
30
- "<|im_start|>",
31
- "<|im_end|>"
32
- ],
33
- "bos_token": null,
34
- "chat_template": "{% set system_message = 'You are a helpful assistant.' %}{% if messages[0]['role'] == 'system' %}{% set system_message = messages[0]['content'] %}{% endif %}{% if system_message is defined %}{{ '<|im_start|>system\\n' + system_message + '<|im_end|>\\n' }}{% endif %}{% for message in messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '<|im_start|>user\\n' + content + '<|im_end|>\\n<|im_start|>assistant\\n' }}{% elif message['role'] == 'assistant' %}{{ content + '<|im_end|>' + '\\n' }}{% endif %}{% endfor %}",
35
- "clean_up_tokenization_spaces": false,
36
- "eos_token": "<|im_end|>",
37
- "errors": "replace",
38
- "model_max_length": 32768,
39
- "pad_token": "<|endoftext|>",
40
- "padding_side": "right",
41
- "split_special_tokens": false,
42
- "tokenizer_class": "Qwen2Tokenizer",
43
- "unk_token": null
44
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
QwenTT/train_results.json DELETED
@@ -1,8 +0,0 @@
1
- {
2
- "epoch": 2.608695652173913,
3
- "total_flos": 27871774801920.0,
4
- "train_loss": 3.2534534454345705,
5
- "train_runtime": 85.4079,
6
- "train_samples_per_second": 3.196,
7
- "train_steps_per_second": 0.176
8
- }
 
 
 
 
 
 
 
 
 
QwenTT/trainer_config.yaml DELETED
@@ -1,31 +0,0 @@
1
- cutoff_len: 1024
2
- dataset: identity,wikipedia_en
3
- dataset_dir: data
4
- do_train: true
5
- finetuning_type: lora
6
- flash_attn: auto
7
- fp16: true
8
- gradient_accumulation_steps: 8
9
- learning_rate: 5.0e-05
10
- logging_steps: 5
11
- lora_alpha: 16
12
- lora_dropout: 0
13
- lora_rank: 8
14
- lora_target: q_proj,v_proj
15
- lr_scheduler_type: cosine
16
- max_grad_norm: 1.0
17
- max_samples: 100000
18
- model_name_or_path: Qwen/Qwen1.5-0.5B
19
- num_train_epochs: 3.0
20
- optim: adamw_torch
21
- output_dir: saves/Qwen1.5-0.5B/lora/QwenTT
22
- packing: false
23
- per_device_train_batch_size: 2
24
- plot_loss: true
25
- preprocessing_num_workers: 16
26
- quantization_bit: 8
27
- report_to: none
28
- save_steps: 100
29
- stage: sft
30
- template: qwen
31
- warmup_steps: 0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
QwenTT/trainer_log.jsonl DELETED
@@ -1,4 +0,0 @@
1
- {"current_steps": 5, "total_steps": 15, "loss": 3.2997, "learning_rate": 3.7500000000000003e-05, "epoch": 0.8695652173913043, "percentage": 33.33, "elapsed_time": "0:00:29", "remaining_time": "0:00:58"}
2
- {"current_steps": 10, "total_steps": 15, "loss": 3.2782, "learning_rate": 1.2500000000000006e-05, "epoch": 1.7391304347826086, "percentage": 66.67, "elapsed_time": "0:00:56", "remaining_time": "0:00:28"}
3
- {"current_steps": 15, "total_steps": 15, "loss": 3.1825, "learning_rate": 0.0, "epoch": 2.608695652173913, "percentage": 100.0, "elapsed_time": "0:01:25", "remaining_time": "0:00:00"}
4
- {"current_steps": 15, "total_steps": 15, "epoch": 2.608695652173913, "percentage": 100.0, "elapsed_time": "0:01:25", "remaining_time": "0:00:00"}
 
 
 
 
 
QwenTT/trainer_state.json DELETED
@@ -1,51 +0,0 @@
1
- {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 2.608695652173913,
5
- "eval_steps": 500,
6
- "global_step": 15,
7
- "is_hyper_param_search": false,
8
- "is_local_process_zero": true,
9
- "is_world_process_zero": true,
10
- "log_history": [
11
- {
12
- "epoch": 0.8695652173913043,
13
- "grad_norm": 1.7076869010925293,
14
- "learning_rate": 3.7500000000000003e-05,
15
- "loss": 3.2997,
16
- "step": 5
17
- },
18
- {
19
- "epoch": 1.7391304347826086,
20
- "grad_norm": 1.5296180248260498,
21
- "learning_rate": 1.2500000000000006e-05,
22
- "loss": 3.2782,
23
- "step": 10
24
- },
25
- {
26
- "epoch": 2.608695652173913,
27
- "grad_norm": 1.4705257415771484,
28
- "learning_rate": 0.0,
29
- "loss": 3.1825,
30
- "step": 15
31
- },
32
- {
33
- "epoch": 2.608695652173913,
34
- "step": 15,
35
- "total_flos": 27871774801920.0,
36
- "train_loss": 3.2534534454345705,
37
- "train_runtime": 85.4079,
38
- "train_samples_per_second": 3.196,
39
- "train_steps_per_second": 0.176
40
- }
41
- ],
42
- "logging_steps": 5,
43
- "max_steps": 15,
44
- "num_input_tokens_seen": 0,
45
- "num_train_epochs": 3,
46
- "save_steps": 100,
47
- "total_flos": 27871774801920.0,
48
- "train_batch_size": 2,
49
- "trial_name": null,
50
- "trial_params": null
51
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
QwenTT/training_args.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:60718a321f9cf36940833e02a0917fbb713ef04b94919bba3b1ad6020bc6b6ac
3
- size 5176
 
 
 
 
QwenTT/training_loss.png DELETED
Binary file (35.7 kB)
 
QwenTT/vocab.json DELETED
The diff for this file is too large to render. See raw diff