pepoo20 commited on
Commit
0be5f00
1 Parent(s): 5f58aeb

Training in progress, step 3000

Browse files
added_tokens.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "<|endoftext|>": 151643,
3
+ "<|im_end|>": 151645,
4
+ "<|im_start|>": 151644
5
+ }
config.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "MathSymbol/BasicSFT_1.8_Pretrain_Lightning",
3
+ "architectures": [
4
+ "Qwen2ForCausalLM"
5
+ ],
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 151643,
8
+ "eos_token_id": 151643,
9
+ "hidden_act": "silu",
10
+ "hidden_size": 2048,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 5504,
13
+ "max_position_embeddings": 32768,
14
+ "max_window_layers": 21,
15
+ "model_type": "qwen2",
16
+ "num_attention_heads": 16,
17
+ "num_hidden_layers": 24,
18
+ "num_key_value_heads": 16,
19
+ "rms_norm_eps": 1e-06,
20
+ "rope_theta": 1000000.0,
21
+ "sliding_window": 32768,
22
+ "tie_word_embeddings": false,
23
+ "torch_dtype": "bfloat16",
24
+ "transformers_version": "4.40.0",
25
+ "use_cache": false,
26
+ "use_sliding_window": false,
27
+ "vocab_size": 151936
28
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bcb9e249858be8cd1761d787a5eab4c07eeae570ff35e838e331a6da03f1806b
3
+ size 3673690696
special_tokens_map.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>"
5
+ ],
6
+ "eos_token": {
7
+ "content": "<|endoftext|>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false
12
+ },
13
+ "pad_token": {
14
+ "content": "<|endoftext|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false
19
+ }
20
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "151643": {
5
+ "content": "<|endoftext|>",
6
+ "lstrip": false,
7
+ "normalized": false,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "151644": {
13
+ "content": "<|im_start|>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "151645": {
21
+ "content": "<|im_end|>",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ }
28
+ },
29
+ "additional_special_tokens": [
30
+ "<|im_start|>",
31
+ "<|im_end|>"
32
+ ],
33
+ "bos_token": null,
34
+ "chat_template": "{% set system_message = 'You are a Math Teacher.Your goal is to understand a math word problem. Then recognize and distinguish which problem it is and then define the variables (if needed) and formulate the problem as it kind then transform it to Symbolic Form.' %}{% if messages[0]['role'] == 'system' %}{% set system_message = messages[0]['content'] %}{% endif %}{% if system_message is defined %}{{ system_message + '\\n' }}{% endif %}{% for message in messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ 'Question: ' + content + ' \\n Answer: ' }}{% elif message['role'] == 'assistant' %}{{ content + '<|endoftext|>' + '\\n' }}{% endif %}{% endfor %}",
35
+ "clean_up_tokenization_spaces": false,
36
+ "eos_token": "<|endoftext|>",
37
+ "errors": "replace",
38
+ "model_max_length": 32768,
39
+ "pad_token": "<|endoftext|>",
40
+ "padding_side": "right",
41
+ "split_special_tokens": false,
42
+ "tokenizer_class": "Qwen2Tokenizer",
43
+ "unk_token": null
44
+ }
trainer_log.jsonl ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"current_steps": 300, "total_steps": 9120, "loss": 0.7143, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3e-05, "epoch": 0.03289383514706285, "percentage": 3.29, "elapsed_time": "0:11:46", "remaining_time": "5:46:15"}
2
+ {"current_steps": 600, "total_steps": 9120, "loss": 0.2219, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.998339850669331e-05, "epoch": 0.0657876702941257, "percentage": 6.58, "elapsed_time": "0:21:56", "remaining_time": "5:11:39"}
3
+ {"current_steps": 900, "total_steps": 9120, "loss": 0.2074, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9734816848192624e-05, "epoch": 0.09868150544118856, "percentage": 9.87, "elapsed_time": "0:33:43", "remaining_time": "5:08:02"}
4
+ {"current_steps": 1200, "total_steps": 9120, "loss": 0.1906, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9190839785031474e-05, "epoch": 0.1315753405882514, "percentage": 13.16, "elapsed_time": "0:43:50", "remaining_time": "4:49:22"}
5
+ {"current_steps": 1500, "total_steps": 9120, "loss": 0.1923, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.835796376008569e-05, "epoch": 0.16446917573531428, "percentage": 16.45, "elapsed_time": "0:55:43", "remaining_time": "4:43:07"}
6
+ {"current_steps": 1500, "total_steps": 9120, "loss": null, "eval_loss": 0.1849033087491989, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 0.16446917573531428, "percentage": 16.45, "elapsed_time": "0:55:43", "remaining_time": "4:43:07"}
7
+ {"current_steps": 1800, "total_steps": 9120, "loss": 0.1839, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.7246135390382216e-05, "epoch": 0.19736301088237712, "percentage": 19.74, "elapsed_time": "1:07:10", "remaining_time": "4:33:08"}
8
+ {"current_steps": 2100, "total_steps": 9120, "loss": 0.1938, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.586863267968384e-05, "epoch": 0.23025684602944, "percentage": 23.03, "elapsed_time": "1:19:01", "remaining_time": "4:24:08"}
9
+ {"current_steps": 2400, "total_steps": 9120, "loss": 0.1863, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.4241906446007296e-05, "epoch": 0.2631506811765028, "percentage": 26.32, "elapsed_time": "1:29:09", "remaining_time": "4:09:38"}
10
+ {"current_steps": 2700, "total_steps": 9120, "loss": 0.1797, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.238538385782601e-05, "epoch": 0.2960445163235657, "percentage": 29.61, "elapsed_time": "1:40:55", "remaining_time": "3:59:59"}
11
+ {"current_steps": 3000, "total_steps": 9120, "loss": 0.176, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.032123642522486e-05, "epoch": 0.32893835147062855, "percentage": 32.89, "elapsed_time": "1:51:07", "remaining_time": "3:46:41"}
12
+ {"current_steps": 3000, "total_steps": 9120, "loss": null, "eval_loss": 0.1760552078485489, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 0.32893835147062855, "percentage": 32.89, "elapsed_time": "1:51:07", "remaining_time": "3:46:41"}
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e87d1dec697e26f8674bebb378e7add472d9862f152df6bbd1c35b83737657ec
3
+ size 5176
vocab.json ADDED
The diff for this file is too large to render. See raw diff