init

Browse files

Files changed (18) hide show

README.md +94 -0
config.json +33 -0
generation_config.json +7 -0
pytorch_model.bin +3 -0
resources/img.png +0 -0
resources/test1.png +0 -0
resources/test2.png +0 -0
resources/test3.png +0 -0
resources/test4.png +0 -0
resources/test5.png +0 -0
resources/test6.png +0 -0
rng_state.pth +3 -0
scheduler.pt +3 -0
special_tokens_map.json +6 -0
tokenizer.json +0 -0
tokenizer_config.json +11 -0
trainer_state.json +0 -0
training_args.bin +3 -0

README.md CHANGED Viewed

@@ -1,3 +1,97 @@
 ---
 license: apache-2.0
 ---

 ---
 license: apache-2.0
+datasets:
+- BelleGroup/train_2M_CN
+- BelleGroup/train_3.5M_CN
+- BelleGroup/train_1M_CN
+- BelleGroup/train_0.5M_CN
+- BelleGroup/school_math_0.25M
+language:
+- zh
 ---
+## GoGPT
+基于中文指令数据微调BLOOM
+![img.png](resources/img.png)
+> 训练第一轮足够了，后续第二轮和第三轮提升不大
+- ������多样性指令数据
+- ������筛选高质量中文数据
+| 模型名字       | 参数量    | 模型地址 |
+|------------|--------|------|
+| gogpt-560m | 5.6亿参数 | ������[golaxy/gogpt-560m](https://huggingface.co/golaxy/gogpt-560m) |
+| gogpt-3b   | 30亿参数  | ������[golaxy/gogpt-3b](https://huggingface.co/golaxy/gogpt-3b) |
+| gogpt-7b   | 70亿参数  | ������[golaxy/gogpt-7b](https://huggingface.co/golaxy/gogpt-7b) |
+## 测试效果
+![img.png](resources/test1.png)
+![img.png](resources/test2.png)
+![img.png](resources/test3.png)
+![img.png](resources/test4.png)
+![img.png](resources/test5.png)
+![img.png](resources/test6.png)
+## TODO
+- 进行RLFH训练
+- 后续加入中英平行语料
+## 感谢
+- [@hz大佬-zero_nlp](https://github.com/yuanzhoulvpi2017/zero_nlp)
+- [stanford_alpaca](https://github.com/tatsu-lab/stanford_alpaca)
+- [Belle数据](https://huggingface.co/BelleGroup)
+(base) [searchgpt@worker2 output-bloom-7b]$ cat README.md ^C
+(base) [searchgpt@worker2 output-bloom-7b]$ vim README.md
+(base) [searchgpt@worker2 output-bloom-7b]$ cat README.md
+---
+license: apache-2.0
+datasets:
+- BelleGroup/train_2M_CN
+- BelleGroup/train_3.5M_CN
+- BelleGroup/train_1M_CN
+- BelleGroup/train_0.5M_CN
+- BelleGroup/school_math_0.25M
+language:
+- zh
+---
+## GoGPT
+基于中文指令数据微调BLOOM
+![img.png](resources/img.png)
+> 训练第一轮足够了，后续第二轮和第三轮提升不大
+- ������多样性指令数据
+- ������筛选高质量中文数据
+| 模型名字       | 参数量    | 模型地址 |
+|------------|--------|------|
+| gogpt-560m | 5.6亿参数 | ������[golaxy/gogpt-560m](https://huggingface.co/golaxy/gogpt-560m) |
+| gogpt-3b   | 30亿参数  | ������[golaxy/gogpt-3b](https://huggingface.co/golaxy/gogpt-3b) |
+| gogpt-7b   | 70亿参数  | ������[golaxy/gogpt-7b](https://huggingface.co/golaxy/gogpt-7b) |
+| gogpt-math-560m   | 5.6亿参数  | ������[gogpt-math-560m](https://huggingface.co/golaxy/gogpt-math-560m) |
+## 测试效果
+![img.png](resources/test1.png)
+![img.png](resources/test2.png)
+![img.png](resources/test3.png)
+![img.png](resources/test4.png)
+![img.png](resources/test5.png)
+![img.png](resources/test6.png)
+## TODO
+- 进行RLFH训练
+- 后续加入中英平行语料
+## 感谢
+- [@hz大佬-zero_nlp](https://github.com/yuanzhoulvpi2017/zero_nlp)
+- [stanford_alpaca](https://github.com/tatsu-lab/stanford_alpaca)
+- [Belle数据](https://huggingface.co/BelleGroup)

config.json ADDED Viewed

	@@ -0,0 +1,33 @@

+{
+  "_name_or_path": "/data/searchgpt/pretrained_models/bloomz-560m",
+  "apply_residual_connection_post_layernorm": false,
+  "architectures": [
+    "BloomForCausalLM"
+  ],
+  "attention_dropout": 0.0,
+  "attention_softmax_in_fp32": true,
+  "bias_dropout_fusion": true,
+  "bos_token_id": 1,
+  "eos_token_id": 2,
+  "hidden_dropout": 0.0,
+  "hidden_size": 1024,
+  "initializer_range": 0.02,
+  "layer_norm_epsilon": 1e-05,
+  "masked_softmax_fusion": true,
+  "model_type": "bloom",
+  "n_head": 16,
+  "n_inner": null,
+  "n_layer": 24,
+  "offset_alibi": 100,
+  "pad_token_id": 3,
+  "pretraining_tp": 1,
+  "seq_length": 2048,
+  "skip_bias_add": true,
+  "skip_bias_add_qkv": false,
+  "slow_but_exact": false,
+  "torch_dtype": "float32",
+  "transformers_version": "4.29.1",
+  "unk_token_id": 0,
+  "use_cache": true,
+  "vocab_size": 250880
+}

generation_config.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 1,
+  "eos_token_id": 2,
+  "pad_token_id": 3,
+  "transformers_version": "4.29.1"
+}

pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f1f646882932acb3eb72ae258e03a3c941e8eb6c3679c0b9f95fc339387b45fb
+size 2236957537

resources/img.png ADDED Viewed

resources/test1.png ADDED Viewed

resources/test2.png ADDED Viewed

resources/test3.png ADDED Viewed

resources/test4.png ADDED Viewed

resources/test5.png ADDED Viewed

resources/test6.png ADDED Viewed

rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1784c9e20ffdc46b706882695c2108245d7626a328b6d70a37d079ad1fbbc989
+size 14575

scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8d468864fa846a5ee2b901205ec327d6ad6eba8105d29814689b30d76d72a62f
+size 627

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "bos_token": "<s>",
+  "eos_token": "</s>",
+  "pad_token": "<pad>",
+  "unk_token": "<unk>"
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,11 @@

+{
+  "add_prefix_space": false,
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "</s>",
+  "model_max_length": 512,
+  "pad_token": "<pad>",
+  "padding_side": "right",
+  "tokenizer_class": "BloomTokenizer",
+  "unk_token": "<unk>"
+}

trainer_state.json ADDED Viewed

The diff for this file is too large to render. See raw diff

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2788de76ce95a51b55485628e01fd4e98f9304a340bb5729fa9a70ee2821acce
+size 3963