Text Generation
PEFT
Russian
English
Saiga
ruGPT-3.5
13B
chat
lora
Peft
adapter
conversational
Paul Rock commited on
Commit
358b891
1 Parent(s): 98291d4

Initial commit

Browse files
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ /.idea/
README.md CHANGED
@@ -1,23 +1,56 @@
1
  ---
2
  license: mit
3
  datasets:
4
- - IlyaGusev/ru_turbo_alpaca
5
- - IlyaGusev/ru_turbo_alpaca_evol_instruct
6
- - IlyaGusev/ru_turbo_saiga
7
- - IlyaGusev/ru_sharegpt_cleaned
8
- - IlyaGusev/oasst1_ru_main_branch
9
- - IlyaGusev/gpt_roleplay_realm
10
- - lksy/ru_instruct_gpt4
11
  language:
12
- - ru
13
- - en
14
  library_name: peft
15
  pipeline_tag: text-generation
16
  tags:
17
- - Saiga
18
- - ruGPT-3.5
19
- - 13B
20
- - chat
21
- - lora
22
- - Peft
23
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  license: mit
3
  datasets:
4
+ - IlyaGusev/ru_turbo_alpaca
5
+ - IlyaGusev/ru_turbo_alpaca_evol_instruct
6
+ - IlyaGusev/ru_turbo_saiga
7
+ - IlyaGusev/ru_sharegpt_cleaned
8
+ - IlyaGusev/oasst1_ru_main_branch
9
+ - IlyaGusev/gpt_roleplay_realm
10
+ - lksy/ru_instruct_gpt4
11
  language:
12
+ - ru
13
+ - en
14
  library_name: peft
15
  pipeline_tag: text-generation
16
  tags:
17
+ - Saiga
18
+ - ruGPT-3.5
19
+ - 13B
20
+ - chat
21
+ - lora
22
+ - Peft
23
+ - adapter
24
+ ---
25
+
26
+ # ruGPT-3.5 13B LoRA
27
+
28
+ This is an adapter-only version.
29
+
30
+ Based on [ruGPT-3.5-13B](https://huggingface.co/ai-forever/ruGPT-3.5-13B).
31
+
32
+ Training code is [here](https://github.com/EvilFreelancer/ruGPT-3.5-13B-lora)
33
+
34
+ > You may use ruGPT-3.5 13B fp16 base model instead.
35
+
36
+ ## Training procedure
37
+
38
+ The following `bitsandbytes` quantization config was used during training:
39
+
40
+ - quant_method: bitsandbytes
41
+ - load_in_8bit: True
42
+ - load_in_4bit: False
43
+ - llm_int8_threshold: 6.0
44
+ - llm_int8_skip_modules: None
45
+ - llm_int8_enable_fp32_cpu_offload: False
46
+ - llm_int8_has_fp16_weight: False
47
+ - bnb_4bit_quant_type: fp4
48
+ - bnb_4bit_use_double_quant: False
49
+ - bnb_4bit_compute_dtype: float32
50
+
51
+ ### Framework versions
52
+
53
+ - PyTorch 2.1.0
54
+ - PEFT 0.5.0
55
+ - bitsandbytes 0.41.1
56
+ - transformers 4.34.0
adapter_config.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "auto_mapping": null,
3
+ "base_model_name_or_path": "/home/pasha/Documents/Repository/gpt/ruGPT-3.5/ruGPT-3.5-13B",
4
+ "bias": "none",
5
+ "fan_in_fan_out": false,
6
+ "inference_mode": true,
7
+ "init_lora_weights": true,
8
+ "layers_pattern": null,
9
+ "layers_to_transform": null,
10
+ "lora_alpha": 16,
11
+ "lora_dropout": 0.05,
12
+ "modules_to_save": null,
13
+ "peft_type": "LORA",
14
+ "r": 16,
15
+ "revision": null,
16
+ "target_modules": [
17
+ "c_attn"
18
+ ],
19
+ "task_type": "CAUSAL_LM"
20
+ }
adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a1563ed0c38f5e596b21307c2dfbe72227f9745ab85543ec412a33f20d59e45a
3
+ size 52457121
added_tokens.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "</s>": 3,
3
+ "<mask>": 5,
4
+ "<pad>": 0,
5
+ "<s>": 2,
6
+ "<|endoftext|>": 1
7
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
special_tokens_map.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "eos_token": "</s>",
4
+ "mask_token": "<mask>",
5
+ "pad_token": "<pad>",
6
+ "sep_token": "<s>",
7
+ "unk_token": "<|endoftext|>"
8
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "added_tokens_decoder": {
5
+ "0": {
6
+ "content": "<pad>",
7
+ "lstrip": false,
8
+ "normalized": true,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "1": {
14
+ "content": "<|endoftext|>",
15
+ "lstrip": false,
16
+ "normalized": true,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "2": {
22
+ "content": "<s>",
23
+ "lstrip": false,
24
+ "normalized": true,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ },
29
+ "3": {
30
+ "content": "</s>",
31
+ "lstrip": false,
32
+ "normalized": true,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ },
37
+ "5": {
38
+ "content": "<mask>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false,
43
+ "special": true
44
+ }
45
+ },
46
+ "additional_special_tokens": [],
47
+ "bos_token": "<s>",
48
+ "clean_up_tokenization_spaces": false,
49
+ "eos_token": "</s>",
50
+ "errors": "replace",
51
+ "mask_token": "<mask>",
52
+ "model_max_length": 2048,
53
+ "pad_token": "<pad>",
54
+ "padding_side": "left",
55
+ "sep_token": "<s>",
56
+ "tokenizer_class": "GPT2Tokenizer",
57
+ "tokenizer_file": null,
58
+ "unk_token": "<|endoftext|>"
59
+ }
vocab.json ADDED
The diff for this file is too large to render. See raw diff