IlyaGusev
/

saiga_7b_lora

Text Generation

Model card Files Files and versions Community

IlyaGusev commited on Apr 9, 2023

Commit

ff656e6

•

1 Parent(s): 7bb8205

New params

Files changed (2) hide show

generation_config.json +4 -4
training_config.json +35 -0

generation_config.json CHANGED Viewed

@@ -3,11 +3,11 @@
     "bos_token_id": 1,
     "eos_token_id": 2,
     "temperature": 1.0,
-    "top_p": 0.9,
     "top_k": 40,
     "do_sample": true,
-    "max_length": 1024,
     "num_beams": 2,
-    "repetition_penalty": 1.1,
-    "no_repeat_ngram_size": 5
 }

     "bos_token_id": 1,
     "eos_token_id": 2,
     "temperature": 1.0,
+    "top_p": 0.95,
     "top_k": 40,
     "do_sample": true,
+    "max_new_tokens": 1024,
     "num_beams": 2,
+    "repetition_penalty": 1.05,
+    "no_repeat_ngram_size": 6
 }

training_config.json ADDED Viewed

	@@ -0,0 +1,35 @@

+{
+    "trainer": {
+        "evaluation_strategy": "steps",
+        "per_device_train_batch_size": 12,
+        "per_device_eval_batch_size": 12,
+        "gradient_accumulation_steps": 10,
+        "eval_steps": 50,
+        "save_steps": 50,
+        "logging_steps": 5,
+        "learning_rate": 0.0003,
+        "num_train_epochs": 3,
+        "lr_scheduler_type": "cosine",
+        "warmup_steps": 30,
+        "fp16": true,
+        "bf16": false,
+        "torch_compile": false,
+        "optim": "adamw_torch"
+    },
+    "lora": {
+        "r": 8,
+        "lora_alpha": 16,
+        "lora_dropout": 0.05,
+        "bias": "none",
+        "target_modules": ["q_proj", "v_proj"],
+        "task_type": "CAUSAL_LM"
+    },
+    "load_in_8bit": true,
+    "only_target_loss": true,
+    "model": "chat",
+    "templates_path": "ru_saiga_template.json",
+    "model_name": "models/llama-7b-hf",
+    "model_type": "causal",
+    "max_tokens_count": 1024
+}