CultriX
/

MistralTrix-v1

Text Generation

text-generation-inference

Inference Endpoints

Model card Files Files and versions Community

CultriX commited on Jan 3, 2024

Commit

e090456

·

1 Parent(s): 1ea5e91

Update README.md

Files changed (1) hide show

README.md +6 -6

README.md CHANGED Viewed

@@ -16,8 +16,8 @@ I used the same dataset and reformatted it to apply the ChatML template.
 The code to train this model is available on Google Colab and GitHub.
 Fine-tuning took about an hour on Google Colab A-1000 GPU with 40GB VRAM.
-# LoRA configuration
 peft_config = LoraConfig(
     r=16,
     lora_alpha=16,
@@ -27,7 +27,7 @@ peft_config = LoraConfig(
     target_modules=['k_proj', 'gate_proj', 'v_proj', 'up_proj', 'q_proj', 'o_proj', 'down_proj']
 )
-# Model to fine-tune
 model = AutoModelForCausalLM.from_pretrained(
     model_name,
     torch_dtype=torch.float16,
@@ -35,14 +35,14 @@ model = AutoModelForCausalLM.from_pretrained(
 )
 model.config.use_cache = False
-# Reference model
 ref_model = AutoModelForCausalLM.from_pretrained(
     model_name,
     torch_dtype=torch.float16,
     load_in_4bit=True
 )
-# Training arguments
 training_args = TrainingArguments(
     per_device_train_batch_size=4,
     gradient_accumulation_steps=4,
@@ -59,7 +59,7 @@ training_args = TrainingArguments(
     report_to="wandb",
 )
-# Create DPO trainer
 dpo_trainer = DPOTrainer(
     model,
     ref_model,

 The code to train this model is available on Google Colab and GitHub.
 Fine-tuning took about an hour on Google Colab A-1000 GPU with 40GB VRAM.
+# TRAINING SPECIFICATIONS
+> LoRA configuration
 peft_config = LoraConfig(
     r=16,
     lora_alpha=16,
     target_modules=['k_proj', 'gate_proj', 'v_proj', 'up_proj', 'q_proj', 'o_proj', 'down_proj']
 )
+> Model to fine-tune
 model = AutoModelForCausalLM.from_pretrained(
     model_name,
     torch_dtype=torch.float16,
 )
 model.config.use_cache = False
+> Reference model
 ref_model = AutoModelForCausalLM.from_pretrained(
     model_name,
     torch_dtype=torch.float16,
     load_in_4bit=True
 )
+> Training arguments
 training_args = TrainingArguments(
     per_device_train_batch_size=4,
     gradient_accumulation_steps=4,
     report_to="wandb",
 )
+> Create DPO trainer
 dpo_trainer = DPOTrainer(
     model,
     ref_model,