Spaces:
Runtime error
Runtime error
print('this is the main file called main.py') | |
''' | |
#this repo contains the code for mixtral model for finding the icd-10 codes and this scripts runs well on the single GPU and is now trying to run with the multiple GPU and i need to make sure that this script runs in a multi gpu environment | |
import warnings | |
warnings.filterwarnings("ignore") | |
from accelerate import FullyShardedDataParallelPlugin, Accelerator | |
from torch.distributed.fsdp.fully_sharded_data_parallel import FullOptimStateDictConfig, FullStateDictConfig | |
from datasets import load_dataset | |
import torch | |
import transformers | |
from datetime import datetime | |
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig | |
from peft import prepare_model_for_kbit_training , LoraConfig, get_peft_model | |
fsdp_plugin = FullyShardedDataParallelPlugin( | |
state_dict_config=FullStateDictConfig(offload_to_cpu=True, rank0_only=False), | |
optim_state_dict_config=FullOptimStateDictConfig(offload_to_cpu=True, rank0_only=False), | |
) #made to distribute the weights across multi gpu env | |
accelerator = Accelerator(fsdp_plugin=fsdp_plugin) | |
## Loading the dataset | |
def Profiler_load_dataset(data_files , field = 'train'): | |
return load_dataset('json' , data_files = data_files , field= field) | |
## high ram used here | |
train_dataset = Profiler_load_dataset(data_files='/content/prov_data2.jsonl', field='train') | |
eval_dataset = Profiler_load_dataset(data_files='/content/prov_data2.jsonl', field='test') | |
### What is the use of formatting function ? | |
## It formats the data in this form for the mixtral model ( means easy to use in an instruction fine-tuning scenario ) | |
def format_fun(example): | |
text = f" The ICD10 code for {example['Input']} is , {example['Output']} " | |
return text | |
# base_model_id = "mistralai/Mixtral-8x7B-v0.1" | |
#try out different models from the hugging faces library ( the best would have been the once released by the authors but that wont be quantised so dont think it would work well !! | |
base_model_id = 'TheBloke/dolphin-2.5-mixtral-8x7b-GGUF' # this is passed in as arg -> args.model_id | |
bnb_config = BitsAndBytesConfig( | |
load_in_4bit=True, | |
bnb_4bit_use_double_quant=True, | |
bnb_4bit_compute_dtype=torch.bfloat16 | |
) | |
model = AutoModelForCausalLM.from_pretrained(base_model_id, quantization_config=bnb_config, device_map="cuda") | |
## The model got loaded and works !! | |
tokenizer = AutoTokenizer.from_pretrained( | |
base_model_id, | |
padding_side="left", | |
add_eos_token=True, | |
add_bos_token=True, | |
) | |
tokenizer.pad_token = tokenizer.eos_token | |
max_length = 50 #max number of word generation | |
def generate_and_tokenize_prompt(prompt): | |
result = tokenizer( | |
format_fun(prompt), | |
truncation=True, | |
max_length=max_length, | |
padding="max_length", | |
) | |
result["labels"] = result["input_ids"].copy() #what this do ?? | |
return result | |
tokenized_train_dataset = train_dataset.map(generate_and_tokenize_prompt) | |
tokenized_val_dataset = eval_dataset.map(generate_and_tokenize_prompt) | |
#Fine tuning the model | |
model.gradient_checkpointing_enable() | |
model = prepare_model_for_kbit_training(model) | |
config = LoraConfig( | |
r=32, | |
lora_alpha=64, | |
target_modules=[ | |
"q_proj", | |
"k_proj", | |
"v_proj", | |
"o_proj", | |
"w1", | |
"w2", | |
"w3", | |
"lm_head", | |
], | |
bias="none", | |
lora_dropout=0.05, # Conventional | |
task_type="CAUSAL_LM", | |
) | |
model = get_peft_model(model, config) | |
if torch.cuda.device_count() > 1: # If more than 1 GPU | |
model.is_parallelizable = True | |
model.model_parallel = True | |
project = "icd-finetune" | |
base_model_name = "mixtral" | |
run_name = base_model_name + "-" + project | |
output_dir = "./" + run_name | |
trainer = transformers.Trainer( | |
model=model, | |
train_dataset=tokenized_train_dataset, | |
eval_dataset=tokenized_val_dataset, | |
args=transformers.TrainingArguments( | |
output_dir=output_dir, | |
warmup_steps=1, | |
per_device_train_batch_size=2, | |
gradient_accumulation_steps=1, | |
gradient_checkpointing=True, | |
max_steps=300, | |
learning_rate=2.5e-5, # Want a small lr for finetuning | |
fp16=True, | |
optim="paged_adamw_8bit", | |
logging_steps=25, # When to start reporting loss | |
logging_dir="./logs", # Directory for storing logs | |
save_strategy="steps", # Save the model checkpoint every logging step | |
save_steps=25, # Save checkpoints every 50 steps | |
evaluation_strategy="steps", # Evaluate the model every logging step | |
eval_steps=25, # Evaluate and save checkpoints every 50 steps | |
do_eval=True, # Perform evaluation at the end of training | |
), | |
data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False), | |
) | |
model.config.use_cache = False # silence the warnings. Please re-enable for inference! | |
trainer.train() | |
# Implement RAG on the fine tuned model | |
# final model prepared | |
''' | |
# 1) Make sure the model runs on multi gpu script ! | |
# 2) The dataset is loaded | |
# 3) The langchain implementation to oversee the prompt generation guide | |
# 4) Also try the bert models rather than directly using the mixtral model () | |
# 5) Once the model is trained copy the checkpoint folder and paste in a local env | |
''' | |
''' | |