print('this is the main file called main.py') ''' #this repo contains the code for mixtral model for finding the icd-10 codes and this scripts runs well on the single GPU and is now trying to run with the multiple GPU and i need to make sure that this script runs in a multi gpu environment import warnings warnings.filterwarnings("ignore") from accelerate import FullyShardedDataParallelPlugin, Accelerator from torch.distributed.fsdp.fully_sharded_data_parallel import FullOptimStateDictConfig, FullStateDictConfig from datasets import load_dataset import torch import transformers from datetime import datetime from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig from peft import prepare_model_for_kbit_training , LoraConfig, get_peft_model fsdp_plugin = FullyShardedDataParallelPlugin( state_dict_config=FullStateDictConfig(offload_to_cpu=True, rank0_only=False), optim_state_dict_config=FullOptimStateDictConfig(offload_to_cpu=True, rank0_only=False), ) #made to distribute the weights across multi gpu env accelerator = Accelerator(fsdp_plugin=fsdp_plugin) ## Loading the dataset def Profiler_load_dataset(data_files , field = 'train'): return load_dataset('json' , data_files = data_files , field= field) ## high ram used here train_dataset = Profiler_load_dataset(data_files='/content/prov_data2.jsonl', field='train') eval_dataset = Profiler_load_dataset(data_files='/content/prov_data2.jsonl', field='test') ### What is the use of formatting function ? ## It formats the data in this form for the mixtral model ( means easy to use in an instruction fine-tuning scenario ) def format_fun(example): text = f" The ICD10 code for {example['Input']} is , {example['Output']} " return text # base_model_id = "mistralai/Mixtral-8x7B-v0.1" #try out different models from the hugging faces library ( the best would have been the once released by the authors but that wont be quantised so dont think it would work well !! base_model_id = 'TheBloke/dolphin-2.5-mixtral-8x7b-GGUF' # this is passed in as arg -> args.model_id bnb_config = BitsAndBytesConfig( load_in_4bit=True, bnb_4bit_use_double_quant=True, bnb_4bit_compute_dtype=torch.bfloat16 ) model = AutoModelForCausalLM.from_pretrained(base_model_id, quantization_config=bnb_config, device_map="cuda") ## The model got loaded and works !! tokenizer = AutoTokenizer.from_pretrained( base_model_id, padding_side="left", add_eos_token=True, add_bos_token=True, ) tokenizer.pad_token = tokenizer.eos_token max_length = 50 #max number of word generation def generate_and_tokenize_prompt(prompt): result = tokenizer( format_fun(prompt), truncation=True, max_length=max_length, padding="max_length", ) result["labels"] = result["input_ids"].copy() #what this do ?? return result tokenized_train_dataset = train_dataset.map(generate_and_tokenize_prompt) tokenized_val_dataset = eval_dataset.map(generate_and_tokenize_prompt) #Fine tuning the model model.gradient_checkpointing_enable() model = prepare_model_for_kbit_training(model) config = LoraConfig( r=32, lora_alpha=64, target_modules=[ "q_proj", "k_proj", "v_proj", "o_proj", "w1", "w2", "w3", "lm_head", ], bias="none", lora_dropout=0.05, # Conventional task_type="CAUSAL_LM", ) model = get_peft_model(model, config) if torch.cuda.device_count() > 1: # If more than 1 GPU model.is_parallelizable = True model.model_parallel = True project = "icd-finetune" base_model_name = "mixtral" run_name = base_model_name + "-" + project output_dir = "./" + run_name trainer = transformers.Trainer( model=model, train_dataset=tokenized_train_dataset, eval_dataset=tokenized_val_dataset, args=transformers.TrainingArguments( output_dir=output_dir, warmup_steps=1, per_device_train_batch_size=2, gradient_accumulation_steps=1, gradient_checkpointing=True, max_steps=300, learning_rate=2.5e-5, # Want a small lr for finetuning fp16=True, optim="paged_adamw_8bit", logging_steps=25, # When to start reporting loss logging_dir="./logs", # Directory for storing logs save_strategy="steps", # Save the model checkpoint every logging step save_steps=25, # Save checkpoints every 50 steps evaluation_strategy="steps", # Evaluate the model every logging step eval_steps=25, # Evaluate and save checkpoints every 50 steps do_eval=True, # Perform evaluation at the end of training ), data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False), ) model.config.use_cache = False # silence the warnings. Please re-enable for inference! trainer.train() # Implement RAG on the fine tuned model # final model prepared ''' # 1) Make sure the model runs on multi gpu script ! # 2) The dataset is loaded # 3) The langchain implementation to oversee the prompt generation guide # 4) Also try the bert models rather than directly using the mixtral model () # 5) Once the model is trained copy the checkpoint folder and paste in a local env ''' '''