Spaces:

mohitFlooid
/

randomSpace

Runtime error

randomSpace / main.py

complete-dope

dockerSecond

17ae86e 11 months ago

5.35 kB


	print('this is the main file called main.py')

	'''

	#this repo contains the code for mixtral model for finding the icd-10 codes and this scripts runs well on the single GPU and is now trying to run with the multiple GPU and i need to make sure that this script runs in a multi gpu environment

	import warnings
	warnings.filterwarnings("ignore")

	from accelerate import FullyShardedDataParallelPlugin, Accelerator
	from torch.distributed.fsdp.fully_sharded_data_parallel import FullOptimStateDictConfig, FullStateDictConfig
	from datasets import load_dataset
	import torch
	import transformers
	from datetime import datetime
	from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
	from peft import prepare_model_for_kbit_training , LoraConfig, get_peft_model

	fsdp_plugin = FullyShardedDataParallelPlugin(
	state_dict_config=FullStateDictConfig(offload_to_cpu=True, rank0_only=False),
	optim_state_dict_config=FullOptimStateDictConfig(offload_to_cpu=True, rank0_only=False),
	) #made to distribute the weights across multi gpu env

	accelerator = Accelerator(fsdp_plugin=fsdp_plugin)

	## Loading the dataset
	def Profiler_load_dataset(data_files , field = 'train'):
	return load_dataset('json' , data_files = data_files , field= field)


	## high ram used here
	train_dataset = Profiler_load_dataset(data_files='/content/prov_data2.jsonl', field='train')
	eval_dataset = Profiler_load_dataset(data_files='/content/prov_data2.jsonl', field='test')


	### What is the use of formatting function ?
	## It formats the data in this form for the mixtral model ( means easy to use in an instruction fine-tuning scenario )
	def format_fun(example):
	text = f" The ICD10 code for {example['Input']} is , {example['Output']} "
	return text

	# base_model_id = "mistralai/Mixtral-8x7B-v0.1"
	#try out different models from the hugging faces library ( the best would have been the once released by the authors but that wont be quantised so dont think it would work well !!


	base_model_id = 'TheBloke/dolphin-2.5-mixtral-8x7b-GGUF' # this is passed in as arg -> args.model_id

	bnb_config = BitsAndBytesConfig(
	load_in_4bit=True,
	bnb_4bit_use_double_quant=True,
	bnb_4bit_compute_dtype=torch.bfloat16
	)

	model = AutoModelForCausalLM.from_pretrained(base_model_id, quantization_config=bnb_config, device_map="cuda")
	## The model got loaded and works !!


	tokenizer = AutoTokenizer.from_pretrained(
	base_model_id,
	padding_side="left",
	add_eos_token=True,
	add_bos_token=True,
	)
	tokenizer.pad_token = tokenizer.eos_token


	max_length = 50 #max number of word generation
	def generate_and_tokenize_prompt(prompt):
	result = tokenizer(
	format_fun(prompt),
	truncation=True,
	max_length=max_length,
	padding="max_length",
	)
	result["labels"] = result["input_ids"].copy() #what this do ??
	return result

	tokenized_train_dataset = train_dataset.map(generate_and_tokenize_prompt)
	tokenized_val_dataset = eval_dataset.map(generate_and_tokenize_prompt)


	#Fine tuning the model
	model.gradient_checkpointing_enable()
	model = prepare_model_for_kbit_training(model)

	config = LoraConfig(
	r=32,
	lora_alpha=64,
	target_modules=[
	"q_proj",
	"k_proj",
	"v_proj",
	"o_proj",
	"w1",
	"w2",
	"w3",
	"lm_head",
	],
	bias="none",
	lora_dropout=0.05, # Conventional
	task_type="CAUSAL_LM",
	)

	model = get_peft_model(model, config)

	if torch.cuda.device_count() > 1: # If more than 1 GPU
	model.is_parallelizable = True
	model.model_parallel = True


	project = "icd-finetune"
	base_model_name = "mixtral"
	run_name = base_model_name + "-" + project
	output_dir = "./" + run_name

	trainer = transformers.Trainer(
	model=model,
	train_dataset=tokenized_train_dataset,
	eval_dataset=tokenized_val_dataset,
	args=transformers.TrainingArguments(
	output_dir=output_dir,
	warmup_steps=1,
	per_device_train_batch_size=2,
	gradient_accumulation_steps=1,
	gradient_checkpointing=True,
	max_steps=300,
	learning_rate=2.5e-5, # Want a small lr for finetuning
	fp16=True,
	optim="paged_adamw_8bit",
	logging_steps=25, # When to start reporting loss
	logging_dir="./logs", # Directory for storing logs
	save_strategy="steps", # Save the model checkpoint every logging step
	save_steps=25, # Save checkpoints every 50 steps
	evaluation_strategy="steps", # Evaluate the model every logging step
	eval_steps=25, # Evaluate and save checkpoints every 50 steps
	do_eval=True, # Perform evaluation at the end of training
	),
	data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
	)

	model.config.use_cache = False # silence the warnings. Please re-enable for inference!
	trainer.train()



	# Implement RAG on the fine tuned model





	# final model prepared
	'''
	# 1) Make sure the model runs on multi gpu script !
	# 2) The dataset is loaded
	# 3) The langchain implementation to oversee the prompt generation guide
	# 4) Also try the bert models rather than directly using the mixtral model ()
	# 5) Once the model is trained copy the checkpoint folder and paste in a local env
	'''

	'''