Yoshiii
/

opt-6.7b-lora

Model card Files Files and versions Community

opt-6.7b-lora / README.md

Yoshiii

Update README.md

70c3583 over 1 year ago

preview code

raw

history blame contribute delete

No virus

5.59 kB

	---
	license: unlicense
	---
	Running opt-6.7b with added loras locally on windows!

	# bitsandbytes

	I needed to get bitsandbytes working in my venv:
	I replaced the main.py in C:\Users\user\Desktop\test\peft\venv\Lib\site-packages\bitsandbytes\cuda_setup\main.py with the one here!
	I also added a .dll file here: C:\Users\user\Desktop\test\peft\venv\Lib\site-packages\bitsandbytes\libbitsandbytes_cuda116.dll



	# Training Script

	(https://github.com/huggingface/peft/commit/df0e1fb59266c9903ddd6dbfe7339bcd2068d150) (It's from their notebook!)

	```
	#load


	import os
	os.environ["CUDA_VISIBLE_DEVICES"]="0"
	import torch
	import torch.nn as nn
	import bitsandbytes as bnb
	from transformers import AutoTokenizer, AutoConfig, AutoModelForCausalLM

	model = AutoModelForCausalLM.from_pretrained(
	"facebook/opt-6.7b",
	load_in_8bit=True,
	device_map='auto',
	)

	tokenizer = AutoTokenizer.from_pretrained("facebook/opt-6.7b")


	#post-processing

	for param in model.parameters():
	param.requires_grad = False # freeze the model - train adapters later
	if param.ndim == 1:
	# cast the small parameters (e.g. layernorm) to fp32 for stability
	param.data = param.data.to(torch.float32)

	model.gradient_checkpointing_enable() # reduce number of stored activations
	model.enable_input_require_grads()

	class CastOutputToFloat(nn.Sequential):
	def forward(self, x): return super().forward(x).to(torch.float32)
	model.lm_head = CastOutputToFloat(model.lm_head)

	# apply lora

	def print_trainable_parameters(model):
	"""
	Prints the number of trainable parameters in the model.
	"""
	trainable_params = 0
	all_param = 0
	for _, param in model.named_parameters():
	all_param += param.numel()
	if param.requires_grad:
	trainable_params += param.numel()
	print(
	f"trainable params: {trainable_params} \|\| all params: {all_param} \|\| trainable%: {100 * trainable_params / all_param}"
	)

	# apply lora 2

	from peft import LoraConfig, get_peft_model

	config = LoraConfig(
	r=16,
	lora_alpha=32,
	target_modules=["q_proj", "v_proj"],
	lora_dropout=0.05,
	bias="none",
	task_type="CAUSAL_LM"
	)

	model = get_peft_model(model, config)
	print_trainable_parameters(model)

	# training

	import transformers
	from datasets import load_dataset
	data = load_dataset("Abirate/english_quotes")
	data = data.map(lambda samples: tokenizer(samples['quote']), batched=True)

	trainer = transformers.Trainer(
	model=model,
	train_dataset=data['train'],
	args=transformers.TrainingArguments(
	per_device_train_batch_size=4,
	gradient_accumulation_steps=4,
	warmup_steps=100,
	max_steps=200,
	learning_rate=2e-4,
	fp16=True,
	logging_steps=1,
	output_dir='outputs'
	),
	data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False)
	)
	model.config.use_cache = False # silence the warnings. Please re-enable for inference!
	trainer.train()

	# push to huggingface txtloras
	model.push_to_hub("Yoshiii/opt-6.7b-lora", use_auth_token=True)


	# inference

	batch = tokenizer("Two things are infinite: ", return_tensors='pt')

	with torch.cuda.amp.autocast():
	output_tokens = model.generate(**batch, max_new_tokens=50)

	print('\n\n', tokenizer.decode(output_tokens[0], skip_special_tokens=True))
	```







	# Inference (loading this repo lora from hf)

	```
	import torch
	from peft import PeftModel, PeftConfig
	from transformers import AutoModelForCausalLM, AutoTokenizer

	peft_model_id = "Yoshiii/opt-6.7b-lora"
	config = PeftConfig.from_pretrained(peft_model_id)
	model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path, return_dict=True, load_in_8bit=True, device_map='auto')
	tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)

	# Load the Lora model
	model = PeftModel.from_pretrained(model, peft_model_id)


	batch = tokenizer("Two things are infinite: ", return_tensors='pt')

	with torch.cuda.amp.autocast():
	output_tokens = model.generate(**batch, max_new_tokens=50)

	print('\n\n', tokenizer.decode(output_tokens[0], skip_special_tokens=True))
	```

	Two things are infinite: the universe and human stupidity; and I'm not sure about the universe. -Albert Einstein I'm not sure about the universe either.


	This output is like the training data. If you run without applying the Lora, it will usually look worse. If you retrain the lora, know that your new lora is not going to output the same results, despite you using the same settings.
	Inference should usually be deterministic when using the same lora, or using without lora.




	Also, If you want to download and use the loras from a visible folder, here's the inference script:

	```
	import torch
	from peft import PeftModel, PeftConfig
	from transformers import AutoModelForCausalLM, AutoTokenizer

	peft_model_id = "./loramodel"
	config = PeftConfig.from_pretrained(peft_model_id)
	model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path, return_dict=True, load_in_8bit=True, device_map='auto')
	tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)

	# Load the Lora model
	model = PeftModel.from_pretrained(model, peft_model_id)


	batch = tokenizer("Two things are infinite: ", return_tensors='pt')

	with torch.cuda.amp.autocast():
	output_tokens = model.generate(**batch, max_new_tokens=50)

	print('\n\n', tokenizer.decode(output_tokens[0], skip_special_tokens=True))
	```

	add your adapter_config.json and your adapter_model.bin to a folder in your current directory named `loramodel`, or whatever you choose.