|
import torch |
|
from peft import PeftModel |
|
from transformers import AutoModelForCausalLM, AutoTokenizer, LlamaTokenizer, StoppingCriteria, StoppingCriteriaList, TextIteratorStreamer |
|
|
|
model_name = "./llama2-hf" |
|
adapters_name = './checkpoint-760/adapter_model' |
|
|
|
print(f"Starting to load the model {model_name} into memory") |
|
|
|
m = AutoModelForCausalLM.from_pretrained( |
|
model_name, |
|
|
|
torch_dtype=torch.bfloat16, |
|
|
|
) |
|
m = PeftModel.from_pretrained(m, adapters_name) |
|
m = m.merge_and_unload() |
|
|
|
print(f"Successfully loaded the model {model_name} into memory") |
|
|
|
m.save_pretrained("nyc-savvy") |
|
|