|
import torch |
|
from peft import PeftModel, PeftConfig |
|
from transformers import AutoModelForCausalLM, AutoTokenizer |
|
import warnings |
|
import os |
|
|
|
|
|
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' |
|
warnings.filterwarnings("ignore", category=UserWarning, module='transformers.generation.utils') |
|
|
|
def load_model_and_tokenizer(): |
|
base_model = "TheBloke/phi-2-GPTQ" |
|
peft_model_id = "STEM-AI-mtl/phi-2-electrical-engineering" |
|
config = PeftConfig.from_pretrained(peft_model_id, trust_remote_code=True) |
|
model = AutoModelForCausalLM.from_pretrained(base_model, device_map="cuda:0",return_dict=True, trust_remote_code=True) |
|
|
|
model = model.to('cuda') |
|
|
|
tokenizer = AutoTokenizer.from_pretrained(base_model) |
|
model = PeftModel.from_pretrained(model, peft_model_id, trust_remote_code=True) |
|
|
|
model = model.to('cuda') |
|
|
|
return model, tokenizer |
|
|
|
def generate(instruction, model, tokenizer): |
|
inputs = tokenizer(instruction, return_tensors="pt", return_attention_mask=False) |
|
inputs = inputs.to('cuda') |
|
outputs = model.generate( |
|
**inputs, |
|
max_length=350, |
|
do_sample=True, |
|
temperature=0.7, |
|
top_k=50, |
|
top_p=0.9, |
|
repetition_penalty=1, |
|
) |
|
text = tokenizer.batch_decode(outputs)[0] |
|
return text |
|
|
|
|
|
if __name__ == '__main__': |
|
model, tokenizer = load_model_and_tokenizer() |
|
while True: |
|
instruction = input("Enter your instruction: ") |
|
if not instruction: |
|
continue |
|
if instruction.lower() in ["exit", "quit", "exit()", "quit()"]: |
|
print("Exiting...") |
|
break |
|
|
|
answer = generate(instruction, model, tokenizer) |
|
print(f'Answer: {answer}') |
|
|