|
--- |
|
library_name: transformers |
|
license: apache-2.0 |
|
--- |
|
|
|
## INFERENCE |
|
|
|
```Python |
|
import time |
|
import torch |
|
from transformers import AutoTokenizer, AutoModelForCausalLM |
|
|
|
finetuned_model = AutoModelForCausalLM("AquilaX-AI/QnA") |
|
tokenizer = AutoTokenizer("AquilaX-AI/QnA") |
|
|
|
alpaca_prompt = """Below is an instruction that describes a task. Write a response that appropriately completes the request. |
|
|
|
### Instruction: |
|
what is machine learning? |
|
|
|
### Response: |
|
""" |
|
|
|
s = time.time() |
|
prompt = alpaca_prompt |
|
encodeds = tokenizer(prompt, return_tensors="pt",truncation=True).input_ids |
|
|
|
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") |
|
finetuned_model.to(device) |
|
inputs = encodeds.to(device) |
|
|
|
# Increase max_new_tokens if needed |
|
generated_ids = finetuned_model.generate(inputs, max_new_tokens=256, temperature=0.5, top_p=0.90, do_sample=True,pad_token_id=50259,eos_token_id=50259,num_return_sequences=1) |
|
print(tokenizer.decode(generated_ids[0]).split('### Response:')[1].split('<eos>')[0].strip()) |
|
e = time.time() |
|
print(f'time taken:{e-s}') |
|
``` |