INFERENCE
import time
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
finetuned_model = AutoModelForCausalLM.from_pretrained("AquilaX-AI/QnA")
tokenizer = AutoTokenizer.from_pretrained("AquilaX-AI/QnA")
alpaca_prompt = """Below is an instruction that describes a task. Write a response that appropriately completes the request.
### Instruction:
what is machine learning?
### Response:
"""
s = time.time()
prompt = alpaca_prompt
encodeds = tokenizer(prompt, return_tensors="pt",truncation=True).input_ids
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
finetuned_model.to(device)
inputs = encodeds.to(device)
# Increase max_new_tokens if needed
generated_ids = finetuned_model.generate(inputs, max_new_tokens=256, temperature=0.5, top_p=0.90, do_sample=True,pad_token_id=50259,eos_token_id=50259,num_return_sequences=1)
print(tokenizer.decode(generated_ids[0]).split('### Response:')[1].split('<eos>')[0].strip())
e = time.time()
print(f'time taken:{e-s}')
- Downloads last month
- 222
This model does not have enough activity to be deployed to Inference API (serverless) yet. Increase its social
visibility and check back later, or deploy to Inference Endpoints (dedicated)
instead.