messages = [
{'role': 'user', 'content': sample_prompt}
]
prompt = tokenizer.apply_chat_template(messages, tokenize = False)
inputs = tokenizer([prompt], return_tensors='pt', add_special_tokens=False).to('cuda')
generate_kwargs = dict(
inputs,
max_new_tokens=8,
top_p=0.95,
top_k=50,
temperature=0.2,
do_sample=True,
num_beams=1,
)
r = model.generate(**generate_kwargs)
tokenizer.decode(r[0])
- Downloads last month
- 3
This model does not have enough activity to be deployed to Inference API (serverless) yet. Increase its social
visibility and check back later, or deploy to Inference Endpoints (dedicated)
instead.