Spaces:
Runtime error
Runtime error
from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer | |
from transformers import pipeline | |
import torch | |
model_id = "MaziyarPanahi/Llama-3-70B-Instruct-DPO-v0.2" | |
model = AutoModelForCausalLM.from_pretrained( | |
model_id, | |
torch_dtype=torch.bfloat16, | |
device_map="auto", | |
trust_remote_code=True, | |
# attn_implementation="flash_attention_2" | |
) | |
tokenizer = AutoTokenizer.from_pretrained( | |
model_id, | |
trust_remote_code=True | |
) | |
streamer = TextStreamer(tokenizer) | |
pipeline = pipeline( | |
"text-generation", | |
model=model, | |
tokenizer=tokenizer, | |
model_kwargs={"torch_dtype": torch.bfloat16}, | |
streamer=streamer | |
) | |
# Then you can use the pipeline to generate text. | |
messages = [ | |
{"role": "system", "content": "You are a pirate chatbot who always responds in pirate speak!"}, | |
{"role": "user", "content": "Who are you?"}, | |
] | |
prompt = tokenizer.apply_chat_template( | |
messages, | |
tokenize=False, | |
add_generation_prompt=True | |
) | |
terminators = [ | |
tokenizer.eos_token_id, | |
tokenizer.convert_tokens_to_ids("<|im_end|>"), | |
tokenizer.convert_tokens_to_ids("<|eot_id|>") # safer to have this too | |
] | |
outputs = pipeline( | |
prompt, | |
max_new_tokens=2048, | |
eos_token_id=terminators, | |
do_sample=True, | |
temperature=0.6, | |
top_p=0.95, | |
) | |
print(outputs[0]["generated_text"][len(prompt):]) | |