|
--- |
|
license: apache-2.0 |
|
--- |
|
|
|
|
|
``` python |
|
import ctranslate2 |
|
import transformers |
|
from huggingface_hub import snapshot_download |
|
|
|
model_dir = snapshot_download(repo_id="Praise2112/Mistral-7B-Instruct-v0.1-int8-ct2") |
|
generator = ctranslate2.Generator(model_dir, device="cuda", compute_type="int8") # GPU |
|
# generator = ctranslate2.Generator(model_dir, device="cpu", compute_type="int8") #CPU |
|
tokenizer = transformers.AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1") |
|
|
|
messages = [ |
|
{"role": "user", "content": "What is your favourite condiment?"}, |
|
{"role": "assistant", "content": "Well, I'm quite partial to a good squeeze of fresh lemon juice. It adds just the right amount of zesty flavour to whatever I'm cooking up in the kitchen!"}, |
|
{"role": "user", "content": "Do you have mayonnaise recipes?"} |
|
] |
|
|
|
model_inputs = tokenizer.apply_chat_template(messages, return_tensors="pt") |
|
model_inputs = [tokenizer.convert_ids_to_tokens(model_input) for model_input in model_inputs] |
|
generated_ids = generator.generate_batch(model_inputs, max_length=1000, sampling_topk=10) |
|
decoded = [res.sequences_ids[0] for res in generated_ids] |
|
decoded = tokenizer.batch_decode(decoded) |
|
print(decoded[0]) |
|
``` |