run onnx model
can you tell me how to run this? this is my first time using model in onnx format
from transformers import AutoTokenizer, pipeline
from optimum.onnxruntime import ORTModelForCausalLM
model_id = "microsoft/mistral-7b-instruct-v0.2-ONNX"
Change file_name to load the .onnx file instead of .onnx.data
model = ORTModelForCausalLM.from_pretrained(
model_id,
subfolder="onnx/cpu_and_mobile/mistral-7b-instruct-v0.2-cpu-int4-rtn-block-32-acc-level-4",
file_name="mistral-7b-instruct-v0.2-cpu-int4-rtn-block-32-acc-level-4.onnx",
use_io_binding=True
)
Explicitly define the path to the tokenizer.model file
tokenizer = AutoTokenizer.from_pretrained(
"microsoft/mistral-7b-instruct-v0.2-ONNX",
subfolder="onnx/cpu_and_mobile/mistral-7b-instruct-v0.2-cpu-int4-rtn-block-32-acc-level-4"
)
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
#result = pipe("Who is Napoleon Bonaparte?")
result = pipe(
"Who is Napoleon Bonaparte?",
max_new_tokens=10, # زيادة عدد التوكنات المنتجة
do_sample=True, # تمكين العشوائية لتنوع الإجابات
temperature=0.7, # ضبط درجة الحرارة لتحسين التنوع
top_k=50, # تحديد أفضل 50 احتمالًا لاختيار الكلمة التالية
top_p=0.9, # تقليل احتمالية الكلمات غير المناسبة
num_return_sequences=1 # توليد إجابة واحدة فقط
)
print(result)
/usr/local/lib/python3.11/dist-packages/huggingface_hub/utils/_auth.py:94: UserWarning:
The secret HF_TOKEN
does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
warnings.warn(
ORTModelForCausalLM loaded a legacy ONNX model with no position_ids input, although this input is required for batched generation for the architecture mistral. We strongly encourage to re-export the model with optimum>=1.14 for position_ids and batched inference support.
Device set to use cpu
Setting pad_token_id
to eos_token_id
:2 for open-end generation.
[{'generated_text': 'Who is Napoleon Bonaparte?\n\nNapoleon Bonaparte ('}]
from transformers import AutoTokenizer, pipeline
from optimum.onnxruntime import ORTModelForCausalLM
model_id = "microsoft/Phi-3.5-mini-instruct-onnx"
Change file_name to load the .onnx file instead of .onnx.data
model = ORTModelForCausalLM.from_pretrained(
model_id,
subfolder="cpu_and_mobile/cpu-int4-awq-block-128-acc-level-4",
file_name="phi-3.5-mini-instruct-cpu-int4-awq-block-128-acc-level-4.onnx"
)
Use the original model id (without -onnx):
tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3.5-mini-instruct")
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
result = pipe("Who is Napoleon Bonaparte?")
print(result)
[{'generated_text': 'Who is Napoleon Bonaparte?\n\nNapoleon Bonaparte was a French military and political leader who rose to prom'}]
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
#result = pipe("Who is Napoleon Bonaparte?")
result = pipe(
"Who is Napoleon Bonaparte?",
max_new_tokens=100, # زيادة عدد التوكنات المنتجة
do_sample=True, # تمكين العشوائية لتنوع الإجابات
temperature=0.7, # ضبط درجة الحرارة لتحسين التنوع
top_k=50, # تحديد أفضل 50 احتمالًا لاختيار الكلمة التالية
top_p=0.9, # تقليل احتمالية الكلمات غير المناسبة
num_return_sequences=1 # توليد إجابة واحدة فقط
)
print(result)
Device set to use cpu
Setting pad_token_id
to eos_token_id
:2 for open-end generation.
[{'generated_text': 'Who is Napoleon Bonaparte?\n\nNapoleon Bonaparte (1769-1821) was a French military and political leader who rose to power during the French Revolution and became Emperor of the French. He is best known for his military campaigns, which made him a legendary military figure and expanded the French Empire.\n\nNapoleon was born on the island of Corsica, which was then a possession of the Republic of Genoa. He was educated in France and became a'}]