add onnx files and readme for usage with onnx

#14
by michaelfeil - opened
Files changed (2) hide show
  1. README.md +46 -0
  2. onnx/model.onnx +3 -0
README.md CHANGED
@@ -365,6 +365,52 @@ with torch.no_grad():
365
  print(scores)
366
  ```
367
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
368
  ## Evaluation
369
 
370
  `baai-general-embedding` models achieve **state-of-the-art performance on both MTEB and C-MTEB leaderboard!**
 
365
  print(scores)
366
  ```
367
 
368
+ #### Usage reranker with the ONNX files
369
+
370
+ ```python
371
+ from optimum.onnxruntime import ORTModelForSequenceClassification # type: ignore
372
+
373
+ import torch
374
+ from transformers import AutoModelForSequenceClassification, AutoTokenizer
375
+
376
+ tokenizer = AutoTokenizer.from_pretrained('BAAI/bge-reranker-large')
377
+ model = AutoModelForSequenceClassification.from_pretrained('BAAI/bge-reranker-base')
378
+ model_ort = ORTModelForSequenceClassification.from_pretrained('BAAI/bge-reranker-base', file_name="onnx/model.onnx")
379
+
380
+ # Sentences we want sentence embeddings for
381
+ pairs = [['what is panda?', 'hi'], ['what is panda?', 'The giant panda (Ailuropoda melanoleuca), sometimes called a panda bear or simply panda, is a bear species endemic to China.']]
382
+
383
+ # Tokenize sentences
384
+ encoded_input = tokenizer(pairs, padding=True, truncation=True, return_tensors='pt')
385
+
386
+ scores_ort = model_ort(**encoded_input, return_dict=True).logits.view(-1, ).float()
387
+ # Compute token embeddings
388
+ with torch.inference_mode():
389
+ scores = model_ort(**encoded_input, return_dict=True).logits.view(-1, ).float()
390
+
391
+ # scores and scores_ort are identical
392
+ ```
393
+ #### Usage reranker with infinity
394
+
395
+ Its also possible to deploy the onnx/torch files with the [infinity_emb](https://github.com/michaelfeil/infinity) pip package.
396
+ ```python
397
+ import asyncio
398
+ from infinity_emb import AsyncEmbeddingEngine, EngineArgs
399
+
400
+ query='what is a panda?'
401
+ docs = ['The giant panda (Ailuropoda melanoleuca), sometimes called a panda bear', "Paris is in France."]
402
+
403
+ engine = AsyncEmbeddingEngine.from_args(
404
+ EngineArgs(model_name_or_path = "BAAI/bge-reranker-base", device="cpu", engine="torch" # or engine="optimum" for onnx
405
+ ))
406
+
407
+ async def main():
408
+ async with engine:
409
+ ranking, usage = await engine.rerank(query=query, docs=docs)
410
+ print(list(zip(ranking, docs)))
411
+ asyncio.run(main())
412
+ ```
413
+
414
  ## Evaluation
415
 
416
  `baai-general-embedding` models achieve **state-of-the-art performance on both MTEB and C-MTEB leaderboard!**
onnx/model.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:15b9a8c3da82eddf263df571281166e00e9308fe19d077084b642ebfcaf06d2b
3
+ size 1112459588