Initial commit

Browse files

Files changed (5) hide show

.gitattributes +1 -0
README.md +99 -9
mxbai-embed-large-v1.mlpackage/Data/com.apple.CoreML/model.mlmodel +3 -0
mxbai-embed-large-v1.mlpackage/Data/com.apple.CoreML/weights/weight.bin +3 -0
mxbai-embed-large-v1.mlpackage/Manifest.json +18 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+mxbai-embed-large-v1.mlpackage filter=lfs diff=lfs merge=lfs -text

README.md CHANGED Viewed

@@ -1,9 +1,99 @@
----
-license: apache-2.0
-base_model:
-- mixedbread-ai/mxbai-embed-large-v1
-tags:
-- coreml
-- embedding model
-- bert
----

+# CoreML Conversion of the mxbai-embed-large-v1 sentence embedding model
+After extensive testing (and a lot of debugging with ChatGPT), I was able to convert the mxbai-embed-large-v1 model to CoreML and run it mostly on the GPU.
+```Python3
+import torch
+from transformers import AutoModel, AutoTokenizer
+import coremltools as ct
+# Define a wrapper class for the AutoModel to return only the last_hidden_state
+class ModelWrapper(torch.nn.Module):
+    def __init__(self, model):
+        super(ModelWrapper, self).__init__()
+        self.model = model
+    def forward(self, input_ids, attention_mask):
+        # Extract the 'last_hidden_state' from the model output
+        output = self.model(input_ids=input_ids, attention_mask=attention_mask)
+        return output.last_hidden_state  # or use 'pooler_output' if needed
+# Load your SentenceTransformer model and tokenizer
+model_name = "mixedbread-ai/mxbai-embed-large-v1"  # Replace with your model
+model = AutoModel.from_pretrained(model_name)
+model.eval()
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+# Wrap the model to return only the tensor output
+wrapped_model = ModelWrapper(model)
+wrapped_model.eval()
+# Sample input to export the model
+dummy_input = tokenizer("This is a sample input", return_tensors="pt")
+# Trace the model using tensor inputs (input_ids, attention_mask)
+traced_model = torch.jit.trace(wrapped_model, (dummy_input['input_ids'], dummy_input['attention_mask']))
+# Convert the traced PyTorch model to CoreML using the ML Program format
+model_from_torch = ct.convert(
+    traced_model,
+    inputs=[
+        ct.TensorType(name="input_ids", shape=(1, ct.RangeDim(1, 512))),
+        ct.TensorType(name="attention_mask", shape=(1, ct.RangeDim(1, 512)))
+    ],
+    minimum_deployment_target=ct.target.iOS17,
+    convert_to="mlprogram",
+    compute_precision=ct.precision.FLOAT32
+)
+# Save the CoreML model as an mlpackage
+model_from_torch.save("mxbai-embed-large-v1.mlpackage")
+```
+It can be run like this:
+```Python
+import coremltools as ct
+from transformers import AutoTokenizer
+import numpy as np
+# Load the CoreML model
+model = ct.models.MLModel("mxbai-embed-large-v1.mlpackage")
+# Load the tokenizer
+tokenizer = AutoTokenizer.from_pretrained("mixedbread-ai/mxbai-embed-large-v1")
+# Prepare some input text
+input_text = "This is a test sentence for the CoreML model"
+inputs = tokenizer(input_text, return_tensors="np", padding=True, truncation=True, max_length=512)
+# Extract input tensors
+input_ids = inputs['input_ids'].astype(np.float32)  # CoreML expects float32
+attention_mask = inputs['attention_mask'].astype(np.float32)
+# Prepare inputs for the CoreML model
+coreml_input = {"input_ids": input_ids, "attention_mask": attention_mask}
+predictions = model.predict(coreml_input)
+hidden_states = predictions['hidden_states']
+cls_embedding = hidden_states[0, 0, :]
+np.set_printoptions(threshold=np.inf)
+# Print the CLS token embedding, which is a 1024-dimensional vector
+print("CLS Token Embedding:", cls_embedding, len(cls_embedding))
+```
+I verified the output with ollama:
+```
+curl http://localhost:11434/api/embeddings -d '{
+    "model": "mxbai-embed-large",
+        "prompt": "This is a test sentence for the CoreML model"
+    }'
+```
+Environment: Python 3.11
+coremltools 8.0
+sentence-transformers 3.1.0
+transformers 4.44.2

mxbai-embed-large-v1.mlpackage/Data/com.apple.CoreML/model.mlmodel ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5d1bae3b1c903b800466ccaffe181f50a888b920730c3b6a1ec1d1f2020c3dc4
+size 409884

mxbai-embed-large-v1.mlpackage/Data/com.apple.CoreML/weights/weight.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8d5a25f7de6794e1d89755e79d9b6eb37921baf723780713d62851f60058db48
+size 1336394112

mxbai-embed-large-v1.mlpackage/Manifest.json ADDED Viewed

	@@ -0,0 +1,18 @@

+{
+    "fileFormatVersion": "1.0.0",
+    "itemInfoEntries": {
+        "D33F0FF9-224F-430E-AB16-D6E0AF12FDA8": {
+            "author": "com.apple.CoreML",
+            "description": "CoreML Model Weights",
+            "name": "weights",
+            "path": "com.apple.CoreML/weights"
+        },
+        "FF0B3830-4E7E-4515-8982-24A9E6B4F2FE": {
+            "author": "com.apple.CoreML",
+            "description": "CoreML Model Specification",
+            "name": "model.mlmodel",
+            "path": "com.apple.CoreML/model.mlmodel"
+        }
+    },
+    "rootModelIdentifier": "FF0B3830-4E7E-4515-8982-24A9E6B4F2FE"
+}