amaye15
/

colqwen2-1.0-alpha-inference

Safetensors

Inference Endpoints

Model card Files Files and versions Community

amaye15 commited on 19 days ago

Commit

aea7238

•

1 Parent(s): a4af2d9

Automatic Batching

Browse files

Files changed (1) hide show

handler.py +86 -16

handler.py CHANGED Viewed

@@ -1,12 +1,72 @@
 import torch
-from typing import Dict, Any
 from PIL import Image
 import base64
 from io import BytesIO
 class EndpointHandler:
-    def __init__(self, path: str = ""):
         # Import your model and processor inside the class
         from colpali_engine.models import ColQwen2, ColQwen2Processor
@@ -21,14 +81,30 @@ class EndpointHandler:
         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         self.model.to(self.device)
     def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]:
         # Extract images from the input data
         images_data = data.get("inputs", [])
         if not images_data:
             return {"error": "No images provided in 'inputs'."}
-        # Process images
         images = []
         for img_data in images_data:
             if isinstance(img_data, str):
@@ -42,17 +118,11 @@ class EndpointHandler:
             else:
                 return {"error": "Images should be base64-encoded strings."}
-        # Prepare inputs
-        batch_images = self.processor.process_images(images)
-        # Move tensors to the device
-        batch_images = {k: v.to(self.device) for k, v in batch_images.items()}
-        # Generate embeddings
-        with torch.no_grad():
-            image_embeddings = self.model(**batch_images)
-        # Convert embeddings to a list
-        embeddings_list = image_embeddings.cpu().tolist()
-        return {"embeddings": embeddings_list}

+# import torch
+# from typing import Dict, Any
+# from PIL import Image
+# import base64
+# from io import BytesIO
+# class EndpointHandler:
+#     def __init__(self, path: str = ""):
+#         # Import your model and processor inside the class
+#         from colpali_engine.models import ColQwen2, ColQwen2Processor
+#         # Load the model and processor
+#         self.model = ColQwen2.from_pretrained(
+#             path,
+#             torch_dtype=torch.bfloat16,
+#         ).eval()
+#         self.processor = ColQwen2Processor.from_pretrained(path)
+#         # Determine the device
+#         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+#         self.model.to(self.device)
+#     def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]:
+#         # Extract images from the input data
+#         images_data = data.get("inputs", [])
+#         if not images_data:
+#             return {"error": "No images provided in 'inputs'."}
+#         # Process images
+#         images = []
+#         for img_data in images_data:
+#             if isinstance(img_data, str):
+#                 try:
+#                     # Assume base64-encoded image
+#                     image_bytes = base64.b64decode(img_data)
+#                     image = Image.open(BytesIO(image_bytes)).convert("RGB")
+#                     images.append(image)
+#                 except Exception as e:
+#                     return {"error": f"Invalid image data: {e}"}
+#             else:
+#                 return {"error": "Images should be base64-encoded strings."}
+#         # Prepare inputs
+#         batch_images = self.processor.process_images(images)
+#         # Move tensors to the device
+#         batch_images = {k: v.to(self.device) for k, v in batch_images.items()}
+#         # Generate embeddings
+#         with torch.no_grad():
+#             image_embeddings = self.model(**batch_images)
+#         # Convert embeddings to a list
+#         embeddings_list = image_embeddings.cpu().tolist()
+#         return {"embeddings": embeddings_list}
 import torch
+from typing import Dict, Any, List
 from PIL import Image
 import base64
 from io import BytesIO
 class EndpointHandler:
+    def __init__(self, path: str = "", default_batch_size: int = 4):
         # Import your model and processor inside the class
         from colpali_engine.models import ColQwen2, ColQwen2Processor
         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         self.model.to(self.device)
+        # Set default batch size
+        self.default_batch_size = default_batch_size
+    def _process_batch(self, images: List[Image.Image]) -> List[List[float]]:
+        # Prepare inputs for a batch
+        batch_images = self.processor.process_images(images)
+        batch_images = {k: v.to(self.device) for k, v in batch_images.items()}
+        # Generate embeddings
+        with torch.no_grad():
+            image_embeddings = self.model(**batch_images)
+        # Convert embeddings to list format
+        return image_embeddings.cpu().tolist()
     def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]:
         # Extract images from the input data
         images_data = data.get("inputs", [])
+        batch_size = data.get("batch_size", self.default_batch_size)
         if not images_data:
             return {"error": "No images provided in 'inputs'."}
+        # Decode and validate images
         images = []
         for img_data in images_data:
             if isinstance(img_data, str):
             else:
                 return {"error": "Images should be base64-encoded strings."}
+        # Process in batches with the specified or default batch size
+        embeddings = []
+        for i in range(0, len(images), batch_size):
+            batch_images = images[i : i + batch_size]
+            batch_embeddings = self._process_batch(batch_images)
+            embeddings.extend(batch_embeddings)
+        return {"embeddings": embeddings}