stupidog04
/

krenzcolor_chkpt_classifier

@@ -1,6 +1,8 @@
 from torchvision import transforms
-from pair_classification import PairClassificationPipeline
-from typing import  Dict
 class PreTrainedPipeline():
@@ -8,17 +10,13 @@ class PreTrainedPipeline():
         """
         Initialize model
         """
-        model_flag = 'google/vit-base-patch16-224-in21k'
         # self.processor = feature_extractor = ViTFeatureExtractor.from_pretrained(model_flag)
-        self.pipe = pipeline("pair-classification", model=model_flag , feature_extractor=model_flag ,
-                model_kwargs={'num_labels':len(label2id),
-                            'label2id':label2id,
-                            'id2label':id2label,
-                            'num_channels':6,
-                            'ignore_mismatched_sizes': True })
-        self.model = self.pipe.model.from_pretrained(path)
     def __call__(self, inputs):
         """
         Args:
@@ -30,4 +28,30 @@ class PreTrainedPipeline():
         """
         # input_values = self.processor(inputs, return_tensors="pt", sampling_rate=self.sampling_rate).input_values  # Batch size 1
         # logits = self.model(input_values).logits.cpu().detach().numpy()[0]
-        return self.pipe(inputs)

 from torchvision import transforms
+from transformers import ViTFeatureExtractor, ViTForImageClassification, ViTConfig
+from transformers import ImageClassificationPipeline
+import torch
 class PreTrainedPipeline():
         """
         Initialize model
         """
         # self.processor = feature_extractor = ViTFeatureExtractor.from_pretrained(model_flag)
+        model_flag = 'google/vit-base-patch16-224-in21k'
+        # model_flag = 'google/vit-base-patch16-384'
+        self.feature_extractor = ViTFeatureExtractor.from_pretrained(model_flag)
+        self.model = ViTForImageClassification.from_pretrained(path)
+        self.pipe = PairClassificationPipeline(self.model, feature_extractor=self.feature_extractor)
     def __call__(self, inputs):
         """
         Args:
         """
         # input_values = self.processor(inputs, return_tensors="pt", sampling_rate=self.sampling_rate).input_values  # Batch size 1
         # logits = self.model(input_values).logits.cpu().detach().numpy()[0]
+        return self.pipe(inputs)
+class PairClassificationPipeline(ImageClassificationPipeline):
+    pipe_to_tensor = transforms.ToTensor()
+    pipe_to_pil = transforms.ToPILImage()
+    def preprocess(self, image):
+        left_image, right_image = self.horizontal_split_image(image)
+        model_inputs = self.extract_split_feature(left_image, right_image)
+        # model_inputs = super().preprocess(image)
+        # print(model_inputs['pixel_values'].shape)
+        return model_inputs
+    def horizontal_split_image(self, image):
+        # image = image.resize((448,224))
+        w, h = image.size
+        half_w = w//2
+        left_image = image.crop([0,0,half_w,h])
+        right_image = image.crop([half_w,0,2*half_w,h])
+        return left_image, right_image
+    def extract_split_feature(self, left_image, right_image):
+        model_inputs = self.feature_extractor(images=left_image, return_tensors=self.framework)
+        right_inputs = self.feature_extractor(images=right_image, return_tensors=self.framework)
+        model_inputs['pixel_values'] = torch.cat([model_inputs['pixel_values'],right_inputs['pixel_values']], dim=1)
+        return model_inputs