Slep commited on
Commit
6982d5b
1 Parent(s): fa2f5fd

Upload processor

Browse files
Files changed (2) hide show
  1. preprocessor_config.json +19 -0
  2. processor.py +68 -0
preprocessor_config.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "auto_map": {
3
+ "AutoImageProcessor": "processor.CondViTProcessor",
4
+ "AutoProcessor": "processor.CondViTProcessor"
5
+ },
6
+ "bkg_color": 255,
7
+ "image_mean": [
8
+ 0.48145466,
9
+ 0.4578275,
10
+ 0.40821073
11
+ ],
12
+ "image_processor_type": "CondViTProcessor",
13
+ "image_std": [
14
+ 0.26862954,
15
+ 0.26130258,
16
+ 0.27577711
17
+ ],
18
+ "input_resolution": 224
19
+ }
processor.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers.image_processing_utils import ImageProcessingMixin, BatchFeature
2
+
3
+ from torchvision.transforms import transforms as tf
4
+ import torchvision.transforms.functional as F
5
+ from PIL import Image
6
+ import torch
7
+
8
+
9
+ class CondViTProcessor(ImageProcessingMixin):
10
+ def __init__(
11
+ self,
12
+ bkg_color=255,
13
+ input_resolution=224,
14
+ image_mean=(0.48145466, 0.4578275, 0.40821073),
15
+ image_std=(0.26862954, 0.26130258, 0.27577711),
16
+ **kwargs,
17
+ ):
18
+ super().__init__(**kwargs)
19
+
20
+ self.bkg_color = bkg_color
21
+ self.input_resolution = input_resolution
22
+ self.image_mean = image_mean
23
+ self.image_std = image_std
24
+
25
+ def square_pad(self, image):
26
+ max_wh = max(image.size)
27
+ p_left, p_top = [(max_wh - s) // 2 for s in image.size]
28
+ p_right, p_bottom = [
29
+ max_wh - (s + pad) for s, pad in zip(image.size, [p_left, p_top])
30
+ ]
31
+ padding = (p_left, p_top, p_right, p_bottom)
32
+ return F.pad(image, padding, self.bkg_color, "constant")
33
+
34
+ def process_img(self, image):
35
+ img = self.square_pad(image)
36
+ img = F.resize(img, self.input_resolution)
37
+ img = F.to_tensor(img)
38
+ img = F.normalize(img, self.image_mean, self.image_std)
39
+ return img
40
+
41
+ def __call__(self, images, texts):
42
+ """
43
+ Parameters
44
+ ----------
45
+ images : Union[Image.Image, List[Image.Image]]
46
+ Image or list of images to process
47
+ texts : Union[str, List[str]]
48
+ Text or list of texts to process. Pass through, no operation is performed.
49
+
50
+ Returns
51
+ -------
52
+ BatchFeature
53
+ pixel_values : torch.Tensor
54
+ Processed image tensor (B C H W)
55
+ texts : Union[str, List[str]]
56
+ """
57
+ # Single Image
58
+ if isinstance(images, Image.Image):
59
+ images = BatchFeature(
60
+ data={"pixel_values": self.process_img(images), "texts": texts}
61
+ )
62
+
63
+ return BatchFeature(
64
+ data={
65
+ "pixel_values": torch.stack([self.process_img(img) for img in images]),
66
+ "texts": texts,
67
+ }
68
+ )