alps / utils /cropping_boxes_to_images.py
yumikimi381's picture
Upload folder using huggingface_hub
daf0288 verified
raw
history blame
8.74 kB
import numpy as np
from typing import List
from PIL import Image
import cv2
import numpy.typing as npt
from numpy import uint8
ImageType = npt.NDArray[uint8]
from numpy.typing import NDArray
# not used actually
def get_rotate_crop_image(img: ImageType, points:NDArray[np.float32])-> ImageType:
"""
Points should be ordered in this order :left_lower, right_lower, right_upper, left_upper
each point has 2 coordinate
So entire thing is np array of size 4 times 2 with float32 numbers
takes an image and a set of four points defining a quadrilateral region within the image.
It extracts and crops this region, corrects its orientation using a perspective transform,
and rotates it if necessary.
"""
assert len(points) == 4
# Check the shape and dtype of points
assert points.shape == (4, 2), f"Points array must be of shape (4, 2), but got {points.shape}"
assert points.dtype == np.float32, f"Points array must be of dtype float32, but got {points.dtype}"
# Calculating Crop Dimensions
img_crop_width = int(
max(
np.linalg.norm(points[0] - points[1]),
np.linalg.norm(points[2] - points[3])))
img_crop_height = int(
max(
np.linalg.norm(points[0] - points[3]),
np.linalg.norm(points[1] - points[2])))
#A set of standard points pts_std is defined to represent the corners of the cropped image in a straightened, upright rectangle.
pts_std = np.float32([[0, 0], [img_crop_width, 0],
[img_crop_width, img_crop_height],
[0, img_crop_height]])
# perspective transformation matrix M that maps the four points to the standard rectangle.
M = cv2.getPerspectiveTransform(points, pts_std)
#applies the perspective transformation to the image, using the transformation matrix M
dst_img = cv2.warpPerspective(
img,
M, (img_crop_width, img_crop_height),
borderMode=cv2.BORDER_REPLICATE,
flags=cv2.INTER_CUBIC)
dst_img_height, dst_img_width = dst_img.shape[0:2]
if dst_img_height * 1.0 / dst_img_width >= 1.5:
#rotating counter clock wise
dst_img = np.rot90(dst_img)
#correct would be k=3
#st_img = np.rot90(dst_img,k=3)
return dst_img
def get_crop_image(img: ImageType, points:NDArray[np.float32],straight=False)-> ImageType:
"""
Points should be ordered in this order :left_lower, right_lower, right_upper, left_upper
each point has 2 coordinate
So entire thing is np array of size 4 times 2 with float32 numbers
takes an image and a set of four points defining a quadrilateral region within the image.
It extracts and crops this region. No perspective transformation is applied
"""
assert len(points) == 4 # xmin, ymin, xmax, ymax
# Check the shape and dtype of points
assert points.shape == (4, 2), f"Points array must be of shape (4, 2), but got {points.shape}"
assert points.dtype == np.float32, f"Points array must be of dtype float32, but got {points.dtype}"
if not straight :
img_crop_width = int(
max(
np.linalg.norm(points[0] - points[1]),
np.linalg.norm(points[2] - points[3])))
img_crop_height = int(
max(
np.linalg.norm(points[0] - points[3]),
np.linalg.norm(points[1] - points[2])))
# bottom left corner
xmin = int(points[0][0])
ymin = int(points[0][1])
# Ensure the crop area is within the bounds of the image
xmax = min(xmin + img_crop_width, img.shape[1])
ymax = min(ymin + img_crop_height, img.shape[0])
else:
xmin = int(points[0][0])
ymin = int(points[0][1])
xmax = int(points[2][0])
ymax = int(points[2][1])
# Crop the image
dst_img = img[ymin:ymax, xmin:xmax]
return dst_img
def cropImages(bxs:List[NDArray[np.float32]], img:Image.Image,straight=False) -> List[ImageType] :
images_to_recognizer = []
for bnum in range(len(bxs)):
left_lower, right_lower, right_upper, left_upper = bxs[bnum]
box = np.array([left_lower, right_lower, right_upper, left_upper ])
cropped_img = get_crop_image(np.array(img), box, straight)
images_to_recognizer.append(cropped_img)
# return list of np arrays
return images_to_recognizer
def crop_an_Image(box:NDArray[np.float32], img:Image.Image) -> ImageType :
#box should be 4x2 array
left_lower, right_lower, right_upper, left_upper = box
b = np.array([left_lower, right_lower, right_upper, left_upper ])
cropped_img = get_crop_image(np.array(img), b)
return cropped_img
def get_new_coord(maxx:int,maxy:int,points:NDArray[np.float32]) -> list[int]:
#points = 4x2 array
img_crop_width = int(
max(
np.linalg.norm(points[0] - points[1]),
np.linalg.norm(points[2] - points[3])))
img_crop_height = int(
max(
np.linalg.norm(points[0] - points[3]),
np.linalg.norm(points[1] - points[2])))
# bottom left corner
bottom_left_x = int(points[0][0])
bottom_left_y = int(points[0][1])
# Ensure the crop area is within the bounds of the image
top_right_x = min(bottom_left_x + img_crop_width, maxx)
top_right_y = min(bottom_left_y + img_crop_height, maxy)
# Crop the image
# 4x1 array of xmin, ymin, xmax, ymax
return [bottom_left_x, bottom_left_y, top_right_x, top_right_y]
MARGIN_FACTOR = 1.4
def get_crop_image_with_extra_margin(img: ImageType, points:NDArray[np.float32],straight=False, marginfactor = MARGIN_FACTOR)-> ImageType:
"""
Points should be ordered in this order :left_lower, right_lower, right_upper, left_upper
each point has 2 coordinate
So entire thing is np array of size 4 times 2 with float32 numbers
takes an image and a set of four points defining a quadrilateral region within the image.
It extracts and crops this region, corrects its orientation using a perspective transform,
and rotates it if necessary.
"""
assert len(points) == 4
# Calculating Crop Dimensions
if not straight :
img_crop_width = int(
max(
np.linalg.norm(points[0] - points[1]),
np.linalg.norm(points[2] - points[3])))
img_crop_height = int(
max(
np.linalg.norm(points[0] - points[3]),
np.linalg.norm(points[1] - points[2])))
# bottom left corner
xmin = int(points[0][0])
ymin = int(points[0][1])
# Ensure the crop area is within the bounds of the image
xmax = min(xmin + img_crop_width, img.shape[1])
ymax = min(ymin + img_crop_height, img.shape[0])
else:
xmin = int(points[0][0])
ymin = int(points[0][1])
xmax = int(points[2][0])
ymax = int(points[2][1])
#print("points are "+str(points))
#print("xmin, ymin, xmax,ymax are "+ str(xmin)+" "+ str(ymin)+" "+ str(xmax)+" "+str(ymax))
# Crop the image
dst_img = img[ymin:ymax, xmin:xmax]
#print(dst_img.shape[:2])
height, width = dst_img.shape[:2]
if width/height<1.6:
bigger = max(height,width)
new_height = int(bigger *3)
new_width = int(bigger*3)
else:
bigger = max(height,width)
new_height = int(bigger *MARGIN_FACTOR)
new_width = int(bigger*MARGIN_FACTOR)
# Create a new image with a white background
new_img = np.full((new_height, new_width, 3), fill_value=255, dtype=np.uint8) # RGB white background
# Calculate the position to center the image on the new white background
y_offset = (new_height - height) // 2
x_offset = (new_width - width) // 2
#print("offsets are " + str(x_offset)+" " +str(y_offset))
# Place the warped image on the new white background
new_img[y_offset:y_offset + height, x_offset:x_offset+width] = dst_img
return new_img
def cropImageExtraMargin(bxs:List[NDArray[np.float32]], img:Image.Image,straight=False, margin = MARGIN_FACTOR ) -> List[ImageType] :
images_to_recognizer = []
for bnum in range(len(bxs)):
left_lower, right_lower, right_upper, left_upper = bxs[bnum]
box = np.array([left_lower, right_lower, right_upper, left_upper ])
#print("newbox is")
#print(box)
cropped_img = get_crop_image_with_extra_margin(np.array(img), box,straight,margin)
images_to_recognizer.append(cropped_img)
# return list of np arrays
return images_to_recognizer