File size: 5,719 Bytes
89c278d
 
 
 
 
 
 
d06defe
e9d702e
89c278d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d06defe
89c278d
 
d06defe
 
 
89c278d
d06defe
 
89c278d
 
 
 
 
d06defe
89c278d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e9d702e
d06defe
 
89c278d
 
 
d06defe
89c278d
e9d702e
89c278d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e9d702e
89c278d
e9d702e
 
 
 
89c278d
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
import os
import io
from ultralytics import YOLO
import cv2
import numpy as np
from PIL import Image
from iopaint.single_processing import batch_inpaint_cv2
import gradio as gr
from bgremover import process

# set current working directory cache instead of default
os.environ["TORCH_HOME"] = "./pretrained-model"
os.environ["HUGGINGFACE_HUB_CACHE"] = "./pretrained-model"

def resize_image(input_image_path, width=640, height=640):
    """Resizes an image from image data and returns the resized image."""
    try:
        # Read the image using cv2.imread
        img = cv2.imread(input_image_path, cv2.IMREAD_COLOR)

        # Resize while maintaining the aspect ratio
        shape = img.shape[:2]  # current shape [height, width]
        new_shape = (width, height)  # the shape to resize to

        # Scale ratio (new / old)
        r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
        ratio = r, r  # width, height ratios
        new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))

        # Resize the image
        im = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)

        # Pad the image
        color = (114, 114, 114)  # color used for padding
        dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]  # wh padding
        # divide padding into 2 sides
        dw /= 2
        dh /= 2
        # compute padding on all corners
        top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
        left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
        im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)  # add border
        return im

    except Exception as e:
        raise gr.Error("Error in resizing image!")


def process_images(input_image, append_image, default_class="chair"):
    if not input_image:
        raise gr.Error("Please upload a main image.")

    if not append_image:
        raise gr.Error("Please upload an object image.")

    # Resize input image and get base64 data of resized image
    img = resize_image(input_image)

    if img is None:
        raise gr.Error("Failed to decode resized image!")

    H, W, _ = img.shape
    x_point = 0
    y_point = 0
    width = 1
    height = 1

    # Load a model
    model = YOLO('pretrained-model/yolov8m-seg.pt')  # pretrained YOLOv8m-seg model

    # Run batched inference on a list of images
    results = model(img, imgsz=(W,H), conf=0.5)  # chair class 56 with confidence >= 0.5
    names = model.names

    class_found = False
    for result in results:
        for i, label in enumerate(result.boxes.cls):
            # Check if the label matches the chair label
            if names[int(label)] == default_class:
                class_found = True
                # Convert the tensor to a numpy array
                chair_mask_np = result.masks.data[i].numpy()

                kernel = np.ones((5, 5), np.uint8)  # Create a 5x5 kernel for dilation
                chair_mask_np = cv2.dilate(chair_mask_np, kernel, iterations=2)  # Apply dilation

                # Find contours to get bounding box
                contours, _ = cv2.findContours((chair_mask_np == 1).astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

                # Iterate over contours to find the bounding box of each object
                for contour in contours:
                    x, y, w, h = cv2.boundingRect(contour)
                    x_point = x
                    y_point = y
                    width = w
                    height = h

                # Get the corresponding mask
                mask = result.masks.data[i].numpy() * 255
                dilated_mask = cv2.dilate(mask, kernel, iterations=2)  # Apply dilation
                # Resize the mask to match the dimensions of the original image
                resized_mask = cv2.resize(dilated_mask, (img.shape[1], img.shape[0]))

                # call repainting and merge function
                output_numpy = repaitingAndMerge(append_image,width, height, x_point, y_point, img, resized_mask)
                # Return the output numpy image in the API response
                return output_numpy

    # return class not found in prediction
    if not class_found:
        raise gr.Error(f'{default_class} object not found in the image')

def repaitingAndMerge(append_image_path, width, height, xposition, yposition, input_base, mask_base):
    # lama inpainting start
    print("lama inpainting start")
    inpaint_result_np = batch_inpaint_cv2('lama', 'cpu', input_base, mask_base)
    print("lama inpainting end")

    # Create PIL Image from NumPy array
    final_image = Image.fromarray(inpaint_result_np)

    print("merge start")
    # Load the append image using cv2.imread
    append_image = cv2.imread(append_image_path, cv2.IMREAD_UNCHANGED)
    # Resize the append image while preserving transparency
    resized_image = cv2.resize(append_image, (width, height), interpolation=cv2.INTER_AREA)
    # Convert the resized image to RGBA format (assuming it's in BGRA format)
    resized_image = cv2.cvtColor(resized_image, cv2.COLOR_BGRA2RGBA)

    # Create a PIL Image from the resized image with transparent background
    #append_image_pil = Image.fromarray(resized_image)

    # remove the bg from image
    append_image_pil = process(resized_image)

    # Paste the append image onto the final image
    final_image.paste(append_image_pil, (xposition, yposition), append_image_pil)
    # Save the resulting image
    print("merge end")
    # Convert the final image to base64
    with io.BytesIO() as output_buffer:
        final_image.save(output_buffer, format='PNG')
        output_numpy = np.array(final_image)

    return output_numpy