import os import base64 import io import uuid from ultralytics import YOLO import cv2 import torch import numpy as np from PIL import Image from torchvision import transforms import imageio.v2 as imageio from utils.tools import get_config import torch.nn.functional as F from iopaint.single_processing import batch_inpaint_cv2 from pathlib import Path import gradio as gr # set current working directory cache instead of default os.environ["TORCH_HOME"] = "./pretrained-model" os.environ["HUGGINGFACE_HUB_CACHE"] = "./pretrained-model" def resize_image(input_image_path, width=640, height=640): """Resizes an image from image data and returns the resized image.""" try: # Read the image using cv2.imread img = cv2.imread(input_image_path, cv2.IMREAD_COLOR) # Resize while maintaining the aspect ratio shape = img.shape[:2] # current shape [height, width] new_shape = (width, height) # the shape to resize to # Scale ratio (new / old) r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) ratio = r, r # width, height ratios new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) # Resize the image im = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR) # Pad the image color = (114, 114, 114) # color used for padding dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding # divide padding into 2 sides dw /= 2 dh /= 2 # compute padding on all corners top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1)) left, right = int(round(dw - 0.1)), int(round(dw + 0.1)) im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border return im except Exception as e: raise gr.Error("Error in resizing image!") def process_images(input_image, append_image, default_class="chair"): if not input_image: raise gr.Error("Please upload a main image.") if not append_image: raise gr.Error("Please upload an object image.") # Check if the append_image is a PNG file with RGBA mode try: with Image.open(append_image) as img: if img.format != 'PNG' or img.mode != 'RGBA': raise gr.Error("Please upload a valid PNG file with RGBA mode for the object image.") except Exception as e: raise gr.Error("Failed to validate object image: Upload new image") # Static paths config_path = Path('configs/config.yaml') model_path = Path('pretrained-model/torch_model.p') # Resize input image and get base64 data of resized image img = resize_image(input_image) if img is None: raise gr.Error("Failed to decode resized image!") H, W, _ = img.shape x_point = 0 y_point = 0 width = 1 height = 1 # Load a model model = YOLO('pretrained-model/yolov8m-seg.pt') # pretrained YOLOv8m-seg model # Run batched inference on a list of images results = model(img, imgsz=(W,H), conf=0.5) # chair class 56 with confidence >= 0.5 names = model.names class_found = False for result in results: for i, label in enumerate(result.boxes.cls): # Check if the label matches the chair label if names[int(label)] == default_class: class_found = True # Convert the tensor to a numpy array chair_mask_np = result.masks.data[i].numpy() kernel = np.ones((5, 5), np.uint8) # Create a 5x5 kernel for dilation chair_mask_np = cv2.dilate(chair_mask_np, kernel, iterations=2) # Apply dilation # Find contours to get bounding box contours, _ = cv2.findContours((chair_mask_np == 1).astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) # Iterate over contours to find the bounding box of each object for contour in contours: x, y, w, h = cv2.boundingRect(contour) x_point = x y_point = y width = w height = h # Get the corresponding mask mask = result.masks.data[i].numpy() * 255 dilated_mask = cv2.dilate(mask, kernel, iterations=2) # Apply dilation # Resize the mask to match the dimensions of the original image resized_mask = cv2.resize(dilated_mask, (img.shape[1], img.shape[0])) # call repainting and merge function output_numpy = repaitingAndMerge(append_image,str(model_path), str(config_path),width, height, x_point, y_point, img, resized_mask) # Return the output numpy image in the API response return output_numpy # return class not found in prediction if not class_found: raise gr.Error(f'{default_class} object not found in the image') def repaitingAndMerge(append_image_path, model_path, config_path, width, height, xposition, yposition, input_base, mask_base): config = get_config(config_path) device = torch.device("cpu") # lama inpainting start print("lama inpainting start") inpaint_result_np = batch_inpaint_cv2('lama', 'cpu', input_base, mask_base) print("lama inpainting end") # Create PIL Image from NumPy array final_image = Image.fromarray(inpaint_result_np) print("merge start") # Load the append image using cv2.imread append_image = cv2.imread(append_image_path, cv2.IMREAD_UNCHANGED) # Resize the append image while preserving transparency resized_image = cv2.resize(append_image, (width, height), interpolation=cv2.INTER_AREA) # Convert the resized image to RGBA format (assuming it's in BGRA format) resized_image = cv2.cvtColor(resized_image, cv2.COLOR_BGRA2RGBA) # Create a PIL Image from the resized image with transparent background append_image_pil = Image.fromarray(resized_image) # Paste the append image onto the final image final_image.paste(append_image_pil, (xposition, yposition), append_image_pil) # Save the resulting image print("merge end") # Convert the final image to base64 with io.BytesIO() as output_buffer: final_image.save(output_buffer, format='PNG') output_numpy = np.array(final_image) return output_numpy