Spaces:
Runtime error
Runtime error
import os | |
import base64 | |
import io | |
import uuid | |
from ultralytics import YOLO | |
import cv2 | |
import torch | |
import numpy as np | |
from PIL import Image | |
from torchvision import transforms | |
import imageio.v2 as imageio | |
from trainer import Trainer | |
from utils.tools import get_config | |
import torch.nn.functional as F | |
from iopaint.single_processing import batch_inpaint_cv2 | |
from pathlib import Path | |
# set current working directory cache instead of default | |
os.environ["TORCH_HOME"] = "./pretrained-model" | |
os.environ["HUGGINGFACE_HUB_CACHE"] = "./pretrained-model" | |
def resize_image(input_image_path, width=640, height=640): | |
"""Resizes an image from image data and returns the resized image.""" | |
try: | |
# Read the image using cv2.imread | |
img = cv2.imread(input_image_path, cv2.IMREAD_COLOR) | |
# Resize while maintaining the aspect ratio | |
shape = img.shape[:2] # current shape [height, width] | |
new_shape = (width, height) # the shape to resize to | |
# Scale ratio (new / old) | |
r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) | |
ratio = r, r # width, height ratios | |
new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) | |
# Resize the image | |
im = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR) | |
# Pad the image | |
color = (114, 114, 114) # color used for padding | |
dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding | |
# divide padding into 2 sides | |
dw /= 2 | |
dh /= 2 | |
# compute padding on all corners | |
top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1)) | |
left, right = int(round(dw - 0.1)), int(round(dw + 0.1)) | |
im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border | |
return im | |
except Exception as e: | |
print(f"Error resizing image: {e}") | |
return None # Or handle differently as needed | |
def load_weights(path, device): | |
model_weights = torch.load(path) | |
return { | |
k: v.to(device) | |
for k, v in model_weights.items() | |
} | |
# Function to convert image to base64 | |
def convert_image_to_base64(image): | |
# Convert image to bytes | |
_, buffer = cv2.imencode('.png', image) | |
# Convert bytes to base64 | |
image_base64 = base64.b64encode(buffer).decode('utf-8') | |
return image_base64 | |
def convert_to_base64(image): | |
# Read the image file as binary data | |
image_data = image.read() | |
# Encode the binary data as base64 | |
base64_encoded = base64.b64encode(image_data).decode('utf-8') | |
return base64_encoded | |
def convert_to_base64_file(image): | |
# Convert the image to binary data | |
image_data = cv2.imencode('.png', image)[1].tobytes() | |
# Encode the binary data as base64 | |
base64_encoded = base64.b64encode(image_data).decode('utf-8') | |
return base64_encoded | |
def process_images(input_image, append_image, default_class="chair"): | |
# Static paths | |
config_path = Path('configs/config.yaml') | |
model_path = Path('pretrained-model/torch_model.p') | |
# Resize input image and get base64 data of resized image | |
img = resize_image(input_image) | |
if img is None: | |
return {'error': 'Failed to decode resized image'}, 419 | |
H, W, _ = img.shape | |
x_point = 0 | |
y_point = 0 | |
width = 1 | |
height = 1 | |
# Load a model | |
model = YOLO('pretrained-model/yolov8m-seg.pt') # pretrained YOLOv8m-seg model | |
# Run batched inference on a list of images | |
results = model(img, imgsz=(W,H), conf=0.5) # chair class 56 with confidence >= 0.5 | |
names = model.names | |
class_found = False | |
for result in results: | |
for i, label in enumerate(result.boxes.cls): | |
# Check if the label matches the chair label | |
if names[int(label)] == default_class: | |
class_found = True | |
# Convert the tensor to a numpy array | |
chair_mask_np = result.masks.data[i].numpy() | |
kernel = np.ones((5, 5), np.uint8) # Create a 5x5 kernel for dilation | |
chair_mask_np = cv2.dilate(chair_mask_np, kernel, iterations=2) # Apply dilation | |
# Find contours to get bounding box | |
contours, _ = cv2.findContours((chair_mask_np == 1).astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) | |
# Iterate over contours to find the bounding box of each object | |
for contour in contours: | |
x, y, w, h = cv2.boundingRect(contour) | |
x_point = x | |
y_point = y | |
width = w | |
height = h | |
# Get the corresponding mask | |
mask = result.masks.data[i].numpy() * 255 | |
dilated_mask = cv2.dilate(mask, kernel, iterations=2) # Apply dilation | |
# Resize the mask to match the dimensions of the original image | |
resized_mask = cv2.resize(dilated_mask, (img.shape[1], img.shape[0])) | |
# call repainting and merge function | |
output_base64 = repaitingAndMerge(append_image,str(model_path), str(config_path),width, height, x_point, y_point, img, resized_mask) | |
# Return the output base64 image in the API response | |
return output_base64 | |
# return class not found in prediction | |
if not class_found: | |
return {'message': f'{default_class} object not found in the image'}, 200 | |
def repaitingAndMerge(append_image_path, model_path, config_path, width, height, xposition, yposition, input_base, mask_base): | |
config = get_config(config_path) | |
device = torch.device("cpu") | |
trainer = Trainer(config) | |
trainer.load_state_dict(load_weights(model_path, device), strict=False) | |
trainer.eval() | |
# lama inpainting start | |
print("lama inpainting start") | |
inpaint_result_np = batch_inpaint_cv2('lama', 'cpu', input_base, mask_base) | |
print("lama inpainting end") | |
# Create PIL Image from NumPy array | |
final_image = Image.fromarray(inpaint_result_np) | |
print("merge start") | |
# Load the append image using cv2.imread | |
append_image = cv2.imread(append_image_path, cv2.IMREAD_UNCHANGED) | |
cv2.imwrite('appneded-image.png',append_image) | |
# Resize the append image while preserving transparency | |
resized_image = cv2.resize(append_image, (width, height), interpolation=cv2.INTER_AREA) | |
# Convert the resized image to RGBA format (assuming it's in BGRA format) | |
resized_image = cv2.cvtColor(resized_image, cv2.COLOR_BGRA2RGBA) | |
# Create a PIL Image from the resized image with transparent background | |
append_image_pil = Image.fromarray(resized_image) | |
# Paste the append image onto the final image | |
final_image.paste(append_image_pil, (xposition, yposition), append_image_pil) | |
# Save the resulting image | |
print("merge end") | |
# Convert the final image to base64 | |
with io.BytesIO() as output_buffer: | |
final_image.save(output_buffer, format='PNG') | |
output_numpy = np.array(final_image) | |
return output_numpy | |