Spaces:
Runtime error
Runtime error
import os | |
import base64 | |
import io | |
import uuid | |
from ultralytics import YOLO | |
import cv2 | |
import torch | |
import numpy as np | |
from PIL import Image | |
from torchvision import transforms | |
import imageio.v2 as imageio | |
from trainer import Trainer | |
from utils.tools import get_config | |
import torch.nn.functional as F | |
from iopaint.single_processing import batch_inpaint | |
from pathlib import Path | |
from flask import Flask, request, jsonify,render_template | |
from flask_cors import CORS | |
app = Flask(__name__) | |
CORS(app) | |
# set current working directory cache instead of default | |
os.environ["TORCH_HOME"] = "./pretrained-model" | |
os.environ["HUGGINGFACE_HUB_CACHE"] = "./pretrained-model" | |
def resize_image(input_image_base64, width=640, height=640): | |
"""Resizes an image from base64 data and returns the resized image as bytes.""" | |
try: | |
# Decode base64 string to bytes | |
input_image_data = base64.b64decode(input_image_base64) | |
# Convert bytes to NumPy array | |
img = np.frombuffer(input_image_data, dtype=np.uint8) | |
# Decode NumPy array as an image | |
img = cv2.imdecode(img, cv2.IMREAD_COLOR) | |
# Resize while maintaining the aspect ratio | |
shape = img.shape[:2] # current shape [height, width] | |
new_shape = (width, height) # the shape to resize to | |
# Scale ratio (new / old) | |
r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) | |
ratio = r, r # width, height ratios | |
new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) | |
# Resize the image | |
im = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR) | |
# Pad the image | |
color = (114, 114, 114) # color used for padding | |
dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding | |
# divide padding into 2 sides | |
dw /= 2 | |
dh /= 2 | |
# compute padding on all corners | |
top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1)) | |
left, right = int(round(dw - 0.1)), int(round(dw + 0.1)) | |
im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border | |
# Convert the resized and padded image to bytes | |
resized_image_bytes = cv2.imencode('.png', im)[1].tobytes() | |
return resized_image_bytes | |
except Exception as e: | |
print(f"Error resizing image: {e}") | |
return None # Or handle differently as needed | |
def load_weights(path, device): | |
model_weights = torch.load(path) | |
return { | |
k: v.to(device) | |
for k, v in model_weights.items() | |
} | |
# Function to convert image to base64 | |
def convert_image_to_base64(image): | |
# Convert image to bytes | |
_, buffer = cv2.imencode('.png', image) | |
# Convert bytes to base64 | |
image_base64 = base64.b64encode(buffer).decode('utf-8') | |
return image_base64 | |
def convert_to_base64(image): | |
# Read the image file as binary data | |
image_data = image.read() | |
# Encode the binary data as base64 | |
base64_encoded = base64.b64encode(image_data).decode('utf-8') | |
return base64_encoded | |
def index(): | |
return render_template('index.html') | |
def process_images(): | |
# Static paths | |
config_path = Path('configs/config.yaml') | |
model_path = Path('pretrained-model/torch_model.p') | |
# Check if the request contains files | |
if 'input_image' not in request.files or 'append_image' not in request.files: | |
return jsonify({'error': 'No files found'}), 419 | |
# Get the objectName from the request or use default "chair" if not provided | |
default_class = request.form.get('objectName', 'chair') | |
# Convert the images to base64 | |
try: | |
input_base64 = convert_to_base64(request.files['input_image']) | |
append_base64 = convert_to_base64(request.files['append_image']) | |
except Exception as e: | |
return jsonify({'error': 'Failed to read files'}), 419 | |
# Resize input image and get base64 data of resized image | |
input_resized_image_bytes = resize_image(input_base64) | |
# Convert resized image bytes to base64 | |
input_resized_base64 = base64.b64encode(input_resized_image_bytes).decode('utf-8') | |
# Decode the resized image from base64 data directly | |
img = cv2.imdecode(np.frombuffer(input_resized_image_bytes, np.uint8), cv2.IMREAD_COLOR) | |
if img is None: | |
return jsonify({'error': 'Failed to decode resized image'}), 419 | |
H, W, _ = img.shape | |
x_point = 0 | |
y_point = 0 | |
width = 1 | |
height = 1 | |
# Load a model | |
model = YOLO('pretrained-model/yolov8m-seg.pt') # pretrained YOLOv8m-seg model | |
# Run batched inference on a list of images | |
results = model(img, imgsz=(W,H), conf=0.5) # chair class 56 with confidence >= 0.5 | |
names = model.names | |
# print(names) | |
class_found = False | |
for result in results: | |
for i, label in enumerate(result.boxes.cls): | |
# Check if the label matches the chair label | |
if names[int(label)] == default_class: | |
class_found = True | |
# Convert the tensor to a numpy array | |
chair_mask_np = result.masks.data[i].numpy() | |
kernel = np.ones((5, 5), np.uint8) # Create a 5x5 kernel for dilation | |
chair_mask_np = cv2.dilate(chair_mask_np, kernel, iterations=2) # Apply dilation | |
# Find contours to get bounding box | |
contours, _ = cv2.findContours((chair_mask_np == 1).astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) | |
# Iterate over contours to find the bounding box of each object | |
for contour in contours: | |
x, y, w, h = cv2.boundingRect(contour) | |
x_point = x | |
y_point = y | |
width = w | |
height = h | |
# Get the corresponding mask | |
mask = result.masks.data[i].numpy() * 255 | |
dilated_mask = cv2.dilate(mask, kernel, iterations=2) # Apply dilation | |
# Resize the mask to match the dimensions of the original image | |
resized_mask = cv2.resize(dilated_mask, (img.shape[1], img.shape[0])) | |
# Convert mask to base64 | |
mask_base64 = convert_image_to_base64(resized_mask) | |
# call repainting and merge function | |
output_base64 = repaitingAndMerge(append_base64,str(model_path), str(config_path),width, height, x_point, y_point, input_resized_base64, mask_base64) | |
# Return the output base64 image in the API response | |
return jsonify({'output_base64': output_base64}), 200 | |
# return class not found in prediction | |
if not class_found: | |
return jsonify({'message': f'{default_class} object not found in the image'}), 200 | |
def repaitingAndMerge(append_image_base64_image, model_path, config_path, width, height, xposition, yposition, input_base64, mask_base64): | |
config = get_config(config_path) | |
device = torch.device("cpu") | |
trainer = Trainer(config) | |
trainer.load_state_dict(load_weights(model_path, device), strict=False) | |
trainer.eval() | |
# lama inpainting start | |
print("lama inpainting start") | |
inpaint_result_base64 = batch_inpaint('lama', 'cpu', input_base64, mask_base64) | |
print("lama inpainting end") | |
# Decode base64 to bytes | |
inpaint_result_bytes = base64.b64decode(inpaint_result_base64) | |
# Convert bytes to NumPy array | |
inpaint_result_np = np.array(Image.open(io.BytesIO(inpaint_result_bytes))) | |
# Create PIL Image from NumPy array | |
final_image = Image.fromarray(inpaint_result_np) | |
print("merge start") | |
# Decode base64 to binary data | |
decoded_image_data = base64.b64decode(append_image_base64_image) | |
# Convert binary data to a NumPy array | |
append_image = cv2.imdecode(np.frombuffer(decoded_image_data, np.uint8), cv2.IMREAD_UNCHANGED) | |
# Resize the append image while preserving transparency | |
resized_image = cv2.resize(append_image, (width, height), interpolation=cv2.INTER_AREA) | |
# Convert the resized image to RGBA format (assuming it's in BGRA format) | |
resized_image = cv2.cvtColor(resized_image, cv2.COLOR_BGRA2RGBA) | |
# Create a PIL Image from the resized image with transparent background | |
append_image_pil = Image.fromarray(resized_image) | |
# Paste the append image onto the final image | |
final_image.paste(append_image_pil, (xposition, yposition), append_image_pil) | |
# Save the resulting image | |
print("merge end") | |
# Convert the final image to base64 | |
with io.BytesIO() as output_buffer: | |
final_image.save(output_buffer, format='PNG') | |
output_base64 = base64.b64encode(output_buffer.getvalue()).decode('utf-8') | |
return output_base64 | |
if __name__ == '__main__': | |
app.run(host='0.0.0.0',debug=True) | |