import gradio as gr import requests import torch import os from tqdm import tqdm # import wandb from ultralytics import YOLO import cv2 import numpy as np import pandas as pd from skimage.transform import resize from skimage import img_as_bool from skimage.morphology import convex_hull_image import json # wandb.init(mode='disabled') def tableConvexHull(img, masks): mask=np.zeros(masks[0].shape,dtype="bool") for msk in masks: temp=msk.cpu().detach().numpy(); chull = convex_hull_image(temp); mask=np.bitwise_or(mask,chull) return mask def cls_exists(clss, cls): indices = torch.where(clss==cls) return len(indices[0])>0 def empty_mask(img): mask = np.zeros(img.shape[:2], dtype="uint8") return np.array(mask, dtype=bool) def extract_img_mask(img_model, img, config): res_dict = { 'status' : 1 } res = get_predictions(img_model, img, config) if res['status']==-1: res_dict['status'] = -1 elif res['status']==0: res_dict['mask']=empty_mask(img) else: masks = res['masks'] boxes = res['boxes'] clss = boxes[:, 5] mask = extract_mask(img, masks, boxes, clss, 0) res_dict['mask'] = mask return res_dict def get_predictions(model, img2, config): res_dict = { 'status': 1 } try: for result in model.predict(source=img2, verbose=False, retina_masks=config['rm'],\ imgsz=config['sz'], conf=config['conf'], stream=True,\ classes=config['classes']): try: res_dict['masks'] = result.masks.data res_dict['boxes'] = result.boxes.data del result return res_dict except Exception as e: res_dict['status'] = 0 return res_dict except: res_dict['status'] = -1 return res_dict def extract_mask(img, masks, boxes, clss, cls): if not cls_exists(clss, cls): return empty_mask(img) indices = torch.where(clss==cls) c_masks = masks[indices] mask_arr = torch.any(c_masks, dim=0).bool() mask_arr = mask_arr.cpu().detach().numpy() mask = mask_arr return mask def get_masks(img, model, img_model, flags, configs): response = { 'status': 1 } ans_masks = [] img2 = img # ***** Getting paragraph and text masks res = get_predictions(model, img2, configs['paratext']) if res['status']==-1: response['status'] = -1 return response elif res['status']==0: for i in range(2): ans_masks.append(empty_mask(img)) else: masks, boxes = res['masks'], res['boxes'] clss = boxes[:, 5] for cls in range(2): mask = extract_mask(img, masks, boxes, clss, cls) ans_masks.append(mask) # ***** Getting image and table masks res2 = get_predictions(model, img2, configs['imgtab']) if res2['status']==-1: response['status'] = -1 return response elif res2['status']==0: for i in range(2): ans_masks.append(empty_mask(img)) else: masks, boxes = res2['masks'], res2['boxes'] clss = boxes[:, 5] if cls_exists(clss, 2): img_res = extract_img_mask(img_model, img, configs['image']) if img_res['status'] == 1: img_mask = img_res['mask'] else: response['status'] = -1 return response else: img_mask = empty_mask(img) ans_masks.append(img_mask) if cls_exists(clss, 3): indices = torch.where(clss==3) tbl_mask = tableConvexHull(img, masks[indices]) else: tbl_mask = empty_mask(img) ans_masks.append(tbl_mask) if not configs['paratext']['rm']: h, w, c = img.shape for i in range(4): ans_masks[i] = img_as_bool(resize(ans_masks[i], (h, w))) response['masks'] = ans_masks return response def overlay(image, mask, color, alpha, resize=None): """Combines image and its segmentation mask into a single image. https://www.kaggle.com/code/purplejester/showing-samples-with-segmentation-mask-overlay Params: image: Training image. np.ndarray, mask: Segmentation mask. np.ndarray, color: Color for segmentation mask rendering. tuple[int, int, int] = (255, 0, 0) alpha: Segmentation mask's transparency. float = 0.5, resize: If provided, both image and its mask are resized before blending them together. tuple[int, int] = (1024, 1024)) Returns: image_combined: The combined image. np.ndarray """ color = color[::-1] colored_mask = np.expand_dims(mask, 0).repeat(3, axis=0) colored_mask = np.moveaxis(colored_mask, 0, -1) masked = np.ma.MaskedArray(image, mask=colored_mask, fill_value=color) image_overlay = masked.filled() if resize is not None: image = cv2.resize(image.transpose(1, 2, 0), resize) image_overlay = cv2.resize(image_overlay.transpose(1, 2, 0), resize) image_combined = cv2.addWeighted(image, 1 - alpha, image_overlay, alpha, 0) return image_combined general_model_path = 'e50_aug.pt' image_model_path = 'e100_img.pt' general_model = YOLO(general_model_path) image_model = YOLO(image_model_path) sample_path = ['0040da34-25c8-4a5a-a6aa-36733ea3b8eb.png'] flags = { 'hist': False, 'bz': False } configs = {} configs['paratext'] = { 'sz' : 640, 'conf': 0.25, 'rm': True, 'classes': [0, 1] } configs['imgtab'] = { 'sz' : 640, 'conf': 0.35, 'rm': True, 'classes': [2, 3] } configs['image'] = { 'sz' : 640, 'conf': 0.35, 'rm': True, 'classes': [0] } def evaluate(img_path, model=general_model, img_model=image_model,\ configs=configs, flags=flags): print('starting') img = cv2.imread(img_path) res = get_masks(img, general_model, image_model, flags, configs) if res['status']==-1: for idx in configs.keys(): configs[idx]['rm'] = False return evaluate(img, model, img_model, flags, configs) else: masks = res['masks'] color_map = { 0 : (255, 0, 0), 1 : (0, 255, 0), 2 : (0, 0, 255), 3 : (255, 255, 0), } for i, mask in enumerate(masks): img = overlay(image=img, mask=mask, color=color_map[i], alpha=0.4) print('finishing') return img # output = evaluate(img_path=sample_path, model=general_model, img_model=image_model,\ # configs=configs, flags=flags) inputs_img = [ gr.components.Video(type="filepath", label="Input Video"), ] outputs_img = [ gr.components.Image(type="numpy", label="Output Image"), ] inputs_image = [ gr.components.Image(type="filepath", label="Input Image"), ] outputs_image = [ gr.components.Image(type="numpy", label="Output Image"), ] interface_image = gr.Interface( fn=evaluate, inputs=inputs_image, outputs=outputs_image, title="Document Layout Segmentor", examples=sample_path, cache_examples=True, )