import cv2 import numpy as np from PIL import Image, ImageDraw import torch import gradio as gr from ultralytics import YOLO device = "cuda" if torch.cuda.is_available() else "cpu" # sam_model_reg = sam_model_registry["default"] # sam = sam_model_reg(checkpoint="models/sam_vit_h_4b8939.pth").to(device=device) # mask_generator = SamAutomaticMaskGenerator(sam) # Load the pre-trained SAM model # model_type = "vit_h" # sam = sam_model_registry[model_type](checkpoint="sam_vit_h_4b8939.pth") # sam.to(device=device) # model = SamModel.from_pretrained("facebook/sam-vit-base").to(device) # processor = SamProcessor.from_pretrained("facebook/sam-vit-base") # sam = sam_model_registry["default"](checkpoint="./models/sam_vit_h_4b8939.pth") # mask_generator = SamAutomaticMaskGenerator(sam) # Create a predictor # predictor = SamPredictor(sam) # MODELS_PATH = { # "face_yolov8m.pt": "adetailer/face_yolov8m.pt", # "face_yolov8n.pt": "adetailer/face_yolov8n.pt", # "face_yolov8s.pt": "adetailer/face_yolov8s.pt", # "female_breast_v3.2.pt": "adetailer/female_breast_v3.2.pt", # "hand_yolov8n.pt": "adetailer/hand_yolov8n.pt", # "hand_yolov8s.pt": "adetailer/hand_yolov8s.pt", # "penisV2.pt": "adetailer/penisV2.pt", # "person_yolov8m-seg.pt": "adetailer/person_yolov8m-seg.pt", # "person_yolov8n-seg.pt": "adetailer/person_yolov8n-seg.pt", # "person_yolov8s-seg.pt": "adetailer/person_yolov8s-seg.pt", # "vagina-v2.6.pt": "adetailer/vagina-v2.6.pt", # "deepfashion2_yolov8s-seg.pt": "MaskModels/deepfashion2_yolov8s-seg.pt", # "anzhc_head_hair_seg_medium_no_dill.pt": "adetailer/anzhc_head_hair_seg_medium_no_dill.pt", # "Eyeful_v2-Paired.pt": "adetailer/Eyeful_v2-Paired.pt", # } torch.hub.download_url_to_file( "https://resources.artworks.ai/ADetailer/face_yolov8m.pt", "models/face_yolov8m.pt", ) MODELS_CACHE = {} def cache_models(model_path): global MODELS_CACHE if model_path not in MODELS_CACHE: MODELS_CACHE[model_path] = YOLO(model_path).to(device) return MODELS_CACHE[model_path] def apply_convex_hull(mask): mask_array = np.array(mask) _, thresh = cv2.threshold(mask_array, 127, 255, 0) contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) for cur in contours: hull = cv2.convexHull(cur) cv2.fillPoly(mask_array, [hull], (255, 255, 255)) return Image.fromarray(mask_array) def apply_padding(padding, image, xxyxy): image_width, image_height = image.size xyxy = [int(x) for x in xxyxy] width = xyxy[2] - xyxy[0] height = xyxy[3] - xyxy[1] padding_x = int((padding - 1) * width / 2) padding_y = int((padding - 1) * height / 2) xyxy = [ max(0, min(xyxy[0] - padding_x, image_width)), max(0, min(xyxy[1] - padding_y, image_height)), min(image_width, xyxy[2] + padding_x), min(image_height, xyxy[3] + padding_y), ] return xyxy def create_mask_from_yolo(image, model_path, padding, convex_hull_required): combined_mask = None ret = [] model = cache_models(model_path) results = model.predict(image) for result in results: masks = [] if result.masks is None else result.masks.data for index, mask in enumerate(masks): mask = mask.cpu().numpy() mask = (mask * 255).astype("uint8") mask = cv2.resize(mask, image.size) if combined_mask is None: combined_mask = mask else: combined_mask = np.maximum(combined_mask, mask) box = result.boxes[index] # @todo: apply `for` instead of `0 index` xxyxy = box.xyxy[0].tolist() xyxy_oring = apply_padding(padding, image, xxyxy) cropped_image = image.crop(xyxy_oring) cropped_mask = Image.fromarray(mask) cropped_mask = cropped_mask.crop(xyxy_oring) if convex_hull_required: cropped_mask = apply_convex_hull(cropped_mask) class_id = box.cls[0].item() class_name = model.names[class_id] confidence = box.conf[0].item() ret.append( ( cropped_image, cropped_mask, confidence, class_name, (xyxy_oring[0], xyxy_oring[1]), ) ) if combined_mask is not None: combined_mask_image = Image.fromarray(combined_mask) return [combined_mask_image, ret], "Operation has processed successfully" for result in results: boxes = result.boxes for box in boxes: # @todo: apply `for` instead of `0 index` xxyxy = box.xyxy[0].tolist() xyxy = [int(x) for x in xxyxy] mask = Image.new("L", image.size, 0) draw = ImageDraw.Draw(mask) draw.rectangle(xyxy, fill=255) mask = np.array(mask) if combined_mask is None: combined_mask = mask else: combined_mask = np.maximum(combined_mask, mask) xyxy_oring = apply_padding(padding, image, xxyxy) cropped_mask = Image.new("L", image.size, 0) draw = ImageDraw.Draw(cropped_mask) draw.rectangle(xyxy, fill=255) cropped_mask = cropped_mask.crop(xyxy_oring) cropped_image = image.crop(xyxy_oring) class_id = box.cls[0].item() class_name = model.names[class_id] confidence = box.conf[0].item() ret.append( ( cropped_image, cropped_mask, confidence, class_name, (xyxy_oring[0], xyxy_oring[1]), ) ) if combined_mask is not None: combined_mask_image = Image.fromarray(combined_mask) return [combined_mask_image, ret], "Operation has processed successfully" return [], "No masks has been found" # @dataclass # class SamPredictResponse: # image: Optional[str] = Field(None) # mask: str # confidence: Optional[float] = Field(-1) # class_name: Optional[str] = Field("unknown") # coordinates: list[int] = Field((0, 0)) def predict(inp) -> dict[str, float]: mask, message = create_mask_from_yolo( inp, "./models/face_yolov8m.pt", 1, False, ) print(message) # result = [] # if len(mask) == 9: # result.append(SamPredictResponse(mask=encode_to_base64(mask[5]))) # elif len(mask) == 2: # for cur in mask[1]: # result.append( # SamPredictResponse( # # image=encode_to_base64(cur[0]), # # mask=encode_to_base64(cur[1]), # confidence=cur[2], # class_name=cur[3], # coordinates=(cur[4][0], cur[4][1]), # ) # ) # return mask[1][0][0] return mask[1][0][0], mask[1][0][1] # return result # masks = mask_generator.generate(np.array(inp)) # inputs = processor(np.array(inp), input_points=None, return_tensors="pt").to(device) # with torch.no_grad(): # outputs = model(**inputs) # masks = processor.image_processor.post_process_masks( # outputs.pred_masks.cpu(), # inputs["original_sizes"].cpu(), # inputs["reshaped_input_sizes"].cpu(), # ) # detections = sv.Detections.from_sam(sam_result=outputs) # img = np.array(inp) # sam_result = mask_generator.generate(img) # detections = sv.Detections.from_sam(sam_result=sam_result) # mask_annotator = sv.MaskAnnotator() # label_annotator = sv.LabelAnnotator(text_position=sv.Position.CENTER_OF_MASS) # annotated_image = mask_annotator.annotate( # scene=inp, # detections=detections, # ) # annotated_image = label_annotator.annotate( # scene=annotated_image, # detections=detections, # ) # mask = masks[0] # # mask = torch.ge(predicted_logits[0, 0, 0, :, :], 0).cpu().detach().numpy() # masked_image_np = sample_image_np.copy().astype(np.uint8) * mask[:, :, None] # Image.fromarray(masked_image_np).save(f"figs/examples/dogs_{model_name}_mask.png") # mask_list = [masks[0][0][0].numpy(), masks[0][0][1].numpy(), masks[0][0][2].numpy()] # overlayed_image = np.array(inp).copy() # for i, mask in enumerate(mask_list, start=1): # overlayed_image[:, :, 0] = np.where(mask == 1, 255, overlayed_image[:, :, 0]) # overlayed_image[:, :, 1] = np.where(mask == 1, 0, overlayed_image[:, :, 1]) # overlayed_image[:, :, 2] = np.where(mask == 1, 0, overlayed_image[:, :, 2]) # # # axes[i].imshow(overlayed_image) # # # axes[i].set_title(f"Mask {i}") # return Image.fromarray(overlayed_image) # return annotated_image def run() -> None: demo = gr.Interface( fn=predict, inputs=gr.Image(type="pil"), outputs=[ gr.Image(type="pil", label="Image"), gr.Image(type="pil", label="Mask"), ], ) demo.launch(server_name="0.0.0.0", server_port=7860) if __name__ == "__main__": run()