import torch import supervision as sv import cv2 import numpy as np import os from segment_anything import SamPredictor, sam_model_registry from diffusers import StableDiffusionInpaintPipeline from torchvision.ops import box_convert from typing import List class SelfSupervised: def __init__(self): from groundingdino.util.inference import load_model # -----Set Image and CUDA self.device = "cuda" if torch.cuda.is_available() else "cpu" # ------SAM Parameters self.model_type = "vit_h" self.predictor = SamPredictor(sam_model_registry[self.model_type]( checkpoint="./weights/sam_vit_h_4b8939.pth").to(device=self.device)) # ------Stable Diffusion self.pipe = StableDiffusionInpaintPipeline.from_pretrained( "stabilityai/stable-diffusion-2-inpainting", torch_dtype=torch.float16,).to(self.device) # ----Grounding DINO self.groundingdino_model = load_model( "fengxai/config/GroundingDINO_SwinT_OGC.py", "weights/groundingdino_swint_ogc.pth") def checkAnnotate(self, image_source: np.ndarray, boxes: torch.Tensor, logits: torch.Tensor, phrases: List[str]): # 在原始图像中添加boxes h, w, _ = image_source.shape boxes = boxes * torch.Tensor([w, h, w, h]) # 参考:https://pytorch.org/vision/main/generated/torchvision.ops.box_convert.html # xyxy: x1y1 为左上角,x2y2为右下角 # cxcywh: 通过盒子的中心,cxcy为盒子的中心,wh为宽度和高度 xyxy = box_convert(boxes=boxes, in_fmt="cxcywh", out_fmt="xyxy").numpy() detections = sv.Detections(xyxy=xyxy) boxesHeight=int(xyxy[0][3]-xyxy[0][1]) boxesWidth=int(xyxy[0][2]-xyxy[0][0]) labels = [ f"{phrase} {logit:.2f} w:{boxesWidth} h:{boxesHeight}" for phrase, logit in zip(phrases, logits) ] box_annotator = sv.BoxAnnotator() annotated_frame = cv2.cvtColor(image_source, cv2.COLOR_RGB2BGR) annotated_frame = box_annotator.annotate(scene=annotated_frame, detections=detections, labels=labels) return annotated_frame, xyxy # 预测图片 def imagePredict(self, imageFile, item="clothing", boxThreshold=0.3, textTreshold=0.25): from groundingdino.util.inference import load_image, predict src, img = load_image(imageFile) h, w, _ = src.shape boxes, logits, phrases = predict( model=self.groundingdino_model, image=img, caption=item, box_threshold=boxThreshold, text_threshold=textTreshold ) # 查看annotate相关的信息 imgAnnnotated, xyxy = self.checkAnnotate( image_source=src, boxes=boxes, logits=logits, phrases=phrases ) imgAnnnotated = imgAnnnotated[..., ::-1] boxesHeight=int(xyxy[0][3]-xyxy[0][1]) boxesWidth=int(xyxy[0][2]-xyxy[0][0]) imageOutPutFile = "data/annotated_image.jpg" fileList = imageOutPutFile.split("/")[0] if not os.path.exists(fileList): print("fileList=", fileList) os.mkdir(fileList) cv2.imwrite(imageOutPutFile, imgAnnnotated) print("os cwd=", os.getcwd()) for root, dirs, files in os.walk(os.getcwd()): print("root=", root) print("files=", files) print("data=") for root, dirs, files in os.walk("data/"): print("root=", root) print("files=", files) return { "imageOutput": imageOutPutFile, "imageHeight": h, "imageWidth": w, "objectHeight": boxesHeight, "objectWidth": boxesWidth }