Spaces:
Sleeping
Sleeping
import torch | |
import supervision as sv | |
import cv2 | |
import numpy as np | |
import os | |
from segment_anything import SamPredictor, sam_model_registry | |
from diffusers import StableDiffusionInpaintPipeline | |
from torchvision.ops import box_convert | |
from typing import List | |
class SelfSupervised: | |
def __init__(self): | |
from groundingdino.util.inference import load_model | |
# -----Set Image and CUDA | |
self.device = "cuda" if torch.cuda.is_available() else "cpu" | |
# ------SAM Parameters | |
self.model_type = "vit_h" | |
self.predictor = SamPredictor(sam_model_registry[self.model_type]( | |
checkpoint="./weights/sam_vit_h_4b8939.pth").to(device=self.device)) | |
# ------Stable Diffusion | |
self.pipe = StableDiffusionInpaintPipeline.from_pretrained( | |
"stabilityai/stable-diffusion-2-inpainting", torch_dtype=torch.float16,).to(self.device) | |
# ----Grounding DINO | |
self.groundingdino_model = load_model( | |
"fengxai/config/GroundingDINO_SwinT_OGC.py", "weights/groundingdino_swint_ogc.pth") | |
def checkAnnotate(self, image_source: np.ndarray, boxes: torch.Tensor, logits: torch.Tensor, phrases: List[str]): | |
# 在原始图像中添加boxes | |
h, w, _ = image_source.shape | |
boxes = boxes * torch.Tensor([w, h, w, h]) | |
# 参考:https://pytorch.org/vision/main/generated/torchvision.ops.box_convert.html | |
# xyxy: x1y1 为左上角,x2y2为右下角 | |
# cxcywh: 通过盒子的中心,cxcy为盒子的中心,wh为宽度和高度 | |
xyxy = box_convert(boxes=boxes, in_fmt="cxcywh", out_fmt="xyxy").numpy() | |
detections = sv.Detections(xyxy=xyxy) | |
boxesHeight=int(xyxy[0][3]-xyxy[0][1]) | |
boxesWidth=int(xyxy[0][2]-xyxy[0][0]) | |
labels = [ | |
f"{phrase} {logit:.2f} w:{boxesWidth} h:{boxesHeight}" | |
for phrase, logit in zip(phrases, logits) | |
] | |
box_annotator = sv.BoxAnnotator() | |
annotated_frame = cv2.cvtColor(image_source, cv2.COLOR_RGB2BGR) | |
annotated_frame = box_annotator.annotate(scene=annotated_frame, detections=detections, labels=labels) | |
return annotated_frame, xyxy | |
# 预测图片 | |
def imagePredict(self, imageFile, item="clothing", boxThreshold=0.3, textTreshold=0.25): | |
from groundingdino.util.inference import load_image, predict | |
src, img = load_image(imageFile) | |
h, w, _ = src.shape | |
boxes, logits, phrases = predict( | |
model=self.groundingdino_model, | |
image=img, | |
caption=item, | |
box_threshold=boxThreshold, | |
text_threshold=textTreshold | |
) | |
# 查看annotate相关的信息 | |
imgAnnnotated, xyxy = self.checkAnnotate( | |
image_source=src, boxes=boxes, logits=logits, phrases=phrases | |
) | |
imgAnnnotated = imgAnnnotated[..., ::-1] | |
boxesHeight=int(xyxy[0][3]-xyxy[0][1]) | |
boxesWidth=int(xyxy[0][2]-xyxy[0][0]) | |
imageOutPutFile = "data/annotated_image.jpg" | |
fileList = imageOutPutFile.split("/")[0] | |
if not os.path.exists(fileList): | |
print("fileList=", fileList) | |
os.mkdir(fileList) | |
cv2.imwrite(imageOutPutFile, imgAnnnotated) | |
print("os cwd=", os.getcwd()) | |
for root, dirs, files in os.walk(os.getcwd()): | |
print("root=", root) | |
print("files=", files) | |
print("data=") | |
for root, dirs, files in os.walk("data/"): | |
print("root=", root) | |
print("files=", files) | |
return { | |
"imageOutput": imageOutPutFile, | |
"imageHeight": h, | |
"imageWidth": w, | |
"objectHeight": boxesHeight, | |
"objectWidth": boxesWidth | |
} |