efengx
fix: update
e1e83fc
import torch
import supervision as sv
import cv2
import numpy as np
import os
from segment_anything import SamPredictor, sam_model_registry
from diffusers import StableDiffusionInpaintPipeline
from torchvision.ops import box_convert
from typing import List
class SelfSupervised:
def __init__(self):
from groundingdino.util.inference import load_model
# -----Set Image and CUDA
self.device = "cuda" if torch.cuda.is_available() else "cpu"
# ------SAM Parameters
self.model_type = "vit_h"
self.predictor = SamPredictor(sam_model_registry[self.model_type](
checkpoint="./weights/sam_vit_h_4b8939.pth").to(device=self.device))
# ------Stable Diffusion
self.pipe = StableDiffusionInpaintPipeline.from_pretrained(
"stabilityai/stable-diffusion-2-inpainting", torch_dtype=torch.float16,).to(self.device)
# ----Grounding DINO
self.groundingdino_model = load_model(
"fengxai/config/GroundingDINO_SwinT_OGC.py", "weights/groundingdino_swint_ogc.pth")
def checkAnnotate(self, image_source: np.ndarray, boxes: torch.Tensor, logits: torch.Tensor, phrases: List[str]):
# 在原始图像中添加boxes
h, w, _ = image_source.shape
boxes = boxes * torch.Tensor([w, h, w, h])
# 参考:https://pytorch.org/vision/main/generated/torchvision.ops.box_convert.html
# xyxy: x1y1 为左上角,x2y2为右下角
# cxcywh: 通过盒子的中心,cxcy为盒子的中心,wh为宽度和高度
xyxy = box_convert(boxes=boxes, in_fmt="cxcywh", out_fmt="xyxy").numpy()
detections = sv.Detections(xyxy=xyxy)
boxesHeight=int(xyxy[0][3]-xyxy[0][1])
boxesWidth=int(xyxy[0][2]-xyxy[0][0])
labels = [
f"{phrase} {logit:.2f} w:{boxesWidth} h:{boxesHeight}"
for phrase, logit in zip(phrases, logits)
]
box_annotator = sv.BoxAnnotator()
annotated_frame = cv2.cvtColor(image_source, cv2.COLOR_RGB2BGR)
annotated_frame = box_annotator.annotate(scene=annotated_frame, detections=detections, labels=labels)
return annotated_frame, xyxy
# 预测图片
def imagePredict(self, imageFile, item="clothing", boxThreshold=0.3, textTreshold=0.25):
from groundingdino.util.inference import load_image, predict
src, img = load_image(imageFile)
h, w, _ = src.shape
boxes, logits, phrases = predict(
model=self.groundingdino_model,
image=img,
caption=item,
box_threshold=boxThreshold,
text_threshold=textTreshold
)
# 查看annotate相关的信息
imgAnnnotated, xyxy = self.checkAnnotate(
image_source=src, boxes=boxes, logits=logits, phrases=phrases
)
imgAnnnotated = imgAnnnotated[..., ::-1]
boxesHeight=int(xyxy[0][3]-xyxy[0][1])
boxesWidth=int(xyxy[0][2]-xyxy[0][0])
imageOutPutFile = "data/annotated_image.jpg"
fileList = imageOutPutFile.split("/")[0]
if not os.path.exists(fileList):
print("fileList=", fileList)
os.mkdir(fileList)
cv2.imwrite(imageOutPutFile, imgAnnnotated)
print("os cwd=", os.getcwd())
for root, dirs, files in os.walk(os.getcwd()):
print("root=", root)
print("files=", files)
print("data=")
for root, dirs, files in os.walk("data/"):
print("root=", root)
print("files=", files)
return {
"imageOutput": imageOutPutFile,
"imageHeight": h,
"imageWidth": w,
"objectHeight": boxesHeight,
"objectWidth": boxesWidth
}