Spaces:

rjx
/

rjxai_image_identification

Sleeping

rjxai_image_identification / fengxai /SelfSupervised.py

efengx

fix: update

e1e83fc over 1 year ago

3.79 kB

	import torch
	import supervision as sv
	import cv2
	import numpy as np
	import os
	from segment_anything import SamPredictor, sam_model_registry
	from diffusers import StableDiffusionInpaintPipeline
	from torchvision.ops import box_convert
	from typing import List

	class SelfSupervised:
	def __init__(self):
	from groundingdino.util.inference import load_model

	# -----Set Image and CUDA
	self.device = "cuda" if torch.cuda.is_available() else "cpu"
	# ------SAM Parameters
	self.model_type = "vit_h"
	self.predictor = SamPredictor(sam_model_registry[self.model_type](
	checkpoint="./weights/sam_vit_h_4b8939.pth").to(device=self.device))
	# ------Stable Diffusion
	self.pipe = StableDiffusionInpaintPipeline.from_pretrained(
	"stabilityai/stable-diffusion-2-inpainting", torch_dtype=torch.float16,).to(self.device)
	# ----Grounding DINO
	self.groundingdino_model = load_model(
	"fengxai/config/GroundingDINO_SwinT_OGC.py", "weights/groundingdino_swint_ogc.pth")


	def checkAnnotate(self, image_source: np.ndarray, boxes: torch.Tensor, logits: torch.Tensor, phrases: List[str]):
	# 在原始图像中添加boxes
	h, w, _ = image_source.shape
	boxes = boxes * torch.Tensor([w, h, w, h])
	# 参考：https://pytorch.org/vision/main/generated/torchvision.ops.box_convert.html
	# xyxy: x1y1 为左上角，x2y2为右下角
	# cxcywh: 通过盒子的中心，cxcy为盒子的中心，wh为宽度和高度
	xyxy = box_convert(boxes=boxes, in_fmt="cxcywh", out_fmt="xyxy").numpy()
	detections = sv.Detections(xyxy=xyxy)
	boxesHeight=int(xyxy[0][3]-xyxy[0][1])
	boxesWidth=int(xyxy[0][2]-xyxy[0][0])
	labels = [
	f"{phrase} {logit:.2f} w:{boxesWidth} h:{boxesHeight}"
	for phrase, logit in zip(phrases, logits)
	]
	box_annotator = sv.BoxAnnotator()
	annotated_frame = cv2.cvtColor(image_source, cv2.COLOR_RGB2BGR)
	annotated_frame = box_annotator.annotate(scene=annotated_frame, detections=detections, labels=labels)
	return annotated_frame, xyxy


	# 预测图片
	def imagePredict(self, imageFile, item="clothing", boxThreshold=0.3, textTreshold=0.25):
	from groundingdino.util.inference import load_image, predict
	src, img = load_image(imageFile)
	h, w, _ = src.shape
	boxes, logits, phrases = predict(
	model=self.groundingdino_model,
	image=img,
	caption=item,
	box_threshold=boxThreshold,
	text_threshold=textTreshold
	)
	# 查看annotate相关的信息
	imgAnnnotated, xyxy = self.checkAnnotate(
	image_source=src, boxes=boxes, logits=logits, phrases=phrases
	)
	imgAnnnotated = imgAnnnotated[..., ::-1]

	boxesHeight=int(xyxy[0][3]-xyxy[0][1])
	boxesWidth=int(xyxy[0][2]-xyxy[0][0])

	imageOutPutFile = "data/annotated_image.jpg"
	fileList = imageOutPutFile.split("/")[0]
	if not os.path.exists(fileList):
	print("fileList=", fileList)
	os.mkdir(fileList)
	cv2.imwrite(imageOutPutFile, imgAnnnotated)
	print("os cwd=", os.getcwd())
	for root, dirs, files in os.walk(os.getcwd()):
	print("root=", root)
	print("files=", files)

	print("data=")
	for root, dirs, files in os.walk("data/"):
	print("root=", root)
	print("files=", files)

	return {
	"imageOutput": imageOutPutFile,
	"imageHeight": h,
	"imageWidth": w,
	"objectHeight": boxesHeight,
	"objectWidth": boxesWidth
	}