mart9992's picture
m
2cd560a
from segment_anything import build_sam, SamPredictor
import os
import cv2
import numpy as np
from collections import defaultdict
class Colors:
# Ultralytics color palette https://ultralytics.com/
def __init__(self):
# hex = matplotlib.colors.TABLEAU_COLORS.values()
hexs = ('FF3838', 'FF9D97', 'FF701F', 'FFB21D', 'CFD231', '48F90A', '92CC17', '3DDB86', '1A9334', '00D4BB',
'2C99A8', '00C2FF', '344593', '6473FF', '0018EC', '8438FF', '520085', 'CB38FF', 'FF95C8', 'FF37C7')
self.palette = [self.hex2rgb(f'#{c}') for c in hexs]
self.n = len(self.palette)
def __call__(self, i, bgr=False):
c = self.palette[int(i) % self.n]
return (c[2], c[1], c[0]) if bgr else c
@staticmethod
def hex2rgb(h): # rgb order (PIL)
return tuple(int(h[1 + i:1 + i + 2], 16) for i in (0, 2, 4))
colors = Colors() # create instance for 'from utils.plots import colors'
predictor = SamPredictor(build_sam(checkpoint="sam_vit_h_4b8939.pth"))
_ = predictor.model.to(device='cuda')
# image = cv2.imread('/home/hadoop-vacv/yanfeng/data/dancetrack/train/dancetrack0001/img1/00000109.jpg')
# image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# predictor.set_image(image)
# bbox = np.array([0,0,100,100], dtype=np.int32)
# masks, _, _ = predictor.predict(box=bbox)
# masks
input_path = '/home/hadoop-vacv/yanfeng/data/dancetrack/val/dancetrack0004'
targets = [f for f in os.listdir(os.path.join(input_path, 'img1')) if not os.path.isdir(os.path.join(input_path, 'img1', f))]
targets = [os.path.join(input_path, 'img1', f) for f in targets]
targets.sort()
bboxes_all = defaultdict(list)
gt_path = os.path.join(input_path, 'gt', 'gt.txt')
# gt_path = os.path.join('/home/hadoop-vacv/yanfeng/project/MOTRv2/MOTRv3/exps/motrv2ch_uni5cost6g/run2/tracker0', 'dancetrack0004.txt')
for l in open(gt_path):
t, i, *xywh, mark, label = l.strip().split(',')[:8]
t, i, mark, label = map(int, (t, i, mark, label))
if mark == 0:
continue
if label in [3, 4, 5, 6, 9, 10, 11]: # Non-person
continue
else:
crowd = False
x, y, w, h = map(int, map(float, (xywh)))
bboxes_all[t].append([x, y, x+w, y+h, i])
fps = 25
size = (1920, 1080)
videowriter = cv2.VideoWriter('tmp.avi', cv2.VideoWriter_fourcc('M','J','P','G'), fps, size)
for t in targets:
print(f"Processing '{t}'...")
image = cv2.imread(t)
if image is None:
print(f"Could not load '{t}' as an image, skipping...")
continue
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
masks_all = []
bboxes = np.array(bboxes_all[int(os.path.basename(t)[:-4])])
# predictor.set_image(image)
# masks, _, _ = predictor.predict(box=bboxes[:, :4])
predictor.set_image(image)
for bbox in bboxes:
masks, iou_predictions, low_res_masks = predictor.predict(box=bbox[:4])
index_max = iou_predictions.argsort()[0]
masks = np.concatenate([masks[index_max:(index_max+1)], masks[index_max:(index_max+1)], masks[index_max:(index_max+1)]], axis=0)
masks = masks.astype(np.int32)*np.array(colors(bbox[4]))[:, None, None]
masks_all.append(masks)
predictor.reset_image()
if len(masks_all):
masks_sum = masks_all[0].copy()
for m in masks_all[1:]:
masks_sum += m
else:
masks_sum = np.zeros_like(img).transpose(2, 0, 1)
img = image.copy()[..., ::-1]
img = (img * 0.5 + (masks_sum.transpose(1,2,0) * 30) %128).astype(np.uint8)
for bbox in bboxes:
cv2.rectangle(img, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (0,0,255), thickness=3)
# cv2.imwrite('tmp.jpg', img)
videowriter.write(img)
videowriter.release()