Spaces:
Sleeping
Sleeping
File size: 3,651 Bytes
5ceacf4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 |
import numpy as np
import pycocotools.mask as mask_util
from detectron2.structures import BoxMode
# MotionNet: based on instances_to_coco_json and relevant codes in densepose
def prediction_to_json(instances, img_id: str):
"""
Args:
instances (Instances): the output of the model
img_id (str): the image id in COCO
Returns:
list[dict]: the results in densepose evaluation format
"""
boxes = instances.pred_boxes.tensor.numpy()
boxes = BoxMode.convert(boxes, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS)
boxes = boxes.tolist()
scores = instances.scores.tolist()
classes = instances.pred_classes.tolist()
# Prediction for MotionNet
# mtype = instances.mtype.squeeze(axis=1).tolist()
# 2.0.3
if instances.has("pdim"):
pdim = instances.pdim.tolist()
if instances.has("ptrans"):
ptrans = instances.ptrans.tolist()
if instances.has("prot"):
prot = instances.prot.tolist()
mtype = instances.mtype.tolist()
morigin = instances.morigin.tolist()
maxis = instances.maxis.tolist()
mstate = instances.mstate.tolist()
mstatemax = instances.mstatemax.tolist()
if instances.has("mextrinsic"):
mextrinsic = instances.mextrinsic.tolist()
# if motionstate:
# mstate = instances.mstate.tolist()
# MotionNet has masks in the annotation
# use RLE to encode the masks, because they are too large and takes memory
# since this evaluator stores outputs of the entire dataset
rles = [mask_util.encode(np.array(mask[:, :, None], order="F", dtype="uint8"))[0] for mask in instances.pred_masks]
for rle in rles:
# "counts" is an array encoded by mask_util as a byte-stream. Python3's
# json writer which always produces strings cannot serialize a bytestream
# unless you decode it. Thankfully, utf-8 works out (which is also what
# the pycocotools/_mask.pyx does).
rle["counts"] = rle["counts"].decode("utf-8")
results = []
for k in range(len(instances)):
if instances.has("pdim"):
result = {
"image_id": img_id,
"category_id": classes[k],
"bbox": boxes[k],
"score": scores[k],
"segmentation": rles[k],
"pdim": pdim[k],
"ptrans": ptrans[k],
"prot": prot[k],
"mtype": mtype[k],
"morigin": morigin[k],
"maxis": maxis[k],
"mstate": mstate[k],
"mstatemax": mstatemax[k],
}
elif instances.has("mextrinsic"):
result = {
"image_id": img_id,
"category_id": classes[k],
"bbox": boxes[k],
"score": scores[k],
"segmentation": rles[k],
"mtype": mtype[k],
"morigin": morigin[k],
"maxis": maxis[k],
"mextrinsic": mextrinsic[k],
"mstate": mstate[k],
"mstatemax": mstatemax[k],
}
else:
result = {
"image_id": img_id,
"category_id": classes[k],
"bbox": boxes[k],
"score": scores[k],
"segmentation": rles[k],
"mtype": mtype[k],
"morigin": morigin[k],
"maxis": maxis[k],
"mstate": mstate[k],
"mstatemax": mstatemax[k],
}
# if motionstate:
# result["mstate"] = mstate[k]
results.append(result)
return results
|