from ultralytics import YOLO import supervision as sv def parse_detection(detections): parsed_rows = [] for i in range(len(detections.xyxy)): x_min = float(detections.xyxy[i][0]) y_min = float(detections.xyxy[i][1]) x_max = float(detections.xyxy[i][2]) y_max = float(detections.xyxy[i][3]) width = int(x_max - x_min) height = int(y_max - y_min) row = { "x": int(y_min), "y": int(x_min), "width": width, "height": height, "class_id": "" if detections.class_id is None else int(detections.class_id[i]), "confidence": "" if detections.confidence is None else float(detections.confidence[i]), "tracker_id": "" if detections.tracker_id is None else int(detections.tracker_id[i]), } if hasattr(detections, "data"): for key, value in detections.data.items(): if key == "class_name": key = "class" row[key] = ( str(value[i]) if hasattr(value, "__getitem__") and value.ndim != 0 else str(value) ) parsed_rows.append(row) return parsed_rows model = YOLO("models/best_v2.pt", task="detect") results = model(["data/IMG_0050.jpg"])[0] width, height = results.orig_shape[1], results.orig_shape[0] print(results.orig_shape) print(results.speed) output = sv.Detections.from_ultralytics(results) output = parse_detection(output) parse_result = {'predictions': output, 'image': {'width': width, 'height': height}} print(parse_result)