Spaces:
Sleeping
Sleeping
import numpy as np | |
import cv2 | |
import matplotlib.pyplot as plt | |
from regionspot.modeling.regionspot import build_regionspot_model | |
from regionspot import RegionSpot_Predictor | |
# Function to show masks on an image | |
def show_mask(mask, ax, random_color=False): | |
if random_color: | |
color = np.concatenate([np.random.random(3), np.array([0.6])], axis=0) | |
else: | |
color = np.array([30/255, 144/255, 255/255, 0.6]) | |
h, w = mask.shape[-2:] | |
mask_image = mask.reshape(h, w, 1) * color.reshape(1, 1, -1) | |
ax.imshow(mask_image) | |
# Function to show points on an image | |
def show_points(coords, labels, ax, marker_size=375): | |
pos_points = coords[labels == 1] | |
neg_points = coords[labels == 0] | |
ax.scatter(pos_points[:, 0], pos_points[:, 1], color='green', marker='*', s=marker_size, edgecolor='white', linewidth=1.25) | |
ax.scatter(neg_points[:, 0], neg_points[:, 1], color='red', marker='*', s=marker_size, edgecolor='white', linewidth=1.25) | |
# Function to show bounding boxes on an image | |
def show_box(box, ax): | |
x0, y0 = box[0], box[1] | |
w, h = box[2] - x0, box[3] - y0 | |
ax.add_patch(plt.Rectangle((x0, y0), w, h, edgecolor='green', facecolor='none', lw=2)) | |
# Read image and set up model | |
image = cv2.imread('assets/image.jpg') | |
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # Convert image to RGB format | |
# Multiple boxes | |
box_prompt = np.array([[64, 926, 804, 1978], [1237, 490, 1615, 771.], [1510, 64, 1670, 167]]) | |
ckpt_path = '/path/to/model_weights.pth' | |
clip_type = 'CLIP_400M_Large_336' | |
clip_input_size = 336 | |
custom_vocabulary = ["Smoothie bowl", "Banana", "Strawberry", "Chia seeds", "Shredded coconut", "Wooden spoons", "Grapefruit", "Goji berries", "Flaxseeds seeds"] | |
# Build and initialize the model | |
model, msg = build_regionspot_model(is_training=False, image_size=clip_input_size, clip_type=clip_type, pretrain_ckpt=ckpt_path, custom_vocabulary=custom_vocabulary) | |
# Create predictor and set image | |
predictor = RegionSpot_Predictor(model.cuda()) | |
predictor.set_image(image, clip_input_size=clip_input_size) | |
# Prediction based on box prompt | |
masks, mask_iou_score, class_score, class_index = predictor.predict( | |
point_coords=None, | |
point_labels=None, | |
box=box_prompt, | |
multimask_output=False, | |
) | |
# Extract class name and display image with masks and box | |
fig, ax = plt.subplots(figsize=(10, 10)) | |
ax.imshow(image) | |
for idx in range(len(box_prompt)): | |
show_mask(masks[idx], ax) | |
show_box(box_prompt[idx], ax) # Assuming box_prompt contains all your boxes | |
# You might want to modify the text display for multiple classes as well | |
class_name = custom_vocabulary[int(class_index[idx])] | |
ax.text(box_prompt[idx][0], box_prompt[idx][1] - 10, class_name, color='white', fontsize=14, bbox=dict(facecolor='green', edgecolor='green', alpha=0.6)) | |
ax.axis('off') | |
plt.show() | |
fig.savefig('result.png') | |
plt.close(fig) | |