Spaces:

sagar007
/

SegmentVision

Running

App Files Files Community

sagar007 commited on Jul 26

Commit

9a34a8b

•

1 Parent(s): f86e74e

Update app.py

Browse files

Files changed (1) hide show

app.py +73 -83

app.py CHANGED Viewed

@@ -2,50 +2,74 @@ import gradio as gr
 import torch
 import cv2
 import numpy as np
-from transformers import SamModel, SamProcessor, BlipProcessor, BlipForConditionalGeneration
 from PIL import Image
-from scipy.ndimage import label, center_of_mass
 # Set up device
 device = "cuda" if torch.cuda.is_available() else "cpu"
-# Load SAM model and processor
-sam_model = SamModel.from_pretrained("facebook/sam-vit-base").to(device)
-sam_processor = SamProcessor.from_pretrained("facebook/sam-vit-base")
-# Load BLIP model and processor for image-to-text
-blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
-blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to(device)
-def process_mask(mask, target_size):
-    if mask.ndim > 2:
-        mask = mask.squeeze()
-    if mask.ndim > 2:
-        mask = mask[0]
-    mask = (mask > 0.5).astype(np.uint8) * 255
-    mask_image = Image.fromarray(mask)
-    mask_image = mask_image.resize(target_size, Image.NEAREST)
-    return np.array(mask_image) > 0
-def is_cat_like(mask, image_area):
-    labeled, num_features = label(mask)
-    if num_features == 0:
-        return False
-    largest_component = (labeled == (np.bincount(labeled.flatten())[1:].argmax() + 1))
-    area = largest_component.sum()
-    # Check if the area is reasonable for a cat (between 5% and 30% of image)
-    if not (0.05 * image_area < area < 0.3 * image_area):
-        return False
-    # Check if the shape is roughly elliptical
-    cy, cx = center_of_mass(largest_component)
-    major_axis = max(largest_component.shape)
-    minor_axis = min(largest_component.shape)
-    aspect_ratio = major_axis / minor_axis
-    return 1.5 < aspect_ratio < 3  # Most cats have an aspect ratio in this range
 def segment_image(input_image, object_name):
     try:
@@ -53,55 +77,21 @@ def segment_image(input_image, object_name):
             return None, "Please upload an image before submitting."
         input_image = Image.fromarray(input_image).convert("RGB")
-        original_size = input_image.size
-        if not original_size or 0 in original_size:
-            return None, "Invalid image size. Please upload a different image."
-        # Generate detailed image caption
-        blip_inputs = blip_processor(input_image, return_tensors="pt").to(device)
-        caption = blip_model.generate(**blip_inputs, max_length=50)
-        caption_text = blip_processor.decode(caption[0], skip_special_tokens=True)
-        # Process the image with SAM
-        sam_inputs = sam_processor(input_image, return_tensors="pt").to(device)
-        # Generate masks
-        with torch.no_grad():
-            sam_outputs = sam_model(**sam_inputs)
-        # Post-process masks
-        masks = sam_processor.image_processor.post_process_masks(
-            sam_outputs.pred_masks.cpu(),
-            sam_inputs["original_sizes"].cpu(),
-            sam_inputs["reshaped_input_sizes"].cpu()
-        )
-        # Find the mask that best matches the specified object
-        best_mask = None
-        best_score = -1
-        image_area = original_size[0] * original_size[1]
-        cat_related_words = ['cat', 'kitten', 'feline', 'tabby', 'kitty']
-        caption_contains_cat = any(word in caption_text.lower() for word in cat_related_words)
-        for mask in masks[0]:
-            mask_binary = mask.numpy() > 0.5
-            if is_cat_like(mask_binary, image_area) and caption_contains_cat:
-                mask_area = mask_binary.sum()
-                if mask_area > best_score:
-                    best_mask = mask_binary
-                    best_score = mask_area
-        if best_mask is None:
-            return input_image, f"Could not find a suitable '{object_name}' in the image."
-        combined_mask = process_mask(best_mask, original_size)
-        # Overlay the mask on the original image
-        result_image = np.array(input_image)
-        mask_rgb = np.zeros_like(result_image)
-        mask_rgb[combined_mask] = [255, 0, 0]  # Red color for the mask
-        result_image = cv2.addWeighted(result_image, 1, mask_rgb, 0.5, 0)
         return result_image, f"Segmented '{object_name}' in the image."
@@ -116,11 +106,11 @@ iface = gr.Interface(
         gr.Textbox(label="Specify object to segment (e.g., dog, cat, grass)")
     ],
     outputs=[
-        gr.Image(type="numpy", label="Segmented Image"),
         gr.Textbox(label="Status")
     ],
-    title="Segment Anything Model (SAM) with Object Specification",
-    description="Upload an image and specify an object to segment."
 )
 # Launch the interface

 import torch
 import cv2
 import numpy as np
 from PIL import Image
+import matplotlib.pyplot as plt
+import io
+from ultralytics import FastSAM
+from ultralytics.models.fastsam import FastSAMPrompt
 # Set up device
 device = "cuda" if torch.cuda.is_available() else "cpu"
+# Load FastSAM model
+model = FastSAM("FastSAM-s.pt")  # or FastSAM-x.pt
+def fig2img(fig):
+    buf = io.BytesIO()
+    fig.savefig(buf)
+    buf.seek(0)
+    img = Image.open(buf)
+    return img
+def plot(annotations, prompt_process, mask_random_color=True, better_quality=True, retina=True, with_contours=True):
+    for ann in annotations:
+        image = ann.orig_img[..., ::-1]  # BGR to RGB
+        original_h, original_w = ann.orig_shape
+        fig = plt.figure(figsize=(original_w / 100, original_h / 100))
+        plt.subplots_adjust(top=1, bottom=0, right=1, left=0, hspace=0, wspace=0)
+        plt.margins(0, 0)
+        plt.gca().xaxis.set_major_locator(plt.NullLocator())
+        plt.gca().yaxis.set_major_locator(plt.NullLocator())
+        plt.imshow(image)
+        if ann.masks is not None:
+            masks = ann.masks.data
+            if better_quality:
+                if isinstance(masks[0], torch.Tensor):
+                    masks = np.array(masks.cpu())
+                for i, mask in enumerate(masks):
+                    mask = cv2.morphologyEx(mask.astype(np.uint8), cv2.MORPH_CLOSE, np.ones((3, 3), np.uint8))
+                    masks[i] = cv2.morphologyEx(mask.astype(np.uint8), cv2.MORPH_OPEN, np.ones((8, 8), np.uint8))
+            prompt_process.fast_show_mask(
+                masks,
+                plt.gca(),
+                random_color=mask_random_color,
+                bbox=None,
+                points=None,
+                pointlabel=None,
+                retinamask=retina,
+                target_height=original_h,
+                target_width=original_w,
+            )
+            if with_contours:
+                contour_all = []
+                temp = np.zeros((original_h, original_w, 1))
+                for i, mask in enumerate(masks):
+                    mask = mask.astype(np.uint8)
+                    if not retina:
+                        mask = cv2.resize(mask, (original_w, original_h), interpolation=cv2.INTER_NEAREST)
+                    contours, _ = cv2.findContours(mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
+                    contour_all.extend(iter(contours))
+                cv2.drawContours(temp, contour_all, -1, (255, 255, 255), 2)
+                color = np.array([0 / 255, 0 / 255, 1.0, 0.8])
+                contour_mask = temp / 255 * color.reshape(1, 1, -1)
+                plt.imshow(contour_mask)
+        plt.axis("off")
+        plt.close()
+        return fig2img(fig)
 def segment_image(input_image, object_name):
     try:
             return None, "Please upload an image before submitting."
         input_image = Image.fromarray(input_image).convert("RGB")
+        # Run FastSAM model
+        everything_results = model(input_image, retina_masks=True, imgsz=1024, conf=0.4, iou=0.9)
+        # Prepare a Prompt Process object
+        prompt_process = FastSAMPrompt(input_image, everything_results, device=device)
+        # Use text prompt to segment the specified object
+        results = prompt_process.text_prompt(text=object_name)
+        if not results:
+            return input_image, f"Could not find '{object_name}' in the image."
+        # Plot the results
+        result_image = plot(annotations=results, prompt_process=prompt_process)
         return result_image, f"Segmented '{object_name}' in the image."
         gr.Textbox(label="Specify object to segment (e.g., dog, cat, grass)")
     ],
     outputs=[
+        gr.Image(type="pil", label="Segmented Image"),
         gr.Textbox(label="Status")
     ],
+    title="FastSAM Segmentation with Object Specification",
+    description="Upload an image and specify an object to segment using FastSAM."
 )
 # Launch the interface