Spaces:

Plachta
/

GraspAnything

Running

App Files Files Community

Plachta commited on Feb 17, 2024

Commit

fcdfd72

verified ·

1 Parent(s): 6aa57fc

Upload 50 files

Browse files

Files changed (50) hide show

README.md +1 -1
app.py +131 -0
gradio_image_prompter-0.1.0-py3-none-any.whl +0 -0
models/__init__.py +15 -0
models/automatic_mask_generator.py +372 -0
models/build_sam.py +107 -0
models/grasp_mods.py +318 -0
models/modeling/__init__.py +11 -0
models/modeling/common.py +43 -0
models/modeling/image_encoder.py +395 -0
models/modeling/mask_decoder.py +176 -0
models/modeling/prompt_encoder.py +214 -0
models/modeling/sam.py +174 -0
models/modeling/transformer.py +240 -0
models/predictor.py +269 -0
models/utils/__init__.py +5 -0
models/utils/amg.py +346 -0
models/utils/onnx.py +144 -0
models/utils/transforms.py +102 -0
requirements.txt +5 -0
src/.gitignore +9 -0
src/LICENSE +201 -0
src/README.md +48 -0
src/backend/gradio_image_prompter/__init__.py +3 -0
src/backend/gradio_image_prompter/image_prompter.py +133 -0
src/backend/gradio_image_prompter/image_prompter.pyi +134 -0
src/backend/gradio_image_prompter/templates/component/__vite-browser-external-2447137e.js +4 -0
src/backend/gradio_image_prompter/templates/component/index.js +0 -0
src/backend/gradio_image_prompter/templates/component/style.css +1 -0
src/backend/gradio_image_prompter/templates/component/wrapper-6f348d45-f837cf34.js +2455 -0
src/backend/gradio_image_prompter/templates/example/index.js +263 -0
src/backend/gradio_image_prompter/templates/example/style.css +1 -0
src/demo/__init__.py +0 -0
src/demo/app.py +9 -0
src/frontend/Example.svelte +44 -0
src/frontend/Index.svelte +167 -0
src/frontend/package-lock.json +718 -0
src/frontend/package.json +28 -0
src/frontend/shared/BoxDrawer.svelte +237 -0
src/frontend/shared/ClearImage.svelte +48 -0
src/frontend/shared/Image.svelte +15 -0
src/frontend/shared/ImagePreview.svelte +88 -0
src/frontend/shared/ImageUploader.svelte +192 -0
src/frontend/shared/utils.ts +24 -0
src/pyproject.toml +43 -0
structures/__init__.py +0 -0
structures/bounding_box.py +323 -0
structures/grasp_box.py +127 -0
structures/image_list.py +67 -0
structures/segmentation_mask.py +298 -0

README.md CHANGED Viewed

@@ -1,6 +1,6 @@
 ---
 title: GraspAnything
-emoji: 🚀
 colorFrom: gray
 colorTo: purple
 sdk: gradio

 ---
 title: GraspAnything
+emoji: 🤖✊
 colorFrom: gray
 colorTo: purple
 sdk: gradio

app.py ADDED Viewed

	@@ -0,0 +1,131 @@

+import copy
+import numpy as np
+import torch
+import sys
+sys.path.append("./")
+from models import sam_model_registry
+from models.grasp_mods import modify_forward
+from models.utils.transforms import ResizeLongestSide
+from gradio_image_prompter import ImagePrompter
+from structures.grasp_box import GraspCoder
+img_resize = ResizeLongestSide(1024)
+import cv2
+import gradio as gr
+from models.grasp_mods import add_inference_method
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model_type = "vit_b"
+mean = np.array([103.53, 116.28, 123.675])[:, np.newaxis, np.newaxis]
+std = np.array([57.375, 57.12, 58.395])[:, np.newaxis, np.newaxis]
+sam = sam_model_registry[model_type]()
+sam.to(device=device)
+sam.forward = modify_forward(sam)
+sam.infer = add_inference_method(sam)
+pretrained_model_path = "E:/epoch_9_step_535390.pth"
+if pretrained_model_path != "":
+    sd = torch.load(pretrained_model_path)
+    # strip prefix "module." from keys
+    new_sd = {}
+    for k, v in sd.items():
+        if k.startswith("module."):
+            k = k[7:]
+        new_sd[k] = v
+    sam.load_state_dict(new_sd)
+sam.eval()
+def predict(input, topk):
+    np_image = input["image"]
+    points = input["points"]
+    orig_size = np_image.shape[:2]
+    # normalize image
+    np_image = np_image.transpose(2, 0, 1)
+    image = (np_image - mean) / std
+    image = torch.tensor(image).float().to(device)
+    image = image.unsqueeze(0)
+    t_image = img_resize.apply_image_torch(image)
+    t_orig_size = t_image.shape[-2:]
+    # pad to 1024x1024
+    t_image = torch.nn.functional.pad(t_image, (0, 1024 - t_image.shape[-1], 0, 1024 - t_image.shape[-2]))
+    # get box prompt
+    valid_boxes = []
+    for point in points:
+        x1, y1, type1, x2, y2, type2 = point
+        if type1 == 2 and type2 == 3:
+            valid_boxes.append([x1, y1, x2, y2])
+    if len(valid_boxes) == 0:
+        return np_image
+    t_boxes = np.array(valid_boxes)
+    t_boxes = img_resize.apply_boxes(t_boxes, orig_size)
+    box_torch = torch.as_tensor(t_boxes, dtype=torch.float, device=device)
+    batched_inputs = [{"image": t_image[0], "boxes": box_torch}]
+    with torch.no_grad():
+        outputs = sam.infer(batched_inputs, multimask_output=False)
+    # visualize and post on tensorboard
+    # recover image
+    recovered_img = batched_inputs[0]['image'].cpu().numpy()
+    recovered_img = recovered_img * std + mean
+    recovered_img = recovered_img.transpose(1, 2, 0).astype(np.uint8).clip(0, 255)
+    for i in range(len(outputs.pred_masks)):
+        # get predicted mask
+        pred_mask = outputs.pred_masks[i].detach().sigmoid().cpu().numpy() > 0.5
+        pred_mask = pred_mask.transpose(1, 2, 0).repeat(3, axis=2)
+        # get predicted grasp
+        pred_logits = outputs.logits[i].detach().cpu().numpy()
+        top_ind = pred_logits[:, 0].argsort()[-topk:][::-1]
+        pred_grasp = outputs.pred_boxes[i].detach().cpu().numpy()[top_ind]
+        coded_grasp = GraspCoder(1024, 1024, None, grasp_annos_reformat=pred_grasp)
+        _ = coded_grasp.decode()
+        decoded_grasp = copy.deepcopy(coded_grasp.grasp_annos)
+        # draw mask
+        mask_color = np.array([0, 255, 0])[None, None, :]
+        recovered_img[pred_mask] = recovered_img[pred_mask] * 0.5 + (pred_mask * mask_color)[pred_mask] * 0.5
+        # draw grasp
+        recovered_img = np.ascontiguousarray(recovered_img)
+        for grasp in decoded_grasp:
+            grasp = grasp.astype(int)
+            cv2.line(recovered_img, tuple(grasp[0:2]), tuple(grasp[2:4]), (255, 0, 0), 1)
+            cv2.line(recovered_img, tuple(grasp[4:6]), tuple(grasp[6:8]), (255, 0, 0), 1)
+            cv2.line(recovered_img, tuple(grasp[2:4]), tuple(grasp[4:6]), (0, 0, 255), 2)
+            cv2.line(recovered_img, tuple(grasp[6:8]), tuple(grasp[0:2]), (0, 0, 255), 2)
+    recovered_img = recovered_img[:t_orig_size[0], :t_orig_size[1]]
+    # resize to original size
+    recovered_img = cv2.resize(recovered_img, (orig_size[0], orig_size[1]))
+    return recovered_img
+if __name__ == "__main__":
+    app = gr.Blocks(title="GraspAnything")
+    with app:
+        gr.Markdown("""
+        # GraspAnything <br>
+        Upload an image and draw a box around the object you want to grasp. Set top k to be the number of grasps you want to predict for each object.
+        """)
+        with gr.Column():
+            prompter = ImagePrompter(show_label=False)
+            top_k = gr.Slider(minimum=1, maximum=20, step=1, value=3, label="Top K Grasps")
+        with gr.Column():
+            image_output = gr.Image()
+        btn = gr.Button("Generate!")
+        btn.click(predict,
+                  inputs=[prompter, top_k],
+                  outputs=[image_output])
+    app.launch()

gradio_image_prompter-0.1.0-py3-none-any.whl ADDED Viewed

Binary file (96.2 kB). View file

models/__init__.py ADDED Viewed

	@@ -0,0 +1,15 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+from .build_sam import (
+    build_sam,
+    build_sam_vit_h,
+    build_sam_vit_l,
+    build_sam_vit_b,
+    sam_model_registry,
+)
+from .predictor import SamPredictor
+from .automatic_mask_generator import SamAutomaticMaskGenerator

models/automatic_mask_generator.py ADDED Viewed

	@@ -0,0 +1,372 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+import numpy as np
+import torch
+from torchvision.ops.boxes import batched_nms, box_area  # type: ignore
+from typing import Any, Dict, List, Optional, Tuple
+from .modeling import Sam
+from .predictor import SamPredictor
+from .utils.amg import (
+    MaskData,
+    area_from_rle,
+    batch_iterator,
+    batched_mask_to_box,
+    box_xyxy_to_xywh,
+    build_all_layer_point_grids,
+    calculate_stability_score,
+    coco_encode_rle,
+    generate_crop_boxes,
+    is_box_near_crop_edge,
+    mask_to_rle_pytorch,
+    remove_small_regions,
+    rle_to_mask,
+    uncrop_boxes_xyxy,
+    uncrop_masks,
+    uncrop_points,
+)
+class SamAutomaticMaskGenerator:
+    def __init__(
+        self,
+        model: Sam,
+        points_per_side: Optional[int] = 32,
+        points_per_batch: int = 64,
+        pred_iou_thresh: float = 0.88,
+        stability_score_thresh: float = 0.95,
+        stability_score_offset: float = 1.0,
+        box_nms_thresh: float = 0.7,
+        crop_n_layers: int = 0,
+        crop_nms_thresh: float = 0.7,
+        crop_overlap_ratio: float = 512 / 1500,
+        crop_n_points_downscale_factor: int = 1,
+        point_grids: Optional[List[np.ndarray]] = None,
+        min_mask_region_area: int = 0,
+        output_mode: str = "binary_mask",
+    ) -> None:
+        """
+        Using a SAM model, generates masks for the entire image.
+        Generates a grid of point prompts over the image, then filters
+        low quality and duplicate masks. The default settings are chosen
+        for SAM with a ViT-H backbone.
+        Arguments:
+          model (Sam): The SAM model to use for mask prediction.
+          points_per_side (int or None): The number of points to be sampled
+            along one side of the image. The total number of points is
+            points_per_side**2. If None, 'point_grids' must provide explicit
+            point sampling.
+          points_per_batch (int): Sets the number of points run simultaneously
+            by the model. Higher numbers may be faster but use more GPU memory.
+          pred_iou_thresh (float): A filtering threshold in [0,1], using the
+            model's predicted mask quality.
+          stability_score_thresh (float): A filtering threshold in [0,1], using
+            the stability of the mask under changes to the cutoff used to binarize
+            the model's mask predictions.
+          stability_score_offset (float): The amount to shift the cutoff when
+            calculated the stability score.
+          box_nms_thresh (float): The box IoU cutoff used by non-maximal
+            suppression to filter duplicate masks.
+          crop_n_layers (int): If >0, mask prediction will be run again on
+            crops of the image. Sets the number of layers to run, where each
+            layer has 2**i_layer number of image crops.
+          crop_nms_thresh (float): The box IoU cutoff used by non-maximal
+            suppression to filter duplicate masks between different crops.
+          crop_overlap_ratio (float): Sets the degree to which crops overlap.
+            In the first crop layer, crops will overlap by this fraction of
+            the image length. Later layers with more crops scale down this overlap.
+          crop_n_points_downscale_factor (int): The number of points-per-side
+            sampled in layer n is scaled down by crop_n_points_downscale_factor**n.
+          point_grids (list(np.ndarray) or None): A list over explicit grids
+            of points used for sampling, normalized to [0,1]. The nth grid in the
+            list is used in the nth crop layer. Exclusive with points_per_side.
+          min_mask_region_area (int): If >0, postprocessing will be applied
+            to remove disconnected regions and holes in masks with area smaller
+            than min_mask_region_area. Requires opencv.
+          output_mode (str): The form masks are returned in. Can be 'binary_mask',
+            'uncompressed_rle', or 'coco_rle'. 'coco_rle' requires pycocotools.
+            For large resolutions, 'binary_mask' may consume large amounts of
+            memory.
+        """
+        assert (points_per_side is None) != (
+            point_grids is None
+        ), "Exactly one of points_per_side or point_grid must be provided."
+        if points_per_side is not None:
+            self.point_grids = build_all_layer_point_grids(
+                points_per_side,
+                crop_n_layers,
+                crop_n_points_downscale_factor,
+            )
+        elif point_grids is not None:
+            self.point_grids = point_grids
+        else:
+            raise ValueError("Can't have both points_per_side and point_grid be None.")
+        assert output_mode in [
+            "binary_mask",
+            "uncompressed_rle",
+            "coco_rle",
+        ], f"Unknown output_mode {output_mode}."
+        if output_mode == "coco_rle":
+            from pycocotools import mask as mask_utils  # type: ignore # noqa: F401
+        if min_mask_region_area > 0:
+            import cv2  # type: ignore # noqa: F401
+        self.predictor = SamPredictor(model)
+        self.points_per_batch = points_per_batch
+        self.pred_iou_thresh = pred_iou_thresh
+        self.stability_score_thresh = stability_score_thresh
+        self.stability_score_offset = stability_score_offset
+        self.box_nms_thresh = box_nms_thresh
+        self.crop_n_layers = crop_n_layers
+        self.crop_nms_thresh = crop_nms_thresh
+        self.crop_overlap_ratio = crop_overlap_ratio
+        self.crop_n_points_downscale_factor = crop_n_points_downscale_factor
+        self.min_mask_region_area = min_mask_region_area
+        self.output_mode = output_mode
+    @torch.no_grad()
+    def generate(self, image: np.ndarray) -> List[Dict[str, Any]]:
+        """
+        Generates masks for the given image.
+        Arguments:
+          image (np.ndarray): The image to generate masks for, in HWC uint8 format.
+        Returns:
+           list(dict(str, any)): A list over records for masks. Each record is
+             a dict containing the following keys:
+               segmentation (dict(str, any) or np.ndarray): The mask. If
+                 output_mode='binary_mask', is an array of shape HW. Otherwise,
+                 is a dictionary containing the RLE.
+               bbox (list(float)): The box around the mask, in XYWH format.
+               area (int): The area in pixels of the mask.
+               predicted_iou (float): The model's own prediction of the mask's
+                 quality. This is filtered by the pred_iou_thresh parameter.
+               point_coords (list(list(float))): The point coordinates input
+                 to the model to generate this mask.
+               stability_score (float): A measure of the mask's quality. This
+                 is filtered on using the stability_score_thresh parameter.
+               crop_box (list(float)): The crop of the image used to generate
+                 the mask, given in XYWH format.
+        """
+        # Generate masks
+        mask_data = self._generate_masks(image)
+        # Filter small disconnected regions and holes in masks
+        if self.min_mask_region_area > 0:
+            mask_data = self.postprocess_small_regions(
+                mask_data,
+                self.min_mask_region_area,
+                max(self.box_nms_thresh, self.crop_nms_thresh),
+            )
+        # Encode masks
+        if self.output_mode == "coco_rle":
+            mask_data["segmentations"] = [coco_encode_rle(rle) for rle in mask_data["rles"]]
+        elif self.output_mode == "binary_mask":
+            mask_data["segmentations"] = [rle_to_mask(rle) for rle in mask_data["rles"]]
+        else:
+            mask_data["segmentations"] = mask_data["rles"]
+        # Write mask records
+        curr_anns = []
+        for idx in range(len(mask_data["segmentations"])):
+            ann = {
+                "segmentation": mask_data["segmentations"][idx],
+                "area": area_from_rle(mask_data["rles"][idx]),
+                "bbox": box_xyxy_to_xywh(mask_data["boxes"][idx]).tolist(),
+                "predicted_iou": mask_data["iou_preds"][idx].item(),
+                "point_coords": [mask_data["points"][idx].tolist()],
+                "stability_score": mask_data["stability_score"][idx].item(),
+                "crop_box": box_xyxy_to_xywh(mask_data["crop_boxes"][idx]).tolist(),
+            }
+            curr_anns.append(ann)
+        return curr_anns
+    def _generate_masks(self, image: np.ndarray) -> MaskData:
+        orig_size = image.shape[:2]
+        crop_boxes, layer_idxs = generate_crop_boxes(
+            orig_size, self.crop_n_layers, self.crop_overlap_ratio
+        )
+        # Iterate over image crops
+        data = MaskData()
+        for crop_box, layer_idx in zip(crop_boxes, layer_idxs):
+            crop_data = self._process_crop(image, crop_box, layer_idx, orig_size)
+            data.cat(crop_data)
+        # Remove duplicate masks between crops
+        if len(crop_boxes) > 1:
+            # Prefer masks from smaller crops
+            scores = 1 / box_area(data["crop_boxes"])
+            scores = scores.to(data["boxes"].device)
+            keep_by_nms = batched_nms(
+                data["boxes"].float(),
+                scores,
+                torch.zeros_like(data["boxes"][:, 0]),  # categories
+                iou_threshold=self.crop_nms_thresh,
+            )
+            data.filter(keep_by_nms)
+        data.to_numpy()
+        return data
+    def _process_crop(
+        self,
+        image: np.ndarray,
+        crop_box: List[int],
+        crop_layer_idx: int,
+        orig_size: Tuple[int, ...],
+    ) -> MaskData:
+        # Crop the image and calculate embeddings
+        x0, y0, x1, y1 = crop_box
+        cropped_im = image[y0:y1, x0:x1, :]
+        cropped_im_size = cropped_im.shape[:2]
+        self.predictor.set_image(cropped_im)
+        # Get points for this crop
+        points_scale = np.array(cropped_im_size)[None, ::-1]
+        points_for_image = self.point_grids[crop_layer_idx] * points_scale
+        # Generate masks for this crop in batches
+        data = MaskData()
+        for (points,) in batch_iterator(self.points_per_batch, points_for_image):
+            batch_data = self._process_batch(points, cropped_im_size, crop_box, orig_size)
+            data.cat(batch_data)
+            del batch_data
+        self.predictor.reset_image()
+        # Remove duplicates within this crop.
+        keep_by_nms = batched_nms(
+            data["boxes"].float(),
+            data["iou_preds"],
+            torch.zeros_like(data["boxes"][:, 0]),  # categories
+            iou_threshold=self.box_nms_thresh,
+        )
+        data.filter(keep_by_nms)
+        # Return to the original image frame
+        data["boxes"] = uncrop_boxes_xyxy(data["boxes"], crop_box)
+        data["points"] = uncrop_points(data["points"], crop_box)
+        data["crop_boxes"] = torch.tensor([crop_box for _ in range(len(data["rles"]))])
+        return data
+    def _process_batch(
+        self,
+        points: np.ndarray,
+        im_size: Tuple[int, ...],
+        crop_box: List[int],
+        orig_size: Tuple[int, ...],
+    ) -> MaskData:
+        orig_h, orig_w = orig_size
+        # Run model on this batch
+        transformed_points = self.predictor.transform.apply_coords(points, im_size)
+        in_points = torch.as_tensor(transformed_points, device=self.predictor.device)
+        in_labels = torch.ones(in_points.shape[0], dtype=torch.int, device=in_points.device)
+        masks, iou_preds, _ = self.predictor.predict_torch(
+            in_points[:, None, :],
+            in_labels[:, None],
+            multimask_output=True,
+            return_logits=True,
+        )
+        # Serialize predictions and store in MaskData
+        data = MaskData(
+            masks=masks.flatten(0, 1),
+            iou_preds=iou_preds.flatten(0, 1),
+            points=torch.as_tensor(points.repeat(masks.shape[1], axis=0)),
+        )
+        del masks
+        # Filter by predicted IoU
+        if self.pred_iou_thresh > 0.0:
+            keep_mask = data["iou_preds"] > self.pred_iou_thresh
+            data.filter(keep_mask)
+        # Calculate stability score
+        data["stability_score"] = calculate_stability_score(
+            data["masks"], self.predictor.model.mask_threshold, self.stability_score_offset
+        )
+        if self.stability_score_thresh > 0.0:
+            keep_mask = data["stability_score"] >= self.stability_score_thresh
+            data.filter(keep_mask)
+        # Threshold masks and calculate boxes
+        data["masks"] = data["masks"] > self.predictor.model.mask_threshold
+        data["boxes"] = batched_mask_to_box(data["masks"])
+        # Filter boxes that touch crop boundaries
+        keep_mask = ~is_box_near_crop_edge(data["boxes"], crop_box, [0, 0, orig_w, orig_h])
+        if not torch.all(keep_mask):
+            data.filter(keep_mask)
+        # Compress to RLE
+        data["masks"] = uncrop_masks(data["masks"], crop_box, orig_h, orig_w)
+        data["rles"] = mask_to_rle_pytorch(data["masks"])
+        del data["masks"]
+        return data
+    @staticmethod
+    def postprocess_small_regions(
+        mask_data: MaskData, min_area: int, nms_thresh: float
+    ) -> MaskData:
+        """
+        Removes small disconnected regions and holes in masks, then reruns
+        box NMS to remove any new duplicates.
+        Edits mask_data in place.
+        Requires open-cv as a dependency.
+        """
+        if len(mask_data["rles"]) == 0:
+            return mask_data
+        # Filter small disconnected regions and holes
+        new_masks = []
+        scores = []
+        for rle in mask_data["rles"]:
+            mask = rle_to_mask(rle)
+            mask, changed = remove_small_regions(mask, min_area, mode="holes")
+            unchanged = not changed
+            mask, changed = remove_small_regions(mask, min_area, mode="islands")
+            unchanged = unchanged and not changed
+            new_masks.append(torch.as_tensor(mask).unsqueeze(0))
+            # Give score=0 to changed masks and score=1 to unchanged masks
+            # so NMS will prefer ones that didn't need postprocessing
+            scores.append(float(unchanged))
+        # Recalculate boxes and remove any new duplicates
+        masks = torch.cat(new_masks, dim=0)
+        boxes = batched_mask_to_box(masks)
+        keep_by_nms = batched_nms(
+            boxes.float(),
+            torch.as_tensor(scores),
+            torch.zeros_like(boxes[:, 0]),  # categories
+            iou_threshold=nms_thresh,
+        )
+        # Only recalculate RLEs for masks that have changed
+        for i_mask in keep_by_nms:
+            if scores[i_mask] == 0.0:
+                mask_torch = masks[i_mask].unsqueeze(0)
+                mask_data["rles"][i_mask] = mask_to_rle_pytorch(mask_torch)[0]
+                mask_data["boxes"][i_mask] = boxes[i_mask]  # update res directly
+        mask_data.filter(keep_by_nms)
+        return mask_data

models/build_sam.py ADDED Viewed

	@@ -0,0 +1,107 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+import torch
+from functools import partial
+from .modeling import ImageEncoderViT, MaskDecoder, PromptEncoder, Sam, TwoWayTransformer
+def build_sam_vit_h(checkpoint=None):
+    return _build_sam(
+        encoder_embed_dim=1280,
+        encoder_depth=32,
+        encoder_num_heads=16,
+        encoder_global_attn_indexes=[7, 15, 23, 31],
+        checkpoint=checkpoint,
+    )
+build_sam = build_sam_vit_h
+def build_sam_vit_l(checkpoint=None):
+    return _build_sam(
+        encoder_embed_dim=1024,
+        encoder_depth=24,
+        encoder_num_heads=16,
+        encoder_global_attn_indexes=[5, 11, 17, 23],
+        checkpoint=checkpoint,
+    )
+def build_sam_vit_b(checkpoint=None):
+    return _build_sam(
+        encoder_embed_dim=768,
+        encoder_depth=12,
+        encoder_num_heads=12,
+        encoder_global_attn_indexes=[2, 5, 8, 11],
+        checkpoint=checkpoint,
+    )
+sam_model_registry = {
+    "default": build_sam_vit_h,
+    "vit_h": build_sam_vit_h,
+    "vit_l": build_sam_vit_l,
+    "vit_b": build_sam_vit_b,
+}
+def _build_sam(
+    encoder_embed_dim,
+    encoder_depth,
+    encoder_num_heads,
+    encoder_global_attn_indexes,
+    checkpoint=None,
+):
+    prompt_embed_dim = 256
+    image_size = 1024
+    vit_patch_size = 16
+    image_embedding_size = image_size // vit_patch_size
+    sam = Sam(
+        image_encoder=ImageEncoderViT(
+            depth=encoder_depth,
+            embed_dim=encoder_embed_dim,
+            img_size=image_size,
+            mlp_ratio=4,
+            norm_layer=partial(torch.nn.LayerNorm, eps=1e-6),
+            num_heads=encoder_num_heads,
+            patch_size=vit_patch_size,
+            qkv_bias=True,
+            use_rel_pos=True,
+            global_attn_indexes=encoder_global_attn_indexes,
+            window_size=14,
+            out_chans=prompt_embed_dim,
+        ),
+        prompt_encoder=PromptEncoder(
+            embed_dim=prompt_embed_dim,
+            image_embedding_size=(image_embedding_size, image_embedding_size),
+            input_image_size=(image_size, image_size),
+            mask_in_chans=16,
+        ),
+        mask_decoder=MaskDecoder(
+            num_multimask_outputs=3,
+            transformer=TwoWayTransformer(
+                depth=2,
+                embedding_dim=prompt_embed_dim,
+                mlp_dim=2048,
+                num_heads=8,
+            ),
+            transformer_dim=prompt_embed_dim,
+            iou_head_depth=3,
+            iou_head_hidden_dim=256,
+        ),
+        pixel_mean=[123.675, 116.28, 103.53],
+        pixel_std=[58.395, 57.12, 57.375],
+    )
+    sam.eval()
+    if checkpoint is not None:
+        with open(checkpoint, "rb") as f:
+            state_dict = torch.load(f)
+        sam.load_state_dict(state_dict)
+    return sam

models/grasp_mods.py ADDED Viewed

	@@ -0,0 +1,318 @@

+"""
+Add additional grasp decoder for Segment Anything model.
+The structure should follow the grasp decoder structure in GraspDETR.
+"""
+import torch
+import torch.nn as nn
+from transformers.models.detr.configuration_detr import DetrConfig
+from transformers.models.detr.modeling_detr import DetrHungarianMatcher, DetrLoss, DetrSegmentationOutput, DetrDecoder, sigmoid_focal_loss, dice_loss
+from typing import Any, Dict, List, Tuple
+from transformers.models.detr.modeling_detr import generalized_box_iou
+from transformers.image_transforms import center_to_corners_format
+from scipy.optimize import linear_sum_assignment
+def modify_matcher_forward(self):
+    @torch.no_grad()
+    def matcher_forward(outputs, targets):
+        batch_size, num_queries = outputs["logits"].shape[:2]
+        # We flatten to compute the cost matrices in a batch
+        out_prob = outputs["logits"].flatten(0, 1).softmax(-1)  # [batch_size * num_queries, num_classes]
+        out_bbox = outputs["pred_boxes"].flatten(0, 1)  # [batch_size * num_queries, 4]
+        # Also concat the target labels and boxes
+        target_ids = torch.cat([v["class_labels"] for v in targets])
+        target_bbox = torch.cat([v["boxes"] for v in targets])
+        # Compute the classification cost. Contrary to the loss, we don't use the NLL,
+        # but approximate it in 1 - proba[target class].
+        # The 1 is a constant that doesn't change the matching, it can be ommitted.
+        class_cost = -out_prob[:, target_ids]
+        # Compute the L1 cost between boxes
+        bbox_cost = torch.cdist(out_bbox, target_bbox, p=1)
+        # Compute the giou cost between boxes
+        giou_cost = -generalized_box_iou(center_to_corners_format(out_bbox[:, :4]), center_to_corners_format(target_bbox[:, :4]))
+        # Final cost matrix
+        cost_matrix = self.bbox_cost * bbox_cost + self.class_cost * class_cost + self.giou_cost * giou_cost
+        cost_matrix = cost_matrix.view(batch_size, num_queries, -1).cpu()
+        sizes = [len(v["boxes"]) for v in targets]
+        indices = [linear_sum_assignment(c[i]) for i, c in enumerate(cost_matrix.split(sizes, -1))]
+        return [(torch.as_tensor(i, dtype=torch.int64), torch.as_tensor(j, dtype=torch.int64)) for i, j in indices]
+    return matcher_forward
+def modify_grasp_loss_forward(self):
+    def modified_loss_labels(outputs, targets, indices, num_boxes):
+        """
+        Classification loss (NLL) targets dicts must contain the key "class_labels" containing a tensor of dim
+        [nb_target_boxes]
+        """
+        num_classes = 1  # model v9 always use class agnostic grasp
+        if "logits" not in outputs:
+            raise KeyError("No logits were found in the outputs")
+        source_logits = outputs["logits"]
+        idx = self._get_source_permutation_idx(indices)
+        target_classes_o = torch.cat([t["class_labels"][J] for t, (_, J) in zip(targets, indices)])
+        target_classes = torch.full(
+            source_logits.shape[:2], num_classes, dtype=torch.int64, device=source_logits.device
+        )
+        target_classes[idx] = target_classes_o
+        loss_ce = nn.functional.cross_entropy(source_logits.transpose(1, 2), target_classes)
+        losses = {"loss_ce": loss_ce}
+        return losses
+    def modified_loss_boxes(outputs, targets, indices, num_boxes):
+        if "pred_boxes" not in outputs:
+            raise KeyError("No predicted boxes found in outputs")
+        idx = self._get_source_permutation_idx(indices)
+        source_boxes = outputs["pred_boxes"][idx]
+        target_boxes = torch.cat([t["boxes"][i] for t, (_, i) in zip(targets, indices)], dim=0)
+        loss_bbox = nn.functional.l1_loss(source_boxes, target_boxes, reduction="none")
+        losses = {}
+        losses["loss_bbox"] = loss_bbox.sum() / num_boxes
+        loss_giou = 1 - torch.diag(
+            generalized_box_iou(center_to_corners_format(source_boxes[:, :4]), center_to_corners_format(target_boxes[:, :4]))
+        )
+        losses["loss_giou"] = loss_giou.sum() / num_boxes
+        return losses
+    return modified_loss_labels, modified_loss_boxes
+def modify_forward(self):
+    """
+    Modify the following methods to make SAM perform grasp detection after segmentation:
+        1. Add a parallel decoder for grasping detection: 1(+1) classes, 5 values to regress (bbox & rotation)
+    Returns:
+        Modified model
+    """
+    # 1. We instantiate a new module in self.base_model, as another decoder
+    self.grasp_decoder_config = DetrConfig()
+    self.grasp_decoder = DetrDecoder(self.grasp_decoder_config).to(self.device)
+    self.grasp_query_position_embeddings = nn.Embedding(20, 256).to(self.device)
+    # 2. Base model forward method is not directly used, no modification needs to be done
+    # self.detr.model.forward = modify_base_model_forward(self.detr.model)
+    # 3. Add additional classification head & bbox regression head for grasp_decoder output
+    self.grasp_predictor = torch.nn.Sequential(
+        torch.nn.Linear(256, 256),
+        torch.nn.Linear(256, 256),
+        torch.nn.Linear(256, 5)
+    ).to(self.device)
+    self.grasp_label_classifier = torch.nn.Linear(256, 2).to(self.device)
+    # 4. Add positional embedding
+    # name it as grasp_img_pos_embed to avoid name conflict
+    class ImagePosEmbed(nn.Module):
+        def __init__(self, img_size=64, hidden_dim=256):
+            super().__init__()
+            self.pos_embed = nn.Parameter(
+                torch.randn(1, img_size, img_size, hidden_dim)
+            )
+        def forward(self, x):
+            return x + self.pos_embed
+    self.grasp_img_pos_embed = ImagePosEmbed().to(self.device)
+    def modified_forward(
+            batched_input: List[Dict[str, Any]],
+            multimask_output: bool,
+    ):
+        input_images = torch.stack([x["image"] for x in batched_input], dim=0)
+        image_embeddings = self.image_encoder(input_images)
+        outputs = []
+        srcs = []
+        for image_record, curr_embedding in zip(batched_input, image_embeddings):
+            if "point_coords" in image_record:
+                points = (image_record["point_coords"], image_record["point_labels"])
+            else:
+                points = None
+            sparse_embeddings, dense_embeddings = self.prompt_encoder(
+                points=points,
+                boxes=image_record.get("boxes", None),
+                masks=image_record.get("mask_inputs", None),
+            )
+            low_res_masks, iou_predictions, src = self.mask_decoder(
+                image_embeddings=curr_embedding.unsqueeze(0),
+                image_pe=self.prompt_encoder.get_dense_pe(),
+                sparse_prompt_embeddings=sparse_embeddings,
+                dense_prompt_embeddings=dense_embeddings,
+                multimask_output=multimask_output,
+            )
+            outputs.append(
+                {
+                    "iou_predictions": iou_predictions,
+                    "low_res_logits": low_res_masks,
+                }
+            )
+            srcs.append(src[0])
+        srcs = torch.stack(srcs, dim=0)
+        # forward grasp decoder here
+        # 1. Get encoder hidden states
+        grasp_encoder_hidden_states = self.grasp_img_pos_embed(srcs.permute(0, 2, 3, 1))
+        # 2. Get query embeddings
+        grasp_query_pe = self.grasp_query_position_embeddings(torch.arange(20).to(self.device))
+        # repeat to batchsize
+        grasp_query_pe = grasp_query_pe.repeat(len(batched_input), 1, 1)
+        grasp_decoder_outputs = self.grasp_decoder(
+            inputs_embeds=torch.zeros_like(grasp_query_pe),
+            attention_mask=None,
+            position_embeddings=torch.zeros_like(grasp_encoder_hidden_states),
+            query_position_embeddings=grasp_query_pe,
+            encoder_hidden_states=grasp_encoder_hidden_states,
+            encoder_attention_mask=None,
+            output_attentions=False,
+            output_hidden_states=False,
+            return_dict=True,
+        )
+        grasp_sequence_output = grasp_decoder_outputs[0]
+        grasp_logits = self.grasp_label_classifier(grasp_sequence_output)
+        pred_grasps = self.grasp_predictor(grasp_sequence_output).sigmoid()
+        # 3. Calculate loss
+        loss, loss_dict = 0, {}
+        if "grasp_labels" in batched_input[0]:
+            config = self.grasp_decoder_config
+            grasp_labels = [{
+                "class_labels": torch.zeros([len(x["grasp_labels"])], dtype=torch.long).to(self.device),
+                "boxes": x["grasp_labels"],
+            } for x in batched_input]
+            # First: create the matcher
+            matcher = DetrHungarianMatcher(
+                class_cost=config.class_cost, bbox_cost=config.bbox_cost, giou_cost=config.giou_cost
+            )
+            matcher.forward = modify_matcher_forward(matcher)
+            # Second: create the criterion
+            losses = ["labels", "boxes"]
+            criterion = DetrLoss(
+                matcher=matcher,
+                num_classes=config.num_labels,
+                eos_coef=config.eos_coefficient,
+                losses=losses,
+            )
+            criterion.loss_labels, criterion.loss_boxes = modify_grasp_loss_forward(criterion)
+            criterion.to(self.device)
+            # Third: compute the losses, based on outputs and labels
+            outputs_loss = {}
+            outputs_loss["logits"] = grasp_logits
+            outputs_loss["pred_boxes"] = pred_grasps
+            grasp_loss_dict = criterion(outputs_loss, grasp_labels)
+            # Fourth: compute total loss, as a weighted sum of the various losses
+            weight_dict = {"loss_ce": 1, "loss_bbox": config.bbox_loss_coefficient}
+            weight_dict["loss_giou"] = config.giou_loss_coefficient
+            if config.auxiliary_loss:
+                aux_weight_dict = {}
+                for i in range(config.decoder_layers - 1):
+                    aux_weight_dict.update({k + f"_{i}": v for k, v in weight_dict.items()})
+                weight_dict.update(aux_weight_dict)
+            grasp_loss = sum(grasp_loss_dict[k] * weight_dict[k] for k in grasp_loss_dict.keys() if k in weight_dict)
+            # merge grasp branch loss into variable loss & loss_dict
+            loss += grasp_loss
+            loss_dict.update(grasp_loss_dict)
+        pred_masks = self.postprocess_masks(
+            torch.cat([x['low_res_logits'] for x in outputs], dim=0),
+            input_size=image_record["image"].shape[-2:],
+            original_size=(1024, 1024),
+        )
+        if 'masks' in batched_input[0]:
+            # 4. Calculate segmentation loss
+            sf_loss = sigmoid_focal_loss(pred_masks.flatten(1),
+                torch.stack([x['masks'] for x in batched_input], dim=0).unsqueeze(1).type(torch.float32).flatten(1), len(batched_input))
+            d_loss = dice_loss(pred_masks.flatten(1),
+                torch.stack([x['masks'] for x in batched_input], dim=0).unsqueeze(1).type(torch.float32).flatten(1), len(batched_input))
+            loss += sf_loss + d_loss
+            loss_dict["sf_loss"] = sf_loss
+            loss_dict["d_loss"] = d_loss
+        return DetrSegmentationOutput(
+            loss=loss,
+            loss_dict=loss_dict,
+            logits=grasp_logits,
+            pred_boxes=pred_grasps,
+            pred_masks=pred_masks,
+        )
+    return modified_forward
+def add_inference_method(self):
+    def infer(
+            batched_input: List[Dict[str, Any]],
+            multimask_output: bool,
+    ):
+        input_images = torch.stack([x["image"] for x in batched_input], dim=0)
+        image_embeddings = self.image_encoder(input_images)
+        outputs = []
+        srcs = []
+        curr_embedding = image_embeddings[0]
+        image_record = batched_input[0]
+        if "point_coords" in image_record:
+            points = (image_record["point_coords"], image_record["point_labels"])
+        else:
+            points = None
+        sparse_embeddings, dense_embeddings = self.prompt_encoder(
+            points=points,
+            boxes=image_record.get("boxes", None),
+            masks=image_record.get("mask_inputs", None),
+        )
+        low_res_masks, iou_predictions, src = self.mask_decoder(
+            image_embeddings=curr_embedding.unsqueeze(0),
+            image_pe=self.prompt_encoder.get_dense_pe(),
+            sparse_prompt_embeddings=sparse_embeddings,
+            dense_prompt_embeddings=dense_embeddings,
+            multimask_output=multimask_output,
+        )
+        outputs.append(
+            {
+                "iou_predictions": iou_predictions,
+                "low_res_logits": low_res_masks,
+            }
+        )
+        srcs.append(src[0])
+        n_queries = iou_predictions.size(0)
+        # forward grasp decoder here
+        # 1. Get encoder hidden states
+        grasp_encoder_hidden_states = self.grasp_img_pos_embed(src.permute(0, 2, 3, 1))
+        # 2. Get query embeddings
+        grasp_query_pe = self.grasp_query_position_embeddings(torch.arange(20).to(self.device))
+        # repeat to batchsize
+        grasp_query_pe = grasp_query_pe.repeat(n_queries, 1, 1)
+        grasp_decoder_outputs = self.grasp_decoder(
+            inputs_embeds=torch.zeros_like(grasp_query_pe),
+            attention_mask=None,
+            position_embeddings=torch.zeros_like(grasp_encoder_hidden_states),
+            query_position_embeddings=grasp_query_pe,
+            encoder_hidden_states=grasp_encoder_hidden_states,
+            encoder_attention_mask=None,
+            output_attentions=False,
+            output_hidden_states=False,
+            return_dict=True,
+        )
+        grasp_sequence_output = grasp_decoder_outputs[0]
+        grasp_logits = self.grasp_label_classifier(grasp_sequence_output)
+        pred_grasps = self.grasp_predictor(grasp_sequence_output).sigmoid()
+        pred_masks = self.postprocess_masks(
+            torch.cat([x['low_res_logits'] for x in outputs], dim=0),
+            input_size=image_record["image"].shape[-2:],
+            original_size=(1024, 1024),
+        )
+        return DetrSegmentationOutput(
+            loss=0,
+            loss_dict={},
+            logits=grasp_logits,
+            pred_boxes=pred_grasps,
+            pred_masks=pred_masks,
+        )
+    return infer

models/modeling/__init__.py ADDED Viewed

	@@ -0,0 +1,11 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+from .sam import Sam
+from .image_encoder import ImageEncoderViT
+from .mask_decoder import MaskDecoder
+from .prompt_encoder import PromptEncoder
+from .transformer import TwoWayTransformer

models/modeling/common.py ADDED Viewed

	@@ -0,0 +1,43 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+import torch
+import torch.nn as nn
+from typing import Type
+class MLPBlock(nn.Module):
+    def __init__(
+        self,
+        embedding_dim: int,
+        mlp_dim: int,
+        act: Type[nn.Module] = nn.GELU,
+    ) -> None:
+        super().__init__()
+        self.lin1 = nn.Linear(embedding_dim, mlp_dim)
+        self.lin2 = nn.Linear(mlp_dim, embedding_dim)
+        self.act = act()
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        return self.lin2(self.act(self.lin1(x)))
+# From https://github.com/facebookresearch/detectron2/blob/main/detectron2/layers/batch_norm.py # noqa
+# Itself from https://github.com/facebookresearch/ConvNeXt/blob/d1fa8f6fef0a165b27399986cc2bdacc92777e40/models/convnext.py#L119  # noqa
+class LayerNorm2d(nn.Module):
+    def __init__(self, num_channels: int, eps: float = 1e-6) -> None:
+        super().__init__()
+        self.weight = nn.Parameter(torch.ones(num_channels))
+        self.bias = nn.Parameter(torch.zeros(num_channels))
+        self.eps = eps
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        u = x.mean(1, keepdim=True)
+        s = (x - u).pow(2).mean(1, keepdim=True)
+        x = (x - u) / torch.sqrt(s + self.eps)
+        x = self.weight[:, None, None] * x + self.bias[:, None, None]
+        return x

models/modeling/image_encoder.py ADDED Viewed

	@@ -0,0 +1,395 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from typing import Optional, Tuple, Type
+from .common import LayerNorm2d, MLPBlock
+# This class and its supporting functions below lightly adapted from the ViTDet backbone available at: https://github.com/facebookresearch/detectron2/blob/main/detectron2/modeling/backbone/vit.py # noqa
+class ImageEncoderViT(nn.Module):
+    def __init__(
+        self,
+        img_size: int = 1024,
+        patch_size: int = 16,
+        in_chans: int = 3,
+        embed_dim: int = 768,
+        depth: int = 12,
+        num_heads: int = 12,
+        mlp_ratio: float = 4.0,
+        out_chans: int = 256,
+        qkv_bias: bool = True,
+        norm_layer: Type[nn.Module] = nn.LayerNorm,
+        act_layer: Type[nn.Module] = nn.GELU,
+        use_abs_pos: bool = True,
+        use_rel_pos: bool = False,
+        rel_pos_zero_init: bool = True,
+        window_size: int = 0,
+        global_attn_indexes: Tuple[int, ...] = (),
+    ) -> None:
+        """
+        Args:
+            img_size (int): Input image size.
+            patch_size (int): Patch size.
+            in_chans (int): Number of input image channels.
+            embed_dim (int): Patch embedding dimension.
+            depth (int): Depth of ViT.
+            num_heads (int): Number of attention heads in each ViT block.
+            mlp_ratio (float): Ratio of mlp hidden dim to embedding dim.
+            qkv_bias (bool): If True, add a learnable bias to query, key, value.
+            norm_layer (nn.Module): Normalization layer.
+            act_layer (nn.Module): Activation layer.
+            use_abs_pos (bool): If True, use absolute positional embeddings.
+            use_rel_pos (bool): If True, add relative positional embeddings to the attention map.
+            rel_pos_zero_init (bool): If True, zero initialize relative positional parameters.
+            window_size (int): Window size for window attention blocks.
+            global_attn_indexes (list): Indexes for blocks using global attention.
+        """
+        super().__init__()
+        self.img_size = img_size
+        self.patch_embed = PatchEmbed(
+            kernel_size=(patch_size, patch_size),
+            stride=(patch_size, patch_size),
+            in_chans=in_chans,
+            embed_dim=embed_dim,
+        )
+        self.pos_embed: Optional[nn.Parameter] = None
+        if use_abs_pos:
+            # Initialize absolute positional embedding with pretrain image size.
+            self.pos_embed = nn.Parameter(
+                torch.zeros(1, img_size // patch_size, img_size // patch_size, embed_dim)
+            )
+        self.blocks = nn.ModuleList()
+        for i in range(depth):
+            block = Block(
+                dim=embed_dim,
+                num_heads=num_heads,
+                mlp_ratio=mlp_ratio,
+                qkv_bias=qkv_bias,
+                norm_layer=norm_layer,
+                act_layer=act_layer,
+                use_rel_pos=use_rel_pos,
+                rel_pos_zero_init=rel_pos_zero_init,
+                window_size=window_size if i not in global_attn_indexes else 0,
+                input_size=(img_size // patch_size, img_size // patch_size),
+            )
+            self.blocks.append(block)
+        self.neck = nn.Sequential(
+            nn.Conv2d(
+                embed_dim,
+                out_chans,
+                kernel_size=1,
+                bias=False,
+            ),
+            LayerNorm2d(out_chans),
+            nn.Conv2d(
+                out_chans,
+                out_chans,
+                kernel_size=3,
+                padding=1,
+                bias=False,
+            ),
+            LayerNorm2d(out_chans),
+        )
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        x = self.patch_embed(x)
+        if self.pos_embed is not None:
+            x = x + self.pos_embed
+        for blk in self.blocks:
+            x = blk(x)
+        x = self.neck(x.permute(0, 3, 1, 2))
+        return x
+class Block(nn.Module):
+    """Transformer blocks with support of window attention and residual propagation blocks"""
+    def __init__(
+        self,
+        dim: int,
+        num_heads: int,
+        mlp_ratio: float = 4.0,
+        qkv_bias: bool = True,
+        norm_layer: Type[nn.Module] = nn.LayerNorm,
+        act_layer: Type[nn.Module] = nn.GELU,
+        use_rel_pos: bool = False,
+        rel_pos_zero_init: bool = True,
+        window_size: int = 0,
+        input_size: Optional[Tuple[int, int]] = None,
+    ) -> None:
+        """
+        Args:
+            dim (int): Number of input channels.
+            num_heads (int): Number of attention heads in each ViT block.
+            mlp_ratio (float): Ratio of mlp hidden dim to embedding dim.
+            qkv_bias (bool): If True, add a learnable bias to query, key, value.
+            norm_layer (nn.Module): Normalization layer.
+            act_layer (nn.Module): Activation layer.
+            use_rel_pos (bool): If True, add relative positional embeddings to the attention map.
+            rel_pos_zero_init (bool): If True, zero initialize relative positional parameters.
+            window_size (int): Window size for window attention blocks. If it equals 0, then
+                use global attention.
+            input_size (tuple(int, int) or None): Input resolution for calculating the relative
+                positional parameter size.
+        """
+        super().__init__()
+        self.norm1 = norm_layer(dim)
+        self.attn = Attention(
+            dim,
+            num_heads=num_heads,
+            qkv_bias=qkv_bias,
+            use_rel_pos=use_rel_pos,
+            rel_pos_zero_init=rel_pos_zero_init,
+            input_size=input_size if window_size == 0 else (window_size, window_size),
+        )
+        self.norm2 = norm_layer(dim)
+        self.mlp = MLPBlock(embedding_dim=dim, mlp_dim=int(dim * mlp_ratio), act=act_layer)
+        self.window_size = window_size
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        shortcut = x
+        x = self.norm1(x)
+        # Window partition
+        if self.window_size > 0:
+            H, W = x.shape[1], x.shape[2]
+            x, pad_hw = window_partition(x, self.window_size)
+        x = self.attn(x)
+        # Reverse window partition
+        if self.window_size > 0:
+            x = window_unpartition(x, self.window_size, pad_hw, (H, W))
+        x = shortcut + x
+        x = x + self.mlp(self.norm2(x))
+        return x
+class Attention(nn.Module):
+    """Multi-head Attention block with relative position embeddings."""
+    def __init__(
+        self,
+        dim: int,
+        num_heads: int = 8,
+        qkv_bias: bool = True,
+        use_rel_pos: bool = False,
+        rel_pos_zero_init: bool = True,
+        input_size: Optional[Tuple[int, int]] = None,
+    ) -> None:
+        """
+        Args:
+            dim (int): Number of input channels.
+            num_heads (int): Number of attention heads.
+            qkv_bias (bool):  If True, add a learnable bias to query, key, value.
+            rel_pos (bool): If True, add relative positional embeddings to the attention map.
+            rel_pos_zero_init (bool): If True, zero initialize relative positional parameters.
+            input_size (tuple(int, int) or None): Input resolution for calculating the relative
+                positional parameter size.
+        """
+        super().__init__()
+        self.num_heads = num_heads
+        head_dim = dim // num_heads
+        self.scale = head_dim**-0.5
+        self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
+        self.proj = nn.Linear(dim, dim)
+        self.use_rel_pos = use_rel_pos
+        if self.use_rel_pos:
+            assert (
+                input_size is not None
+            ), "Input size must be provided if using relative positional encoding."
+            # initialize relative positional embeddings
+            self.rel_pos_h = nn.Parameter(torch.zeros(2 * input_size[0] - 1, head_dim))
+            self.rel_pos_w = nn.Parameter(torch.zeros(2 * input_size[1] - 1, head_dim))
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        B, H, W, _ = x.shape
+        # qkv with shape (3, B, nHead, H * W, C)
+        qkv = self.qkv(x).reshape(B, H * W, 3, self.num_heads, -1).permute(2, 0, 3, 1, 4)
+        # q, k, v with shape (B * nHead, H * W, C)
+        q, k, v = qkv.reshape(3, B * self.num_heads, H * W, -1).unbind(0)
+        attn = (q * self.scale) @ k.transpose(-2, -1)
+        if self.use_rel_pos:
+            attn = add_decomposed_rel_pos(attn, q, self.rel_pos_h, self.rel_pos_w, (H, W), (H, W))
+        attn = attn.softmax(dim=-1)
+        x = (attn @ v).view(B, self.num_heads, H, W, -1).permute(0, 2, 3, 1, 4).reshape(B, H, W, -1)
+        x = self.proj(x)
+        return x
+def window_partition(x: torch.Tensor, window_size: int) -> Tuple[torch.Tensor, Tuple[int, int]]:
+    """
+    Partition into non-overlapping windows with padding if needed.
+    Args:
+        x (tensor): input tokens with [B, H, W, C].
+        window_size (int): window size.
+    Returns:
+        windows: windows after partition with [B * num_windows, window_size, window_size, C].
+        (Hp, Wp): padded height and width before partition
+    """
+    B, H, W, C = x.shape
+    pad_h = (window_size - H % window_size) % window_size
+    pad_w = (window_size - W % window_size) % window_size
+    if pad_h > 0 or pad_w > 0:
+        x = F.pad(x, (0, 0, 0, pad_w, 0, pad_h))
+    Hp, Wp = H + pad_h, W + pad_w
+    x = x.view(B, Hp // window_size, window_size, Wp // window_size, window_size, C)
+    windows = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(-1, window_size, window_size, C)
+    return windows, (Hp, Wp)
+def window_unpartition(
+    windows: torch.Tensor, window_size: int, pad_hw: Tuple[int, int], hw: Tuple[int, int]
+) -> torch.Tensor:
+    """
+    Window unpartition into original sequences and removing padding.
+    Args:
+        windows (tensor): input tokens with [B * num_windows, window_size, window_size, C].
+        window_size (int): window size.
+        pad_hw (Tuple): padded height and width (Hp, Wp).
+        hw (Tuple): original height and width (H, W) before padding.
+    Returns:
+        x: unpartitioned sequences with [B, H, W, C].
+    """
+    Hp, Wp = pad_hw
+    H, W = hw
+    B = windows.shape[0] // (Hp * Wp // window_size // window_size)
+    x = windows.view(B, Hp // window_size, Wp // window_size, window_size, window_size, -1)
+    x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, Hp, Wp, -1)
+    if Hp > H or Wp > W:
+        x = x[:, :H, :W, :].contiguous()
+    return x
+def get_rel_pos(q_size: int, k_size: int, rel_pos: torch.Tensor) -> torch.Tensor:
+    """
+    Get relative positional embeddings according to the relative positions of
+        query and key sizes.
+    Args:
+        q_size (int): size of query q.
+        k_size (int): size of key k.
+        rel_pos (Tensor): relative position embeddings (L, C).
+    Returns:
+        Extracted positional embeddings according to relative positions.
+    """
+    max_rel_dist = int(2 * max(q_size, k_size) - 1)
+    # Interpolate rel pos if needed.
+    if rel_pos.shape[0] != max_rel_dist:
+        # Interpolate rel pos.
+        rel_pos_resized = F.interpolate(
+            rel_pos.reshape(1, rel_pos.shape[0], -1).permute(0, 2, 1),
+            size=max_rel_dist,
+            mode="linear",
+        )
+        rel_pos_resized = rel_pos_resized.reshape(-1, max_rel_dist).permute(1, 0)
+    else:
+        rel_pos_resized = rel_pos
+    # Scale the coords with short length if shapes for q and k are different.
+    q_coords = torch.arange(q_size)[:, None] * max(k_size / q_size, 1.0)
+    k_coords = torch.arange(k_size)[None, :] * max(q_size / k_size, 1.0)
+    relative_coords = (q_coords - k_coords) + (k_size - 1) * max(q_size / k_size, 1.0)
+    return rel_pos_resized[relative_coords.long()]
+def add_decomposed_rel_pos(
+    attn: torch.Tensor,
+    q: torch.Tensor,
+    rel_pos_h: torch.Tensor,
+    rel_pos_w: torch.Tensor,
+    q_size: Tuple[int, int],
+    k_size: Tuple[int, int],
+) -> torch.Tensor:
+    """
+    Calculate decomposed Relative Positional Embeddings from :paper:`mvitv2`.
+    https://github.com/facebookresearch/mvit/blob/19786631e330df9f3622e5402b4a419a263a2c80/mvit/models/attention.py   # noqa B950
+    Args:
+        attn (Tensor): attention map.
+        q (Tensor): query q in the attention layer with shape (B, q_h * q_w, C).
+        rel_pos_h (Tensor): relative position embeddings (Lh, C) for height axis.
+        rel_pos_w (Tensor): relative position embeddings (Lw, C) for width axis.
+        q_size (Tuple): spatial sequence size of query q with (q_h, q_w).
+        k_size (Tuple): spatial sequence size of key k with (k_h, k_w).
+    Returns:
+        attn (Tensor): attention map with added relative positional embeddings.
+    """
+    q_h, q_w = q_size
+    k_h, k_w = k_size
+    Rh = get_rel_pos(q_h, k_h, rel_pos_h)
+    Rw = get_rel_pos(q_w, k_w, rel_pos_w)
+    B, _, dim = q.shape
+    r_q = q.reshape(B, q_h, q_w, dim)
+    rel_h = torch.einsum("bhwc,hkc->bhwk", r_q, Rh)
+    rel_w = torch.einsum("bhwc,wkc->bhwk", r_q, Rw)
+    attn = (
+        attn.view(B, q_h, q_w, k_h, k_w) + rel_h[:, :, :, :, None] + rel_w[:, :, :, None, :]
+    ).view(B, q_h * q_w, k_h * k_w)
+    return attn
+class PatchEmbed(nn.Module):
+    """
+    Image to Patch Embedding.
+    """
+    def __init__(
+        self,
+        kernel_size: Tuple[int, int] = (16, 16),
+        stride: Tuple[int, int] = (16, 16),
+        padding: Tuple[int, int] = (0, 0),
+        in_chans: int = 3,
+        embed_dim: int = 768,
+    ) -> None:
+        """
+        Args:
+            kernel_size (Tuple): kernel size of the projection layer.
+            stride (Tuple): stride of the projection layer.
+            padding (Tuple): padding size of the projection layer.
+            in_chans (int): Number of input image channels.
+            embed_dim (int): Patch embedding dimension.
+        """
+        super().__init__()
+        self.proj = nn.Conv2d(
+            in_chans, embed_dim, kernel_size=kernel_size, stride=stride, padding=padding
+        )
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        x = self.proj(x)
+        # B C H W -> B H W C
+        x = x.permute(0, 2, 3, 1)
+        return x

models/modeling/mask_decoder.py ADDED Viewed

	@@ -0,0 +1,176 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+import torch
+from torch import nn
+from torch.nn import functional as F
+from typing import List, Tuple, Type
+from .common import LayerNorm2d
+class MaskDecoder(nn.Module):
+    def __init__(
+        self,
+        *,
+        transformer_dim: int,
+        transformer: nn.Module,
+        num_multimask_outputs: int = 3,
+        activation: Type[nn.Module] = nn.GELU,
+        iou_head_depth: int = 3,
+        iou_head_hidden_dim: int = 256,
+    ) -> None:
+        """
+        Predicts masks given an image and prompt embeddings, using a
+        transformer architecture.
+        Arguments:
+          transformer_dim (int): the channel dimension of the transformer
+          transformer (nn.Module): the transformer used to predict masks
+          num_multimask_outputs (int): the number of masks to predict
+            when disambiguating masks
+          activation (nn.Module): the type of activation to use when
+            upscaling masks
+          iou_head_depth (int): the depth of the MLP used to predict
+            mask quality
+          iou_head_hidden_dim (int): the hidden dimension of the MLP
+            used to predict mask quality
+        """
+        super().__init__()
+        self.transformer_dim = transformer_dim
+        self.transformer = transformer
+        self.num_multimask_outputs = num_multimask_outputs
+        self.iou_token = nn.Embedding(1, transformer_dim)
+        self.num_mask_tokens = num_multimask_outputs + 1
+        self.mask_tokens = nn.Embedding(self.num_mask_tokens, transformer_dim)
+        self.output_upscaling = nn.Sequential(
+            nn.ConvTranspose2d(transformer_dim, transformer_dim // 4, kernel_size=2, stride=2),
+            LayerNorm2d(transformer_dim // 4),
+            activation(),
+            nn.ConvTranspose2d(transformer_dim // 4, transformer_dim // 8, kernel_size=2, stride=2),
+            activation(),
+        )
+        self.output_hypernetworks_mlps = nn.ModuleList(
+            [
+                MLP(transformer_dim, transformer_dim, transformer_dim // 8, 3)
+                for i in range(self.num_mask_tokens)
+            ]
+        )
+        self.iou_prediction_head = MLP(
+            transformer_dim, iou_head_hidden_dim, self.num_mask_tokens, iou_head_depth
+        )
+    def forward(
+        self,
+        image_embeddings: torch.Tensor,
+        image_pe: torch.Tensor,
+        sparse_prompt_embeddings: torch.Tensor,
+        dense_prompt_embeddings: torch.Tensor,
+        multimask_output: bool,
+    ) -> Tuple[torch.Tensor, torch.Tensor]:
+        """
+        Predict masks given image and prompt embeddings.
+        Arguments:
+          image_embeddings (torch.Tensor): the embeddings from the image encoder
+          image_pe (torch.Tensor): positional encoding with the shape of image_embeddings
+          sparse_prompt_embeddings (torch.Tensor): the embeddings of the points and boxes
+          dense_prompt_embeddings (torch.Tensor): the embeddings of the mask inputs
+          multimask_output (bool): Whether to return multiple masks or a single
+            mask.
+        Returns:
+          torch.Tensor: batched predicted masks
+          torch.Tensor: batched predictions of mask quality
+        """
+        masks, iou_pred, src = self.predict_masks(
+            image_embeddings=image_embeddings,
+            image_pe=image_pe,
+            sparse_prompt_embeddings=sparse_prompt_embeddings,
+            dense_prompt_embeddings=dense_prompt_embeddings,
+        )
+        # Select the correct mask or masks for output
+        if multimask_output:
+            mask_slice = slice(1, None)
+        else:
+            mask_slice = slice(0, 1)
+        masks = masks[:, mask_slice, :, :]
+        iou_pred = iou_pred[:, mask_slice]
+        # Prepare output
+        return masks, iou_pred, src
+    def predict_masks(
+        self,
+        image_embeddings: torch.Tensor,
+        image_pe: torch.Tensor,
+        sparse_prompt_embeddings: torch.Tensor,
+        dense_prompt_embeddings: torch.Tensor,
+    ) -> Tuple[torch.Tensor, torch.Tensor]:
+        """Predicts masks. See 'forward' for more details."""
+        # Concatenate output tokens
+        output_tokens = torch.cat([self.iou_token.weight, self.mask_tokens.weight], dim=0)
+        output_tokens = output_tokens.unsqueeze(0).expand(sparse_prompt_embeddings.size(0), -1, -1)
+        tokens = torch.cat((output_tokens, sparse_prompt_embeddings), dim=1)
+        # Expand per-image data in batch direction to be per-mask
+        src = torch.repeat_interleave(image_embeddings, tokens.shape[0], dim=0)
+        src = src + dense_prompt_embeddings
+        pos_src = torch.repeat_interleave(image_pe, tokens.shape[0], dim=0)
+        b, c, h, w = src.shape
+        # Run the transformer
+        hs, src = self.transformer(src, pos_src, tokens)
+        iou_token_out = hs[:, 0, :]
+        mask_tokens_out = hs[:, 1 : (1 + self.num_mask_tokens), :]
+        # Upscale mask embeddings and predict masks using the mask tokens
+        src = src.transpose(1, 2).view(b, c, h, w)
+        upscaled_embedding = self.output_upscaling(src)
+        hyper_in_list: List[torch.Tensor] = []
+        for i in range(self.num_mask_tokens):
+            hyper_in_list.append(self.output_hypernetworks_mlps[i](mask_tokens_out[:, i, :]))
+        hyper_in = torch.stack(hyper_in_list, dim=1)
+        b, c, h, w = upscaled_embedding.shape
+        masks = (hyper_in @ upscaled_embedding.view(b, c, h * w)).view(b, -1, h, w)
+        # Generate mask quality predictions
+        iou_pred = self.iou_prediction_head(iou_token_out)
+        return masks, iou_pred, src
+# Lightly adapted from
+# https://github.com/facebookresearch/MaskFormer/blob/main/mask_former/modeling/transformer/transformer_predictor.py # noqa
+class MLP(nn.Module):
+    def __init__(
+        self,
+        input_dim: int,
+        hidden_dim: int,
+        output_dim: int,
+        num_layers: int,
+        sigmoid_output: bool = False,
+    ) -> None:
+        super().__init__()
+        self.num_layers = num_layers
+        h = [hidden_dim] * (num_layers - 1)
+        self.layers = nn.ModuleList(
+            nn.Linear(n, k) for n, k in zip([input_dim] + h, h + [output_dim])
+        )
+        self.sigmoid_output = sigmoid_output
+    def forward(self, x):
+        for i, layer in enumerate(self.layers):
+            x = F.relu(layer(x)) if i < self.num_layers - 1 else layer(x)
+        if self.sigmoid_output:
+            x = F.sigmoid(x)
+        return x

models/modeling/prompt_encoder.py ADDED Viewed

	@@ -0,0 +1,214 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+import numpy as np
+import torch
+from torch import nn
+from typing import Any, Optional, Tuple, Type
+from .common import LayerNorm2d
+class PromptEncoder(nn.Module):
+    def __init__(
+        self,
+        embed_dim: int,
+        image_embedding_size: Tuple[int, int],
+        input_image_size: Tuple[int, int],
+        mask_in_chans: int,
+        activation: Type[nn.Module] = nn.GELU,
+    ) -> None:
+        """
+        Encodes prompts for input to SAM's mask decoder.
+        Arguments:
+          embed_dim (int): The prompts' embedding dimension
+          image_embedding_size (tuple(int, int)): The spatial size of the
+            image embedding, as (H, W).
+          input_image_size (int): The padded size of the image as input
+            to the image encoder, as (H, W).
+          mask_in_chans (int): The number of hidden channels used for
+            encoding input masks.
+          activation (nn.Module): The activation to use when encoding
+            input masks.
+        """
+        super().__init__()
+        self.embed_dim = embed_dim
+        self.input_image_size = input_image_size
+        self.image_embedding_size = image_embedding_size
+        self.pe_layer = PositionEmbeddingRandom(embed_dim // 2)
+        self.num_point_embeddings: int = 4  # pos/neg point + 2 box corners
+        point_embeddings = [nn.Embedding(1, embed_dim) for i in range(self.num_point_embeddings)]
+        self.point_embeddings = nn.ModuleList(point_embeddings)
+        self.not_a_point_embed = nn.Embedding(1, embed_dim)
+        self.mask_input_size = (4 * image_embedding_size[0], 4 * image_embedding_size[1])
+        self.mask_downscaling = nn.Sequential(
+            nn.Conv2d(1, mask_in_chans // 4, kernel_size=2, stride=2),
+            LayerNorm2d(mask_in_chans // 4),
+            activation(),
+            nn.Conv2d(mask_in_chans // 4, mask_in_chans, kernel_size=2, stride=2),
+            LayerNorm2d(mask_in_chans),
+            activation(),
+            nn.Conv2d(mask_in_chans, embed_dim, kernel_size=1),
+        )
+        self.no_mask_embed = nn.Embedding(1, embed_dim)
+    def get_dense_pe(self) -> torch.Tensor:
+        """
+        Returns the positional encoding used to encode point prompts,
+        applied to a dense set of points the shape of the image encoding.
+        Returns:
+          torch.Tensor: Positional encoding with shape
+            1x(embed_dim)x(embedding_h)x(embedding_w)
+        """
+        return self.pe_layer(self.image_embedding_size).unsqueeze(0)
+    def _embed_points(
+        self,
+        points: torch.Tensor,
+        labels: torch.Tensor,
+        pad: bool,
+    ) -> torch.Tensor:
+        """Embeds point prompts."""
+        points = points + 0.5  # Shift to center of pixel
+        if pad:
+            padding_point = torch.zeros((points.shape[0], 1, 2), device=points.device)
+            padding_label = -torch.ones((labels.shape[0], 1), device=labels.device)
+            points = torch.cat([points, padding_point], dim=1)
+            labels = torch.cat([labels, padding_label], dim=1)
+        point_embedding = self.pe_layer.forward_with_coords(points, self.input_image_size)
+        point_embedding[labels == -1] = 0.0
+        point_embedding[labels == -1] += self.not_a_point_embed.weight
+        point_embedding[labels == 0] += self.point_embeddings[0].weight
+        point_embedding[labels == 1] += self.point_embeddings[1].weight
+        return point_embedding
+    def _embed_boxes(self, boxes: torch.Tensor) -> torch.Tensor:
+        """Embeds box prompts."""
+        boxes = boxes + 0.5  # Shift to center of pixel
+        coords = boxes.reshape(-1, 2, 2)
+        corner_embedding = self.pe_layer.forward_with_coords(coords, self.input_image_size)
+        corner_embedding[:, 0, :] += self.point_embeddings[2].weight
+        corner_embedding[:, 1, :] += self.point_embeddings[3].weight
+        return corner_embedding
+    def _embed_masks(self, masks: torch.Tensor) -> torch.Tensor:
+        """Embeds mask inputs."""
+        mask_embedding = self.mask_downscaling(masks)
+        return mask_embedding
+    def _get_batch_size(
+        self,
+        points: Optional[Tuple[torch.Tensor, torch.Tensor]],
+        boxes: Optional[torch.Tensor],
+        masks: Optional[torch.Tensor],
+    ) -> int:
+        """
+        Gets the batch size of the output given the batch size of the input prompts.
+        """
+        if points is not None:
+            return points[0].shape[0]
+        elif boxes is not None:
+            return boxes.shape[0]
+        elif masks is not None:
+            return masks.shape[0]
+        else:
+            return 1
+    def _get_device(self) -> torch.device:
+        return self.point_embeddings[0].weight.device
+    def forward(
+        self,
+        points: Optional[Tuple[torch.Tensor, torch.Tensor]],
+        boxes: Optional[torch.Tensor],
+        masks: Optional[torch.Tensor],
+    ) -> Tuple[torch.Tensor, torch.Tensor]:
+        """
+        Embeds different types of prompts, returning both sparse and dense
+        embeddings.
+        Arguments:
+          points (tuple(torch.Tensor, torch.Tensor) or none): point coordinates
+            and labels to embed.
+          boxes (torch.Tensor or none): boxes to embed
+          masks (torch.Tensor or none): masks to embed
+        Returns:
+          torch.Tensor: sparse embeddings for the points and boxes, with shape
+            BxNx(embed_dim), where N is determined by the number of input points
+            and boxes.
+          torch.Tensor: dense embeddings for the masks, in the shape
+            Bx(embed_dim)x(embed_H)x(embed_W)
+        """
+        bs = self._get_batch_size(points, boxes, masks)
+        sparse_embeddings = torch.empty((bs, 0, self.embed_dim), device=self._get_device())
+        if points is not None:
+            coords, labels = points
+            point_embeddings = self._embed_points(coords, labels, pad=(boxes is None))
+            sparse_embeddings = torch.cat([sparse_embeddings, point_embeddings], dim=1)
+        if boxes is not None:
+            box_embeddings = self._embed_boxes(boxes)
+            sparse_embeddings = torch.cat([sparse_embeddings, box_embeddings], dim=1)
+        if masks is not None:
+            dense_embeddings = self._embed_masks(masks)
+        else:
+            dense_embeddings = self.no_mask_embed.weight.reshape(1, -1, 1, 1).expand(
+                bs, -1, self.image_embedding_size[0], self.image_embedding_size[1]
+            )
+        return sparse_embeddings, dense_embeddings
+class PositionEmbeddingRandom(nn.Module):
+    """
+    Positional encoding using random spatial frequencies.
+    """
+    def __init__(self, num_pos_feats: int = 64, scale: Optional[float] = None) -> None:
+        super().__init__()
+        if scale is None or scale <= 0.0:
+            scale = 1.0
+        self.register_buffer(
+            "positional_encoding_gaussian_matrix",
+            scale * torch.randn((2, num_pos_feats)),
+        )
+    def _pe_encoding(self, coords: torch.Tensor) -> torch.Tensor:
+        """Positionally encode points that are normalized to [0,1]."""
+        # assuming coords are in [0, 1]^2 square and have d_1 x ... x d_n x 2 shape
+        coords = 2 * coords - 1
+        coords = coords @ self.positional_encoding_gaussian_matrix
+        coords = 2 * np.pi * coords
+        # outputs d_1 x ... x d_n x C shape
+        return torch.cat([torch.sin(coords), torch.cos(coords)], dim=-1)
+    def forward(self, size: Tuple[int, int]) -> torch.Tensor:
+        """Generate positional encoding for a grid of the specified size."""
+        h, w = size
+        device: Any = self.positional_encoding_gaussian_matrix.device
+        grid = torch.ones((h, w), device=device, dtype=torch.float32)
+        y_embed = grid.cumsum(dim=0) - 0.5
+        x_embed = grid.cumsum(dim=1) - 0.5
+        y_embed = y_embed / h
+        x_embed = x_embed / w
+        pe = self._pe_encoding(torch.stack([x_embed, y_embed], dim=-1))
+        return pe.permute(2, 0, 1)  # C x H x W
+    def forward_with_coords(
+        self, coords_input: torch.Tensor, image_size: Tuple[int, int]
+    ) -> torch.Tensor:
+        """Positionally encode points that are not normalized to [0,1]."""
+        coords = coords_input.clone()
+        coords[:, :, 0] = coords[:, :, 0] / image_size[1]
+        coords[:, :, 1] = coords[:, :, 1] / image_size[0]
+        return self._pe_encoding(coords.to(torch.float))  # B x N x C

models/modeling/sam.py ADDED Viewed

	@@ -0,0 +1,174 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+import torch
+from torch import nn
+from torch.nn import functional as F
+from typing import Any, Dict, List, Tuple
+from .image_encoder import ImageEncoderViT
+from .mask_decoder import MaskDecoder
+from .prompt_encoder import PromptEncoder
+class Sam(nn.Module):
+    mask_threshold: float = 0.0
+    image_format: str = "RGB"
+    def __init__(
+        self,
+        image_encoder: ImageEncoderViT,
+        prompt_encoder: PromptEncoder,
+        mask_decoder: MaskDecoder,
+        pixel_mean: List[float] = [123.675, 116.28, 103.53],
+        pixel_std: List[float] = [58.395, 57.12, 57.375],
+    ) -> None:
+        """
+        SAM predicts object masks from an image and input prompts.
+        Arguments:
+          image_encoder (ImageEncoderViT): The backbone used to encode the
+            image into image embeddings that allow for efficient mask prediction.
+          prompt_encoder (PromptEncoder): Encodes various types of input prompts.
+          mask_decoder (MaskDecoder): Predicts masks from the image embeddings
+            and encoded prompts.
+          pixel_mean (list(float)): Mean values for normalizing pixels in the input image.
+          pixel_std (list(float)): Std values for normalizing pixels in the input image.
+        """
+        super().__init__()
+        self.image_encoder = image_encoder
+        self.prompt_encoder = prompt_encoder
+        self.mask_decoder = mask_decoder
+        self.register_buffer("pixel_mean", torch.Tensor(pixel_mean).view(-1, 1, 1), False)
+        self.register_buffer("pixel_std", torch.Tensor(pixel_std).view(-1, 1, 1), False)
+    @property
+    def device(self) -> Any:
+        return self.pixel_mean.device
+    @torch.no_grad()
+    def forward(
+        self,
+        batched_input: List[Dict[str, Any]],
+        multimask_output: bool,
+    ) -> List[Dict[str, torch.Tensor]]:
+        """
+        Predicts masks end-to-end from provided images and prompts.
+        If prompts are not known in advance, using SamPredictor is
+        recommended over calling the model directly.
+        Arguments:
+          batched_input (list(dict)): A list over input images, each a
+            dictionary with the following keys. A prompt key can be
+            excluded if it is not present.
+              'image': The image as a torch tensor in 3xHxW format,
+                already transformed for input to the model.
+              'original_size': (tuple(int, int)) The original size of
+                the image before transformation, as (H, W).
+              'point_coords': (torch.Tensor) Batched point prompts for
+                this image, with shape BxNx2. Already transformed to the
+                input frame of the model.
+              'point_labels': (torch.Tensor) Batched labels for point prompts,
+                with shape BxN.
+              'boxes': (torch.Tensor) Batched box inputs, with shape Bx4.
+                Already transformed to the input frame of the model.
+              'mask_inputs': (torch.Tensor) Batched mask inputs to the model,
+                in the form Bx1xHxW.
+          multimask_output (bool): Whether the model should predict multiple
+            disambiguating masks, or return a single mask.
+        Returns:
+          (list(dict)): A list over input images, where each element is
+            as dictionary with the following keys.
+              'masks': (torch.Tensor) Batched binary mask predictions,
+                with shape BxCxHxW, where B is the number of input prompts,
+                C is determined by multimask_output, and (H, W) is the
+                original size of the image.
+              'iou_predictions': (torch.Tensor) The model's predictions
+                of mask quality, in shape BxC.
+              'low_res_logits': (torch.Tensor) Low resolution logits with
+                shape BxCxHxW, where H=W=256. Can be passed as mask input
+                to subsequent iterations of prediction.
+        """
+        input_images = torch.stack([self.preprocess(x["image"]) for x in batched_input], dim=0)
+        image_embeddings = self.image_encoder(input_images)
+        outputs = []
+        for image_record, curr_embedding in zip(batched_input, image_embeddings):
+            if "point_coords" in image_record:
+                points = (image_record["point_coords"], image_record["point_labels"])
+            else:
+                points = None
+            sparse_embeddings, dense_embeddings = self.prompt_encoder(
+                points=points,
+                boxes=image_record.get("boxes", None),
+                masks=image_record.get("mask_inputs", None),
+            )
+            low_res_masks, iou_predictions = self.mask_decoder(
+                image_embeddings=curr_embedding.unsqueeze(0),
+                image_pe=self.prompt_encoder.get_dense_pe(),
+                sparse_prompt_embeddings=sparse_embeddings,
+                dense_prompt_embeddings=dense_embeddings,
+                multimask_output=multimask_output,
+            )
+            masks = self.postprocess_masks(
+                low_res_masks,
+                input_size=image_record["image"].shape[-2:],
+                original_size=image_record["original_size"],
+            )
+            masks = masks > self.mask_threshold
+            outputs.append(
+                {
+                    "masks": masks,
+                    "iou_predictions": iou_predictions,
+                    "low_res_logits": low_res_masks,
+                }
+            )
+        return outputs
+    def postprocess_masks(
+        self,
+        masks: torch.Tensor,
+        input_size: Tuple[int, ...],
+        original_size: Tuple[int, ...],
+    ) -> torch.Tensor:
+        """
+        Remove padding and upscale masks to the original image size.
+        Arguments:
+          masks (torch.Tensor): Batched masks from the mask_decoder,
+            in BxCxHxW format.
+          input_size (tuple(int, int)): The size of the image input to the
+            model, in (H, W) format. Used to remove padding.
+          original_size (tuple(int, int)): The original size of the image
+            before resizing for input to the model, in (H, W) format.
+        Returns:
+          (torch.Tensor): Batched masks in BxCxHxW format, where (H, W)
+            is given by original_size.
+        """
+        masks = F.interpolate(
+            masks,
+            (self.image_encoder.img_size, self.image_encoder.img_size),
+            mode="bilinear",
+            align_corners=False,
+        )
+        masks = masks[..., : input_size[0], : input_size[1]]
+        masks = F.interpolate(masks, original_size, mode="bilinear", align_corners=False)
+        return masks
+    def preprocess(self, x: torch.Tensor) -> torch.Tensor:
+        """Normalize pixel values and pad to a square input."""
+        # Normalize colors
+        x = (x - self.pixel_mean) / self.pixel_std
+        # Pad
+        h, w = x.shape[-2:]
+        padh = self.image_encoder.img_size - h
+        padw = self.image_encoder.img_size - w
+        x = F.pad(x, (0, padw, 0, padh))
+        return x

models/modeling/transformer.py ADDED Viewed

	@@ -0,0 +1,240 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+import torch
+from torch import Tensor, nn
+import math
+from typing import Tuple, Type
+from .common import MLPBlock
+class TwoWayTransformer(nn.Module):
+    def __init__(
+        self,
+        depth: int,
+        embedding_dim: int,
+        num_heads: int,
+        mlp_dim: int,
+        activation: Type[nn.Module] = nn.ReLU,
+        attention_downsample_rate: int = 2,
+    ) -> None:
+        """
+        A transformer decoder that attends to an input image using
+        queries whose positional embedding is supplied.
+        Args:
+          depth (int): number of layers in the transformer
+          embedding_dim (int): the channel dimension for the input embeddings
+          num_heads (int): the number of heads for multihead attention. Must
+            divide embedding_dim
+          mlp_dim (int): the channel dimension internal to the MLP block
+          activation (nn.Module): the activation to use in the MLP block
+        """
+        super().__init__()
+        self.depth = depth
+        self.embedding_dim = embedding_dim
+        self.num_heads = num_heads
+        self.mlp_dim = mlp_dim
+        self.layers = nn.ModuleList()
+        for i in range(depth):
+            self.layers.append(
+                TwoWayAttentionBlock(
+                    embedding_dim=embedding_dim,
+                    num_heads=num_heads,
+                    mlp_dim=mlp_dim,
+                    activation=activation,
+                    attention_downsample_rate=attention_downsample_rate,
+                    skip_first_layer_pe=(i == 0),
+                )
+            )
+        self.final_attn_token_to_image = Attention(
+            embedding_dim, num_heads, downsample_rate=attention_downsample_rate
+        )
+        self.norm_final_attn = nn.LayerNorm(embedding_dim)
+    def forward(
+        self,
+        image_embedding: Tensor,
+        image_pe: Tensor,
+        point_embedding: Tensor,
+    ) -> Tuple[Tensor, Tensor]:
+        """
+        Args:
+          image_embedding (torch.Tensor): image to attend to. Should be shape
+            B x embedding_dim x h x w for any h and w.
+          image_pe (torch.Tensor): the positional encoding to add to the image. Must
+            have the same shape as image_embedding.
+          point_embedding (torch.Tensor): the embedding to add to the query points.
+            Must have shape B x N_points x embedding_dim for any N_points.
+        Returns:
+          torch.Tensor: the processed point_embedding
+          torch.Tensor: the processed image_embedding
+        """
+        # BxCxHxW -> BxHWxC == B x N_image_tokens x C
+        bs, c, h, w = image_embedding.shape
+        image_embedding = image_embedding.flatten(2).permute(0, 2, 1)
+        image_pe = image_pe.flatten(2).permute(0, 2, 1)
+        # Prepare queries
+        queries = point_embedding
+        keys = image_embedding
+        # Apply transformer blocks and final layernorm
+        for layer in self.layers:
+            queries, keys = layer(
+                queries=queries,
+                keys=keys,
+                query_pe=point_embedding,
+                key_pe=image_pe,
+            )
+        # Apply the final attention layer from the points to the image
+        q = queries + point_embedding
+        k = keys + image_pe
+        attn_out = self.final_attn_token_to_image(q=q, k=k, v=keys)
+        queries = queries + attn_out
+        queries = self.norm_final_attn(queries)
+        return queries, keys
+class TwoWayAttentionBlock(nn.Module):
+    def __init__(
+        self,
+        embedding_dim: int,
+        num_heads: int,
+        mlp_dim: int = 2048,
+        activation: Type[nn.Module] = nn.ReLU,
+        attention_downsample_rate: int = 2,
+        skip_first_layer_pe: bool = False,
+    ) -> None:
+        """
+        A transformer block with four layers: (1) self-attention of sparse
+        inputs, (2) cross attention of sparse inputs to dense inputs, (3) mlp
+        block on sparse inputs, and (4) cross attention of dense inputs to sparse
+        inputs.
+        Arguments:
+          embedding_dim (int): the channel dimension of the embeddings
+          num_heads (int): the number of heads in the attention layers
+          mlp_dim (int): the hidden dimension of the mlp block
+          activation (nn.Module): the activation of the mlp block
+          skip_first_layer_pe (bool): skip the PE on the first layer
+        """
+        super().__init__()
+        self.self_attn = Attention(embedding_dim, num_heads)
+        self.norm1 = nn.LayerNorm(embedding_dim)
+        self.cross_attn_token_to_image = Attention(
+            embedding_dim, num_heads, downsample_rate=attention_downsample_rate
+        )
+        self.norm2 = nn.LayerNorm(embedding_dim)
+        self.mlp = MLPBlock(embedding_dim, mlp_dim, activation)
+        self.norm3 = nn.LayerNorm(embedding_dim)
+        self.norm4 = nn.LayerNorm(embedding_dim)
+        self.cross_attn_image_to_token = Attention(
+            embedding_dim, num_heads, downsample_rate=attention_downsample_rate
+        )
+        self.skip_first_layer_pe = skip_first_layer_pe
+    def forward(
+        self, queries: Tensor, keys: Tensor, query_pe: Tensor, key_pe: Tensor
+    ) -> Tuple[Tensor, Tensor]:
+        # Self attention block
+        if self.skip_first_layer_pe:
+            queries = self.self_attn(q=queries, k=queries, v=queries)
+        else:
+            q = queries + query_pe
+            attn_out = self.self_attn(q=q, k=q, v=queries)
+            queries = queries + attn_out
+        queries = self.norm1(queries)
+        # Cross attention block, tokens attending to image embedding
+        q = queries + query_pe
+        k = keys + key_pe
+        attn_out = self.cross_attn_token_to_image(q=q, k=k, v=keys)
+        queries = queries + attn_out
+        queries = self.norm2(queries)
+        # MLP block
+        mlp_out = self.mlp(queries)
+        queries = queries + mlp_out
+        queries = self.norm3(queries)
+        # Cross attention block, image embedding attending to tokens
+        q = queries + query_pe
+        k = keys + key_pe
+        attn_out = self.cross_attn_image_to_token(q=k, k=q, v=queries)
+        keys = keys + attn_out
+        keys = self.norm4(keys)
+        return queries, keys
+class Attention(nn.Module):
+    """
+    An attention layer that allows for downscaling the size of the embedding
+    after projection to queries, keys, and values.
+    """
+    def __init__(
+        self,
+        embedding_dim: int,
+        num_heads: int,
+        downsample_rate: int = 1,
+    ) -> None:
+        super().__init__()
+        self.embedding_dim = embedding_dim
+        self.internal_dim = embedding_dim // downsample_rate
+        self.num_heads = num_heads
+        assert self.internal_dim % num_heads == 0, "num_heads must divide embedding_dim."
+        self.q_proj = nn.Linear(embedding_dim, self.internal_dim)
+        self.k_proj = nn.Linear(embedding_dim, self.internal_dim)
+        self.v_proj = nn.Linear(embedding_dim, self.internal_dim)
+        self.out_proj = nn.Linear(self.internal_dim, embedding_dim)
+    def _separate_heads(self, x: Tensor, num_heads: int) -> Tensor:
+        b, n, c = x.shape
+        x = x.reshape(b, n, num_heads, c // num_heads)
+        return x.transpose(1, 2)  # B x N_heads x N_tokens x C_per_head
+    def _recombine_heads(self, x: Tensor) -> Tensor:
+        b, n_heads, n_tokens, c_per_head = x.shape
+        x = x.transpose(1, 2)
+        return x.reshape(b, n_tokens, n_heads * c_per_head)  # B x N_tokens x C
+    def forward(self, q: Tensor, k: Tensor, v: Tensor) -> Tensor:
+        # Input projections
+        q = self.q_proj(q)
+        k = self.k_proj(k)
+        v = self.v_proj(v)
+        # Separate into heads
+        q = self._separate_heads(q, self.num_heads)
+        k = self._separate_heads(k, self.num_heads)
+        v = self._separate_heads(v, self.num_heads)
+        # Attention
+        _, _, _, c_per_head = q.shape
+        attn = q @ k.permute(0, 1, 3, 2)  # B x N_heads x N_tokens x N_tokens
+        attn = attn / math.sqrt(c_per_head)
+        attn = torch.softmax(attn, dim=-1)
+        # Get output
+        out = attn @ v
+        out = self._recombine_heads(out)
+        out = self.out_proj(out)
+        return out

models/predictor.py ADDED Viewed

	@@ -0,0 +1,269 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+import numpy as np
+import torch
+from models.modeling import Sam
+from typing import Optional, Tuple
+from .utils.transforms import ResizeLongestSide
+class SamPredictor:
+    def __init__(
+        self,
+        sam_model: Sam,
+    ) -> None:
+        """
+        Uses SAM to calculate the image embedding for an image, and then
+        allow repeated, efficient mask prediction given prompts.
+        Arguments:
+          sam_model (Sam): The model to use for mask prediction.
+        """
+        super().__init__()
+        self.model = sam_model
+        self.transform = ResizeLongestSide(sam_model.image_encoder.img_size)
+        self.reset_image()
+    def set_image(
+        self,
+        image: np.ndarray,
+        image_format: str = "RGB",
+    ) -> None:
+        """
+        Calculates the image embeddings for the provided image, allowing
+        masks to be predicted with the 'predict' method.
+        Arguments:
+          image (np.ndarray): The image for calculating masks. Expects an
+            image in HWC uint8 format, with pixel values in [0, 255].
+          image_format (str): The color format of the image, in ['RGB', 'BGR'].
+        """
+        assert image_format in [
+            "RGB",
+            "BGR",
+        ], f"image_format must be in ['RGB', 'BGR'], is {image_format}."
+        if image_format != self.model.image_format:
+            image = image[..., ::-1]
+        # Transform the image to the form expected by the model
+        input_image = self.transform.apply_image(image)
+        input_image_torch = torch.as_tensor(input_image, device=self.device)
+        input_image_torch = input_image_torch.permute(2, 0, 1).contiguous()[None, :, :, :]
+        self.set_torch_image(input_image_torch, image.shape[:2])
+    @torch.no_grad()
+    def set_torch_image(
+        self,
+        transformed_image: torch.Tensor,
+        original_image_size: Tuple[int, ...],
+    ) -> None:
+        """
+        Calculates the image embeddings for the provided image, allowing
+        masks to be predicted with the 'predict' method. Expects the input
+        image to be already transformed to the format expected by the model.
+        Arguments:
+          transformed_image (torch.Tensor): The input image, with shape
+            1x3xHxW, which has been transformed with ResizeLongestSide.
+          original_image_size (tuple(int, int)): The size of the image
+            before transformation, in (H, W) format.
+        """
+        assert (
+            len(transformed_image.shape) == 4
+            and transformed_image.shape[1] == 3
+            and max(*transformed_image.shape[2:]) == self.model.image_encoder.img_size
+        ), f"set_torch_image input must be BCHW with long side {self.model.image_encoder.img_size}."
+        self.reset_image()
+        self.original_size = original_image_size
+        self.input_size = tuple(transformed_image.shape[-2:])
+        input_image = self.model.preprocess(transformed_image)
+        self.features = self.model.image_encoder(input_image)
+        self.is_image_set = True
+    def predict(
+        self,
+        point_coords: Optional[np.ndarray] = None,
+        point_labels: Optional[np.ndarray] = None,
+        box: Optional[np.ndarray] = None,
+        mask_input: Optional[np.ndarray] = None,
+        multimask_output: bool = True,
+        return_logits: bool = False,
+    ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
+        """
+        Predict masks for the given input prompts, using the currently set image.
+        Arguments:
+          point_coords (np.ndarray or None): A Nx2 array of point prompts to the
+            model. Each point is in (X,Y) in pixels.
+          point_labels (np.ndarray or None): A length N array of labels for the
+            point prompts. 1 indicates a foreground point and 0 indicates a
+            background point.
+          box (np.ndarray or None): A length 4 array given a box prompt to the
+            model, in XYXY format.
+          mask_input (np.ndarray): A low resolution mask input to the model, typically
+            coming from a previous prediction iteration. Has form 1xHxW, where
+            for SAM, H=W=256.
+          multimask_output (bool): If true, the model will return three masks.
+            For ambiguous input prompts (such as a single click), this will often
+            produce better masks than a single prediction. If only a single
+            mask is needed, the model's predicted quality score can be used
+            to select the best mask. For non-ambiguous prompts, such as multiple
+            input prompts, multimask_output=False can give better results.
+          return_logits (bool): If true, returns un-thresholded masks logits
+            instead of a binary mask.
+        Returns:
+          (np.ndarray): The output masks in CxHxW format, where C is the
+            number of masks, and (H, W) is the original image size.
+          (np.ndarray): An array of length C containing the model's
+            predictions for the quality of each mask.
+          (np.ndarray): An array of shape CxHxW, where C is the number
+            of masks and H=W=256. These low resolution logits can be passed to
+            a subsequent iteration as mask input.
+        """
+        if not self.is_image_set:
+            raise RuntimeError("An image must be set with .set_image(...) before mask prediction.")
+        # Transform input prompts
+        coords_torch, labels_torch, box_torch, mask_input_torch = None, None, None, None
+        if point_coords is not None:
+            assert (
+                point_labels is not None
+            ), "point_labels must be supplied if point_coords is supplied."
+            point_coords = self.transform.apply_coords(point_coords, self.original_size)
+            coords_torch = torch.as_tensor(point_coords, dtype=torch.float, device=self.device)
+            labels_torch = torch.as_tensor(point_labels, dtype=torch.int, device=self.device)
+            coords_torch, labels_torch = coords_torch[None, :, :], labels_torch[None, :]
+        if box is not None:
+            box = self.transform.apply_boxes(box, self.original_size)
+            box_torch = torch.as_tensor(box, dtype=torch.float, device=self.device)
+            box_torch = box_torch[None, :]
+        if mask_input is not None:
+            mask_input_torch = torch.as_tensor(mask_input, dtype=torch.float, device=self.device)
+            mask_input_torch = mask_input_torch[None, :, :, :]
+        masks, iou_predictions, low_res_masks = self.predict_torch(
+            coords_torch,
+            labels_torch,
+            box_torch,
+            mask_input_torch,
+            multimask_output,
+            return_logits=return_logits,
+        )
+        masks_np = masks[0].detach().cpu().numpy()
+        iou_predictions_np = iou_predictions[0].detach().cpu().numpy()
+        low_res_masks_np = low_res_masks[0].detach().cpu().numpy()
+        return masks_np, iou_predictions_np, low_res_masks_np
+    @torch.no_grad()
+    def predict_torch(
+        self,
+        point_coords: Optional[torch.Tensor],
+        point_labels: Optional[torch.Tensor],
+        boxes: Optional[torch.Tensor] = None,
+        mask_input: Optional[torch.Tensor] = None,
+        multimask_output: bool = True,
+        return_logits: bool = False,
+    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+        """
+        Predict masks for the given input prompts, using the currently set image.
+        Input prompts are batched torch tensors and are expected to already be
+        transformed to the input frame using ResizeLongestSide.
+        Arguments:
+          point_coords (torch.Tensor or None): A BxNx2 array of point prompts to the
+            model. Each point is in (X,Y) in pixels.
+          point_labels (torch.Tensor or None): A BxN array of labels for the
+            point prompts. 1 indicates a foreground point and 0 indicates a
+            background point.
+          boxes (np.ndarray or None): A Bx4 array given a box prompt to the
+            model, in XYXY format.
+          mask_input (np.ndarray): A low resolution mask input to the model, typically
+            coming from a previous prediction iteration. Has form Bx1xHxW, where
+            for SAM, H=W=256. Masks returned by a previous iteration of the
+            predict method do not need further transformation.
+          multimask_output (bool): If true, the model will return three masks.
+            For ambiguous input prompts (such as a single click), this will often
+            produce better masks than a single prediction. If only a single
+            mask is needed, the model's predicted quality score can be used
+            to select the best mask. For non-ambiguous prompts, such as multiple
+            input prompts, multimask_output=False can give better results.
+          return_logits (bool): If true, returns un-thresholded masks logits
+            instead of a binary mask.
+        Returns:
+          (torch.Tensor): The output masks in BxCxHxW format, where C is the
+            number of masks, and (H, W) is the original image size.
+          (torch.Tensor): An array of shape BxC containing the model's
+            predictions for the quality of each mask.
+          (torch.Tensor): An array of shape BxCxHxW, where C is the number
+            of masks and H=W=256. These low res logits can be passed to
+            a subsequent iteration as mask input.
+        """
+        if not self.is_image_set:
+            raise RuntimeError("An image must be set with .set_image(...) before mask prediction.")
+        if point_coords is not None:
+            points = (point_coords, point_labels)
+        else:
+            points = None
+        # Embed prompts
+        sparse_embeddings, dense_embeddings = self.model.prompt_encoder(
+            points=points,
+            boxes=boxes,
+            masks=mask_input,
+        )
+        # Predict masks
+        low_res_masks, iou_predictions = self.model.mask_decoder(
+            image_embeddings=self.features,
+            image_pe=self.model.prompt_encoder.get_dense_pe(),
+            sparse_prompt_embeddings=sparse_embeddings,
+            dense_prompt_embeddings=dense_embeddings,
+            multimask_output=multimask_output,
+        )
+        # Upscale the masks to the original image resolution
+        masks = self.model.postprocess_masks(low_res_masks, self.input_size, self.original_size)
+        if not return_logits:
+            masks = masks > self.model.mask_threshold
+        return masks, iou_predictions, low_res_masks
+    def get_image_embedding(self) -> torch.Tensor:
+        """
+        Returns the image embeddings for the currently set image, with
+        shape 1xCxHxW, where C is the embedding dimension and (H,W) are
+        the embedding spatial dimension of SAM (typically C=256, H=W=64).
+        """
+        if not self.is_image_set:
+            raise RuntimeError(
+                "An image must be set with .set_image(...) to generate an embedding."
+            )
+        assert self.features is not None, "Features must exist if an image has been set."
+        return self.features
+    @property
+    def device(self) -> torch.device:
+        return self.model.device
+    def reset_image(self) -> None:
+        """Resets the currently set image."""
+        self.is_image_set = False
+        self.features = None
+        self.orig_h = None
+        self.orig_w = None
+        self.input_h = None
+        self.input_w = None

models/utils/__init__.py ADDED Viewed

	@@ -0,0 +1,5 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.

models/utils/amg.py ADDED Viewed

	@@ -0,0 +1,346 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+import numpy as np
+import torch
+import math
+from copy import deepcopy
+from itertools import product
+from typing import Any, Dict, Generator, ItemsView, List, Tuple
+class MaskData:
+    """
+    A structure for storing masks and their related data in batched format.
+    Implements basic filtering and concatenation.
+    """
+    def __init__(self, **kwargs) -> None:
+        for v in kwargs.values():
+            assert isinstance(
+                v, (list, np.ndarray, torch.Tensor)
+            ), "MaskData only supports list, numpy arrays, and torch tensors."
+        self._stats = dict(**kwargs)
+    def __setitem__(self, key: str, item: Any) -> None:
+        assert isinstance(
+            item, (list, np.ndarray, torch.Tensor)
+        ), "MaskData only supports list, numpy arrays, and torch tensors."
+        self._stats[key] = item
+    def __delitem__(self, key: str) -> None:
+        del self._stats[key]
+    def __getitem__(self, key: str) -> Any:
+        return self._stats[key]
+    def items(self) -> ItemsView[str, Any]:
+        return self._stats.items()
+    def filter(self, keep: torch.Tensor) -> None:
+        for k, v in self._stats.items():
+            if v is None:
+                self._stats[k] = None
+            elif isinstance(v, torch.Tensor):
+                self._stats[k] = v[torch.as_tensor(keep, device=v.device)]
+            elif isinstance(v, np.ndarray):
+                self._stats[k] = v[keep.detach().cpu().numpy()]
+            elif isinstance(v, list) and keep.dtype == torch.bool:
+                self._stats[k] = [a for i, a in enumerate(v) if keep[i]]
+            elif isinstance(v, list):
+                self._stats[k] = [v[i] for i in keep]
+            else:
+                raise TypeError(f"MaskData key {k} has an unsupported type {type(v)}.")
+    def cat(self, new_stats: "MaskData") -> None:
+        for k, v in new_stats.items():
+            if k not in self._stats or self._stats[k] is None:
+                self._stats[k] = deepcopy(v)
+            elif isinstance(v, torch.Tensor):
+                self._stats[k] = torch.cat([self._stats[k], v], dim=0)
+            elif isinstance(v, np.ndarray):
+                self._stats[k] = np.concatenate([self._stats[k], v], axis=0)
+            elif isinstance(v, list):
+                self._stats[k] = self._stats[k] + deepcopy(v)
+            else:
+                raise TypeError(f"MaskData key {k} has an unsupported type {type(v)}.")
+    def to_numpy(self) -> None:
+        for k, v in self._stats.items():
+            if isinstance(v, torch.Tensor):
+                self._stats[k] = v.detach().cpu().numpy()
+def is_box_near_crop_edge(
+    boxes: torch.Tensor, crop_box: List[int], orig_box: List[int], atol: float = 20.0
+) -> torch.Tensor:
+    """Filter masks at the edge of a crop, but not at the edge of the original image."""
+    crop_box_torch = torch.as_tensor(crop_box, dtype=torch.float, device=boxes.device)
+    orig_box_torch = torch.as_tensor(orig_box, dtype=torch.float, device=boxes.device)
+    boxes = uncrop_boxes_xyxy(boxes, crop_box).float()
+    near_crop_edge = torch.isclose(boxes, crop_box_torch[None, :], atol=atol, rtol=0)
+    near_image_edge = torch.isclose(boxes, orig_box_torch[None, :], atol=atol, rtol=0)
+    near_crop_edge = torch.logical_and(near_crop_edge, ~near_image_edge)
+    return torch.any(near_crop_edge, dim=1)
+def box_xyxy_to_xywh(box_xyxy: torch.Tensor) -> torch.Tensor:
+    box_xywh = deepcopy(box_xyxy)
+    box_xywh[2] = box_xywh[2] - box_xywh[0]
+    box_xywh[3] = box_xywh[3] - box_xywh[1]
+    return box_xywh
+def batch_iterator(batch_size: int, *args) -> Generator[List[Any], None, None]:
+    assert len(args) > 0 and all(
+        len(a) == len(args[0]) for a in args
+    ), "Batched iteration must have inputs of all the same size."
+    n_batches = len(args[0]) // batch_size + int(len(args[0]) % batch_size != 0)
+    for b in range(n_batches):
+        yield [arg[b * batch_size : (b + 1) * batch_size] for arg in args]
+def mask_to_rle_pytorch(tensor: torch.Tensor) -> List[Dict[str, Any]]:
+    """
+    Encodes masks to an uncompressed RLE, in the format expected by
+    pycoco tools.
+    """
+    # Put in fortran order and flatten h,w
+    b, h, w = tensor.shape
+    tensor = tensor.permute(0, 2, 1).flatten(1)
+    # Compute change indices
+    diff = tensor[:, 1:] ^ tensor[:, :-1]
+    change_indices = diff.nonzero()
+    # Encode run length
+    out = []
+    for i in range(b):
+        cur_idxs = change_indices[change_indices[:, 0] == i, 1]
+        cur_idxs = torch.cat(
+            [
+                torch.tensor([0], dtype=cur_idxs.dtype, device=cur_idxs.device),
+                cur_idxs + 1,
+                torch.tensor([h * w], dtype=cur_idxs.dtype, device=cur_idxs.device),
+            ]
+        )
+        btw_idxs = cur_idxs[1:] - cur_idxs[:-1]
+        counts = [] if tensor[i, 0] == 0 else [0]
+        counts.extend(btw_idxs.detach().cpu().tolist())
+        out.append({"size": [h, w], "counts": counts})
+    return out
+def rle_to_mask(rle: Dict[str, Any]) -> np.ndarray:
+    """Compute a binary mask from an uncompressed RLE."""
+    h, w = rle["size"]
+    mask = np.empty(h * w, dtype=bool)
+    idx = 0
+    parity = False
+    for count in rle["counts"]:
+        mask[idx : idx + count] = parity
+        idx += count
+        parity ^= True
+    mask = mask.reshape(w, h)
+    return mask.transpose()  # Put in C order
+def area_from_rle(rle: Dict[str, Any]) -> int:
+    return sum(rle["counts"][1::2])
+def calculate_stability_score(
+    masks: torch.Tensor, mask_threshold: float, threshold_offset: float
+) -> torch.Tensor:
+    """
+    Computes the stability score for a batch of masks. The stability
+    score is the IoU between the binary masks obtained by thresholding
+    the predicted mask logits at high and low values.
+    """
+    # One mask is always contained inside the other.
+    # Save memory by preventing unnecessary cast to torch.int64
+    intersections = (
+        (masks > (mask_threshold + threshold_offset))
+        .sum(-1, dtype=torch.int16)
+        .sum(-1, dtype=torch.int32)
+    )
+    unions = (
+        (masks > (mask_threshold - threshold_offset))
+        .sum(-1, dtype=torch.int16)
+        .sum(-1, dtype=torch.int32)
+    )
+    return intersections / unions
+def build_point_grid(n_per_side: int) -> np.ndarray:
+    """Generates a 2D grid of points evenly spaced in [0,1]x[0,1]."""
+    offset = 1 / (2 * n_per_side)
+    points_one_side = np.linspace(offset, 1 - offset, n_per_side)
+    points_x = np.tile(points_one_side[None, :], (n_per_side, 1))
+    points_y = np.tile(points_one_side[:, None], (1, n_per_side))
+    points = np.stack([points_x, points_y], axis=-1).reshape(-1, 2)
+    return points
+def build_all_layer_point_grids(
+    n_per_side: int, n_layers: int, scale_per_layer: int
+) -> List[np.ndarray]:
+    """Generates point grids for all crop layers."""
+    points_by_layer = []
+    for i in range(n_layers + 1):
+        n_points = int(n_per_side / (scale_per_layer**i))
+        points_by_layer.append(build_point_grid(n_points))
+    return points_by_layer
+def generate_crop_boxes(
+    im_size: Tuple[int, ...], n_layers: int, overlap_ratio: float
+) -> Tuple[List[List[int]], List[int]]:
+    """
+    Generates a list of crop boxes of different sizes. Each layer
+    has (2**i)**2 boxes for the ith layer.
+    """
+    crop_boxes, layer_idxs = [], []
+    im_h, im_w = im_size
+    short_side = min(im_h, im_w)
+    # Original image
+    crop_boxes.append([0, 0, im_w, im_h])
+    layer_idxs.append(0)
+    def crop_len(orig_len, n_crops, overlap):
+        return int(math.ceil((overlap * (n_crops - 1) + orig_len) / n_crops))
+    for i_layer in range(n_layers):
+        n_crops_per_side = 2 ** (i_layer + 1)
+        overlap = int(overlap_ratio * short_side * (2 / n_crops_per_side))
+        crop_w = crop_len(im_w, n_crops_per_side, overlap)
+        crop_h = crop_len(im_h, n_crops_per_side, overlap)
+        crop_box_x0 = [int((crop_w - overlap) * i) for i in range(n_crops_per_side)]
+        crop_box_y0 = [int((crop_h - overlap) * i) for i in range(n_crops_per_side)]
+        # Crops in XYWH format
+        for x0, y0 in product(crop_box_x0, crop_box_y0):
+            box = [x0, y0, min(x0 + crop_w, im_w), min(y0 + crop_h, im_h)]
+            crop_boxes.append(box)
+            layer_idxs.append(i_layer + 1)
+    return crop_boxes, layer_idxs
+def uncrop_boxes_xyxy(boxes: torch.Tensor, crop_box: List[int]) -> torch.Tensor:
+    x0, y0, _, _ = crop_box
+    offset = torch.tensor([[x0, y0, x0, y0]], device=boxes.device)
+    # Check if boxes has a channel dimension
+    if len(boxes.shape) == 3:
+        offset = offset.unsqueeze(1)
+    return boxes + offset
+def uncrop_points(points: torch.Tensor, crop_box: List[int]) -> torch.Tensor:
+    x0, y0, _, _ = crop_box
+    offset = torch.tensor([[x0, y0]], device=points.device)
+    # Check if points has a channel dimension
+    if len(points.shape) == 3:
+        offset = offset.unsqueeze(1)
+    return points + offset
+def uncrop_masks(
+    masks: torch.Tensor, crop_box: List[int], orig_h: int, orig_w: int
+) -> torch.Tensor:
+    x0, y0, x1, y1 = crop_box
+    if x0 == 0 and y0 == 0 and x1 == orig_w and y1 == orig_h:
+        return masks
+    # Coordinate transform masks
+    pad_x, pad_y = orig_w - (x1 - x0), orig_h - (y1 - y0)
+    pad = (x0, pad_x - x0, y0, pad_y - y0)
+    return torch.nn.functional.pad(masks, pad, value=0)
+def remove_small_regions(
+    mask: np.ndarray, area_thresh: float, mode: str
+) -> Tuple[np.ndarray, bool]:
+    """
+    Removes small disconnected regions and holes in a mask. Returns the
+    mask and an indicator of if the mask has been modified.
+    """
+    import cv2  # type: ignore
+    assert mode in ["holes", "islands"]
+    correct_holes = mode == "holes"
+    working_mask = (correct_holes ^ mask).astype(np.uint8)
+    n_labels, regions, stats, _ = cv2.connectedComponentsWithStats(working_mask, 8)
+    sizes = stats[:, -1][1:]  # Row 0 is background label
+    small_regions = [i + 1 for i, s in enumerate(sizes) if s < area_thresh]
+    if len(small_regions) == 0:
+        return mask, False
+    fill_labels = [0] + small_regions
+    if not correct_holes:
+        fill_labels = [i for i in range(n_labels) if i not in fill_labels]
+        # If every region is below threshold, keep largest
+        if len(fill_labels) == 0:
+            fill_labels = [int(np.argmax(sizes)) + 1]
+    mask = np.isin(regions, fill_labels)
+    return mask, True
+def coco_encode_rle(uncompressed_rle: Dict[str, Any]) -> Dict[str, Any]:
+    from pycocotools import mask as mask_utils  # type: ignore
+    h, w = uncompressed_rle["size"]
+    rle = mask_utils.frPyObjects(uncompressed_rle, h, w)
+    rle["counts"] = rle["counts"].decode("utf-8")  # Necessary to serialize with json
+    return rle
+def batched_mask_to_box(masks: torch.Tensor) -> torch.Tensor:
+    """
+    Calculates boxes in XYXY format around masks. Return [0,0,0,0] for
+    an empty mask. For input shape C1xC2x...xHxW, the output shape is C1xC2x...x4.
+    """
+    # torch.max below raises an error on empty inputs, just skip in this case
+    if torch.numel(masks) == 0:
+        return torch.zeros(*masks.shape[:-2], 4, device=masks.device)
+    # Normalize shape to CxHxW
+    shape = masks.shape
+    h, w = shape[-2:]
+    if len(shape) > 2:
+        masks = masks.flatten(0, -3)
+    else:
+        masks = masks.unsqueeze(0)
+    # Get top and bottom edges
+    in_height, _ = torch.max(masks, dim=-1)
+    in_height_coords = in_height * torch.arange(h, device=in_height.device)[None, :]
+    bottom_edges, _ = torch.max(in_height_coords, dim=-1)
+    in_height_coords = in_height_coords + h * (~in_height)
+    top_edges, _ = torch.min(in_height_coords, dim=-1)
+    # Get left and right edges
+    in_width, _ = torch.max(masks, dim=-2)
+    in_width_coords = in_width * torch.arange(w, device=in_width.device)[None, :]
+    right_edges, _ = torch.max(in_width_coords, dim=-1)
+    in_width_coords = in_width_coords + w * (~in_width)
+    left_edges, _ = torch.min(in_width_coords, dim=-1)
+    # If the mask is empty the right edge will be to the left of the left edge.
+    # Replace these boxes with [0, 0, 0, 0]
+    empty_filter = (right_edges < left_edges) | (bottom_edges < top_edges)
+    out = torch.stack([left_edges, top_edges, right_edges, bottom_edges], dim=-1)
+    out = out * (~empty_filter).unsqueeze(-1)
+    # Return to original shape
+    if len(shape) > 2:
+        out = out.reshape(*shape[:-2], 4)
+    else:
+        out = out[0]
+    return out

models/utils/onnx.py ADDED Viewed

	@@ -0,0 +1,144 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+import torch
+import torch.nn as nn
+from torch.nn import functional as F
+from typing import Tuple
+from ..modeling import Sam
+from .amg import calculate_stability_score
+class SamOnnxModel(nn.Module):
+    """
+    This model should not be called directly, but is used in ONNX export.
+    It combines the prompt encoder, mask decoder, and mask postprocessing of Sam,
+    with some functions modified to enable model tracing. Also supports extra
+    options controlling what information. See the ONNX export script for details.
+    """
+    def __init__(
+        self,
+        model: Sam,
+        return_single_mask: bool,
+        use_stability_score: bool = False,
+        return_extra_metrics: bool = False,
+    ) -> None:
+        super().__init__()
+        self.mask_decoder = model.mask_decoder
+        self.model = model
+        self.img_size = model.image_encoder.img_size
+        self.return_single_mask = return_single_mask
+        self.use_stability_score = use_stability_score
+        self.stability_score_offset = 1.0
+        self.return_extra_metrics = return_extra_metrics
+    @staticmethod
+    def resize_longest_image_size(
+        input_image_size: torch.Tensor, longest_side: int
+    ) -> torch.Tensor:
+        input_image_size = input_image_size.to(torch.float32)
+        scale = longest_side / torch.max(input_image_size)
+        transformed_size = scale * input_image_size
+        transformed_size = torch.floor(transformed_size + 0.5).to(torch.int64)
+        return transformed_size
+    def _embed_points(self, point_coords: torch.Tensor, point_labels: torch.Tensor) -> torch.Tensor:
+        point_coords = point_coords + 0.5
+        point_coords = point_coords / self.img_size
+        point_embedding = self.model.prompt_encoder.pe_layer._pe_encoding(point_coords)
+        point_labels = point_labels.unsqueeze(-1).expand_as(point_embedding)
+        point_embedding = point_embedding * (point_labels != -1)
+        point_embedding = point_embedding + self.model.prompt_encoder.not_a_point_embed.weight * (
+            point_labels == -1
+        )
+        for i in range(self.model.prompt_encoder.num_point_embeddings):
+            point_embedding = point_embedding + self.model.prompt_encoder.point_embeddings[
+                i
+            ].weight * (point_labels == i)
+        return point_embedding
+    def _embed_masks(self, input_mask: torch.Tensor, has_mask_input: torch.Tensor) -> torch.Tensor:
+        mask_embedding = has_mask_input * self.model.prompt_encoder.mask_downscaling(input_mask)
+        mask_embedding = mask_embedding + (
+            1 - has_mask_input
+        ) * self.model.prompt_encoder.no_mask_embed.weight.reshape(1, -1, 1, 1)
+        return mask_embedding
+    def mask_postprocessing(self, masks: torch.Tensor, orig_im_size: torch.Tensor) -> torch.Tensor:
+        masks = F.interpolate(
+            masks,
+            size=(self.img_size, self.img_size),
+            mode="bilinear",
+            align_corners=False,
+        )
+        prepadded_size = self.resize_longest_image_size(orig_im_size, self.img_size).to(torch.int64)
+        masks = masks[..., : prepadded_size[0], : prepadded_size[1]]  # type: ignore
+        orig_im_size = orig_im_size.to(torch.int64)
+        h, w = orig_im_size[0], orig_im_size[1]
+        masks = F.interpolate(masks, size=(h, w), mode="bilinear", align_corners=False)
+        return masks
+    def select_masks(
+        self, masks: torch.Tensor, iou_preds: torch.Tensor, num_points: int
+    ) -> Tuple[torch.Tensor, torch.Tensor]:
+        # Determine if we should return the multiclick mask or not from the number of points.
+        # The reweighting is used to avoid control flow.
+        score_reweight = torch.tensor(
+            [[1000] + [0] * (self.model.mask_decoder.num_mask_tokens - 1)]
+        ).to(iou_preds.device)
+        score = iou_preds + (num_points - 2.5) * score_reweight
+        best_idx = torch.argmax(score, dim=1)
+        masks = masks[torch.arange(masks.shape[0]), best_idx, :, :].unsqueeze(1)
+        iou_preds = iou_preds[torch.arange(masks.shape[0]), best_idx].unsqueeze(1)
+        return masks, iou_preds
+    @torch.no_grad()
+    def forward(
+        self,
+        image_embeddings: torch.Tensor,
+        point_coords: torch.Tensor,
+        point_labels: torch.Tensor,
+        mask_input: torch.Tensor,
+        has_mask_input: torch.Tensor,
+        orig_im_size: torch.Tensor,
+    ):
+        sparse_embedding = self._embed_points(point_coords, point_labels)
+        dense_embedding = self._embed_masks(mask_input, has_mask_input)
+        masks, scores = self.model.mask_decoder.predict_masks(
+            image_embeddings=image_embeddings,
+            image_pe=self.model.prompt_encoder.get_dense_pe(),
+            sparse_prompt_embeddings=sparse_embedding,
+            dense_prompt_embeddings=dense_embedding,
+        )
+        if self.use_stability_score:
+            scores = calculate_stability_score(
+                masks, self.model.mask_threshold, self.stability_score_offset
+            )
+        if self.return_single_mask:
+            masks, scores = self.select_masks(masks, scores, point_coords.shape[1])
+        upscaled_masks = self.mask_postprocessing(masks, orig_im_size)
+        if self.return_extra_metrics:
+            stability_scores = calculate_stability_score(
+                upscaled_masks, self.model.mask_threshold, self.stability_score_offset
+            )
+            areas = (upscaled_masks > self.model.mask_threshold).sum(-1).sum(-1)
+            return upscaled_masks, scores, stability_scores, areas, masks
+        return upscaled_masks, scores, masks

models/utils/transforms.py ADDED Viewed

	@@ -0,0 +1,102 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+import numpy as np
+import torch
+from torch.nn import functional as F
+from torchvision.transforms.functional import resize, to_pil_image  # type: ignore
+from copy import deepcopy
+from typing import Tuple
+class ResizeLongestSide:
+    """
+    Resizes images to the longest side 'target_length', as well as provides
+    methods for resizing coordinates and boxes. Provides methods for
+    transforming both numpy array and batched torch tensors.
+    """
+    def __init__(self, target_length: int) -> None:
+        self.target_length = target_length
+    def apply_image(self, image: np.ndarray) -> np.ndarray:
+        """
+        Expects a numpy array with shape HxWxC in uint8 format.
+        """
+        target_size = self.get_preprocess_shape(image.shape[0], image.shape[1], self.target_length)
+        return np.array(resize(to_pil_image(image), target_size))
+    def apply_coords(self, coords: np.ndarray, original_size: Tuple[int, ...]) -> np.ndarray:
+        """
+        Expects a numpy array of length 2 in the final dimension. Requires the
+        original image size in (H, W) format.
+        """
+        old_h, old_w = original_size
+        new_h, new_w = self.get_preprocess_shape(
+            original_size[0], original_size[1], self.target_length
+        )
+        coords = deepcopy(coords).astype(float)
+        coords[..., 0] = coords[..., 0] * (new_w / old_w)
+        coords[..., 1] = coords[..., 1] * (new_h / old_h)
+        return coords
+    def apply_boxes(self, boxes: np.ndarray, original_size: Tuple[int, ...]) -> np.ndarray:
+        """
+        Expects a numpy array shape Bx4. Requires the original image size
+        in (H, W) format.
+        """
+        boxes = self.apply_coords(boxes.reshape(-1, 2, 2), original_size)
+        return boxes.reshape(-1, 4)
+    def apply_image_torch(self, image: torch.Tensor) -> torch.Tensor:
+        """
+        Expects batched images with shape BxCxHxW and float format. This
+        transformation may not exactly match apply_image. apply_image is
+        the transformation expected by the model.
+        """
+        # Expects an image in BCHW format. May not exactly match apply_image.
+        target_size = self.get_preprocess_shape(image.shape[2], image.shape[3], self.target_length)
+        return F.interpolate(
+            image, target_size, mode="bilinear", align_corners=False, antialias=True
+        )
+    def apply_coords_torch(
+        self, coords: torch.Tensor, original_size: Tuple[int, ...]
+    ) -> torch.Tensor:
+        """
+        Expects a torch tensor with length 2 in the last dimension. Requires the
+        original image size in (H, W) format.
+        """
+        old_h, old_w = original_size
+        new_h, new_w = self.get_preprocess_shape(
+            original_size[0], original_size[1], self.target_length
+        )
+        coords = deepcopy(coords).to(torch.float)
+        coords[..., 0] = coords[..., 0] * (new_w / old_w)
+        coords[..., 1] = coords[..., 1] * (new_h / old_h)
+        return coords
+    def apply_boxes_torch(
+        self, boxes: torch.Tensor, original_size: Tuple[int, ...]
+    ) -> torch.Tensor:
+        """
+        Expects a torch tensor with shape Bx4. Requires the original image
+        size in (H, W) format.
+        """
+        boxes = self.apply_coords_torch(boxes.reshape(-1, 2, 2), original_size)
+        return boxes.reshape(-1, 4)
+    @staticmethod
+    def get_preprocess_shape(oldh: int, oldw: int, long_side_length: int) -> Tuple[int, int]:
+        """
+        Compute the output size given input size and target long side length.
+        """
+        scale = long_side_length * 1.0 / max(oldh, oldw)
+        newh, neww = oldh * scale, oldw * scale
+        neww = int(neww + 0.5)
+        newh = int(newh + 0.5)
+        return (newh, neww)

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+torch
+torchvision
+pycocotools
+transformers
+gradio_image_prompter-0.1.0-py3-none-any.whl

src/.gitignore ADDED Viewed

	@@ -0,0 +1,9 @@

+.eggs/
+dist/
+*.pyc
+__pycache__/
+*.py[cod]
+*$py.class
+__tmp/*
+*.pyi
+node_modules

src/LICENSE ADDED Viewed

	@@ -0,0 +1,201 @@

+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+   1. Definitions.
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+   END OF TERMS AND CONDITIONS
+   APPENDIX: How to apply the Apache License to your work.
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+   Copyright [yyyy] [name of copyright owner]
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

src/README.md ADDED Viewed

	@@ -0,0 +1,48 @@

+# Image Prompter for Gradio
+A gradio component to upload images and process point/box prompts.
+This custom component is developed for [Tokenize Anything](https://github.com/baaivision/tokenize-anything) gradio demo.
+## Installation
+### Preliminaries
+``gradio`` >= 4.0.0
+### Installing Package
+```bash
+pip install gradio-image-prompter
+```
+## Quick Start
+### Development
+```bash
+cd gradio-image-prompter
+gradio cc install
+gradio cc dev
+```
+### Example
+```python
+import gradio as gr
+from gradio_image_prompter import ImagePrompter
+demo = gr.Interface(
+    lambda prompts: (prompts["image"], prompts["points"]),
+    ImagePrompter(show_label=False),
+    [gr.Image(show_label=False), gr.Dataframe(label="Points")],
+)
+demo.launch()
+```
+## License
+[Apache License 2.0](LICENSE)
+## Acknowledgement
+We thank the repositories: [SAM](https://github.com/facebookresearch/segment-anything), [GradioBox](https://github.com/ShoufaChen/gradio-box) and [Gradio](https://github.com/gradio-app/gradio).

src/backend/gradio_image_prompter/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ from .image_prompter import ImagePrompter
2	+
3	+ __all__ = ["ImagePrompter"]

src/backend/gradio_image_prompter/image_prompter.py ADDED Viewed

	@@ -0,0 +1,133 @@

+# ------------------------------------------------------------------------
+# Copyright (c) 2023-present, PhyscalX. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ------------------------------------------------------------------------
+"""Gradio ``ImagePrompter`` component."""
+from __future__ import annotations
+from typing import Optional, List, TypedDict, Union, Literal
+import numpy as np
+import gradio
+from gradio.data_classes import FileData, GradioModel
+from gradio_client.documentation import document, set_documentation_group
+from PIL import Image as _Image  # using _ to minimize namespace pollution
+set_documentation_group("component")
+class PromptData(GradioModel):
+    image: FileData
+    points: List[List[float]]
+class PromptValue(TypedDict):
+    image: Optional[Union[np.ndarray, _Image.Image, str]]
+    points: Optional[List[List[float]]]
+@document()
+class ImagePrompter(gradio.Image):
+    """Create an image prompter to upload images and process point/box prompts."""
+    data_model = PromptData
+    def __init__(
+        self,
+        value: str | _Image.Image | np.ndarray | None = None,
+        *,
+        height: int | None = None,
+        width: int | None = None,
+        image_mode: Literal[
+            "1", "L", "P", "RGB", "RGBA", "CMYK", "YCbCr", "LAB", "HSV", "I", "F"
+        ] = "RGB",
+        sources: list[Literal["upload", "clipboard"]] | None = None,
+        type: Literal["numpy", "pil", "filepath"] = "numpy",
+        label: str | None = None,
+        every: float | None = None,
+        show_label: bool | None = None,
+        show_download_button: bool = True,
+        container: bool = True,
+        scale: int | None = None,
+        min_width: int = 160,
+        interactive: bool | None = None,
+        visible: bool = True,
+        elem_id: str | None = None,
+        elem_classes: list[str] | str | None = None,
+        render: bool = True,
+        show_share_button: bool | None = None,
+    ):
+        """
+        Parameters:
+            value: A PIL Image, numpy array, path or URL for the default value. If callable, it will be called set the initial value.
+            height: Height of the displayed image in pixels.
+            width: Width of the displayed image in pixels.
+            image_mode: "RGB" if color, or "L" if black and white. See https://pillow.readthedocs.io/en/stable/handbook/concepts.html.
+            sources: List of sources for the image.
+            type: The format the image is converted before being passed into the prediction function.
+            label: The label for this component.
+            every: If `value` is a callable, run the function 'every' number of seconds while the client connection is open.
+            show_label: if True, will display label.
+            show_download_button: If True, will display button to download image.
+            container: If True, will place the component in a container - providing some extra padding around the border.
+            scale: relative width compared to adjacent Components in a Row. Should be an integer.
+            min_width: minimum pixel width, will wrap if not sufficient screen space to satisfy this value.
+            interactive: if True, will allow users to upload and edit an image; if False, can only be used to display images.
+            visible: If False, component will be hidden.
+            streaming: If True when used in a `live` interface, will automatically stream webcam feed. Only valid is source is 'webcam'.
+            elem_id: An optional string that is assigned as the id of this component in the HTML DOM.
+            elem_classes: An optional list of strings that are assigned as the classes of this component in the HTML DOM.
+            render: If False, component will not render be rendered in the Blocks context.
+            mirror_webcam: If True webcam will be mirrored. Default is True.
+            show_share_button: If True, show a share icon that allows user to share outputs to Hugging Face Spaces Discussions.
+        """
+        super(ImagePrompter, self).__init__(
+            value=value,
+            height=height,
+            width=width,
+            image_mode=image_mode,
+            sources=["upload", "clipboard"] if sources is None else sources,
+            type=type,
+            label=label,
+            every=every,
+            show_label=show_label,
+            show_download_button=show_download_button,
+            container=container,
+            scale=scale,
+            min_width=min_width,
+            interactive=interactive,
+            visible=visible,
+            elem_id=elem_id,
+            elem_classes=elem_classes,
+            render=render,
+            show_share_button=show_share_button,
+        )
+    def preprocess(self, x: PromptData) -> PromptValue | None:
+        if x is None:
+            return x
+        im = super().preprocess(x.image)
+        return {"image": im, "points": x.points}
+    def postprocess(self, y: PromptValue) -> PromptData | None:
+        if y is None:
+            return None
+        image, points = y.get("image", None), y.get("points", [])
+        return PromptData(image=super().postprocess(image), points=points)
+    def as_example(self, y: PromptValue) -> str | None:
+        if y is None:
+            return None
+        return self.move_resource_to_block_cache(y.get("image", None))

src/backend/gradio_image_prompter/image_prompter.pyi ADDED Viewed

	@@ -0,0 +1,134 @@

+# ------------------------------------------------------------------------
+# Copyright (c) 2023-present, PhyscalX. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ------------------------------------------------------------------------
+"""Gradio ``ImagePrompter`` component."""
+from __future__ import annotations
+from typing import Optional, List, TypedDict, Union, Literal
+import numpy as np
+import gradio
+from gradio.data_classes import FileData, GradioModel
+from gradio_client.documentation import document, set_documentation_group
+from PIL import Image as _Image  # using _ to minimize namespace pollution
+set_documentation_group("component")
+class PromptData(GradioModel):
+    image: FileData
+    points: List[List[float]]
+class PromptValue(TypedDict):
+    image: Optional[Union[np.ndarray, _Image.Image, str]]
+    points: Optional[list[list[float]]]
+from gradio.events import Dependency
+@document()
+class ImagePrompter(gradio.Image):
+    """Create an image prompter to upload images and process point/box prompts."""
+    data_model = PromptData
+    def __init__(
+        self,
+        value: str | _Image.Image | np.ndarray | None = None,
+        *,
+        height: int | None = None,
+        width: int | None = None,
+        image_mode: Literal[
+            "1", "L", "P", "RGB", "RGBA", "CMYK", "YCbCr", "LAB", "HSV", "I", "F"
+        ] = "RGB",
+        sources: list[Literal["upload", "clipboard"]] | None = None,
+        type: Literal["numpy", "pil", "filepath"] = "numpy",
+        label: str | None = None,
+        every: float | None = None,
+        show_label: bool | None = None,
+        show_download_button: bool = True,
+        container: bool = True,
+        scale: int | None = None,
+        min_width: int = 160,
+        interactive: bool | None = None,
+        visible: bool = True,
+        elem_id: str | None = None,
+        elem_classes: list[str] | str | None = None,
+        render: bool = True,
+        show_share_button: bool | None = None,
+    ):
+        """
+        Parameters:
+            value: A PIL Image, numpy array, path or URL for the default value. If callable, it will be called set the initial value.
+            height: Height of the displayed image in pixels.
+            width: Width of the displayed image in pixels.
+            image_mode: "RGB" if color, or "L" if black and white. See https://pillow.readthedocs.io/en/stable/handbook/concepts.html.
+            sources: List of sources for the image.
+            type: The format the image is converted before being passed into the prediction function.
+            label: The label for this component.
+            every: If `value` is a callable, run the function 'every' number of seconds while the client connection is open.
+            show_label: if True, will display label.
+            show_download_button: If True, will display button to download image.
+            container: If True, will place the component in a container - providing some extra padding around the border.
+            scale: relative width compared to adjacent Components in a Row. Should be an integer.
+            min_width: minimum pixel width, will wrap if not sufficient screen space to satisfy this value.
+            interactive: if True, will allow users to upload and edit an image; if False, can only be used to display images.
+            visible: If False, component will be hidden.
+            streaming: If True when used in a `live` interface, will automatically stream webcam feed. Only valid is source is 'webcam'.
+            elem_id: An optional string that is assigned as the id of this component in the HTML DOM.
+            elem_classes: An optional list of strings that are assigned as the classes of this component in the HTML DOM.
+            render: If False, component will not render be rendered in the Blocks context.
+            mirror_webcam: If True webcam will be mirrored. Default is True.
+            show_share_button: If True, show a share icon that allows user to share outputs to Hugging Face Spaces Discussions.
+        """
+        super(ImagePrompter, self).__init__(
+            value=value,
+            height=height,
+            width=width,
+            image_mode=image_mode,
+            sources=["upload", "clipboard"] if sources is None else sources,
+            type=type,
+            label=label,
+            every=every,
+            show_label=show_label,
+            show_download_button=show_download_button,
+            container=container,
+            scale=scale,
+            min_width=min_width,
+            interactive=interactive,
+            visible=visible,
+            elem_id=elem_id,
+            elem_classes=elem_classes,
+            render=render,
+            show_share_button=show_share_button,
+        )
+    def preprocess(self, x: PromptData) -> PromptValue | None:
+        if x is None:
+            return x
+        im = super().preprocess(x.image)
+        return {"image": im, "points": x.points}
+    def postprocess(self, y: PromptValue) -> PromptData | None:
+        if y is None:
+            return None
+        image, points = y.get("image", None), y.get("points", [])
+        return PromptData(image=super().postprocess(image), points=points)
+    def as_example(self, y: PromptValue) -> str | None:
+        if y is None:
+            return None
+        return self.move_resource_to_block_cache(y.get("image", None))

src/backend/gradio_image_prompter/templates/component/__vite-browser-external-2447137e.js ADDED Viewed

	@@ -0,0 +1,4 @@

+const e = {};
+export {
+  e as default
+};

src/backend/gradio_image_prompter/templates/component/index.js ADDED Viewed

The diff for this file is too large to render. See raw diff

src/backend/gradio_image_prompter/templates/component/style.css ADDED Viewed

	@@ -0,0 +1 @@

+ .block.svelte-1t38q2d{position:relative;margin:0;box-shadow:var(--block-shadow);border-width:var(--block-border-width);border-color:var(--block-border-color);border-radius:var(--block-radius);background:var(--block-background-fill);width:100%;line-height:var(--line-sm)}.block.border_focus.svelte-1t38q2d{border-color:var(--color-accent)}.padded.svelte-1t38q2d{padding:var(--block-padding)}.hidden.svelte-1t38q2d{display:none}.hide-container.svelte-1t38q2d{margin:0;box-shadow:none;--block-border-width:0;background:transparent;padding:0;overflow:visible}div.svelte-1hnfib2{margin-bottom:var(--spacing-lg);color:var(--block-info-text-color);font-weight:var(--block-info-text-weight);font-size:var(--block-info-text-size);line-height:var(--line-sm)}span.has-info.svelte-22c38v{margin-bottom:var(--spacing-xs)}span.svelte-22c38v:not(.has-info){margin-bottom:var(--spacing-lg)}span.svelte-22c38v{display:inline-block;position:relative;z-index:var(--layer-4);border:solid var(--block-title-border-width) var(--block-title-border-color);border-radius:var(--block-title-radius);background:var(--block-title-background-fill);padding:var(--block-title-padding);color:var(--block-title-text-color);font-weight:var(--block-title-text-weight);font-size:var(--block-title-text-size);line-height:var(--line-sm)}.hide.svelte-22c38v{margin:0;height:0}label.svelte-9gxdi0{display:inline-flex;align-items:center;z-index:var(--layer-2);box-shadow:var(--block-label-shadow);border:var(--block-label-border-width) solid var(--border-color-primary);border-top:none;border-left:none;border-radius:var(--block-label-radius);background:var(--block-label-background-fill);padding:var(--block-label-padding);pointer-events:none;color:var(--block-label-text-color);font-weight:var(--block-label-text-weight);font-size:var(--block-label-text-size);line-height:var(--line-sm)}.gr-group label.svelte-9gxdi0{border-top-left-radius:0}label.float.svelte-9gxdi0{position:absolute;top:var(--block-label-margin);left:var(--block-label-margin)}label.svelte-9gxdi0:not(.float){position:static;margin-top:var(--block-label-margin);margin-left:var(--block-label-margin)}.hide.svelte-9gxdi0{height:0}span.svelte-9gxdi0{opacity:.8;margin-right:var(--size-2);width:calc(var(--block-label-text-size) - 1px);height:calc(var(--block-label-text-size) - 1px)}.hide-label.svelte-9gxdi0{box-shadow:none;border-width:0;background:transparent;overflow:visible}button.svelte-lpi64a{display:flex;justify-content:center;align-items:center;gap:1px;z-index:var(--layer-2);border-radius:var(--radius-sm);color:var(--block-label-text-color);border:1px solid transparent}button[disabled].svelte-lpi64a{opacity:.5;box-shadow:none}button[disabled].svelte-lpi64a:hover{cursor:not-allowed}.padded.svelte-lpi64a{padding:2px;background:var(--bg-color);box-shadow:var(--shadow-drop);border:1px solid var(--button-secondary-border-color)}button.svelte-lpi64a:hover,button.highlight.svelte-lpi64a{cursor:pointer;color:var(--color-accent)}.padded.svelte-lpi64a:hover{border:2px solid var(--button-secondary-border-color-hover);padding:1px;color:var(--block-label-text-color)}span.svelte-lpi64a{padding:0 1px;font-size:10px}div.svelte-lpi64a{padding:2px;display:flex;align-items:flex-end}.small.svelte-lpi64a{width:14px;height:14px}.large.svelte-lpi64a{width:22px;height:22px}.pending.svelte-lpi64a{animation:svelte-lpi64a-flash .5s infinite}@keyframes svelte-lpi64a-flash{0%{opacity:.5}50%{opacity:1}to{opacity:.5}}.transparent.svelte-lpi64a{background:transparent;border:none;box-shadow:none}.empty.svelte-3w3rth{display:flex;justify-content:center;align-items:center;margin-top:calc(0px - var(--size-6));height:var(--size-full)}.icon.svelte-3w3rth{opacity:.5;height:var(--size-5);color:var(--body-text-color)}.small.svelte-3w3rth{min-height:calc(var(--size-32) - 20px)}.large.svelte-3w3rth{min-height:calc(var(--size-64) - 20px)}.unpadded_box.svelte-3w3rth{margin-top:0}.small_parent.svelte-3w3rth{min-height:100%!important}.dropdown-arrow.svelte-145leq6{fill:currentColor}.wrap.svelte-kzcjhc{display:flex;flex-direction:column;justify-content:center;align-items:center;min-height:var(--size-60);color:var(--block-label-text-color);line-height:var(--line-md);height:100%;padding-top:var(--size-3)}.or.svelte-kzcjhc{color:var(--body-text-color-subdued);display:flex}.icon-wrap.svelte-kzcjhc{width:30px;margin-bottom:var(--spacing-lg)}@media (--screen-md){.wrap.svelte-kzcjhc{font-size:var(--text-lg)}}.hovered.svelte-kzcjhc{color:var(--color-accent)}div.svelte-ipfyu7{border-top:1px solid transparent;display:flex;max-height:100%;justify-content:center;gap:var(--spacing-sm);height:auto;align-items:flex-end;padding-bottom:var(--spacing-xl);color:var(--block-label-text-color);flex-shrink:0;width:95%}.show_border.svelte-ipfyu7{border-top:1px solid var(--block-border-color);margin-top:var(--spacing-xxl);box-shadow:var(--shadow-drop)}.source-selection.svelte-lde7lt{display:flex;align-items:center;justify-content:center;border-top:1px solid var(--border-color-primary);width:95%;bottom:0;left:0;right:0;margin-left:auto;margin-right:auto;align-self:flex-end}.icon.svelte-lde7lt{width:22px;height:22px;margin:var(--spacing-lg) var(--spacing-xs);padding:var(--spacing-xs);color:var(--neutral-400);border-radius:var(--radius-md)}.selected.svelte-lde7lt{color:var(--color-accent)}.icon.svelte-lde7lt:hover,.icon.svelte-lde7lt:focus{color:var(--color-accent)}img.svelte-1e0ed51,button.svelte-1e0ed51{width:var(--size-full);height:var(--size-full);object-fit:contain;display:block;border-radius:var(--radius-lg)}.selectable.svelte-1e0ed51{cursor:crosshair}.icon-buttons.svelte-1e0ed51{display:flex;position:absolute;top:6px;right:6px;gap:var(--size-1)}.wrap.svelte-12ckl9l.svelte-12ckl9l{overflow-y:auto;transition:opacity .5s ease-in-out;background:var(--block-background-fill);position:relative;display:flex;flex-direction:column;align-items:center;justify-content:center;min-height:var(--size-40)}.wrap.svelte-12ckl9l.svelte-12ckl9l:after{content:"";position:absolute;top:0;left:0;width:var(--upload-progress-width);height:100%;transition:all .5s ease-in-out;z-index:1}.uploading.svelte-12ckl9l.svelte-12ckl9l{font-size:var(--text-lg);font-family:var(--font);z-index:2}.file-name.svelte-12ckl9l.svelte-12ckl9l{margin:var(--spacing-md);font-size:var(--text-lg);color:var(--body-text-color-subdued)}.file.svelte-12ckl9l.svelte-12ckl9l{font-size:var(--text-md);z-index:2;display:flex;align-items:center}.file.svelte-12ckl9l progress.svelte-12ckl9l{display:inline;height:var(--size-1);width:100%;transition:all .5s ease-in-out;color:var(--color-accent);border:none}.file.svelte-12ckl9l progress[value].svelte-12ckl9l::-webkit-progress-value{background-color:var(--color-accent);border-radius:20px}.file.svelte-12ckl9l progress[value].svelte-12ckl9l::-webkit-progress-bar{background-color:var(--border-color-accent);border-radius:20px}.progress-bar.svelte-12ckl9l.svelte-12ckl9l{width:14px;height:14px;border-radius:50%;background:radial-gradient(closest-side,var(--block-background-fill) 64%,transparent 53% 100%),conic-gradient(var(--color-accent) var(--upload-progress-width),var(--border-color-accent) 0);transition:all .5s ease-in-out}button.svelte-1aq8tno{cursor:pointer;width:var(--size-full)}.hidden.svelte-1aq8tno{display:none;height:0!important;position:absolute;width:0;flex-grow:0}.center.svelte-1aq8tno{display:flex;justify-content:center}.flex.svelte-1aq8tno{display:flex;justify-content:center;align-items:center}input.svelte-1aq8tno{display:none}div.svelte-1wj0ocy{display:flex;top:var(--size-2);right:var(--size-2);justify-content:flex-end;gap:var(--spacing-sm);z-index:var(--layer-1)}.not-absolute.svelte-1wj0ocy{margin:var(--size-1)}div.svelte-1o7cyxy{display:flex;position:absolute;top:var(--size-2);right:var(--size-2);justify-content:flex-end;gap:var(--spacing-sm);z-index:var(--layer-5)}canvas.svelte-1mnpmgt{display:block;position:absolute;top:0;right:0;bottom:0;left:0;margin:auto}.wrap.svelte-1mnpmgt{position:relative;width:var(--size-full);height:var(--size-full);touch-action:none}img.svelte-1qm7xww{width:var(--size-full);height:var(--size-full)}.upload-container.svelte-1qm7xww{height:100%;flex-shrink:1;max-height:100%}.image-container.svelte-1qm7xww{display:flex;height:100%;flex-direction:column;justify-content:center;align-items:center;max-height:100%}svg.svelte-43sxxs.svelte-43sxxs{width:var(--size-20);height:var(--size-20)}svg.svelte-43sxxs path.svelte-43sxxs{fill:var(--loader-color)}div.svelte-43sxxs.svelte-43sxxs{z-index:var(--layer-2)}.margin.svelte-43sxxs.svelte-43sxxs{margin:var(--size-4)}.wrap.svelte-1txqlrd.svelte-1txqlrd{display:flex;flex-direction:column;justify-content:center;align-items:center;z-index:var(--layer-top);transition:opacity .1s ease-in-out;border-radius:var(--block-radius);background:var(--block-background-fill);padding:0 var(--size-6);max-height:var(--size-screen-h);overflow:hidden;pointer-events:none}.wrap.center.svelte-1txqlrd.svelte-1txqlrd{top:0;right:0;left:0}.wrap.default.svelte-1txqlrd.svelte-1txqlrd{top:0;right:0;bottom:0;left:0}.hide.svelte-1txqlrd.svelte-1txqlrd{opacity:0;pointer-events:none}.generating.svelte-1txqlrd.svelte-1txqlrd{animation:svelte-1txqlrd-pulse 2s cubic-bezier(.4,0,.6,1) infinite;border:2px solid var(--color-accent);background:transparent}.translucent.svelte-1txqlrd.svelte-1txqlrd{background:none}@keyframes svelte-1txqlrd-pulse{0%,to{opacity:1}50%{opacity:.5}}.loading.svelte-1txqlrd.svelte-1txqlrd{z-index:var(--layer-2);color:var(--body-text-color)}.eta-bar.svelte-1txqlrd.svelte-1txqlrd{position:absolute;top:0;right:0;bottom:0;left:0;transform-origin:left;opacity:.8;z-index:var(--layer-1);transition:10ms;background:var(--background-fill-secondary)}.progress-bar-wrap.svelte-1txqlrd.svelte-1txqlrd{border:1px solid var(--border-color-primary);background:var(--background-fill-primary);width:55.5%;height:var(--size-4)}.progress-bar.svelte-1txqlrd.svelte-1txqlrd{transform-origin:left;background-color:var(--loader-color);width:var(--size-full);height:var(--size-full)}.progress-level.svelte-1txqlrd.svelte-1txqlrd{display:flex;flex-direction:column;align-items:center;gap:1;z-index:var(--layer-2);width:var(--size-full)}.progress-level-inner.svelte-1txqlrd.svelte-1txqlrd{margin:var(--size-2) auto;color:var(--body-text-color);font-size:var(--text-sm);font-family:var(--font-mono)}.meta-text.svelte-1txqlrd.svelte-1txqlrd{position:absolute;top:0;right:0;z-index:var(--layer-2);padding:var(--size-1) var(--size-2);font-size:var(--text-sm);font-family:var(--font-mono)}.meta-text-center.svelte-1txqlrd.svelte-1txqlrd{display:flex;position:absolute;top:0;right:0;justify-content:center;align-items:center;transform:translateY(var(--size-6));z-index:var(--layer-2);padding:var(--size-1) var(--size-2);font-size:var(--text-sm);font-family:var(--font-mono);text-align:center}.error.svelte-1txqlrd.svelte-1txqlrd{box-shadow:var(--shadow-drop);border:solid 1px var(--error-border-color);border-radius:var(--radius-full);background:var(--error-background-fill);padding-right:var(--size-4);padding-left:var(--size-4);color:var(--error-text-color);font-weight:var(--weight-semibold);font-size:var(--text-lg);line-height:var(--line-lg);font-family:var(--font)}.minimal.svelte-1txqlrd .progress-text.svelte-1txqlrd{background:var(--block-background-fill)}.border.svelte-1txqlrd.svelte-1txqlrd{border:1px solid var(--border-color-primary)}.toast-body.svelte-solcu7{display:flex;position:relative;right:0;left:0;align-items:center;margin:var(--size-6) var(--size-4);margin:auto;border-radius:var(--container-radius);overflow:hidden;pointer-events:auto}.toast-body.error.svelte-solcu7{border:1px solid var(--color-red-700);background:var(--color-red-50)}.dark .toast-body.error.svelte-solcu7{border:1px solid var(--color-red-500);background-color:var(--color-grey-950)}.toast-body.warning.svelte-solcu7{border:1px solid var(--color-yellow-700);background:var(--color-yellow-50)}.dark .toast-body.warning.svelte-solcu7{border:1px solid var(--color-yellow-500);background-color:var(--color-grey-950)}.toast-body.info.svelte-solcu7{border:1px solid var(--color-grey-700);background:var(--color-grey-50)}.dark .toast-body.info.svelte-solcu7{border:1px solid var(--color-grey-500);background-color:var(--color-grey-950)}.toast-title.svelte-solcu7{display:flex;align-items:center;font-weight:var(--weight-bold);font-size:var(--text-lg);line-height:var(--line-sm);text-transform:capitalize}.toast-title.error.svelte-solcu7{color:var(--color-red-700)}.dark .toast-title.error.svelte-solcu7{color:var(--color-red-50)}.toast-title.warning.svelte-solcu7{color:var(--color-yellow-700)}.dark .toast-title.warning.svelte-solcu7{color:var(--color-yellow-50)}.toast-title.info.svelte-solcu7{color:var(--color-grey-700)}.dark .toast-title.info.svelte-solcu7{color:var(--color-grey-50)}.toast-close.svelte-solcu7{margin:0 var(--size-3);border-radius:var(--size-3);padding:0px var(--size-1-5);font-size:var(--size-5);line-height:var(--size-5)}.toast-close.error.svelte-solcu7{color:var(--color-red-700)}.dark .toast-close.error.svelte-solcu7{color:var(--color-red-500)}.toast-close.warning.svelte-solcu7{color:var(--color-yellow-700)}.dark .toast-close.warning.svelte-solcu7{color:var(--color-yellow-500)}.toast-close.info.svelte-solcu7{color:var(--color-grey-700)}.dark .toast-close.info.svelte-solcu7{color:var(--color-grey-500)}.toast-text.svelte-solcu7{font-size:var(--text-lg)}.toast-text.error.svelte-solcu7{color:var(--color-red-700)}.dark .toast-text.error.svelte-solcu7{color:var(--color-red-50)}.toast-text.warning.svelte-solcu7{color:var(--color-yellow-700)}.dark .toast-text.warning.svelte-solcu7{color:var(--color-yellow-50)}.toast-text.info.svelte-solcu7{color:var(--color-grey-700)}.dark .toast-text.info.svelte-solcu7{color:var(--color-grey-50)}.toast-details.svelte-solcu7{margin:var(--size-3) var(--size-3) var(--size-3) 0;width:100%}.toast-icon.svelte-solcu7{display:flex;position:absolute;position:relative;flex-shrink:0;justify-content:center;align-items:center;margin:var(--size-2);border-radius:var(--radius-full);padding:var(--size-1);padding-left:calc(var(--size-1) - 1px);width:35px;height:35px}.toast-icon.error.svelte-solcu7{color:var(--color-red-700)}.dark .toast-icon.error.svelte-solcu7{color:var(--color-red-500)}.toast-icon.warning.svelte-solcu7{color:var(--color-yellow-700)}.dark .toast-icon.warning.svelte-solcu7{color:var(--color-yellow-500)}.toast-icon.info.svelte-solcu7{color:var(--color-grey-700)}.dark .toast-icon.info.svelte-solcu7{color:var(--color-grey-500)}@keyframes svelte-solcu7-countdown{0%{transform:scaleX(1)}to{transform:scaleX(0)}}.timer.svelte-solcu7{position:absolute;bottom:0;left:0;transform-origin:0 0;animation:svelte-solcu7-countdown 10s linear forwards;width:100%;height:var(--size-1)}.timer.error.svelte-solcu7{background:var(--color-red-700)}.dark .timer.error.svelte-solcu7{background:var(--color-red-500)}.timer.warning.svelte-solcu7{background:var(--color-yellow-700)}.dark .timer.warning.svelte-solcu7{background:var(--color-yellow-500)}.timer.info.svelte-solcu7{background:var(--color-grey-700)}.dark .timer.info.svelte-solcu7{background:var(--color-grey-500)}.toast-wrap.svelte-gatr8h{display:flex;position:fixed;top:var(--size-4);right:var(--size-4);flex-direction:column;align-items:end;gap:var(--size-2);z-index:var(--layer-top);width:calc(100% - var(--size-8))}@media (--screen-sm){.toast-wrap.svelte-gatr8h{width:calc(var(--size-96) + var(--size-10))}}.container.svelte-h11ksk img{width:100%;height:100%}.container.selected.svelte-h11ksk{border-color:var(--border-color-accent)}.container.table.svelte-h11ksk{margin:0 auto;border:2px solid var(--border-color-primary);border-radius:var(--radius-lg);overflow:hidden;width:var(--size-20);height:var(--size-20);object-fit:cover}.container.gallery.svelte-h11ksk{height:var(--size-20);max-height:var(--size-20);object-fit:cover}

src/backend/gradio_image_prompter/templates/component/wrapper-6f348d45-f837cf34.js ADDED Viewed

	@@ -0,0 +1,2455 @@

+import S from "./__vite-browser-external-2447137e.js";
+function z(s) {
+  return s && s.__esModule && Object.prototype.hasOwnProperty.call(s, "default") ? s.default : s;
+}
+function gt(s) {
+  if (s.__esModule)
+    return s;
+  var e = s.default;
+  if (typeof e == "function") {
+    var t = function r() {
+      if (this instanceof r) {
+        var i = [null];
+        i.push.apply(i, arguments);
+        var n = Function.bind.apply(e, i);
+        return new n();
+      }
+      return e.apply(this, arguments);
+    };
+    t.prototype = e.prototype;
+  } else
+    t = {};
+  return Object.defineProperty(t, "__esModule", { value: !0 }), Object.keys(s).forEach(function(r) {
+    var i = Object.getOwnPropertyDescriptor(s, r);
+    Object.defineProperty(t, r, i.get ? i : {
+      enumerable: !0,
+      get: function() {
+        return s[r];
+      }
+    });
+  }), t;
+}
+const { Duplex: yt } = S;
+function Oe(s) {
+  s.emit("close");
+}
+function vt() {
+  !this.destroyed && this._writableState.finished && this.destroy();
+}
+function Qe(s) {
+  this.removeListener("error", Qe), this.destroy(), this.listenerCount("error") === 0 && this.emit("error", s);
+}
+function St(s, e) {
+  let t = !0;
+  const r = new yt({
+    ...e,
+    autoDestroy: !1,
+    emitClose: !1,
+    objectMode: !1,
+    writableObjectMode: !1
+  });
+  return s.on("message", function(n, o) {
+    const l = !o && r._readableState.objectMode ? n.toString() : n;
+    r.push(l) || s.pause();
+  }), s.once("error", function(n) {
+    r.destroyed || (t = !1, r.destroy(n));
+  }), s.once("close", function() {
+    r.destroyed || r.push(null);
+  }), r._destroy = function(i, n) {
+    if (s.readyState === s.CLOSED) {
+      n(i), process.nextTick(Oe, r);
+      return;
+    }
+    let o = !1;
+    s.once("error", function(f) {
+      o = !0, n(f);
+    }), s.once("close", function() {
+      o || n(i), process.nextTick(Oe, r);
+    }), t && s.terminate();
+  }, r._final = function(i) {
+    if (s.readyState === s.CONNECTING) {
+      s.once("open", function() {
+        r._final(i);
+      });
+      return;
+    }
+    s._socket !== null && (s._socket._writableState.finished ? (i(), r._readableState.endEmitted && r.destroy()) : (s._socket.once("finish", function() {
+      i();
+    }), s.close()));
+  }, r._read = function() {
+    s.isPaused && s.resume();
+  }, r._write = function(i, n, o) {
+    if (s.readyState === s.CONNECTING) {
+      s.once("open", function() {
+        r._write(i, n, o);
+      });
+      return;
+    }
+    s.send(i, o);
+  }, r.on("end", vt), r.on("error", Qe), r;
+}
+var Et = St;
+const Vs = /* @__PURE__ */ z(Et);
+var te = { exports: {} }, U = {
+  BINARY_TYPES: ["nodebuffer", "arraybuffer", "fragments"],
+  EMPTY_BUFFER: Buffer.alloc(0),
+  GUID: "258EAFA5-E914-47DA-95CA-C5AB0DC85B11",
+  kForOnEventAttribute: Symbol("kIsForOnEventAttribute"),
+  kListener: Symbol("kListener"),
+  kStatusCode: Symbol("status-code"),
+  kWebSocket: Symbol("websocket"),
+  NOOP: () => {
+  }
+}, bt, xt;
+const { EMPTY_BUFFER: kt } = U, Se = Buffer[Symbol.species];
+function wt(s, e) {
+  if (s.length === 0)
+    return kt;
+  if (s.length === 1)
+    return s[0];
+  const t = Buffer.allocUnsafe(e);
+  let r = 0;
+  for (let i = 0; i < s.length; i++) {
+    const n = s[i];
+    t.set(n, r), r += n.length;
+  }
+  return r < e ? new Se(t.buffer, t.byteOffset, r) : t;
+}
+function Je(s, e, t, r, i) {
+  for (let n = 0; n < i; n++)
+    t[r + n] = s[n] ^ e[n & 3];
+}
+function et(s, e) {
+  for (let t = 0; t < s.length; t++)
+    s[t] ^= e[t & 3];
+}
+function Ot(s) {
+  return s.length === s.buffer.byteLength ? s.buffer : s.buffer.slice(s.byteOffset, s.byteOffset + s.length);
+}
+function Ee(s) {
+  if (Ee.readOnly = !0, Buffer.isBuffer(s))
+    return s;
+  let e;
+  return s instanceof ArrayBuffer ? e = new Se(s) : ArrayBuffer.isView(s) ? e = new Se(s.buffer, s.byteOffset, s.byteLength) : (e = Buffer.from(s), Ee.readOnly = !1), e;
+}
+te.exports = {
+  concat: wt,
+  mask: Je,
+  toArrayBuffer: Ot,
+  toBuffer: Ee,
+  unmask: et
+};
+if (!process.env.WS_NO_BUFFER_UTIL)
+  try {
+    const s = require("bufferutil");
+    xt = te.exports.mask = function(e, t, r, i, n) {
+      n < 48 ? Je(e, t, r, i, n) : s.mask(e, t, r, i, n);
+    }, bt = te.exports.unmask = function(e, t) {
+      e.length < 32 ? et(e, t) : s.unmask(e, t);
+    };
+  } catch {
+  }
+var ne = te.exports;
+const Ce = Symbol("kDone"), ue = Symbol("kRun");
+let Ct = class {
+  /**
+   * Creates a new `Limiter`.
+   *
+   * @param {Number} [concurrency=Infinity] The maximum number of jobs allowed
+   *     to run concurrently
+   */
+  constructor(e) {
+    this[Ce] = () => {
+      this.pending--, this[ue]();
+    }, this.concurrency = e || 1 / 0, this.jobs = [], this.pending = 0;
+  }
+  /**
+   * Adds a job to the queue.
+   *
+   * @param {Function} job The job to run
+   * @public
+   */
+  add(e) {
+    this.jobs.push(e), this[ue]();
+  }
+  /**
+   * Removes a job from the queue and runs it if possible.
+   *
+   * @private
+   */
+  [ue]() {
+    if (this.pending !== this.concurrency && this.jobs.length) {
+      const e = this.jobs.shift();
+      this.pending++, e(this[Ce]);
+    }
+  }
+};
+var Tt = Ct;
+const W = S, Te = ne, Lt = Tt, { kStatusCode: tt } = U, Nt = Buffer[Symbol.species], Pt = Buffer.from([0, 0, 255, 255]), se = Symbol("permessage-deflate"), w = Symbol("total-length"), V = Symbol("callback"), C = Symbol("buffers"), J = Symbol("error");
+let K, Rt = class {
+  /**
+   * Creates a PerMessageDeflate instance.
+   *
+   * @param {Object} [options] Configuration options
+   * @param {(Boolean|Number)} [options.clientMaxWindowBits] Advertise support
+   *     for, or request, a custom client window size
+   * @param {Boolean} [options.clientNoContextTakeover=false] Advertise/
+   *     acknowledge disabling of client context takeover
+   * @param {Number} [options.concurrencyLimit=10] The number of concurrent
+   *     calls to zlib
+   * @param {(Boolean|Number)} [options.serverMaxWindowBits] Request/confirm the
+   *     use of a custom server window size
+   * @param {Boolean} [options.serverNoContextTakeover=false] Request/accept
+   *     disabling of server context takeover
+   * @param {Number} [options.threshold=1024] Size (in bytes) below which
+   *     messages should not be compressed if context takeover is disabled
+   * @param {Object} [options.zlibDeflateOptions] Options to pass to zlib on
+   *     deflate
+   * @param {Object} [options.zlibInflateOptions] Options to pass to zlib on
+   *     inflate
+   * @param {Boolean} [isServer=false] Create the instance in either server or
+   *     client mode
+   * @param {Number} [maxPayload=0] The maximum allowed message length
+   */
+  constructor(e, t, r) {
+    if (this._maxPayload = r | 0, this._options = e || {}, this._threshold = this._options.threshold !== void 0 ? this._options.threshold : 1024, this._isServer = !!t, this._deflate = null, this._inflate = null, this.params = null, !K) {
+      const i = this._options.concurrencyLimit !== void 0 ? this._options.concurrencyLimit : 10;
+      K = new Lt(i);
+    }
+  }
+  /**
+   * @type {String}
+   */
+  static get extensionName() {
+    return "permessage-deflate";
+  }
+  /**
+   * Create an extension negotiation offer.
+   *
+   * @return {Object} Extension parameters
+   * @public
+   */
+  offer() {
+    const e = {};
+    return this._options.serverNoContextTakeover && (e.server_no_context_takeover = !0), this._options.clientNoContextTakeover && (e.client_no_context_takeover = !0), this._options.serverMaxWindowBits && (e.server_max_window_bits = this._options.serverMaxWindowBits), this._options.clientMaxWindowBits ? e.client_max_window_bits = this._options.clientMaxWindowBits : this._options.clientMaxWindowBits == null && (e.client_max_window_bits = !0), e;
+  }
+  /**
+   * Accept an extension negotiation offer/response.
+   *
+   * @param {Array} configurations The extension negotiation offers/reponse
+   * @return {Object} Accepted configuration
+   * @public
+   */
+  accept(e) {
+    return e = this.normalizeParams(e), this.params = this._isServer ? this.acceptAsServer(e) : this.acceptAsClient(e), this.params;
+  }
+  /**
+   * Releases all resources used by the extension.
+   *
+   * @public
+   */
+  cleanup() {
+    if (this._inflate && (this._inflate.close(), this._inflate = null), this._deflate) {
+      const e = this._deflate[V];
+      this._deflate.close(), this._deflate = null, e && e(
+        new Error(
+          "The deflate stream was closed while data was being processed"
+        )
+      );
+    }
+  }
+  /**
+   *  Accept an extension negotiation offer.
+   *
+   * @param {Array} offers The extension negotiation offers
+   * @return {Object} Accepted configuration
+   * @private
+   */
+  acceptAsServer(e) {
+    const t = this._options, r = e.find((i) => !(t.serverNoContextTakeover === !1 && i.server_no_context_takeover || i.server_max_window_bits && (t.serverMaxWindowBits === !1 || typeof t.serverMaxWindowBits == "number" && t.serverMaxWindowBits > i.server_max_window_bits) || typeof t.clientMaxWindowBits == "number" && !i.client_max_window_bits));
+    if (!r)
+      throw new Error("None of the extension offers can be accepted");
+    return t.serverNoContextTakeover && (r.server_no_context_takeover = !0), t.clientNoContextTakeover && (r.client_no_context_takeover = !0), typeof t.serverMaxWindowBits == "number" && (r.server_max_window_bits = t.serverMaxWindowBits), typeof t.clientMaxWindowBits == "number" ? r.client_max_window_bits = t.clientMaxWindowBits : (r.client_max_window_bits === !0 || t.clientMaxWindowBits === !1) && delete r.client_max_window_bits, r;
+  }
+  /**
+   * Accept the extension negotiation response.
+   *
+   * @param {Array} response The extension negotiation response
+   * @return {Object} Accepted configuration
+   * @private
+   */
+  acceptAsClient(e) {
+    const t = e[0];
+    if (this._options.clientNoContextTakeover === !1 && t.client_no_context_takeover)
+      throw new Error('Unexpected parameter "client_no_context_takeover"');
+    if (!t.client_max_window_bits)
+      typeof this._options.clientMaxWindowBits == "number" && (t.client_max_window_bits = this._options.clientMaxWindowBits);
+    else if (this._options.clientMaxWindowBits === !1 || typeof this._options.clientMaxWindowBits == "number" && t.client_max_window_bits > this._options.clientMaxWindowBits)
+      throw new Error(
+        'Unexpected or invalid parameter "client_max_window_bits"'
+      );
+    return t;
+  }
+  /**
+   * Normalize parameters.
+   *
+   * @param {Array} configurations The extension negotiation offers/reponse
+   * @return {Array} The offers/response with normalized parameters
+   * @private
+   */
+  normalizeParams(e) {
+    return e.forEach((t) => {
+      Object.keys(t).forEach((r) => {
+        let i = t[r];
+        if (i.length > 1)
+          throw new Error(`Parameter "${r}" must have only a single value`);
+        if (i = i[0], r === "client_max_window_bits") {
+          if (i !== !0) {
+            const n = +i;
+            if (!Number.isInteger(n) || n < 8 || n > 15)
+              throw new TypeError(
+                `Invalid value for parameter "${r}": ${i}`
+              );
+            i = n;
+          } else if (!this._isServer)
+            throw new TypeError(
+              `Invalid value for parameter "${r}": ${i}`
+            );
+        } else if (r === "server_max_window_bits") {
+          const n = +i;
+          if (!Number.isInteger(n) || n < 8 || n > 15)
+            throw new TypeError(
+              `Invalid value for parameter "${r}": ${i}`
+            );
+          i = n;
+        } else if (r === "client_no_context_takeover" || r === "server_no_context_takeover") {
+          if (i !== !0)
+            throw new TypeError(
+              `Invalid value for parameter "${r}": ${i}`
+            );
+        } else
+          throw new Error(`Unknown parameter "${r}"`);
+        t[r] = i;
+      });
+    }), e;
+  }
+  /**
+   * Decompress data. Concurrency limited.
+   *
+   * @param {Buffer} data Compressed data
+   * @param {Boolean} fin Specifies whether or not this is the last fragment
+   * @param {Function} callback Callback
+   * @public
+   */
+  decompress(e, t, r) {
+    K.add((i) => {
+      this._decompress(e, t, (n, o) => {
+        i(), r(n, o);
+      });
+    });
+  }
+  /**
+   * Compress data. Concurrency limited.
+   *
+   * @param {(Buffer|String)} data Data to compress
+   * @param {Boolean} fin Specifies whether or not this is the last fragment
+   * @param {Function} callback Callback
+   * @public
+   */
+  compress(e, t, r) {
+    K.add((i) => {
+      this._compress(e, t, (n, o) => {
+        i(), r(n, o);
+      });
+    });
+  }
+  /**
+   * Decompress data.
+   *
+   * @param {Buffer} data Compressed data
+   * @param {Boolean} fin Specifies whether or not this is the last fragment
+   * @param {Function} callback Callback
+   * @private
+   */
+  _decompress(e, t, r) {
+    const i = this._isServer ? "client" : "server";
+    if (!this._inflate) {
+      const n = `${i}_max_window_bits`, o = typeof this.params[n] != "number" ? W.Z_DEFAULT_WINDOWBITS : this.params[n];
+      this._inflate = W.createInflateRaw({
+        ...this._options.zlibInflateOptions,
+        windowBits: o
+      }), this._inflate[se] = this, this._inflate[w] = 0, this._inflate[C] = [], this._inflate.on("error", Bt), this._inflate.on("data", st);
+    }
+    this._inflate[V] = r, this._inflate.write(e), t && this._inflate.write(Pt), this._inflate.flush(() => {
+      const n = this._inflate[J];
+      if (n) {
+        this._inflate.close(), this._inflate = null, r(n);
+        return;
+      }
+      const o = Te.concat(
+        this._inflate[C],
+        this._inflate[w]
+      );
+      this._inflate._readableState.endEmitted ? (this._inflate.close(), this._inflate = null) : (this._inflate[w] = 0, this._inflate[C] = [], t && this.params[`${i}_no_context_takeover`] && this._inflate.reset()), r(null, o);
+    });
+  }
+  /**
+   * Compress data.
+   *
+   * @param {(Buffer|String)} data Data to compress
+   * @param {Boolean} fin Specifies whether or not this is the last fragment
+   * @param {Function} callback Callback
+   * @private
+   */
+  _compress(e, t, r) {
+    const i = this._isServer ? "server" : "client";
+    if (!this._deflate) {
+      const n = `${i}_max_window_bits`, o = typeof this.params[n] != "number" ? W.Z_DEFAULT_WINDOWBITS : this.params[n];
+      this._deflate = W.createDeflateRaw({
+        ...this._options.zlibDeflateOptions,
+        windowBits: o
+      }), this._deflate[w] = 0, this._deflate[C] = [], this._deflate.on("data", Ut);
+    }
+    this._deflate[V] = r, this._deflate.write(e), this._deflate.flush(W.Z_SYNC_FLUSH, () => {
+      if (!this._deflate)
+        return;
+      let n = Te.concat(
+        this._deflate[C],
+        this._deflate[w]
+      );
+      t && (n = new Nt(n.buffer, n.byteOffset, n.length - 4)), this._deflate[V] = null, this._deflate[w] = 0, this._deflate[C] = [], t && this.params[`${i}_no_context_takeover`] && this._deflate.reset(), r(null, n);
+    });
+  }
+};
+var oe = Rt;
+function Ut(s) {
+  this[C].push(s), this[w] += s.length;
+}
+function st(s) {
+  if (this[w] += s.length, this[se]._maxPayload < 1 || this[w] <= this[se]._maxPayload) {
+    this[C].push(s);
+    return;
+  }
+  this[J] = new RangeError("Max payload size exceeded"), this[J].code = "WS_ERR_UNSUPPORTED_MESSAGE_LENGTH", this[J][tt] = 1009, this.removeListener("data", st), this.reset();
+}
+function Bt(s) {
+  this[se]._inflate = null, s[tt] = 1007, this[V](s);
+}
+var re = { exports: {} };
+const $t = {}, Mt = /* @__PURE__ */ Object.freeze(/* @__PURE__ */ Object.defineProperty({
+  __proto__: null,
+  default: $t
+}, Symbol.toStringTag, { value: "Module" })), It = /* @__PURE__ */ gt(Mt);
+var Le;
+const { isUtf8: Ne } = S, Dt = [
+  0,
+  0,
+  0,
+  0,
+  0,
+  0,
+  0,
+  0,
+  0,
+  0,
+  0,
+  0,
+  0,
+  0,
+  0,
+  0,
+  // 0 - 15
+  0,
+  0,
+  0,
+  0,
+  0,
+  0,
+  0,
+  0,
+  0,
+  0,
+  0,
+  0,
+  0,
+  0,
+  0,
+  0,
+  // 16 - 31
+  0,
+  1,
+  0,
+  1,
+  1,
+  1,
+  1,
+  1,
+  0,
+  0,
+  1,
+  1,
+  0,
+  1,
+  1,
+  0,
+  // 32 - 47
+  1,
+  1,
+  1,
+  1,
+  1,
+  1,
+  1,
+  1,
+  1,
+  1,
+  0,
+  0,
+  0,
+  0,
+  0,
+  0,
+  // 48 - 63
+  0,
+  1,
+  1,
+  1,
+  1,
+  1,
+  1,
+  1,
+  1,
+  1,
+  1,
+  1,
+  1,
+  1,
+  1,
+  1,
+  // 64 - 79
+  1,
+  1,
+  1,
+  1,
+  1,
+  1,
+  1,
+  1,
+  1,
+  1,
+  1,
+  0,
+  0,
+  0,
+  1,
+  1,
+  // 80 - 95
+  1,
+  1,
+  1,
+  1,
+  1,
+  1,
+  1,
+  1,
+  1,
+  1,
+  1,
+  1,
+  1,
+  1,
+  1,
+  1,
+  // 96 - 111
+  1,
+  1,
+  1,
+  1,
+  1,
+  1,
+  1,
+  1,
+  1,
+  1,
+  1,
+  0,
+  1,
+  0,
+  1,
+  0
+  // 112 - 127
+];
+function Wt(s) {
+  return s >= 1e3 && s <= 1014 && s !== 1004 && s !== 1005 && s !== 1006 || s >= 3e3 && s <= 4999;
+}
+function be(s) {
+  const e = s.length;
+  let t = 0;
+  for (; t < e; )
+    if (!(s[t] & 128))
+      t++;
+    else if ((s[t] & 224) === 192) {
+      if (t + 1 === e || (s[t + 1] & 192) !== 128 || (s[t] & 254) === 192)
+        return !1;
+      t += 2;
+    } else if ((s[t] & 240) === 224) {
+      if (t + 2 >= e || (s[t + 1] & 192) !== 128 || (s[t + 2] & 192) !== 128 || s[t] === 224 && (s[t + 1] & 224) === 128 || // Overlong
+      s[t] === 237 && (s[t + 1] & 224) === 160)
+        return !1;
+      t += 3;
+    } else if ((s[t] & 248) === 240) {
+      if (t + 3 >= e || (s[t + 1] & 192) !== 128 || (s[t + 2] & 192) !== 128 || (s[t + 3] & 192) !== 128 || s[t] === 240 && (s[t + 1] & 240) === 128 || // Overlong
+      s[t] === 244 && s[t + 1] > 143 || s[t] > 244)
+        return !1;
+      t += 4;
+    } else
+      return !1;
+  return !0;
+}
+re.exports = {
+  isValidStatusCode: Wt,
+  isValidUTF8: be,
+  tokenChars: Dt
+};
+if (Ne)
+  Le = re.exports.isValidUTF8 = function(s) {
+    return s.length < 24 ? be(s) : Ne(s);
+  };
+else if (!process.env.WS_NO_UTF_8_VALIDATE)
+  try {
+    const s = It;
+    Le = re.exports.isValidUTF8 = function(e) {
+      return e.length < 32 ? be(e) : s(e);
+    };
+  } catch {
+  }
+var ae = re.exports;
+const { Writable: At } = S, Pe = oe, {
+  BINARY_TYPES: Ft,
+  EMPTY_BUFFER: Re,
+  kStatusCode: jt,
+  kWebSocket: Gt
+} = U, { concat: de, toArrayBuffer: Vt, unmask: Ht } = ne, { isValidStatusCode: zt, isValidUTF8: Ue } = ae, X = Buffer[Symbol.species], A = 0, Be = 1, $e = 2, Me = 3, _e = 4, Yt = 5;
+let qt = class extends At {
+  /**
+   * Creates a Receiver instance.
+   *
+   * @param {Object} [options] Options object
+   * @param {String} [options.binaryType=nodebuffer] The type for binary data
+   * @param {Object} [options.extensions] An object containing the negotiated
+   *     extensions
+   * @param {Boolean} [options.isServer=false] Specifies whether to operate in
+   *     client or server mode
+   * @param {Number} [options.maxPayload=0] The maximum allowed message length
+   * @param {Boolean} [options.skipUTF8Validation=false] Specifies whether or
+   *     not to skip UTF-8 validation for text and close messages
+   */
+  constructor(e = {}) {
+    super(), this._binaryType = e.binaryType || Ft[0], this._extensions = e.extensions || {}, this._isServer = !!e.isServer, this._maxPayload = e.maxPayload | 0, this._skipUTF8Validation = !!e.skipUTF8Validation, this[Gt] = void 0, this._bufferedBytes = 0, this._buffers = [], this._compressed = !1, this._payloadLength = 0, this._mask = void 0, this._fragmented = 0, this._masked = !1, this._fin = !1, this._opcode = 0, this._totalPayloadLength = 0, this._messageLength = 0, this._fragments = [], this._state = A, this._loop = !1;
+  }
+  /**
+   * Implements `Writable.prototype._write()`.
+   *
+   * @param {Buffer} chunk The chunk of data to write
+   * @param {String} encoding The character encoding of `chunk`
+   * @param {Function} cb Callback
+   * @private
+   */
+  _write(e, t, r) {
+    if (this._opcode === 8 && this._state == A)
+      return r();
+    this._bufferedBytes += e.length, this._buffers.push(e), this.startLoop(r);
+  }
+  /**
+   * Consumes `n` bytes from the buffered data.
+   *
+   * @param {Number} n The number of bytes to consume
+   * @return {Buffer} The consumed bytes
+   * @private
+   */
+  consume(e) {
+    if (this._bufferedBytes -= e, e === this._buffers[0].length)
+      return this._buffers.shift();
+    if (e < this._buffers[0].length) {
+      const r = this._buffers[0];
+      return this._buffers[0] = new X(
+        r.buffer,
+        r.byteOffset + e,
+        r.length - e
+      ), new X(r.buffer, r.byteOffset, e);
+    }
+    const t = Buffer.allocUnsafe(e);
+    do {
+      const r = this._buffers[0], i = t.length - e;
+      e >= r.length ? t.set(this._buffers.shift(), i) : (t.set(new Uint8Array(r.buffer, r.byteOffset, e), i), this._buffers[0] = new X(
+        r.buffer,
+        r.byteOffset + e,
+        r.length - e
+      )), e -= r.length;
+    } while (e > 0);
+    return t;
+  }
+  /**
+   * Starts the parsing loop.
+   *
+   * @param {Function} cb Callback
+   * @private
+   */
+  startLoop(e) {
+    let t;
+    this._loop = !0;
+    do
+      switch (this._state) {
+        case A:
+          t = this.getInfo();
+          break;
+        case Be:
+          t = this.getPayloadLength16();
+          break;
+        case $e:
+          t = this.getPayloadLength64();
+          break;
+        case Me:
+          this.getMask();
+          break;
+        case _e:
+          t = this.getData(e);
+          break;
+        default:
+          this._loop = !1;
+          return;
+      }
+    while (this._loop);
+    e(t);
+  }
+  /**
+   * Reads the first two bytes of a frame.
+   *
+   * @return {(RangeError|undefined)} A possible error
+   * @private
+   */
+  getInfo() {
+    if (this._bufferedBytes < 2) {
+      this._loop = !1;
+      return;
+    }
+    const e = this.consume(2);
+    if (e[0] & 48)
+      return this._loop = !1, g(
+        RangeError,
+        "RSV2 and RSV3 must be clear",
+        !0,
+        1002,
+        "WS_ERR_UNEXPECTED_RSV_2_3"
+      );
+    const t = (e[0] & 64) === 64;
+    if (t && !this._extensions[Pe.extensionName])
+      return this._loop = !1, g(
+        RangeError,
+        "RSV1 must be clear",
+        !0,
+        1002,
+        "WS_ERR_UNEXPECTED_RSV_1"
+      );
+    if (this._fin = (e[0] & 128) === 128, this._opcode = e[0] & 15, this._payloadLength = e[1] & 127, this._opcode === 0) {
+      if (t)
+        return this._loop = !1, g(
+          RangeError,
+          "RSV1 must be clear",
+          !0,
+          1002,
+          "WS_ERR_UNEXPECTED_RSV_1"
+        );
+      if (!this._fragmented)
+        return this._loop = !1, g(
+          RangeError,
+          "invalid opcode 0",
+          !0,
+          1002,
+          "WS_ERR_INVALID_OPCODE"
+        );
+      this._opcode = this._fragmented;
+    } else if (this._opcode === 1 || this._opcode === 2) {
+      if (this._fragmented)
+        return this._loop = !1, g(
+          RangeError,
+          `invalid opcode ${this._opcode}`,
+          !0,
+          1002,
+          "WS_ERR_INVALID_OPCODE"
+        );
+      this._compressed = t;
+    } else if (this._opcode > 7 && this._opcode < 11) {
+      if (!this._fin)
+        return this._loop = !1, g(
+          RangeError,
+          "FIN must be set",
+          !0,
+          1002,
+          "WS_ERR_EXPECTED_FIN"
+        );
+      if (t)
+        return this._loop = !1, g(
+          RangeError,
+          "RSV1 must be clear",
+          !0,
+          1002,
+          "WS_ERR_UNEXPECTED_RSV_1"
+        );
+      if (this._payloadLength > 125 || this._opcode === 8 && this._payloadLength === 1)
+        return this._loop = !1, g(
+          RangeError,
+          `invalid payload length ${this._payloadLength}`,
+          !0,
+          1002,
+          "WS_ERR_INVALID_CONTROL_PAYLOAD_LENGTH"
+        );
+    } else
+      return this._loop = !1, g(
+        RangeError,
+        `invalid opcode ${this._opcode}`,
+        !0,
+        1002,
+        "WS_ERR_INVALID_OPCODE"
+      );
+    if (!this._fin && !this._fragmented && (this._fragmented = this._opcode), this._masked = (e[1] & 128) === 128, this._isServer) {
+      if (!this._masked)
+        return this._loop = !1, g(
+          RangeError,
+          "MASK must be set",
+          !0,
+          1002,
+          "WS_ERR_EXPECTED_MASK"
+        );
+    } else if (this._masked)
+      return this._loop = !1, g(
+        RangeError,
+        "MASK must be clear",
+        !0,
+        1002,
+        "WS_ERR_UNEXPECTED_MASK"
+      );
+    if (this._payloadLength === 126)
+      this._state = Be;
+    else if (this._payloadLength === 127)
+      this._state = $e;
+    else
+      return this.haveLength();
+  }
+  /**
+   * Gets extended payload length (7+16).
+   *
+   * @return {(RangeError|undefined)} A possible error
+   * @private
+   */
+  getPayloadLength16() {
+    if (this._bufferedBytes < 2) {
+      this._loop = !1;
+      return;
+    }
+    return this._payloadLength = this.consume(2).readUInt16BE(0), this.haveLength();
+  }
+  /**
+   * Gets extended payload length (7+64).
+   *
+   * @return {(RangeError|undefined)} A possible error
+   * @private
+   */
+  getPayloadLength64() {
+    if (this._bufferedBytes < 8) {
+      this._loop = !1;
+      return;
+    }
+    const e = this.consume(8), t = e.readUInt32BE(0);
+    return t > Math.pow(2, 53 - 32) - 1 ? (this._loop = !1, g(
+      RangeError,
+      "Unsupported WebSocket frame: payload length > 2^53 - 1",
+      !1,
+      1009,
+      "WS_ERR_UNSUPPORTED_DATA_PAYLOAD_LENGTH"
+    )) : (this._payloadLength = t * Math.pow(2, 32) + e.readUInt32BE(4), this.haveLength());
+  }
+  /**
+   * Payload length has been read.
+   *
+   * @return {(RangeError|undefined)} A possible error
+   * @private
+   */
+  haveLength() {
+    if (this._payloadLength && this._opcode < 8 && (this._totalPayloadLength += this._payloadLength, this._totalPayloadLength > this._maxPayload && this._maxPayload > 0))
+      return this._loop = !1, g(
+        RangeError,
+        "Max payload size exceeded",
+        !1,
+        1009,
+        "WS_ERR_UNSUPPORTED_MESSAGE_LENGTH"
+      );
+    this._masked ? this._state = Me : this._state = _e;
+  }
+  /**
+   * Reads mask bytes.
+   *
+   * @private
+   */
+  getMask() {
+    if (this._bufferedBytes < 4) {
+      this._loop = !1;
+      return;
+    }
+    this._mask = this.consume(4), this._state = _e;
+  }
+  /**
+   * Reads data bytes.
+   *
+   * @param {Function} cb Callback
+   * @return {(Error|RangeError|undefined)} A possible error
+   * @private
+   */
+  getData(e) {
+    let t = Re;
+    if (this._payloadLength) {
+      if (this._bufferedBytes < this._payloadLength) {
+        this._loop = !1;
+        return;
+      }
+      t = this.consume(this._payloadLength), this._masked && this._mask[0] | this._mask[1] | this._mask[2] | this._mask[3] && Ht(t, this._mask);
+    }
+    if (this._opcode > 7)
+      return this.controlMessage(t);
+    if (this._compressed) {
+      this._state = Yt, this.decompress(t, e);
+      return;
+    }
+    return t.length && (this._messageLength = this._totalPayloadLength, this._fragments.push(t)), this.dataMessage();
+  }
+  /**
+   * Decompresses data.
+   *
+   * @param {Buffer} data Compressed data
+   * @param {Function} cb Callback
+   * @private
+   */
+  decompress(e, t) {
+    this._extensions[Pe.extensionName].decompress(e, this._fin, (i, n) => {
+      if (i)
+        return t(i);
+      if (n.length) {
+        if (this._messageLength += n.length, this._messageLength > this._maxPayload && this._maxPayload > 0)
+          return t(
+            g(
+              RangeError,
+              "Max payload size exceeded",
+              !1,
+              1009,
+              "WS_ERR_UNSUPPORTED_MESSAGE_LENGTH"
+            )
+          );
+        this._fragments.push(n);
+      }
+      const o = this.dataMessage();
+      if (o)
+        return t(o);
+      this.startLoop(t);
+    });
+  }
+  /**
+   * Handles a data message.
+   *
+   * @return {(Error|undefined)} A possible error
+   * @private
+   */
+  dataMessage() {
+    if (this._fin) {
+      const e = this._messageLength, t = this._fragments;
+      if (this._totalPayloadLength = 0, this._messageLength = 0, this._fragmented = 0, this._fragments = [], this._opcode === 2) {
+        let r;
+        this._binaryType === "nodebuffer" ? r = de(t, e) : this._binaryType === "arraybuffer" ? r = Vt(de(t, e)) : r = t, this.emit("message", r, !0);
+      } else {
+        const r = de(t, e);
+        if (!this._skipUTF8Validation && !Ue(r))
+          return this._loop = !1, g(
+            Error,
+            "invalid UTF-8 sequence",
+            !0,
+            1007,
+            "WS_ERR_INVALID_UTF8"
+          );
+        this.emit("message", r, !1);
+      }
+    }
+    this._state = A;
+  }
+  /**
+   * Handles a control message.
+   *
+   * @param {Buffer} data Data to handle
+   * @return {(Error|RangeError|undefined)} A possible error
+   * @private
+   */
+  controlMessage(e) {
+    if (this._opcode === 8)
+      if (this._loop = !1, e.length === 0)
+        this.emit("conclude", 1005, Re), this.end();
+      else {
+        const t = e.readUInt16BE(0);
+        if (!zt(t))
+          return g(
+            RangeError,
+            `invalid status code ${t}`,
+            !0,
+            1002,
+            "WS_ERR_INVALID_CLOSE_CODE"
+          );
+        const r = new X(
+          e.buffer,
+          e.byteOffset + 2,
+          e.length - 2
+        );
+        if (!this._skipUTF8Validation && !Ue(r))
+          return g(
+            Error,
+            "invalid UTF-8 sequence",
+            !0,
+            1007,
+            "WS_ERR_INVALID_UTF8"
+          );
+        this.emit("conclude", t, r), this.end();
+      }
+    else
+      this._opcode === 9 ? this.emit("ping", e) : this.emit("pong", e);
+    this._state = A;
+  }
+};
+var rt = qt;
+function g(s, e, t, r, i) {
+  const n = new s(
+    t ? `Invalid WebSocket frame: ${e}` : e
+  );
+  return Error.captureStackTrace(n, g), n.code = i, n[jt] = r, n;
+}
+const qs = /* @__PURE__ */ z(rt), { randomFillSync: Kt } = S, Ie = oe, { EMPTY_BUFFER: Xt } = U, { isValidStatusCode: Zt } = ae, { mask: De, toBuffer: M } = ne, x = Symbol("kByteLength"), Qt = Buffer.alloc(4);
+let Jt = class P {
+  /**
+   * Creates a Sender instance.
+   *
+   * @param {(net.Socket|tls.Socket)} socket The connection socket
+   * @param {Object} [extensions] An object containing the negotiated extensions
+   * @param {Function} [generateMask] The function used to generate the masking
+   *     key
+   */
+  constructor(e, t, r) {
+    this._extensions = t || {}, r && (this._generateMask = r, this._maskBuffer = Buffer.alloc(4)), this._socket = e, this._firstFragment = !0, this._compress = !1, this._bufferedBytes = 0, this._deflating = !1, this._queue = [];
+  }
+  /**
+   * Frames a piece of data according to the HyBi WebSocket protocol.
+   *
+   * @param {(Buffer|String)} data The data to frame
+   * @param {Object} options Options object
+   * @param {Boolean} [options.fin=false] Specifies whether or not to set the
+   *     FIN bit
+   * @param {Function} [options.generateMask] The function used to generate the
+   *     masking key
+   * @param {Boolean} [options.mask=false] Specifies whether or not to mask
+   *     `data`
+   * @param {Buffer} [options.maskBuffer] The buffer used to store the masking
+   *     key
+   * @param {Number} options.opcode The opcode
+   * @param {Boolean} [options.readOnly=false] Specifies whether `data` can be
+   *     modified
+   * @param {Boolean} [options.rsv1=false] Specifies whether or not to set the
+   *     RSV1 bit
+   * @return {(Buffer|String)[]} The framed data
+   * @public
+   */
+  static frame(e, t) {
+    let r, i = !1, n = 2, o = !1;
+    t.mask && (r = t.maskBuffer || Qt, t.generateMask ? t.generateMask(r) : Kt(r, 0, 4), o = (r[0] | r[1] | r[2] | r[3]) === 0, n = 6);
+    let l;
+    typeof e == "string" ? (!t.mask || o) && t[x] !== void 0 ? l = t[x] : (e = Buffer.from(e), l = e.length) : (l = e.length, i = t.mask && t.readOnly && !o);
+    let f = l;
+    l >= 65536 ? (n += 8, f = 127) : l > 125 && (n += 2, f = 126);
+    const a = Buffer.allocUnsafe(i ? l + n : n);
+    return a[0] = t.fin ? t.opcode | 128 : t.opcode, t.rsv1 && (a[0] |= 64), a[1] = f, f === 126 ? a.writeUInt16BE(l, 2) : f === 127 && (a[2] = a[3] = 0, a.writeUIntBE(l, 4, 6)), t.mask ? (a[1] |= 128, a[n - 4] = r[0], a[n - 3] = r[1], a[n - 2] = r[2], a[n - 1] = r[3], o ? [a, e] : i ? (De(e, r, a, n, l), [a]) : (De(e, r, e, 0, l), [a, e])) : [a, e];
+  }
+  /**
+   * Sends a close message to the other peer.
+   *
+   * @param {Number} [code] The status code component of the body
+   * @param {(String|Buffer)} [data] The message component of the body
+   * @param {Boolean} [mask=false] Specifies whether or not to mask the message
+   * @param {Function} [cb] Callback
+   * @public
+   */
+  close(e, t, r, i) {
+    let n;
+    if (e === void 0)
+      n = Xt;
+    else {
+      if (typeof e != "number" || !Zt(e))
+        throw new TypeError("First argument must be a valid error code number");
+      if (t === void 0 || !t.length)
+        n = Buffer.allocUnsafe(2), n.writeUInt16BE(e, 0);
+      else {
+        const l = Buffer.byteLength(t);
+        if (l > 123)
+          throw new RangeError("The message must not be greater than 123 bytes");
+        n = Buffer.allocUnsafe(2 + l), n.writeUInt16BE(e, 0), typeof t == "string" ? n.write(t, 2) : n.set(t, 2);
+      }
+    }
+    const o = {
+      [x]: n.length,
+      fin: !0,
+      generateMask: this._generateMask,
+      mask: r,
+      maskBuffer: this._maskBuffer,
+      opcode: 8,
+      readOnly: !1,
+      rsv1: !1
+    };
+    this._deflating ? this.enqueue([this.dispatch, n, !1, o, i]) : this.sendFrame(P.frame(n, o), i);
+  }
+  /**
+   * Sends a ping message to the other peer.
+   *
+   * @param {*} data The message to send
+   * @param {Boolean} [mask=false] Specifies whether or not to mask `data`
+   * @param {Function} [cb] Callback
+   * @public
+   */
+  ping(e, t, r) {
+    let i, n;
+    if (typeof e == "string" ? (i = Buffer.byteLength(e), n = !1) : (e = M(e), i = e.length, n = M.readOnly), i > 125)
+      throw new RangeError("The data size must not be greater than 125 bytes");
+    const o = {
+      [x]: i,
+      fin: !0,
+      generateMask: this._generateMask,
+      mask: t,
+      maskBuffer: this._maskBuffer,
+      opcode: 9,
+      readOnly: n,
+      rsv1: !1
+    };
+    this._deflating ? this.enqueue([this.dispatch, e, !1, o, r]) : this.sendFrame(P.frame(e, o), r);
+  }
+  /**
+   * Sends a pong message to the other peer.
+   *
+   * @param {*} data The message to send
+   * @param {Boolean} [mask=false] Specifies whether or not to mask `data`
+   * @param {Function} [cb] Callback
+   * @public
+   */
+  pong(e, t, r) {
+    let i, n;
+    if (typeof e == "string" ? (i = Buffer.byteLength(e), n = !1) : (e = M(e), i = e.length, n = M.readOnly), i > 125)
+      throw new RangeError("The data size must not be greater than 125 bytes");
+    const o = {
+      [x]: i,
+      fin: !0,
+      generateMask: this._generateMask,
+      mask: t,
+      maskBuffer: this._maskBuffer,
+      opcode: 10,
+      readOnly: n,
+      rsv1: !1
+    };
+    this._deflating ? this.enqueue([this.dispatch, e, !1, o, r]) : this.sendFrame(P.frame(e, o), r);
+  }
+  /**
+   * Sends a data message to the other peer.
+   *
+   * @param {*} data The message to send
+   * @param {Object} options Options object
+   * @param {Boolean} [options.binary=false] Specifies whether `data` is binary
+   *     or text
+   * @param {Boolean} [options.compress=false] Specifies whether or not to
+   *     compress `data`
+   * @param {Boolean} [options.fin=false] Specifies whether the fragment is the
+   *     last one
+   * @param {Boolean} [options.mask=false] Specifies whether or not to mask
+   *     `data`
+   * @param {Function} [cb] Callback
+   * @public
+   */
+  send(e, t, r) {
+    const i = this._extensions[Ie.extensionName];
+    let n = t.binary ? 2 : 1, o = t.compress, l, f;
+    if (typeof e == "string" ? (l = Buffer.byteLength(e), f = !1) : (e = M(e), l = e.length, f = M.readOnly), this._firstFragment ? (this._firstFragment = !1, o && i && i.params[i._isServer ? "server_no_context_takeover" : "client_no_context_takeover"] && (o = l >= i._threshold), this._compress = o) : (o = !1, n = 0), t.fin && (this._firstFragment = !0), i) {
+      const a = {
+        [x]: l,
+        fin: t.fin,
+        generateMask: this._generateMask,
+        mask: t.mask,
+        maskBuffer: this._maskBuffer,
+        opcode: n,
+        readOnly: f,
+        rsv1: o
+      };
+      this._deflating ? this.enqueue([this.dispatch, e, this._compress, a, r]) : this.dispatch(e, this._compress, a, r);
+    } else
+      this.sendFrame(
+        P.frame(e, {
+          [x]: l,
+          fin: t.fin,
+          generateMask: this._generateMask,
+          mask: t.mask,
+          maskBuffer: this._maskBuffer,
+          opcode: n,
+          readOnly: f,
+          rsv1: !1
+        }),
+        r
+      );
+  }
+  /**
+   * Dispatches a message.
+   *
+   * @param {(Buffer|String)} data The message to send
+   * @param {Boolean} [compress=false] Specifies whether or not to compress
+   *     `data`
+   * @param {Object} options Options object
+   * @param {Boolean} [options.fin=false] Specifies whether or not to set the
+   *     FIN bit
+   * @param {Function} [options.generateMask] The function used to generate the
+   *     masking key
+   * @param {Boolean} [options.mask=false] Specifies whether or not to mask
+   *     `data`
+   * @param {Buffer} [options.maskBuffer] The buffer used to store the masking
+   *     key
+   * @param {Number} options.opcode The opcode
+   * @param {Boolean} [options.readOnly=false] Specifies whether `data` can be
+   *     modified
+   * @param {Boolean} [options.rsv1=false] Specifies whether or not to set the
+   *     RSV1 bit
+   * @param {Function} [cb] Callback
+   * @private
+   */
+  dispatch(e, t, r, i) {
+    if (!t) {
+      this.sendFrame(P.frame(e, r), i);
+      return;
+    }
+    const n = this._extensions[Ie.extensionName];
+    this._bufferedBytes += r[x], this._deflating = !0, n.compress(e, r.fin, (o, l) => {
+      if (this._socket.destroyed) {
+        const f = new Error(
+          "The socket was closed while data was being compressed"
+        );
+        typeof i == "function" && i(f);
+        for (let a = 0; a < this._queue.length; a++) {
+          const c = this._queue[a], h = c[c.length - 1];
+          typeof h == "function" && h(f);
+        }
+        return;
+      }
+      this._bufferedBytes -= r[x], this._deflating = !1, r.readOnly = !1, this.sendFrame(P.frame(l, r), i), this.dequeue();
+    });
+  }
+  /**
+   * Executes queued send operations.
+   *
+   * @private
+   */
+  dequeue() {
+    for (; !this._deflating && this._queue.length; ) {
+      const e = this._queue.shift();
+      this._bufferedBytes -= e[3][x], Reflect.apply(e[0], this, e.slice(1));
+    }
+  }
+  /**
+   * Enqueues a send operation.
+   *
+   * @param {Array} params Send operation parameters.
+   * @private
+   */
+  enqueue(e) {
+    this._bufferedBytes += e[3][x], this._queue.push(e);
+  }
+  /**
+   * Sends a frame.
+   *
+   * @param {Buffer[]} list The frame to send
+   * @param {Function} [cb] Callback
+   * @private
+   */
+  sendFrame(e, t) {
+    e.length === 2 ? (this._socket.cork(), this._socket.write(e[0]), this._socket.write(e[1], t), this._socket.uncork()) : this._socket.write(e[0], t);
+  }
+};
+var it = Jt;
+const Ks = /* @__PURE__ */ z(it), { kForOnEventAttribute: F, kListener: pe } = U, We = Symbol("kCode"), Ae = Symbol("kData"), Fe = Symbol("kError"), je = Symbol("kMessage"), Ge = Symbol("kReason"), I = Symbol("kTarget"), Ve = Symbol("kType"), He = Symbol("kWasClean");
+class B {
+  /**
+   * Create a new `Event`.
+   *
+   * @param {String} type The name of the event
+   * @throws {TypeError} If the `type` argument is not specified
+   */
+  constructor(e) {
+    this[I] = null, this[Ve] = e;
+  }
+  /**
+   * @type {*}
+   */
+  get target() {
+    return this[I];
+  }
+  /**
+   * @type {String}
+   */
+  get type() {
+    return this[Ve];
+  }
+}
+Object.defineProperty(B.prototype, "target", { enumerable: !0 });
+Object.defineProperty(B.prototype, "type", { enumerable: !0 });
+class Y extends B {
+  /**
+   * Create a new `CloseEvent`.
+   *
+   * @param {String} type The name of the event
+   * @param {Object} [options] A dictionary object that allows for setting
+   *     attributes via object members of the same name
+   * @param {Number} [options.code=0] The status code explaining why the
+   *     connection was closed
+   * @param {String} [options.reason=''] A human-readable string explaining why
+   *     the connection was closed
+   * @param {Boolean} [options.wasClean=false] Indicates whether or not the
+   *     connection was cleanly closed
+   */
+  constructor(e, t = {}) {
+    super(e), this[We] = t.code === void 0 ? 0 : t.code, this[Ge] = t.reason === void 0 ? "" : t.reason, this[He] = t.wasClean === void 0 ? !1 : t.wasClean;
+  }
+  /**
+   * @type {Number}
+   */
+  get code() {
+    return this[We];
+  }
+  /**
+   * @type {String}
+   */
+  get reason() {
+    return this[Ge];
+  }
+  /**
+   * @type {Boolean}
+   */
+  get wasClean() {
+    return this[He];
+  }
+}
+Object.defineProperty(Y.prototype, "code", { enumerable: !0 });
+Object.defineProperty(Y.prototype, "reason", { enumerable: !0 });
+Object.defineProperty(Y.prototype, "wasClean", { enumerable: !0 });
+class le extends B {
+  /**
+   * Create a new `ErrorEvent`.
+   *
+   * @param {String} type The name of the event
+   * @param {Object} [options] A dictionary object that allows for setting
+   *     attributes via object members of the same name
+   * @param {*} [options.error=null] The error that generated this event
+   * @param {String} [options.message=''] The error message
+   */
+  constructor(e, t = {}) {
+    super(e), this[Fe] = t.error === void 0 ? null : t.error, this[je] = t.message === void 0 ? "" : t.message;
+  }
+  /**
+   * @type {*}
+   */
+  get error() {
+    return this[Fe];
+  }
+  /**
+   * @type {String}
+   */
+  get message() {
+    return this[je];
+  }
+}
+Object.defineProperty(le.prototype, "error", { enumerable: !0 });
+Object.defineProperty(le.prototype, "message", { enumerable: !0 });
+class xe extends B {
+  /**
+   * Create a new `MessageEvent`.
+   *
+   * @param {String} type The name of the event
+   * @param {Object} [options] A dictionary object that allows for setting
+   *     attributes via object members of the same name
+   * @param {*} [options.data=null] The message content
+   */
+  constructor(e, t = {}) {
+    super(e), this[Ae] = t.data === void 0 ? null : t.data;
+  }
+  /**
+   * @type {*}
+   */
+  get data() {
+    return this[Ae];
+  }
+}
+Object.defineProperty(xe.prototype, "data", { enumerable: !0 });
+const es = {
+  /**
+   * Register an event listener.
+   *
+   * @param {String} type A string representing the event type to listen for
+   * @param {(Function|Object)} handler The listener to add
+   * @param {Object} [options] An options object specifies characteristics about
+   *     the event listener
+   * @param {Boolean} [options.once=false] A `Boolean` indicating that the
+   *     listener should be invoked at most once after being added. If `true`,
+   *     the listener would be automatically removed when invoked.
+   * @public
+   */
+  addEventListener(s, e, t = {}) {
+    for (const i of this.listeners(s))
+      if (!t[F] && i[pe] === e && !i[F])
+        return;
+    let r;
+    if (s === "message")
+      r = function(n, o) {
+        const l = new xe("message", {
+          data: o ? n : n.toString()
+        });
+        l[I] = this, Z(e, this, l);
+      };
+    else if (s === "close")
+      r = function(n, o) {
+        const l = new Y("close", {
+          code: n,
+          reason: o.toString(),
+          wasClean: this._closeFrameReceived && this._closeFrameSent
+        });
+        l[I] = this, Z(e, this, l);
+      };
+    else if (s === "error")
+      r = function(n) {
+        const o = new le("error", {
+          error: n,
+          message: n.message
+        });
+        o[I] = this, Z(e, this, o);
+      };
+    else if (s === "open")
+      r = function() {
+        const n = new B("open");
+        n[I] = this, Z(e, this, n);
+      };
+    else
+      return;
+    r[F] = !!t[F], r[pe] = e, t.once ? this.once(s, r) : this.on(s, r);
+  },
+  /**
+   * Remove an event listener.
+   *
+   * @param {String} type A string representing the event type to remove
+   * @param {(Function|Object)} handler The listener to remove
+   * @public
+   */
+  removeEventListener(s, e) {
+    for (const t of this.listeners(s))
+      if (t[pe] === e && !t[F]) {
+        this.removeListener(s, t);
+        break;
+      }
+  }
+};
+var ts = {
+  CloseEvent: Y,
+  ErrorEvent: le,
+  Event: B,
+  EventTarget: es,
+  MessageEvent: xe
+};
+function Z(s, e, t) {
+  typeof s == "object" && s.handleEvent ? s.handleEvent.call(s, t) : s.call(e, t);
+}
+const { tokenChars: j } = ae;
+function k(s, e, t) {
+  s[e] === void 0 ? s[e] = [t] : s[e].push(t);
+}
+function ss(s) {
+  const e = /* @__PURE__ */ Object.create(null);
+  let t = /* @__PURE__ */ Object.create(null), r = !1, i = !1, n = !1, o, l, f = -1, a = -1, c = -1, h = 0;
+  for (; h < s.length; h++)
+    if (a = s.charCodeAt(h), o === void 0)
+      if (c === -1 && j[a] === 1)
+        f === -1 && (f = h);
+      else if (h !== 0 && (a === 32 || a === 9))
+        c === -1 && f !== -1 && (c = h);
+      else if (a === 59 || a === 44) {
+        if (f === -1)
+          throw new SyntaxError(`Unexpected character at index ${h}`);
+        c === -1 && (c = h);
+        const v = s.slice(f, c);
+        a === 44 ? (k(e, v, t), t = /* @__PURE__ */ Object.create(null)) : o = v, f = c = -1;
+      } else
+        throw new SyntaxError(`Unexpected character at index ${h}`);
+    else if (l === void 0)
+      if (c === -1 && j[a] === 1)
+        f === -1 && (f = h);
+      else if (a === 32 || a === 9)
+        c === -1 && f !== -1 && (c = h);
+      else if (a === 59 || a === 44) {
+        if (f === -1)
+          throw new SyntaxError(`Unexpected character at index ${h}`);
+        c === -1 && (c = h), k(t, s.slice(f, c), !0), a === 44 && (k(e, o, t), t = /* @__PURE__ */ Object.create(null), o = void 0), f = c = -1;
+      } else if (a === 61 && f !== -1 && c === -1)
+        l = s.slice(f, h), f = c = -1;
+      else
+        throw new SyntaxError(`Unexpected character at index ${h}`);
+    else if (i) {
+      if (j[a] !== 1)
+        throw new SyntaxError(`Unexpected character at index ${h}`);
+      f === -1 ? f = h : r || (r = !0), i = !1;
+    } else if (n)
+      if (j[a] === 1)
+        f === -1 && (f = h);
+      else if (a === 34 && f !== -1)
+        n = !1, c = h;
+      else if (a === 92)
+        i = !0;
+      else
+        throw new SyntaxError(`Unexpected character at index ${h}`);
+    else if (a === 34 && s.charCodeAt(h - 1) === 61)
+      n = !0;
+    else if (c === -1 && j[a] === 1)
+      f === -1 && (f = h);
+    else if (f !== -1 && (a === 32 || a === 9))
+      c === -1 && (c = h);
+    else if (a === 59 || a === 44) {
+      if (f === -1)
+        throw new SyntaxError(`Unexpected character at index ${h}`);
+      c === -1 && (c = h);
+      let v = s.slice(f, c);
+      r && (v = v.replace(/\\/g, ""), r = !1), k(t, l, v), a === 44 && (k(e, o, t), t = /* @__PURE__ */ Object.create(null), o = void 0), l = void 0, f = c = -1;
+    } else
+      throw new SyntaxError(`Unexpected character at index ${h}`);
+  if (f === -1 || n || a === 32 || a === 9)
+    throw new SyntaxError("Unexpected end of input");
+  c === -1 && (c = h);
+  const p = s.slice(f, c);
+  return o === void 0 ? k(e, p, t) : (l === void 0 ? k(t, p, !0) : r ? k(t, l, p.replace(/\\/g, "")) : k(t, l, p), k(e, o, t)), e;
+}
+function rs(s) {
+  return Object.keys(s).map((e) => {
+    let t = s[e];
+    return Array.isArray(t) || (t = [t]), t.map((r) => [e].concat(
+      Object.keys(r).map((i) => {
+        let n = r[i];
+        return Array.isArray(n) || (n = [n]), n.map((o) => o === !0 ? i : `${i}=${o}`).join("; ");
+      })
+    ).join("; ")).join(", ");
+  }).join(", ");
+}
+var nt = { format: rs, parse: ss };
+const is = S, ns = S, os = S, ot = S, as = S, { randomBytes: ls, createHash: fs } = S, { URL: me } = S, T = oe, hs = rt, cs = it, {
+  BINARY_TYPES: ze,
+  EMPTY_BUFFER: Q,
+  GUID: us,
+  kForOnEventAttribute: ge,
+  kListener: ds,
+  kStatusCode: _s,
+  kWebSocket: y,
+  NOOP: at
+} = U, {
+  EventTarget: { addEventListener: ps, removeEventListener: ms }
+} = ts, { format: gs, parse: ys } = nt, { toBuffer: vs } = ne, Ss = 30 * 1e3, lt = Symbol("kAborted"), ye = [8, 13], O = ["CONNECTING", "OPEN", "CLOSING", "CLOSED"], Es = /^[!#$%&'*+\-.0-9A-Z^_`|a-z~]+$/;
+let m = class d extends is {
+  /**
+   * Create a new `WebSocket`.
+   *
+   * @param {(String|URL)} address The URL to which to connect
+   * @param {(String|String[])} [protocols] The subprotocols
+   * @param {Object} [options] Connection options
+   */
+  constructor(e, t, r) {
+    super(), this._binaryType = ze[0], this._closeCode = 1006, this._closeFrameReceived = !1, this._closeFrameSent = !1, this._closeMessage = Q, this._closeTimer = null, this._extensions = {}, this._paused = !1, this._protocol = "", this._readyState = d.CONNECTING, this._receiver = null, this._sender = null, this._socket = null, e !== null ? (this._bufferedAmount = 0, this._isServer = !1, this._redirects = 0, t === void 0 ? t = [] : Array.isArray(t) || (typeof t == "object" && t !== null ? (r = t, t = []) : t = [t]), ht(this, e, t, r)) : this._isServer = !0;
+  }
+  /**
+   * This deviates from the WHATWG interface since ws doesn't support the
+   * required default "blob" type (instead we define a custom "nodebuffer"
+   * type).
+   *
+   * @type {String}
+   */
+  get binaryType() {
+    return this._binaryType;
+  }
+  set binaryType(e) {
+    ze.includes(e) && (this._binaryType = e, this._receiver && (this._receiver._binaryType = e));
+  }
+  /**
+   * @type {Number}
+   */
+  get bufferedAmount() {
+    return this._socket ? this._socket._writableState.length + this._sender._bufferedBytes : this._bufferedAmount;
+  }
+  /**
+   * @type {String}
+   */
+  get extensions() {
+    return Object.keys(this._extensions).join();
+  }
+  /**
+   * @type {Boolean}
+   */
+  get isPaused() {
+    return this._paused;
+  }
+  /**
+   * @type {Function}
+   */
+  /* istanbul ignore next */
+  get onclose() {
+    return null;
+  }
+  /**
+   * @type {Function}
+   */
+  /* istanbul ignore next */
+  get onerror() {
+    return null;
+  }
+  /**
+   * @type {Function}
+   */
+  /* istanbul ignore next */
+  get onopen() {
+    return null;
+  }
+  /**
+   * @type {Function}
+   */
+  /* istanbul ignore next */
+  get onmessage() {
+    return null;
+  }
+  /**
+   * @type {String}
+   */
+  get protocol() {
+    return this._protocol;
+  }
+  /**
+   * @type {Number}
+   */
+  get readyState() {
+    return this._readyState;
+  }
+  /**
+   * @type {String}
+   */
+  get url() {
+    return this._url;
+  }
+  /**
+   * Set up the socket and the internal resources.
+   *
+   * @param {(net.Socket|tls.Socket)} socket The network socket between the
+   *     server and client
+   * @param {Buffer} head The first packet of the upgraded stream
+   * @param {Object} options Options object
+   * @param {Function} [options.generateMask] The function used to generate the
+   *     masking key
+   * @param {Number} [options.maxPayload=0] The maximum allowed message size
+   * @param {Boolean} [options.skipUTF8Validation=false] Specifies whether or
+   *     not to skip UTF-8 validation for text and close messages
+   * @private
+   */
+  setSocket(e, t, r) {
+    const i = new hs({
+      binaryType: this.binaryType,
+      extensions: this._extensions,
+      isServer: this._isServer,
+      maxPayload: r.maxPayload,
+      skipUTF8Validation: r.skipUTF8Validation
+    });
+    this._sender = new cs(e, this._extensions, r.generateMask), this._receiver = i, this._socket = e, i[y] = this, e[y] = this, i.on("conclude", ks), i.on("drain", ws), i.on("error", Os), i.on("message", Cs), i.on("ping", Ts), i.on("pong", Ls), e.setTimeout(0), e.setNoDelay(), t.length > 0 && e.unshift(t), e.on("close", ut), e.on("data", fe), e.on("end", dt), e.on("error", _t), this._readyState = d.OPEN, this.emit("open");
+  }
+  /**
+   * Emit the `'close'` event.
+   *
+   * @private
+   */
+  emitClose() {
+    if (!this._socket) {
+      this._readyState = d.CLOSED, this.emit("close", this._closeCode, this._closeMessage);
+      return;
+    }
+    this._extensions[T.extensionName] && this._extensions[T.extensionName].cleanup(), this._receiver.removeAllListeners(), this._readyState = d.CLOSED, this.emit("close", this._closeCode, this._closeMessage);
+  }
+  /**
+   * Start a closing handshake.
+   *
+   *          +----------+   +-----------+   +----------+
+   *     - - -|ws.close()|-->|close frame|-->|ws.close()|- - -
+   *    |     +----------+   +-----------+   +----------+     |
+   *          +----------+   +-----------+         |
+   * CLOSING  |ws.close()|<--|close frame|<--+-----+       CLOSING
+   *          +----------+   +-----------+   |
+   *    |           |                        |   +---+        |
+   *                +------------------------+-->|fin| - - - -
+   *    |         +---+                      |   +---+
+   *     - - - - -|fin|<---------------------+
+   *              +---+
+   *
+   * @param {Number} [code] Status code explaining why the connection is closing
+   * @param {(String|Buffer)} [data] The reason why the connection is
+   *     closing
+   * @public
+   */
+  close(e, t) {
+    if (this.readyState !== d.CLOSED) {
+      if (this.readyState === d.CONNECTING) {
+        const r = "WebSocket was closed before the connection was established";
+        b(this, this._req, r);
+        return;
+      }
+      if (this.readyState === d.CLOSING) {
+        this._closeFrameSent && (this._closeFrameReceived || this._receiver._writableState.errorEmitted) && this._socket.end();
+        return;
+      }
+      this._readyState = d.CLOSING, this._sender.close(e, t, !this._isServer, (r) => {
+        r || (this._closeFrameSent = !0, (this._closeFrameReceived || this._receiver._writableState.errorEmitted) && this._socket.end());
+      }), this._closeTimer = setTimeout(
+        this._socket.destroy.bind(this._socket),
+        Ss
+      );
+    }
+  }
+  /**
+   * Pause the socket.
+   *
+   * @public
+   */
+  pause() {
+    this.readyState === d.CONNECTING || this.readyState === d.CLOSED || (this._paused = !0, this._socket.pause());
+  }
+  /**
+   * Send a ping.
+   *
+   * @param {*} [data] The data to send
+   * @param {Boolean} [mask] Indicates whether or not to mask `data`
+   * @param {Function} [cb] Callback which is executed when the ping is sent
+   * @public
+   */
+  ping(e, t, r) {
+    if (this.readyState === d.CONNECTING)
+      throw new Error("WebSocket is not open: readyState 0 (CONNECTING)");
+    if (typeof e == "function" ? (r = e, e = t = void 0) : typeof t == "function" && (r = t, t = void 0), typeof e == "number" && (e = e.toString()), this.readyState !== d.OPEN) {
+      ve(this, e, r);
+      return;
+    }
+    t === void 0 && (t = !this._isServer), this._sender.ping(e || Q, t, r);
+  }
+  /**
+   * Send a pong.
+   *
+   * @param {*} [data] The data to send
+   * @param {Boolean} [mask] Indicates whether or not to mask `data`
+   * @param {Function} [cb] Callback which is executed when the pong is sent
+   * @public
+   */
+  pong(e, t, r) {
+    if (this.readyState === d.CONNECTING)
+      throw new Error("WebSocket is not open: readyState 0 (CONNECTING)");
+    if (typeof e == "function" ? (r = e, e = t = void 0) : typeof t == "function" && (r = t, t = void 0), typeof e == "number" && (e = e.toString()), this.readyState !== d.OPEN) {
+      ve(this, e, r);
+      return;
+    }
+    t === void 0 && (t = !this._isServer), this._sender.pong(e || Q, t, r);
+  }
+  /**
+   * Resume the socket.
+   *
+   * @public
+   */
+  resume() {
+    this.readyState === d.CONNECTING || this.readyState === d.CLOSED || (this._paused = !1, this._receiver._writableState.needDrain || this._socket.resume());
+  }
+  /**
+   * Send a data message.
+   *
+   * @param {*} data The message to send
+   * @param {Object} [options] Options object
+   * @param {Boolean} [options.binary] Specifies whether `data` is binary or
+   *     text
+   * @param {Boolean} [options.compress] Specifies whether or not to compress
+   *     `data`
+   * @param {Boolean} [options.fin=true] Specifies whether the fragment is the
+   *     last one
+   * @param {Boolean} [options.mask] Specifies whether or not to mask `data`
+   * @param {Function} [cb] Callback which is executed when data is written out
+   * @public
+   */
+  send(e, t, r) {
+    if (this.readyState === d.CONNECTING)
+      throw new Error("WebSocket is not open: readyState 0 (CONNECTING)");
+    if (typeof t == "function" && (r = t, t = {}), typeof e == "number" && (e = e.toString()), this.readyState !== d.OPEN) {
+      ve(this, e, r);
+      return;
+    }
+    const i = {
+      binary: typeof e != "string",
+      mask: !this._isServer,
+      compress: !0,
+      fin: !0,
+      ...t
+    };
+    this._extensions[T.extensionName] || (i.compress = !1), this._sender.send(e || Q, i, r);
+  }
+  /**
+   * Forcibly close the connection.
+   *
+   * @public
+   */
+  terminate() {
+    if (this.readyState !== d.CLOSED) {
+      if (this.readyState === d.CONNECTING) {
+        const e = "WebSocket was closed before the connection was established";
+        b(this, this._req, e);
+        return;
+      }
+      this._socket && (this._readyState = d.CLOSING, this._socket.destroy());
+    }
+  }
+};
+Object.defineProperty(m, "CONNECTING", {
+  enumerable: !0,
+  value: O.indexOf("CONNECTING")
+});
+Object.defineProperty(m.prototype, "CONNECTING", {
+  enumerable: !0,
+  value: O.indexOf("CONNECTING")
+});
+Object.defineProperty(m, "OPEN", {
+  enumerable: !0,
+  value: O.indexOf("OPEN")
+});
+Object.defineProperty(m.prototype, "OPEN", {
+  enumerable: !0,
+  value: O.indexOf("OPEN")
+});
+Object.defineProperty(m, "CLOSING", {
+  enumerable: !0,
+  value: O.indexOf("CLOSING")
+});
+Object.defineProperty(m.prototype, "CLOSING", {
+  enumerable: !0,
+  value: O.indexOf("CLOSING")
+});
+Object.defineProperty(m, "CLOSED", {
+  enumerable: !0,
+  value: O.indexOf("CLOSED")
+});
+Object.defineProperty(m.prototype, "CLOSED", {
+  enumerable: !0,
+  value: O.indexOf("CLOSED")
+});
+[
+  "binaryType",
+  "bufferedAmount",
+  "extensions",
+  "isPaused",
+  "protocol",
+  "readyState",
+  "url"
+].forEach((s) => {
+  Object.defineProperty(m.prototype, s, { enumerable: !0 });
+});
+["open", "error", "close", "message"].forEach((s) => {
+  Object.defineProperty(m.prototype, `on${s}`, {
+    enumerable: !0,
+    get() {
+      for (const e of this.listeners(s))
+        if (e[ge])
+          return e[ds];
+      return null;
+    },
+    set(e) {
+      for (const t of this.listeners(s))
+        if (t[ge]) {
+          this.removeListener(s, t);
+          break;
+        }
+      typeof e == "function" && this.addEventListener(s, e, {
+        [ge]: !0
+      });
+    }
+  });
+});
+m.prototype.addEventListener = ps;
+m.prototype.removeEventListener = ms;
+var ft = m;
+function ht(s, e, t, r) {
+  const i = {
+    protocolVersion: ye[1],
+    maxPayload: 104857600,
+    skipUTF8Validation: !1,
+    perMessageDeflate: !0,
+    followRedirects: !1,
+    maxRedirects: 10,
+    ...r,
+    createConnection: void 0,
+    socketPath: void 0,
+    hostname: void 0,
+    protocol: void 0,
+    timeout: void 0,
+    method: "GET",
+    host: void 0,
+    path: void 0,
+    port: void 0
+  };
+  if (!ye.includes(i.protocolVersion))
+    throw new RangeError(
+      `Unsupported protocol version: ${i.protocolVersion} (supported versions: ${ye.join(", ")})`
+    );
+  let n;
+  if (e instanceof me)
+    n = e, s._url = e.href;
+  else {
+    try {
+      n = new me(e);
+    } catch {
+      throw new SyntaxError(`Invalid URL: ${e}`);
+    }
+    s._url = e;
+  }
+  const o = n.protocol === "wss:", l = n.protocol === "ws+unix:";
+  let f;
+  if (n.protocol !== "ws:" && !o && !l ? f = `The URL's protocol must be one of "ws:", "wss:", or "ws+unix:"` : l && !n.pathname ? f = "The URL's pathname is empty" : n.hash && (f = "The URL contains a fragment identifier"), f) {
+    const u = new SyntaxError(f);
+    if (s._redirects === 0)
+      throw u;
+    ee(s, u);
+    return;
+  }
+  const a = o ? 443 : 80, c = ls(16).toString("base64"), h = o ? ns.request : os.request, p = /* @__PURE__ */ new Set();
+  let v;
+  if (i.createConnection = o ? xs : bs, i.defaultPort = i.defaultPort || a, i.port = n.port || a, i.host = n.hostname.startsWith("[") ? n.hostname.slice(1, -1) : n.hostname, i.headers = {
+    ...i.headers,
+    "Sec-WebSocket-Version": i.protocolVersion,
+    "Sec-WebSocket-Key": c,
+    Connection: "Upgrade",
+    Upgrade: "websocket"
+  }, i.path = n.pathname + n.search, i.timeout = i.handshakeTimeout, i.perMessageDeflate && (v = new T(
+    i.perMessageDeflate !== !0 ? i.perMessageDeflate : {},
+    !1,
+    i.maxPayload
+  ), i.headers["Sec-WebSocket-Extensions"] = gs({
+    [T.extensionName]: v.offer()
+  })), t.length) {
+    for (const u of t) {
+      if (typeof u != "string" || !Es.test(u) || p.has(u))
+        throw new SyntaxError(
+          "An invalid or duplicated subprotocol was specified"
+        );
+      p.add(u);
+    }
+    i.headers["Sec-WebSocket-Protocol"] = t.join(",");
+  }
+  if (i.origin && (i.protocolVersion < 13 ? i.headers["Sec-WebSocket-Origin"] = i.origin : i.headers.Origin = i.origin), (n.username || n.password) && (i.auth = `${n.username}:${n.password}`), l) {
+    const u = i.path.split(":");
+    i.socketPath = u[0], i.path = u[1];
+  }
+  let _;
+  if (i.followRedirects) {
+    if (s._redirects === 0) {
+      s._originalIpc = l, s._originalSecure = o, s._originalHostOrSocketPath = l ? i.socketPath : n.host;
+      const u = r && r.headers;
+      if (r = { ...r, headers: {} }, u)
+        for (const [E, $] of Object.entries(u))
+          r.headers[E.toLowerCase()] = $;
+    } else if (s.listenerCount("redirect") === 0) {
+      const u = l ? s._originalIpc ? i.socketPath === s._originalHostOrSocketPath : !1 : s._originalIpc ? !1 : n.host === s._originalHostOrSocketPath;
+      (!u || s._originalSecure && !o) && (delete i.headers.authorization, delete i.headers.cookie, u || delete i.headers.host, i.auth = void 0);
+    }
+    i.auth && !r.headers.authorization && (r.headers.authorization = "Basic " + Buffer.from(i.auth).toString("base64")), _ = s._req = h(i), s._redirects && s.emit("redirect", s.url, _);
+  } else
+    _ = s._req = h(i);
+  i.timeout && _.on("timeout", () => {
+    b(s, _, "Opening handshake has timed out");
+  }), _.on("error", (u) => {
+    _ === null || _[lt] || (_ = s._req = null, ee(s, u));
+  }), _.on("response", (u) => {
+    const E = u.headers.location, $ = u.statusCode;
+    if (E && i.followRedirects && $ >= 300 && $ < 400) {
+      if (++s._redirects > i.maxRedirects) {
+        b(s, _, "Maximum redirects exceeded");
+        return;
+      }
+      _.abort();
+      let q;
+      try {
+        q = new me(E, e);
+      } catch {
+        const L = new SyntaxError(`Invalid URL: ${E}`);
+        ee(s, L);
+        return;
+      }
+      ht(s, q, t, r);
+    } else
+      s.emit("unexpected-response", _, u) || b(
+        s,
+        _,
+        `Unexpected server response: ${u.statusCode}`
+      );
+  }), _.on("upgrade", (u, E, $) => {
+    if (s.emit("upgrade", u), s.readyState !== m.CONNECTING)
+      return;
+    if (_ = s._req = null, u.headers.upgrade.toLowerCase() !== "websocket") {
+      b(s, E, "Invalid Upgrade header");
+      return;
+    }
+    const q = fs("sha1").update(c + us).digest("base64");
+    if (u.headers["sec-websocket-accept"] !== q) {
+      b(s, E, "Invalid Sec-WebSocket-Accept header");
+      return;
+    }
+    const D = u.headers["sec-websocket-protocol"];
+    let L;
+    if (D !== void 0 ? p.size ? p.has(D) || (L = "Server sent an invalid subprotocol") : L = "Server sent a subprotocol but none was requested" : p.size && (L = "Server sent no subprotocol"), L) {
+      b(s, E, L);
+      return;
+    }
+    D && (s._protocol = D);
+    const ke = u.headers["sec-websocket-extensions"];
+    if (ke !== void 0) {
+      if (!v) {
+        b(s, E, "Server sent a Sec-WebSocket-Extensions header but no extension was requested");
+        return;
+      }
+      let he;
+      try {
+        he = ys(ke);
+      } catch {
+        b(s, E, "Invalid Sec-WebSocket-Extensions header");
+        return;
+      }
+      const we = Object.keys(he);
+      if (we.length !== 1 || we[0] !== T.extensionName) {
+        b(s, E, "Server indicated an extension that was not requested");
+        return;
+      }
+      try {
+        v.accept(he[T.extensionName]);
+      } catch {
+        b(s, E, "Invalid Sec-WebSocket-Extensions header");
+        return;
+      }
+      s._extensions[T.extensionName] = v;
+    }
+    s.setSocket(E, $, {
+      generateMask: i.generateMask,
+      maxPayload: i.maxPayload,
+      skipUTF8Validation: i.skipUTF8Validation
+    });
+  }), i.finishRequest ? i.finishRequest(_, s) : _.end();
+}
+function ee(s, e) {
+  s._readyState = m.CLOSING, s.emit("error", e), s.emitClose();
+}
+function bs(s) {
+  return s.path = s.socketPath, ot.connect(s);
+}
+function xs(s) {
+  return s.path = void 0, !s.servername && s.servername !== "" && (s.servername = ot.isIP(s.host) ? "" : s.host), as.connect(s);
+}
+function b(s, e, t) {
+  s._readyState = m.CLOSING;
+  const r = new Error(t);
+  Error.captureStackTrace(r, b), e.setHeader ? (e[lt] = !0, e.abort(), e.socket && !e.socket.destroyed && e.socket.destroy(), process.nextTick(ee, s, r)) : (e.destroy(r), e.once("error", s.emit.bind(s, "error")), e.once("close", s.emitClose.bind(s)));
+}
+function ve(s, e, t) {
+  if (e) {
+    const r = vs(e).length;
+    s._socket ? s._sender._bufferedBytes += r : s._bufferedAmount += r;
+  }
+  if (t) {
+    const r = new Error(
+      `WebSocket is not open: readyState ${s.readyState} (${O[s.readyState]})`
+    );
+    process.nextTick(t, r);
+  }
+}
+function ks(s, e) {
+  const t = this[y];
+  t._closeFrameReceived = !0, t._closeMessage = e, t._closeCode = s, t._socket[y] !== void 0 && (t._socket.removeListener("data", fe), process.nextTick(ct, t._socket), s === 1005 ? t.close() : t.close(s, e));
+}
+function ws() {
+  const s = this[y];
+  s.isPaused || s._socket.resume();
+}
+function Os(s) {
+  const e = this[y];
+  e._socket[y] !== void 0 && (e._socket.removeListener("data", fe), process.nextTick(ct, e._socket), e.close(s[_s])), e.emit("error", s);
+}
+function Ye() {
+  this[y].emitClose();
+}
+function Cs(s, e) {
+  this[y].emit("message", s, e);
+}
+function Ts(s) {
+  const e = this[y];
+  e.pong(s, !e._isServer, at), e.emit("ping", s);
+}
+function Ls(s) {
+  this[y].emit("pong", s);
+}
+function ct(s) {
+  s.resume();
+}
+function ut() {
+  const s = this[y];
+  this.removeListener("close", ut), this.removeListener("data", fe), this.removeListener("end", dt), s._readyState = m.CLOSING;
+  let e;
+  !this._readableState.endEmitted && !s._closeFrameReceived && !s._receiver._writableState.errorEmitted && (e = s._socket.read()) !== null && s._receiver.write(e), s._receiver.end(), this[y] = void 0, clearTimeout(s._closeTimer), s._receiver._writableState.finished || s._receiver._writableState.errorEmitted ? s.emitClose() : (s._receiver.on("error", Ye), s._receiver.on("finish", Ye));
+}
+function fe(s) {
+  this[y]._receiver.write(s) || this.pause();
+}
+function dt() {
+  const s = this[y];
+  s._readyState = m.CLOSING, s._receiver.end(), this.end();
+}
+function _t() {
+  const s = this[y];
+  this.removeListener("error", _t), this.on("error", at), s && (s._readyState = m.CLOSING, this.destroy());
+}
+const Xs = /* @__PURE__ */ z(ft), { tokenChars: Ns } = ae;
+function Ps(s) {
+  const e = /* @__PURE__ */ new Set();
+  let t = -1, r = -1, i = 0;
+  for (i; i < s.length; i++) {
+    const o = s.charCodeAt(i);
+    if (r === -1 && Ns[o] === 1)
+      t === -1 && (t = i);
+    else if (i !== 0 && (o === 32 || o === 9))
+      r === -1 && t !== -1 && (r = i);
+    else if (o === 44) {
+      if (t === -1)
+        throw new SyntaxError(`Unexpected character at index ${i}`);
+      r === -1 && (r = i);
+      const l = s.slice(t, r);
+      if (e.has(l))
+        throw new SyntaxError(`The "${l}" subprotocol is duplicated`);
+      e.add(l), t = r = -1;
+    } else
+      throw new SyntaxError(`Unexpected character at index ${i}`);
+  }
+  if (t === -1 || r !== -1)
+    throw new SyntaxError("Unexpected end of input");
+  const n = s.slice(t, i);
+  if (e.has(n))
+    throw new SyntaxError(`The "${n}" subprotocol is duplicated`);
+  return e.add(n), e;
+}
+var Rs = { parse: Ps };
+const Us = S, ie = S, { createHash: Bs } = S, qe = nt, N = oe, $s = Rs, Ms = ft, { GUID: Is, kWebSocket: Ds } = U, Ws = /^[+/0-9A-Za-z]{22}==$/, Ke = 0, Xe = 1, pt = 2;
+class As extends Us {
+  /**
+   * Create a `WebSocketServer` instance.
+   *
+   * @param {Object} options Configuration options
+   * @param {Number} [options.backlog=511] The maximum length of the queue of
+   *     pending connections
+   * @param {Boolean} [options.clientTracking=true] Specifies whether or not to
+   *     track clients
+   * @param {Function} [options.handleProtocols] A hook to handle protocols
+   * @param {String} [options.host] The hostname where to bind the server
+   * @param {Number} [options.maxPayload=104857600] The maximum allowed message
+   *     size
+   * @param {Boolean} [options.noServer=false] Enable no server mode
+   * @param {String} [options.path] Accept only connections matching this path
+   * @param {(Boolean|Object)} [options.perMessageDeflate=false] Enable/disable
+   *     permessage-deflate
+   * @param {Number} [options.port] The port where to bind the server
+   * @param {(http.Server|https.Server)} [options.server] A pre-created HTTP/S
+   *     server to use
+   * @param {Boolean} [options.skipUTF8Validation=false] Specifies whether or
+   *     not to skip UTF-8 validation for text and close messages
+   * @param {Function} [options.verifyClient] A hook to reject connections
+   * @param {Function} [options.WebSocket=WebSocket] Specifies the `WebSocket`
+   *     class to use. It must be the `WebSocket` class or class that extends it
+   * @param {Function} [callback] A listener for the `listening` event
+   */
+  constructor(e, t) {
+    if (super(), e = {
+      maxPayload: 100 * 1024 * 1024,
+      skipUTF8Validation: !1,
+      perMessageDeflate: !1,
+      handleProtocols: null,
+      clientTracking: !0,
+      verifyClient: null,
+      noServer: !1,
+      backlog: null,
+      // use default (511 as implemented in net.js)
+      server: null,
+      host: null,
+      path: null,
+      port: null,
+      WebSocket: Ms,
+      ...e
+    }, e.port == null && !e.server && !e.noServer || e.port != null && (e.server || e.noServer) || e.server && e.noServer)
+      throw new TypeError(
+        'One and only one of the "port", "server", or "noServer" options must be specified'
+      );
+    if (e.port != null ? (this._server = ie.createServer((r, i) => {
+      const n = ie.STATUS_CODES[426];
+      i.writeHead(426, {
+        "Content-Length": n.length,
+        "Content-Type": "text/plain"
+      }), i.end(n);
+    }), this._server.listen(
+      e.port,
+      e.host,
+      e.backlog,
+      t
+    )) : e.server && (this._server = e.server), this._server) {
+      const r = this.emit.bind(this, "connection");
+      this._removeListeners = js(this._server, {
+        listening: this.emit.bind(this, "listening"),
+        error: this.emit.bind(this, "error"),
+        upgrade: (i, n, o) => {
+          this.handleUpgrade(i, n, o, r);
+        }
+      });
+    }
+    e.perMessageDeflate === !0 && (e.perMessageDeflate = {}), e.clientTracking && (this.clients = /* @__PURE__ */ new Set(), this._shouldEmitClose = !1), this.options = e, this._state = Ke;
+  }
+  /**
+   * Returns the bound address, the address family name, and port of the server
+   * as reported by the operating system if listening on an IP socket.
+   * If the server is listening on a pipe or UNIX domain socket, the name is
+   * returned as a string.
+   *
+   * @return {(Object|String|null)} The address of the server
+   * @public
+   */
+  address() {
+    if (this.options.noServer)
+      throw new Error('The server is operating in "noServer" mode');
+    return this._server ? this._server.address() : null;
+  }
+  /**
+   * Stop the server from accepting new connections and emit the `'close'` event
+   * when all existing connections are closed.
+   *
+   * @param {Function} [cb] A one-time listener for the `'close'` event
+   * @public
+   */
+  close(e) {
+    if (this._state === pt) {
+      e && this.once("close", () => {
+        e(new Error("The server is not running"));
+      }), process.nextTick(G, this);
+      return;
+    }
+    if (e && this.once("close", e), this._state !== Xe)
+      if (this._state = Xe, this.options.noServer || this.options.server)
+        this._server && (this._removeListeners(), this._removeListeners = this._server = null), this.clients ? this.clients.size ? this._shouldEmitClose = !0 : process.nextTick(G, this) : process.nextTick(G, this);
+      else {
+        const t = this._server;
+        this._removeListeners(), this._removeListeners = this._server = null, t.close(() => {
+          G(this);
+        });
+      }
+  }
+  /**
+   * See if a given request should be handled by this server instance.
+   *
+   * @param {http.IncomingMessage} req Request object to inspect
+   * @return {Boolean} `true` if the request is valid, else `false`
+   * @public
+   */
+  shouldHandle(e) {
+    if (this.options.path) {
+      const t = e.url.indexOf("?");
+      if ((t !== -1 ? e.url.slice(0, t) : e.url) !== this.options.path)
+        return !1;
+    }
+    return !0;
+  }
+  /**
+   * Handle a HTTP Upgrade request.
+   *
+   * @param {http.IncomingMessage} req The request object
+   * @param {(net.Socket|tls.Socket)} socket The network socket between the
+   *     server and client
+   * @param {Buffer} head The first packet of the upgraded stream
+   * @param {Function} cb Callback
+   * @public
+   */
+  handleUpgrade(e, t, r, i) {
+    t.on("error", Ze);
+    const n = e.headers["sec-websocket-key"], o = +e.headers["sec-websocket-version"];
+    if (e.method !== "GET") {
+      R(this, e, t, 405, "Invalid HTTP method");
+      return;
+    }
+    if (e.headers.upgrade.toLowerCase() !== "websocket") {
+      R(this, e, t, 400, "Invalid Upgrade header");
+      return;
+    }
+    if (!n || !Ws.test(n)) {
+      R(this, e, t, 400, "Missing or invalid Sec-WebSocket-Key header");
+      return;
+    }
+    if (o !== 8 && o !== 13) {
+      R(this, e, t, 400, "Missing or invalid Sec-WebSocket-Version header");
+      return;
+    }
+    if (!this.shouldHandle(e)) {
+      H(t, 400);
+      return;
+    }
+    const l = e.headers["sec-websocket-protocol"];
+    let f = /* @__PURE__ */ new Set();
+    if (l !== void 0)
+      try {
+        f = $s.parse(l);
+      } catch {
+        R(this, e, t, 400, "Invalid Sec-WebSocket-Protocol header");
+        return;
+      }
+    const a = e.headers["sec-websocket-extensions"], c = {};
+    if (this.options.perMessageDeflate && a !== void 0) {
+      const h = new N(
+        this.options.perMessageDeflate,
+        !0,
+        this.options.maxPayload
+      );
+      try {
+        const p = qe.parse(a);
+        p[N.extensionName] && (h.accept(p[N.extensionName]), c[N.extensionName] = h);
+      } catch {
+        R(this, e, t, 400, "Invalid or unacceptable Sec-WebSocket-Extensions header");
+        return;
+      }
+    }
+    if (this.options.verifyClient) {
+      const h = {
+        origin: e.headers[`${o === 8 ? "sec-websocket-origin" : "origin"}`],
+        secure: !!(e.socket.authorized || e.socket.encrypted),
+        req: e
+      };
+      if (this.options.verifyClient.length === 2) {
+        this.options.verifyClient(h, (p, v, _, u) => {
+          if (!p)
+            return H(t, v || 401, _, u);
+          this.completeUpgrade(
+            c,
+            n,
+            f,
+            e,
+            t,
+            r,
+            i
+          );
+        });
+        return;
+      }
+      if (!this.options.verifyClient(h))
+        return H(t, 401);
+    }
+    this.completeUpgrade(c, n, f, e, t, r, i);
+  }
+  /**
+   * Upgrade the connection to WebSocket.
+   *
+   * @param {Object} extensions The accepted extensions
+   * @param {String} key The value of the `Sec-WebSocket-Key` header
+   * @param {Set} protocols The subprotocols
+   * @param {http.IncomingMessage} req The request object
+   * @param {(net.Socket|tls.Socket)} socket The network socket between the
+   *     server and client
+   * @param {Buffer} head The first packet of the upgraded stream
+   * @param {Function} cb Callback
+   * @throws {Error} If called more than once with the same socket
+   * @private
+   */
+  completeUpgrade(e, t, r, i, n, o, l) {
+    if (!n.readable || !n.writable)
+      return n.destroy();
+    if (n[Ds])
+      throw new Error(
+        "server.handleUpgrade() was called more than once with the same socket, possibly due to a misconfiguration"
+      );
+    if (this._state > Ke)
+      return H(n, 503);
+    const a = [
+      "HTTP/1.1 101 Switching Protocols",
+      "Upgrade: websocket",
+      "Connection: Upgrade",
+      `Sec-WebSocket-Accept: ${Bs("sha1").update(t + Is).digest("base64")}`
+    ], c = new this.options.WebSocket(null);
+    if (r.size) {
+      const h = this.options.handleProtocols ? this.options.handleProtocols(r, i) : r.values().next().value;
+      h && (a.push(`Sec-WebSocket-Protocol: ${h}`), c._protocol = h);
+    }
+    if (e[N.extensionName]) {
+      const h = e[N.extensionName].params, p = qe.format({
+        [N.extensionName]: [h]
+      });
+      a.push(`Sec-WebSocket-Extensions: ${p}`), c._extensions = e;
+    }
+    this.emit("headers", a, i), n.write(a.concat(`\r
+`).join(`\r
+`)), n.removeListener("error", Ze), c.setSocket(n, o, {
+      maxPayload: this.options.maxPayload,
+      skipUTF8Validation: this.options.skipUTF8Validation
+    }), this.clients && (this.clients.add(c), c.on("close", () => {
+      this.clients.delete(c), this._shouldEmitClose && !this.clients.size && process.nextTick(G, this);
+    })), l(c, i);
+  }
+}
+var Fs = As;
+function js(s, e) {
+  for (const t of Object.keys(e))
+    s.on(t, e[t]);
+  return function() {
+    for (const r of Object.keys(e))
+      s.removeListener(r, e[r]);
+  };
+}
+function G(s) {
+  s._state = pt, s.emit("close");
+}
+function Ze() {
+  this.destroy();
+}
+function H(s, e, t, r) {
+  t = t || ie.STATUS_CODES[e], r = {
+    Connection: "close",
+    "Content-Type": "text/html",
+    "Content-Length": Buffer.byteLength(t),
+    ...r
+  }, s.once("finish", s.destroy), s.end(
+    `HTTP/1.1 ${e} ${ie.STATUS_CODES[e]}\r
+` + Object.keys(r).map((i) => `${i}: ${r[i]}`).join(`\r
+`) + `\r
+\r
+` + t
+  );
+}
+function R(s, e, t, r, i) {
+  if (s.listenerCount("wsClientError")) {
+    const n = new Error(i);
+    Error.captureStackTrace(n, R), s.emit("wsClientError", n, t, e);
+  } else
+    H(t, r, i);
+}
+const Zs = /* @__PURE__ */ z(Fs);
+export {
+  qs as Receiver,
+  Ks as Sender,
+  Xs as WebSocket,
+  Zs as WebSocketServer,
+  Vs as createWebSocketStream,
+  Xs as default
+};

src/backend/gradio_image_prompter/templates/example/index.js ADDED Viewed

	@@ -0,0 +1,263 @@

+const { setContext: ee, getContext: p } = window.__gradio__svelte__internal, v = "WORKER_PROXY_CONTEXT_KEY";
+function y() {
+  return p(v);
+}
+function k(l) {
+  return l.host === window.location.host || l.host === "localhost:7860" || l.host === "127.0.0.1:7860" || // Ref: https://github.com/gradio-app/gradio/blob/v3.32.0/js/app/src/Index.svelte#L194
+  l.host === "lite.local";
+}
+async function f(l) {
+  if (l == null)
+    return l;
+  const e = new URL(l);
+  if (!k(e) || e.protocol !== "http:" && e.protocol !== "https:")
+    return l;
+  const r = y();
+  if (r == null)
+    return l;
+  const n = e.pathname;
+  return r.httpRequest({
+    method: "GET",
+    path: n,
+    headers: {},
+    query_string: ""
+  }).then((t) => {
+    if (t.status !== 200)
+      throw new Error(`Failed to get file ${n} from the Wasm worker.`);
+    const o = new Blob([t.body], {
+      type: t.headers["Content-Type"]
+    });
+    return URL.createObjectURL(o);
+  });
+}
+const {
+  SvelteComponent: w,
+  append: C,
+  assign: _,
+  compute_rest_props: d,
+  detach: u,
+  element: b,
+  empty: E,
+  exclude_internal_props: R,
+  get_spread_update: O,
+  handle_promise: h,
+  init: q,
+  insert: m,
+  noop: c,
+  safe_not_equal: T,
+  set_attributes: g,
+  set_data: P,
+  set_style: U,
+  src_url_equal: W,
+  text: K,
+  update_await_block_branch: X
+} = window.__gradio__svelte__internal;
+function Y(l) {
+  let e, r = (
+    /*error*/
+    l[3].message + ""
+  ), n;
+  return {
+    c() {
+      e = b("p"), n = K(r), U(e, "color", "red");
+    },
+    m(t, o) {
+      m(t, e, o), C(e, n);
+    },
+    p(t, o) {
+      o & /*src*/
+      1 && r !== (r = /*error*/
+      t[3].message + "") && P(n, r);
+    },
+    d(t) {
+      t && u(e);
+    }
+  };
+}
+function L(l) {
+  let e, r, n = [
+    {
+      src: r = /*resolved_src*/
+      l[2]
+    },
+    /*$$restProps*/
+    l[1]
+  ], t = {};
+  for (let o = 0; o < n.length; o += 1)
+    t = _(t, n[o]);
+  return {
+    c() {
+      e = b("img"), g(e, t);
+    },
+    m(o, s) {
+      m(o, e, s);
+    },
+    p(o, s) {
+      g(e, t = O(n, [
+        s & /*src*/
+        1 && !W(e.src, r = /*resolved_src*/
+        o[2]) && { src: r },
+        s & /*$$restProps*/
+        2 && /*$$restProps*/
+        o[1]
+      ]));
+    },
+    d(o) {
+      o && u(e);
+    }
+  };
+}
+function N(l) {
+  return { c, m: c, p: c, d: c };
+}
+function S(l) {
+  let e, r, n = {
+    ctx: l,
+    current: null,
+    token: null,
+    hasCatch: !0,
+    pending: N,
+    then: L,
+    catch: Y,
+    value: 2,
+    error: 3
+  };
+  return h(r = f(
+    /*src*/
+    l[0]
+  ), n), {
+    c() {
+      e = E(), n.block.c();
+    },
+    m(t, o) {
+      m(t, e, o), n.block.m(t, n.anchor = o), n.mount = () => e.parentNode, n.anchor = e;
+    },
+    p(t, [o]) {
+      l = t, n.ctx = l, o & /*src*/
+      1 && r !== (r = f(
+        /*src*/
+        l[0]
+      )) && h(r, n) || X(n, l, o);
+    },
+    i: c,
+    o: c,
+    d(t) {
+      t && u(e), n.block.d(t), n.token = null, n = null;
+    }
+  };
+}
+function j(l, e, r) {
+  const n = ["src"];
+  let t = d(e, n), { src: o = void 0 } = e;
+  return l.$$set = (s) => {
+    e = _(_({}, e), R(s)), r(1, t = d(e, n)), "src" in s && r(0, o = s.src);
+  }, [o, t];
+}
+class B extends w {
+  constructor(e) {
+    super(), q(this, e, j, S, T, { src: 0 });
+  }
+}
+const {
+  SvelteComponent: F,
+  attr: G,
+  create_component: I,
+  destroy_component: z,
+  detach: A,
+  element: D,
+  init: H,
+  insert: J,
+  mount_component: M,
+  safe_not_equal: Q,
+  toggle_class: i,
+  transition_in: V,
+  transition_out: Z
+} = window.__gradio__svelte__internal;
+function x(l) {
+  let e, r, n;
+  return r = new B({
+    props: {
+      src: (
+        /*samples_dir*/
+        l[1] + /*value*/
+        l[0]
+      ),
+      alt: ""
+    }
+  }), {
+    c() {
+      e = D("div"), I(r.$$.fragment), G(e, "class", "container svelte-h11ksk"), i(
+        e,
+        "table",
+        /*type*/
+        l[2] === "table"
+      ), i(
+        e,
+        "gallery",
+        /*type*/
+        l[2] === "gallery"
+      ), i(
+        e,
+        "selected",
+        /*selected*/
+        l[3]
+      );
+    },
+    m(t, o) {
+      J(t, e, o), M(r, e, null), n = !0;
+    },
+    p(t, [o]) {
+      const s = {};
+      o & /*samples_dir, value*/
+      3 && (s.src = /*samples_dir*/
+      t[1] + /*value*/
+      t[0]), r.$set(s), (!n || o & /*type*/
+      4) && i(
+        e,
+        "table",
+        /*type*/
+        t[2] === "table"
+      ), (!n || o & /*type*/
+      4) && i(
+        e,
+        "gallery",
+        /*type*/
+        t[2] === "gallery"
+      ), (!n || o & /*selected*/
+      8) && i(
+        e,
+        "selected",
+        /*selected*/
+        t[3]
+      );
+    },
+    i(t) {
+      n || (V(r.$$.fragment, t), n = !0);
+    },
+    o(t) {
+      Z(r.$$.fragment, t), n = !1;
+    },
+    d(t) {
+      t && A(e), z(r);
+    }
+  };
+}
+function $(l, e, r) {
+  let { value: n } = e, { samples_dir: t } = e, { type: o } = e, { selected: s = !1 } = e;
+  return l.$$set = (a) => {
+    "value" in a && r(0, n = a.value), "samples_dir" in a && r(1, t = a.samples_dir), "type" in a && r(2, o = a.type), "selected" in a && r(3, s = a.selected);
+  }, [n, t, o, s];
+}
+class te extends F {
+  constructor(e) {
+    super(), H(this, e, $, x, Q, {
+      value: 0,
+      samples_dir: 1,
+      type: 2,
+      selected: 3
+    });
+  }
+}
+export {
+  te as default
+};

src/backend/gradio_image_prompter/templates/example/style.css ADDED Viewed

	@@ -0,0 +1 @@

+ .container.svelte-h11ksk img{width:100%;height:100%}.container.selected.svelte-h11ksk{border-color:var(--border-color-accent)}.container.table.svelte-h11ksk{margin:0 auto;border:2px solid var(--border-color-primary);border-radius:var(--radius-lg);overflow:hidden;width:var(--size-20);height:var(--size-20);object-fit:cover}.container.gallery.svelte-h11ksk{height:var(--size-20);max-height:var(--size-20);object-fit:cover}

src/demo/__init__.py ADDED Viewed

File without changes

src/demo/app.py ADDED Viewed

	@@ -0,0 +1,9 @@

+import gradio as gr
+from gradio_image_prompter import ImagePrompter
+demo = gr.Interface(
+    lambda prompts: (prompts["image"], prompts["points"]),
+    ImagePrompter(show_label=False),
+    [gr.Image(show_label=False), gr.Dataframe(label="Points")],
+)
+demo.launch()

src/frontend/Example.svelte ADDED Viewed

	@@ -0,0 +1,44 @@

+<script lang="ts">
+  import Image from "./shared/Image.svelte";
+  export let value: string;
+  export let samples_dir: string;
+  export let type: "gallery" | "table";
+  export let selected = false;
+</script>
+<div
+  class="container"
+  class:table={type === "table"}
+  class:gallery={type === "gallery"}
+  class:selected
+>
+  <Image src={samples_dir + value} alt="" />
+</div>
+<style>
+  .container :global(img) {
+    width: 100%;
+    height: 100%;
+  }
+  .container.selected {
+    border-color: var(--border-color-accent);
+  }
+  .container.table {
+    margin: 0 auto;
+    border: 2px solid var(--border-color-primary);
+    border-radius: var(--radius-lg);
+    overflow: hidden;
+    width: var(--size-20);
+    height: var(--size-20);
+    object-fit: cover;
+  }
+  .container.gallery {
+    height: var(--size-20);
+    max-height: var(--size-20);
+    object-fit: cover;
+  }
+</style>

src/frontend/Index.svelte ADDED Viewed

	@@ -0,0 +1,167 @@

+<svelte:options accessors={true} />
+<script context="module" lang="ts">
+  export { default as BaseImageUploader } from "./shared/ImageUploader.svelte";
+  export { default as BaseStaticImage } from "./shared/ImagePreview.svelte";
+  export { default as BaseExample } from "./Example.svelte";
+  export { default as BaseImage } from "./shared/Image.svelte";
+  export { default as BoxDrawer } from "./shared/BoxDrawer.svelte";
+</script>
+<script lang="ts">
+  import type { Gradio, SelectData } from "@gradio/utils";
+  import StaticImage from "./shared/ImagePreview.svelte";
+  import ImageUploader from "./shared/ImageUploader.svelte";
+  import { Block, Empty, UploadText } from "@gradio/atoms";
+  import { Image } from "@gradio/icons";
+  import { StatusTracker } from "@gradio/statustracker";
+  import type { FileData } from "@gradio/client";
+  import type { LoadingStatus } from "@gradio/statustracker";
+  import { normalise_file } from "@gradio/client";
+  export let elem_id = "";
+  export let elem_classes: string[] = [];
+  export let visible = true;
+  export let value: { image: FileData; points: number[][6] } | null = null;
+  $: _image = value && normalise_file(value.image, root, proxy_url);
+  $: _points = value && value.points;
+  export let label: string;
+  export let show_label: boolean;
+  export let show_download_button: boolean;
+  export let root: string;
+  export let proxy_url: null | string;
+  export let height: number | undefined;
+  export let width: number | undefined;
+  export let _selectable = false;
+  export let container = true;
+  export let scale: number | null = null;
+  export let min_width: number | undefined = undefined;
+  export let loading_status: LoadingStatus;
+  export let show_share_button = false;
+  export let sources: "upload"[] = ["upload"];
+  export let interactive: boolean;
+  export let streaming: boolean;
+  export let gradio: Gradio<{
+    change: never;
+    error: string;
+    edit: never;
+    stream: never;
+    drag: never;
+    upload: never;
+    clear: never;
+    select: SelectData;
+    share: ShareData;
+  }>;
+  $: url = _image?.url;
+  $: url && gradio.dispatch("change");
+  let dragging: boolean;
+  let active_tool: null | "webcam" = null;
+</script>
+{#if !interactive}
+  <Block
+    {visible}
+    variant={"solid"}
+    border_mode={dragging ? "focus" : "base"}
+    padding={false}
+    {elem_id}
+    {elem_classes}
+    height={height || undefined}
+    {width}
+    allow_overflow={false}
+    {container}
+    {scale}
+    {min_width}
+  >
+    <StatusTracker
+      autoscroll={gradio.autoscroll}
+      i18n={gradio.i18n}
+      {...loading_status}
+    />
+    <StaticImage
+      on:select={({ detail }) => gradio.dispatch("select", detail)}
+      on:share={({ detail }) => gradio.dispatch("share", detail)}
+      on:error={({ detail }) => gradio.dispatch("error", detail)}
+      value={_image}
+      {label}
+      {show_label}
+      {show_download_button}
+      selectable={_selectable}
+      {show_share_button}
+      i18n={gradio.i18n}
+    />
+  </Block>
+{:else}
+  <Block
+    {visible}
+    variant={_image === null ? "dashed" : "solid"}
+    border_mode={dragging ? "focus" : "base"}
+    padding={false}
+    {elem_id}
+    {elem_classes}
+    height={height || undefined}
+    {width}
+    allow_overflow={false}
+    {container}
+    {scale}
+    {min_width}
+  >
+    <StatusTracker
+      autoscroll={gradio.autoscroll}
+      i18n={gradio.i18n}
+      {...loading_status}
+    />
+    <ImageUploader
+      bind:active_tool
+      bind:value={_image}
+      bind:points={_points}
+      {root}
+      {sources}
+      on:points_change={({ detail }) => (value.points = detail)}
+      on:edit={() => gradio.dispatch("edit")}
+      on:clear={() => {
+        value = null;
+        gradio.dispatch("clear");
+        gradio.dispatch("change");
+      }}
+      on:stream={() => gradio.dispatch("stream")}
+      on:drag={({ detail }) => (dragging = detail)}
+      on:upload={({ detail }) => {
+        if (value == null) {
+          value = { image: detail, points: null };
+        } else {
+          value.image = detail;
+        }
+        gradio.dispatch("upload");
+      }}
+      on:select={({ detail }) => gradio.dispatch("select", detail)}
+      on:share={({ detail }) => gradio.dispatch("share", detail)}
+      on:error={({ detail }) => {
+        loading_status = loading_status;
+        loading_status.status = "error";
+        gradio.dispatch("error", detail);
+      }}
+      on:click={() => gradio.dispatch("error", "bad thing happened")}
+      on:error
+      {label}
+      {show_label}
+      {streaming}
+      i18n={gradio.i18n}
+    >
+      {#if sources.includes("upload")}
+        <UploadText i18n={gradio.i18n} type="image" mode="short" />
+      {:else}
+        <Empty unpadded_box={true} size="large"><Image /></Empty>
+      {/if}
+    </ImageUploader>
+  </Block>
+{/if}

src/frontend/package-lock.json ADDED Viewed

	@@ -0,0 +1,718 @@

+{
+  "name": "gradio_image_prompter",
+  "version": "0.4.2",
+  "lockfileVersion": 3,
+  "requires": true,
+  "packages": {
+    "": {
+      "name": "gradio_image_prompter",
+      "version": "0.4.2",
+      "license": "ISC",
+      "dependencies": {
+        "@gradio/atoms": "0.3.1",
+        "@gradio/client": "0.8.2",
+        "@gradio/icons": "0.3.1",
+        "@gradio/statustracker": "0.4.1",
+        "@gradio/upload": "0.5.2",
+        "@gradio/utils": "0.2.0",
+        "@gradio/wasm": "0.3.0",
+        "cropperjs": "^1.5.12",
+        "lazy-brush": "^1.0.1",
+        "resize-observer-polyfill": "^1.5.1"
+      }
+    },
+    "node_modules/@ampproject/remapping": {
+      "version": "2.2.1",
+      "resolved": "https://registry.npmjs.org/@ampproject/remapping/-/remapping-2.2.1.tgz",
+      "integrity": "sha512-lFMjJTrFL3j7L9yBxwYfCq2k6qqwHyzuUl/XBnif78PWTJYyL/dfowQHWE3sp6U6ZzqWiiIZnpTMO96zhkjwtg==",
+      "peer": true,
+      "dependencies": {
+        "@jridgewell/gen-mapping": "^0.3.0",
+        "@jridgewell/trace-mapping": "^0.3.9"
+      },
+      "engines": {
+        "node": ">=6.0.0"
+      }
+    },
+    "node_modules/@esbuild/darwin-arm64": {
+      "version": "0.19.8",
+      "resolved": "https://registry.npmjs.org/@esbuild/darwin-arm64/-/darwin-arm64-0.19.8.tgz",
+      "integrity": "sha512-RQw9DemMbIq35Bprbboyf8SmOr4UXsRVxJ97LgB55VKKeJOOdvsIPy0nFyF2l8U+h4PtBx/1kRf0BelOYCiQcw==",
+      "cpu": [
+        "arm64"
+      ],
+      "optional": true,
+      "os": [
+        "darwin"
+      ],
+      "engines": {
+        "node": ">=12"
+      }
+    },
+    "node_modules/@formatjs/ecma402-abstract": {
+      "version": "1.11.4",
+      "resolved": "https://registry.npmjs.org/@formatjs/ecma402-abstract/-/ecma402-abstract-1.11.4.tgz",
+      "integrity": "sha512-EBikYFp2JCdIfGEb5G9dyCkTGDmC57KSHhRQOC3aYxoPWVZvfWCDjZwkGYHN7Lis/fmuWl906bnNTJifDQ3sXw==",
+      "dependencies": {
+        "@formatjs/intl-localematcher": "0.2.25",
+        "tslib": "^2.1.0"
+      }
+    },
+    "node_modules/@formatjs/fast-memoize": {
+      "version": "1.2.1",
+      "resolved": "https://registry.npmjs.org/@formatjs/fast-memoize/-/fast-memoize-1.2.1.tgz",
+      "integrity": "sha512-Rg0e76nomkz3vF9IPlKeV+Qynok0r7YZjL6syLz4/urSg0IbjPZCB/iYUMNsYA643gh4mgrX3T7KEIFIxJBQeg==",
+      "dependencies": {
+        "tslib": "^2.1.0"
+      }
+    },
+    "node_modules/@formatjs/icu-messageformat-parser": {
+      "version": "2.1.0",
+      "resolved": "https://registry.npmjs.org/@formatjs/icu-messageformat-parser/-/icu-messageformat-parser-2.1.0.tgz",
+      "integrity": "sha512-Qxv/lmCN6hKpBSss2uQ8IROVnta2r9jd3ymUEIjm2UyIkUCHVcbUVRGL/KS/wv7876edvsPe+hjHVJ4z8YuVaw==",
+      "dependencies": {
+        "@formatjs/ecma402-abstract": "1.11.4",
+        "@formatjs/icu-skeleton-parser": "1.3.6",
+        "tslib": "^2.1.0"
+      }
+    },
+    "node_modules/@formatjs/icu-skeleton-parser": {
+      "version": "1.3.6",
+      "resolved": "https://registry.npmjs.org/@formatjs/icu-skeleton-parser/-/icu-skeleton-parser-1.3.6.tgz",
+      "integrity": "sha512-I96mOxvml/YLrwU2Txnd4klA7V8fRhb6JG/4hm3VMNmeJo1F03IpV2L3wWt7EweqNLES59SZ4d6hVOPCSf80Bg==",
+      "dependencies": {
+        "@formatjs/ecma402-abstract": "1.11.4",
+        "tslib": "^2.1.0"
+      }
+    },
+    "node_modules/@formatjs/intl-localematcher": {
+      "version": "0.2.25",
+      "resolved": "https://registry.npmjs.org/@formatjs/intl-localematcher/-/intl-localematcher-0.2.25.tgz",
+      "integrity": "sha512-YmLcX70BxoSopLFdLr1Ds99NdlTI2oWoLbaUW2M406lxOIPzE1KQhRz2fPUkq34xVZQaihCoU29h0KK7An3bhA==",
+      "dependencies": {
+        "tslib": "^2.1.0"
+      }
+    },
+    "node_modules/@gradio/atoms": {
+      "version": "0.3.1",
+      "resolved": "https://registry.npmjs.org/@gradio/atoms/-/atoms-0.3.1.tgz",
+      "integrity": "sha512-P2u1Qud/EmwfGMD9HZdSkw4L3RznGUE3owBx4lRY7JP/1J3sDqy/wN8pZFex+kPKripX29+IiH6+4TRqSs2zFw==",
+      "dependencies": {
+        "@gradio/icons": "^0.3.1",
+        "@gradio/utils": "^0.2.0"
+      }
+    },
+    "node_modules/@gradio/client": {
+      "version": "0.8.2",
+      "resolved": "https://registry.npmjs.org/@gradio/client/-/client-0.8.2.tgz",
+      "integrity": "sha512-ZWrkJBsVg7ioIHhGV1pqIo4MBL0GPn0SHLeA04cqrsxkWiZHZz9CB5wFtm1kaFtd68ERAgEzR8OYVzzlBd2pyQ==",
+      "dependencies": {
+        "bufferutil": "^4.0.7",
+        "semiver": "^1.1.0",
+        "ws": "^8.13.0"
+      },
+      "engines": {
+        "node": ">=18.0.0"
+      }
+    },
+    "node_modules/@gradio/column": {
+      "version": "0.1.0",
+      "resolved": "https://registry.npmjs.org/@gradio/column/-/column-0.1.0.tgz",
+      "integrity": "sha512-P24nqqVnMXBaDA1f/zSN5HZRho4PxP8Dq+7VltPHlmxIEiZYik2AJ4J0LeuIha34FDO0guu/16evdrpvGIUAfw=="
+    },
+    "node_modules/@gradio/icons": {
+      "version": "0.3.1",
+      "resolved": "https://registry.npmjs.org/@gradio/icons/-/icons-0.3.1.tgz",
+      "integrity": "sha512-ZwgXODKa7irD+spE0RCae8fyixgwKOtds6wHL300n9pIRYzL9QkvS1cQJbz0C6NupFCYRSGTQrV5hoLo7yQCew=="
+    },
+    "node_modules/@gradio/statustracker": {
+      "version": "0.4.1",
+      "resolved": "https://registry.npmjs.org/@gradio/statustracker/-/statustracker-0.4.1.tgz",
+      "integrity": "sha512-6YV5UDzau/nNid5D25YLZyPGm/tFd9b0a+x0OCHY+aE3cez7PD4v6hWGuQXPNwa/69viRm8YyoQ2Vex7/3updA==",
+      "dependencies": {
+        "@gradio/atoms": "^0.3.1",
+        "@gradio/column": "^0.1.0",
+        "@gradio/icons": "^0.3.1",
+        "@gradio/utils": "^0.2.0"
+      }
+    },
+    "node_modules/@gradio/theme": {
+      "version": "0.2.0",
+      "resolved": "https://registry.npmjs.org/@gradio/theme/-/theme-0.2.0.tgz",
+      "integrity": "sha512-33c68Nk7oRXLn08OxPfjcPm7S4tXGOUV1I1bVgzdM2YV5o1QBOS1GEnXPZPu/CEYPePLMB6bsDwffrLEyLGWVQ=="
+    },
+    "node_modules/@gradio/upload": {
+      "version": "0.5.2",
+      "resolved": "https://registry.npmjs.org/@gradio/upload/-/upload-0.5.2.tgz",
+      "integrity": "sha512-IXQZ/+0TG/FSOSjJKE28lUG+vGGboD+YQswyvSK6lOpRHvixiqK+eJo0g3jHvmWO9wZLBrEx3XRv8LSgnVHHzw==",
+      "dependencies": {
+        "@gradio/atoms": "^0.3.1",
+        "@gradio/client": "^0.8.2",
+        "@gradio/icons": "^0.3.1",
+        "@gradio/upload": "^0.5.2",
+        "@gradio/utils": "^0.2.0"
+      }
+    },
+    "node_modules/@gradio/utils": {
+      "version": "0.2.0",
+      "resolved": "https://registry.npmjs.org/@gradio/utils/-/utils-0.2.0.tgz",
+      "integrity": "sha512-YkwzXufi6IxQrlMW+1sFo8Yn6F9NLL69ZoBsbo7QEhms0v5L7pmOTw+dfd7M3dwbRP2lgjrb52i1kAIN3n6aqQ==",
+      "dependencies": {
+        "@gradio/theme": "^0.2.0",
+        "svelte-i18n": "^3.6.0"
+      }
+    },
+    "node_modules/@gradio/wasm": {
+      "version": "0.3.0",
+      "resolved": "https://registry.npmjs.org/@gradio/wasm/-/wasm-0.3.0.tgz",
+      "integrity": "sha512-avgMFBrHUUDzQraBMW9mNgiQMMkObsPzDap0PZV6FgzfDpW8K+R4BBcl+gClq82jRi3ulDjtISTXriUrNNfkrg==",
+      "dependencies": {
+        "@types/path-browserify": "^1.0.0",
+        "path-browserify": "^1.0.1"
+      }
+    },
+    "node_modules/@jridgewell/gen-mapping": {
+      "version": "0.3.3",
+      "resolved": "https://registry.npmjs.org/@jridgewell/gen-mapping/-/gen-mapping-0.3.3.tgz",
+      "integrity": "sha512-HLhSWOLRi875zjjMG/r+Nv0oCW8umGb0BgEhyX3dDX3egwZtB8PqLnjz3yedt8R5StBrzcg4aBpnh8UA9D1BoQ==",
+      "peer": true,
+      "dependencies": {
+        "@jridgewell/set-array": "^1.0.1",
+        "@jridgewell/sourcemap-codec": "^1.4.10",
+        "@jridgewell/trace-mapping": "^0.3.9"
+      },
+      "engines": {
+        "node": ">=6.0.0"
+      }
+    },
+    "node_modules/@jridgewell/resolve-uri": {
+      "version": "3.1.1",
+      "resolved": "https://registry.npmjs.org/@jridgewell/resolve-uri/-/resolve-uri-3.1.1.tgz",
+      "integrity": "sha512-dSYZh7HhCDtCKm4QakX0xFpsRDqjjtZf/kjI/v3T3Nwt5r8/qz/M19F9ySyOqU94SXBmeG9ttTul+YnR4LOxFA==",
+      "peer": true,
+      "engines": {
+        "node": ">=6.0.0"
+      }
+    },
+    "node_modules/@jridgewell/set-array": {
+      "version": "1.1.2",
+      "resolved": "https://registry.npmjs.org/@jridgewell/set-array/-/set-array-1.1.2.tgz",
+      "integrity": "sha512-xnkseuNADM0gt2bs+BvhO0p78Mk762YnZdsuzFV018NoG1Sj1SCQvpSqa7XUaTam5vAGasABV9qXASMKnFMwMw==",
+      "peer": true,
+      "engines": {
+        "node": ">=6.0.0"
+      }
+    },
+    "node_modules/@jridgewell/sourcemap-codec": {
+      "version": "1.4.15",
+      "resolved": "https://registry.npmjs.org/@jridgewell/sourcemap-codec/-/sourcemap-codec-1.4.15.tgz",
+      "integrity": "sha512-eF2rxCRulEKXHTRiDrDy6erMYWqNw4LPdQ8UQA4huuxaQsVeRPFl2oM8oDGxMFhJUWZf9McpLtJasDDZb/Bpeg==",
+      "peer": true
+    },
+    "node_modules/@jridgewell/trace-mapping": {
+      "version": "0.3.20",
+      "resolved": "https://registry.npmjs.org/@jridgewell/trace-mapping/-/trace-mapping-0.3.20.tgz",
+      "integrity": "sha512-R8LcPeWZol2zR8mmH3JeKQ6QRCFb7XgUhV9ZlGhHLGyg4wpPiPZNQOOWhFZhxKw8u//yTbNGI42Bx/3paXEQ+Q==",
+      "peer": true,
+      "dependencies": {
+        "@jridgewell/resolve-uri": "^3.1.0",
+        "@jridgewell/sourcemap-codec": "^1.4.14"
+      }
+    },
+    "node_modules/@types/estree": {
+      "version": "1.0.5",
+      "resolved": "https://registry.npmjs.org/@types/estree/-/estree-1.0.5.tgz",
+      "integrity": "sha512-/kYRxGDLWzHOB7q+wtSUQlFrtcdUccpfy+X+9iMBpHK8QLLhx2wIPYuS5DYtR9Wa/YlZAbIovy7qVdB1Aq6Lyw==",
+      "peer": true
+    },
+    "node_modules/@types/path-browserify": {
+      "version": "1.0.2",
+      "resolved": "https://registry.npmjs.org/@types/path-browserify/-/path-browserify-1.0.2.tgz",
+      "integrity": "sha512-ZkC5IUqqIFPXx3ASTTybTzmQdwHwe2C0u3eL75ldQ6T9E9IWFJodn6hIfbZGab73DfyiHN4Xw15gNxUq2FbvBA=="
+    },
+    "node_modules/acorn": {
+      "version": "8.11.2",
+      "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.11.2.tgz",
+      "integrity": "sha512-nc0Axzp/0FILLEVsm4fNwLCwMttvhEI263QtVPQcbpfZZ3ts0hLsZGOpE6czNlid7CJ9MlyH8reXkpsf3YUY4w==",
+      "peer": true,
+      "bin": {
+        "acorn": "bin/acorn"
+      },
+      "engines": {
+        "node": ">=0.4.0"
+      }
+    },
+    "node_modules/aria-query": {
+      "version": "5.3.0",
+      "resolved": "https://registry.npmjs.org/aria-query/-/aria-query-5.3.0.tgz",
+      "integrity": "sha512-b0P0sZPKtyu8HkeRAfCq0IfURZK+SuwMjY1UXGBU27wpAiTwQAIlq56IbIO+ytk/JjS1fMR14ee5WBBfKi5J6A==",
+      "peer": true,
+      "dependencies": {
+        "dequal": "^2.0.3"
+      }
+    },
+    "node_modules/axobject-query": {
+      "version": "3.2.1",
+      "resolved": "https://registry.npmjs.org/axobject-query/-/axobject-query-3.2.1.tgz",
+      "integrity": "sha512-jsyHu61e6N4Vbz/v18DHwWYKK0bSWLqn47eeDSKPB7m8tqMHF9YJ+mhIk2lVteyZrY8tnSj/jHOv4YiTCuCJgg==",
+      "peer": true,
+      "dependencies": {
+        "dequal": "^2.0.3"
+      }
+    },
+    "node_modules/bufferutil": {
+      "version": "4.0.8",
+      "resolved": "https://registry.npmjs.org/bufferutil/-/bufferutil-4.0.8.tgz",
+      "integrity": "sha512-4T53u4PdgsXqKaIctwF8ifXlRTTmEPJ8iEPWFdGZvcf7sbwYo6FKFEX9eNNAnzFZ7EzJAQ3CJeOtCRA4rDp7Pw==",
+      "hasInstallScript": true,
+      "dependencies": {
+        "node-gyp-build": "^4.3.0"
+      },
+      "engines": {
+        "node": ">=6.14.2"
+      }
+    },
+    "node_modules/cli-color": {
+      "version": "2.0.3",
+      "resolved": "https://registry.npmjs.org/cli-color/-/cli-color-2.0.3.tgz",
+      "integrity": "sha512-OkoZnxyC4ERN3zLzZaY9Emb7f/MhBOIpePv0Ycok0fJYT+Ouo00UBEIwsVsr0yoow++n5YWlSUgST9GKhNHiRQ==",
+      "dependencies": {
+        "d": "^1.0.1",
+        "es5-ext": "^0.10.61",
+        "es6-iterator": "^2.0.3",
+        "memoizee": "^0.4.15",
+        "timers-ext": "^0.1.7"
+      },
+      "engines": {
+        "node": ">=0.10"
+      }
+    },
+    "node_modules/code-red": {
+      "version": "1.0.4",
+      "resolved": "https://registry.npmjs.org/code-red/-/code-red-1.0.4.tgz",
+      "integrity": "sha512-7qJWqItLA8/VPVlKJlFXU+NBlo/qyfs39aJcuMT/2ere32ZqvF5OSxgdM5xOfJJ7O429gg2HM47y8v9P+9wrNw==",
+      "peer": true,
+      "dependencies": {
+        "@jridgewell/sourcemap-codec": "^1.4.15",
+        "@types/estree": "^1.0.1",
+        "acorn": "^8.10.0",
+        "estree-walker": "^3.0.3",
+        "periscopic": "^3.1.0"
+      }
+    },
+    "node_modules/cropperjs": {
+      "version": "1.6.1",
+      "resolved": "https://registry.npmjs.org/cropperjs/-/cropperjs-1.6.1.tgz",
+      "integrity": "sha512-F4wsi+XkDHCOMrHMYjrTEE4QBOrsHHN5/2VsVAaRq8P7E5z7xQpT75S+f/9WikmBEailas3+yo+6zPIomW+NOA=="
+    },
+    "node_modules/css-tree": {
+      "version": "2.3.1",
+      "resolved": "https://registry.npmjs.org/css-tree/-/css-tree-2.3.1.tgz",
+      "integrity": "sha512-6Fv1DV/TYw//QF5IzQdqsNDjx/wc8TrMBZsqjL9eW01tWb7R7k/mq+/VXfJCl7SoD5emsJop9cOByJZfs8hYIw==",
+      "peer": true,
+      "dependencies": {
+        "mdn-data": "2.0.30",
+        "source-map-js": "^1.0.1"
+      },
+      "engines": {
+        "node": "^10 || ^12.20.0 || ^14.13.0 || >=15.0.0"
+      }
+    },
+    "node_modules/d": {
+      "version": "1.0.1",
+      "resolved": "https://registry.npmjs.org/d/-/d-1.0.1.tgz",
+      "integrity": "sha512-m62ShEObQ39CfralilEQRjH6oAMtNCV1xJyEx5LpRYUVN+EviphDgUc/F3hnYbADmkiNs67Y+3ylmlG7Lnu+FA==",
+      "dependencies": {
+        "es5-ext": "^0.10.50",
+        "type": "^1.0.1"
+      }
+    },
+    "node_modules/deepmerge": {
+      "version": "4.3.1",
+      "resolved": "https://registry.npmjs.org/deepmerge/-/deepmerge-4.3.1.tgz",
+      "integrity": "sha512-3sUqbMEc77XqpdNO7FRyRog+eW3ph+GYCbj+rK+uYyRMuwsVy0rMiVtPn+QJlKFvWP/1PYpapqYn0Me2knFn+A==",
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
+    "node_modules/dequal": {
+      "version": "2.0.3",
+      "resolved": "https://registry.npmjs.org/dequal/-/dequal-2.0.3.tgz",
+      "integrity": "sha512-0je+qPKHEMohvfRTCEo3CrPG6cAzAYgmzKyxRiYSSDkS6eGJdyVJm7WaYA5ECaAD9wLB2T4EEeymA5aFVcYXCA==",
+      "peer": true,
+      "engines": {
+        "node": ">=6"
+      }
+    },
+    "node_modules/es5-ext": {
+      "version": "0.10.62",
+      "resolved": "https://registry.npmjs.org/es5-ext/-/es5-ext-0.10.62.tgz",
+      "integrity": "sha512-BHLqn0klhEpnOKSrzn/Xsz2UIW8j+cGmo9JLzr8BiUapV8hPL9+FliFqjwr9ngW7jWdnxv6eO+/LqyhJVqgrjA==",
+      "hasInstallScript": true,
+      "dependencies": {
+        "es6-iterator": "^2.0.3",
+        "es6-symbol": "^3.1.3",
+        "next-tick": "^1.1.0"
+      },
+      "engines": {
+        "node": ">=0.10"
+      }
+    },
+    "node_modules/es6-iterator": {
+      "version": "2.0.3",
+      "resolved": "https://registry.npmjs.org/es6-iterator/-/es6-iterator-2.0.3.tgz",
+      "integrity": "sha512-zw4SRzoUkd+cl+ZoE15A9o1oQd920Bb0iOJMQkQhl3jNc03YqVjAhG7scf9C5KWRU/R13Orf588uCC6525o02g==",
+      "dependencies": {
+        "d": "1",
+        "es5-ext": "^0.10.35",
+        "es6-symbol": "^3.1.1"
+      }
+    },
+    "node_modules/es6-symbol": {
+      "version": "3.1.3",
+      "resolved": "https://registry.npmjs.org/es6-symbol/-/es6-symbol-3.1.3.tgz",
+      "integrity": "sha512-NJ6Yn3FuDinBaBRWl/q5X/s4koRHBrgKAu+yGI6JCBeiu3qrcbJhwT2GeR/EXVfylRk8dpQVJoLEFhK+Mu31NA==",
+      "dependencies": {
+        "d": "^1.0.1",
+        "ext": "^1.1.2"
+      }
+    },
+    "node_modules/es6-weak-map": {
+      "version": "2.0.3",
+      "resolved": "https://registry.npmjs.org/es6-weak-map/-/es6-weak-map-2.0.3.tgz",
+      "integrity": "sha512-p5um32HOTO1kP+w7PRnB+5lQ43Z6muuMuIMffvDN8ZB4GcnjLBV6zGStpbASIMk4DCAvEaamhe2zhyCb/QXXsA==",
+      "dependencies": {
+        "d": "1",
+        "es5-ext": "^0.10.46",
+        "es6-iterator": "^2.0.3",
+        "es6-symbol": "^3.1.1"
+      }
+    },
+    "node_modules/esbuild": {
+      "version": "0.19.8",
+      "resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.19.8.tgz",
+      "integrity": "sha512-l7iffQpT2OrZfH2rXIp7/FkmaeZM0vxbxN9KfiCwGYuZqzMg/JdvX26R31Zxn/Pxvsrg3Y9N6XTcnknqDyyv4w==",
+      "hasInstallScript": true,
+      "bin": {
+        "esbuild": "bin/esbuild"
+      },
+      "engines": {
+        "node": ">=12"
+      },
+      "optionalDependencies": {
+        "@esbuild/android-arm": "0.19.8",
+        "@esbuild/android-arm64": "0.19.8",
+        "@esbuild/android-x64": "0.19.8",
+        "@esbuild/darwin-arm64": "0.19.8",
+        "@esbuild/darwin-x64": "0.19.8",
+        "@esbuild/freebsd-arm64": "0.19.8",
+        "@esbuild/freebsd-x64": "0.19.8",
+        "@esbuild/linux-arm": "0.19.8",
+        "@esbuild/linux-arm64": "0.19.8",
+        "@esbuild/linux-ia32": "0.19.8",
+        "@esbuild/linux-loong64": "0.19.8",
+        "@esbuild/linux-mips64el": "0.19.8",
+        "@esbuild/linux-ppc64": "0.19.8",
+        "@esbuild/linux-riscv64": "0.19.8",
+        "@esbuild/linux-s390x": "0.19.8",
+        "@esbuild/linux-x64": "0.19.8",
+        "@esbuild/netbsd-x64": "0.19.8",
+        "@esbuild/openbsd-x64": "0.19.8",
+        "@esbuild/sunos-x64": "0.19.8",
+        "@esbuild/win32-arm64": "0.19.8",
+        "@esbuild/win32-ia32": "0.19.8",
+        "@esbuild/win32-x64": "0.19.8"
+      }
+    },
+    "node_modules/estree-walker": {
+      "version": "3.0.3",
+      "resolved": "https://registry.npmjs.org/estree-walker/-/estree-walker-3.0.3.tgz",
+      "integrity": "sha512-7RUKfXgSMMkzt6ZuXmqapOurLGPPfgj6l9uRZ7lRGolvk0y2yocc35LdcxKC5PQZdn2DMqioAQ2NoWcrTKmm6g==",
+      "peer": true,
+      "dependencies": {
+        "@types/estree": "^1.0.0"
+      }
+    },
+    "node_modules/event-emitter": {
+      "version": "0.3.5",
+      "resolved": "https://registry.npmjs.org/event-emitter/-/event-emitter-0.3.5.tgz",
+      "integrity": "sha512-D9rRn9y7kLPnJ+hMq7S/nhvoKwwvVJahBi2BPmx3bvbsEdK3W9ii8cBSGjP+72/LnM4n6fo3+dkCX5FeTQruXA==",
+      "dependencies": {
+        "d": "1",
+        "es5-ext": "~0.10.14"
+      }
+    },
+    "node_modules/ext": {
+      "version": "1.7.0",
+      "resolved": "https://registry.npmjs.org/ext/-/ext-1.7.0.tgz",
+      "integrity": "sha512-6hxeJYaL110a9b5TEJSj0gojyHQAmA2ch5Os+ySCiA1QGdS697XWY1pzsrSjqA9LDEEgdB/KypIlR59RcLuHYw==",
+      "dependencies": {
+        "type": "^2.7.2"
+      }
+    },
+    "node_modules/ext/node_modules/type": {
+      "version": "2.7.2",
+      "resolved": "https://registry.npmjs.org/type/-/type-2.7.2.tgz",
+      "integrity": "sha512-dzlvlNlt6AXU7EBSfpAscydQ7gXB+pPGsPnfJnZpiNJBDj7IaJzQlBZYGdEi4R9HmPdBv2XmWJ6YUtoTa7lmCw=="
+    },
+    "node_modules/globalyzer": {
+      "version": "0.1.0",
+      "resolved": "https://registry.npmjs.org/globalyzer/-/globalyzer-0.1.0.tgz",
+      "integrity": "sha512-40oNTM9UfG6aBmuKxk/giHn5nQ8RVz/SS4Ir6zgzOv9/qC3kKZ9v4etGTcJbEl/NyVQH7FGU7d+X1egr57Md2Q=="
+    },
+    "node_modules/globrex": {
+      "version": "0.1.2",
+      "resolved": "https://registry.npmjs.org/globrex/-/globrex-0.1.2.tgz",
+      "integrity": "sha512-uHJgbwAMwNFf5mLst7IWLNg14x1CkeqglJb/K3doi4dw6q2IvAAmM/Y81kevy83wP+Sst+nutFTYOGg3d1lsxg=="
+    },
+    "node_modules/intl-messageformat": {
+      "version": "9.13.0",
+      "resolved": "https://registry.npmjs.org/intl-messageformat/-/intl-messageformat-9.13.0.tgz",
+      "integrity": "sha512-7sGC7QnSQGa5LZP7bXLDhVDtQOeKGeBFGHF2Y8LVBwYZoQZCgWeKoPGTa5GMG8g/TzDgeXuYJQis7Ggiw2xTOw==",
+      "dependencies": {
+        "@formatjs/ecma402-abstract": "1.11.4",
+        "@formatjs/fast-memoize": "1.2.1",
+        "@formatjs/icu-messageformat-parser": "2.1.0",
+        "tslib": "^2.1.0"
+      }
+    },
+    "node_modules/is-promise": {
+      "version": "2.2.2",
+      "resolved": "https://registry.npmjs.org/is-promise/-/is-promise-2.2.2.tgz",
+      "integrity": "sha512-+lP4/6lKUBfQjZ2pdxThZvLUAafmZb8OAxFb8XXtiQmS35INgr85hdOGoEs124ez1FCnZJt6jau/T+alh58QFQ=="
+    },
+    "node_modules/is-reference": {
+      "version": "3.0.2",
+      "resolved": "https://registry.npmjs.org/is-reference/-/is-reference-3.0.2.tgz",
+      "integrity": "sha512-v3rht/LgVcsdZa3O2Nqs+NMowLOxeOm7Ay9+/ARQ2F+qEoANRcqrjAZKGN0v8ymUetZGgkp26LTnGT7H0Qo9Pg==",
+      "peer": true,
+      "dependencies": {
+        "@types/estree": "*"
+      }
+    },
+    "node_modules/lazy-brush": {
+      "version": "1.0.1",
+      "resolved": "https://registry.npmjs.org/lazy-brush/-/lazy-brush-1.0.1.tgz",
+      "integrity": "sha512-xT/iSClTVi7vLoF8dCWTBhCuOWqsLXCMPa6ucVmVAk6hyNCM5JeS1NLhXqIrJktUg+caEYKlqSOUU4u3cpXzKg=="
+    },
+    "node_modules/locate-character": {
+      "version": "3.0.0",
+      "resolved": "https://registry.npmjs.org/locate-character/-/locate-character-3.0.0.tgz",
+      "integrity": "sha512-SW13ws7BjaeJ6p7Q6CO2nchbYEc3X3J6WrmTTDto7yMPqVSZTUyY5Tjbid+Ab8gLnATtygYtiDIJGQRRn2ZOiA==",
+      "peer": true
+    },
+    "node_modules/lru-queue": {
+      "version": "0.1.0",
+      "resolved": "https://registry.npmjs.org/lru-queue/-/lru-queue-0.1.0.tgz",
+      "integrity": "sha512-BpdYkt9EvGl8OfWHDQPISVpcl5xZthb+XPsbELj5AQXxIC8IriDZIQYjBJPEm5rS420sjZ0TLEzRcq5KdBhYrQ==",
+      "dependencies": {
+        "es5-ext": "~0.10.2"
+      }
+    },
+    "node_modules/magic-string": {
+      "version": "0.30.5",
+      "resolved": "https://registry.npmjs.org/magic-string/-/magic-string-0.30.5.tgz",
+      "integrity": "sha512-7xlpfBaQaP/T6Vh8MO/EqXSW5En6INHEvEXQiuff7Gku0PWjU3uf6w/j9o7O+SpB5fOAkrI5HeoNgwjEO0pFsA==",
+      "peer": true,
+      "dependencies": {
+        "@jridgewell/sourcemap-codec": "^1.4.15"
+      },
+      "engines": {
+        "node": ">=12"
+      }
+    },
+    "node_modules/mdn-data": {
+      "version": "2.0.30",
+      "resolved": "https://registry.npmjs.org/mdn-data/-/mdn-data-2.0.30.tgz",
+      "integrity": "sha512-GaqWWShW4kv/G9IEucWScBx9G1/vsFZZJUO+tD26M8J8z3Kw5RDQjaoZe03YAClgeS/SWPOcb4nkFBTEi5DUEA==",
+      "peer": true
+    },
+    "node_modules/memoizee": {
+      "version": "0.4.15",
+      "resolved": "https://registry.npmjs.org/memoizee/-/memoizee-0.4.15.tgz",
+      "integrity": "sha512-UBWmJpLZd5STPm7PMUlOw/TSy972M+z8gcyQ5veOnSDRREz/0bmpyTfKt3/51DhEBqCZQn1udM/5flcSPYhkdQ==",
+      "dependencies": {
+        "d": "^1.0.1",
+        "es5-ext": "^0.10.53",
+        "es6-weak-map": "^2.0.3",
+        "event-emitter": "^0.3.5",
+        "is-promise": "^2.2.2",
+        "lru-queue": "^0.1.0",
+        "next-tick": "^1.1.0",
+        "timers-ext": "^0.1.7"
+      }
+    },
+    "node_modules/mri": {
+      "version": "1.2.0",
+      "resolved": "https://registry.npmjs.org/mri/-/mri-1.2.0.tgz",
+      "integrity": "sha512-tzzskb3bG8LvYGFF/mDTpq3jpI6Q9wc3LEmBaghu+DdCssd1FakN7Bc0hVNmEyGq1bq3RgfkCb3cmQLpNPOroA==",
+      "engines": {
+        "node": ">=4"
+      }
+    },
+    "node_modules/next-tick": {
+      "version": "1.1.0",
+      "resolved": "https://registry.npmjs.org/next-tick/-/next-tick-1.1.0.tgz",
+      "integrity": "sha512-CXdUiJembsNjuToQvxayPZF9Vqht7hewsvy2sOWafLvi2awflj9mOC6bHIg50orX8IJvWKY9wYQ/zB2kogPslQ=="
+    },
+    "node_modules/node-gyp-build": {
+      "version": "4.7.1",
+      "resolved": "https://registry.npmjs.org/node-gyp-build/-/node-gyp-build-4.7.1.tgz",
+      "integrity": "sha512-wTSrZ+8lsRRa3I3H8Xr65dLWSgCvY2l4AOnaeKdPA9TB/WYMPaTcrzf3rXvFoVvjKNVnu0CcWSx54qq9GKRUYg==",
+      "bin": {
+        "node-gyp-build": "bin.js",
+        "node-gyp-build-optional": "optional.js",
+        "node-gyp-build-test": "build-test.js"
+      }
+    },
+    "node_modules/path-browserify": {
+      "version": "1.0.1",
+      "resolved": "https://registry.npmjs.org/path-browserify/-/path-browserify-1.0.1.tgz",
+      "integrity": "sha512-b7uo2UCUOYZcnF/3ID0lulOJi/bafxa1xPe7ZPsammBSpjSWQkjNxlt635YGS2MiR9GjvuXCtz2emr3jbsz98g=="
+    },
+    "node_modules/periscopic": {
+      "version": "3.1.0",
+      "resolved": "https://registry.npmjs.org/periscopic/-/periscopic-3.1.0.tgz",
+      "integrity": "sha512-vKiQ8RRtkl9P+r/+oefh25C3fhybptkHKCZSPlcXiJux2tJF55GnEj3BVn4A5gKfq9NWWXXrxkHBwVPUfH0opw==",
+      "peer": true,
+      "dependencies": {
+        "@types/estree": "^1.0.0",
+        "estree-walker": "^3.0.0",
+        "is-reference": "^3.0.0"
+      }
+    },
+    "node_modules/resize-observer-polyfill": {
+      "version": "1.5.1",
+      "resolved": "https://registry.npmjs.org/resize-observer-polyfill/-/resize-observer-polyfill-1.5.1.tgz",
+      "integrity": "sha512-LwZrotdHOo12nQuZlHEmtuXdqGoOD0OhaxopaNFxWzInpEgaLWoVuAMbTzixuosCx2nEG58ngzW3vxdWoxIgdg=="
+    },
+    "node_modules/sade": {
+      "version": "1.8.1",
+      "resolved": "https://registry.npmjs.org/sade/-/sade-1.8.1.tgz",
+      "integrity": "sha512-xal3CZX1Xlo/k4ApwCFrHVACi9fBqJ7V+mwhBsuf/1IOKbBy098Fex+Wa/5QMubw09pSZ/u8EY8PWgevJsXp1A==",
+      "dependencies": {
+        "mri": "^1.1.0"
+      },
+      "engines": {
+        "node": ">=6"
+      }
+    },
+    "node_modules/semiver": {
+      "version": "1.1.0",
+      "resolved": "https://registry.npmjs.org/semiver/-/semiver-1.1.0.tgz",
+      "integrity": "sha512-QNI2ChmuioGC1/xjyYwyZYADILWyW6AmS1UH6gDj/SFUUUS4MBAWs/7mxnkRPc/F4iHezDP+O8t0dO8WHiEOdg==",
+      "engines": {
+        "node": ">=6"
+      }
+    },
+    "node_modules/source-map-js": {
+      "version": "1.0.2",
+      "resolved": "https://registry.npmjs.org/source-map-js/-/source-map-js-1.0.2.tgz",
+      "integrity": "sha512-R0XvVJ9WusLiqTCEiGCmICCMplcCkIwwR11mOSD9CR5u+IXYdiseeEuXCVAjS54zqwkLcPNnmU4OeJ6tUrWhDw==",
+      "peer": true,
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
+    "node_modules/svelte": {
+      "version": "4.2.8",
+      "resolved": "https://registry.npmjs.org/svelte/-/svelte-4.2.8.tgz",
+      "integrity": "sha512-hU6dh1MPl8gh6klQZwK/n73GiAHiR95IkFsesLPbMeEZi36ydaXL/ZAb4g9sayT0MXzpxyZjR28yderJHxcmYA==",
+      "peer": true,
+      "dependencies": {
+        "@ampproject/remapping": "^2.2.1",
+        "@jridgewell/sourcemap-codec": "^1.4.15",
+        "@jridgewell/trace-mapping": "^0.3.18",
+        "acorn": "^8.9.0",
+        "aria-query": "^5.3.0",
+        "axobject-query": "^3.2.1",
+        "code-red": "^1.0.3",
+        "css-tree": "^2.3.1",
+        "estree-walker": "^3.0.3",
+        "is-reference": "^3.0.1",
+        "locate-character": "^3.0.0",
+        "magic-string": "^0.30.4",
+        "periscopic": "^3.1.0"
+      },
+      "engines": {
+        "node": ">=16"
+      }
+    },
+    "node_modules/svelte-i18n": {
+      "version": "3.7.4",
+      "resolved": "https://registry.npmjs.org/svelte-i18n/-/svelte-i18n-3.7.4.tgz",
+      "integrity": "sha512-yGRCNo+eBT4cPuU7IVsYTYjxB7I2V8qgUZPlHnNctJj5IgbJgV78flsRzpjZ/8iUYZrS49oCt7uxlU3AZv/N5Q==",
+      "dependencies": {
+        "cli-color": "^2.0.3",
+        "deepmerge": "^4.2.2",
+        "esbuild": "^0.19.2",
+        "estree-walker": "^2",
+        "intl-messageformat": "^9.13.0",
+        "sade": "^1.8.1",
+        "tiny-glob": "^0.2.9"
+      },
+      "bin": {
+        "svelte-i18n": "dist/cli.js"
+      },
+      "engines": {
+        "node": ">= 16"
+      },
+      "peerDependencies": {
+        "svelte": "^3 || ^4"
+      }
+    },
+    "node_modules/svelte-i18n/node_modules/estree-walker": {
+      "version": "2.0.2",
+      "resolved": "https://registry.npmjs.org/estree-walker/-/estree-walker-2.0.2.tgz",
+      "integrity": "sha512-Rfkk/Mp/DL7JVje3u18FxFujQlTNR2q6QfMSMB7AvCBx91NGj/ba3kCfza0f6dVDbw7YlRf/nDrn7pQrCCyQ/w=="
+    },
+    "node_modules/timers-ext": {
+      "version": "0.1.7",
+      "resolved": "https://registry.npmjs.org/timers-ext/-/timers-ext-0.1.7.tgz",
+      "integrity": "sha512-b85NUNzTSdodShTIbky6ZF02e8STtVVfD+fu4aXXShEELpozH+bCpJLYMPZbsABN2wDH7fJpqIoXxJpzbf0NqQ==",
+      "dependencies": {
+        "es5-ext": "~0.10.46",
+        "next-tick": "1"
+      }
+    },
+    "node_modules/tiny-glob": {
+      "version": "0.2.9",
+      "resolved": "https://registry.npmjs.org/tiny-glob/-/tiny-glob-0.2.9.tgz",
+      "integrity": "sha512-g/55ssRPUjShh+xkfx9UPDXqhckHEsHr4Vd9zX55oSdGZc/MD0m3sferOkwWtp98bv+kcVfEHtRJgBVJzelrzg==",
+      "dependencies": {
+        "globalyzer": "0.1.0",
+        "globrex": "^0.1.2"
+      }
+    },
+    "node_modules/tslib": {
+      "version": "2.6.2",
+      "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.6.2.tgz",
+      "integrity": "sha512-AEYxH93jGFPn/a2iVAwW87VuUIkR1FVUKB77NwMF7nBTDkDrrT/Hpt/IrCJ0QXhW27jTBDcf5ZY7w6RiqTMw2Q=="
+    },
+    "node_modules/type": {
+      "version": "1.2.0",
+      "resolved": "https://registry.npmjs.org/type/-/type-1.2.0.tgz",
+      "integrity": "sha512-+5nt5AAniqsCnu2cEQQdpzCAh33kVx8n0VoFidKpB1dVVLAN/F+bgVOqOJqOnEnrhp222clB5p3vUlD+1QAnfg=="
+    },
+    "node_modules/ws": {
+      "version": "8.14.2",
+      "resolved": "https://registry.npmjs.org/ws/-/ws-8.14.2.tgz",
+      "integrity": "sha512-wEBG1ftX4jcglPxgFCMJmZ2PLtSbJ2Peg6TmpJFTbe9GZYOQCDPdMYu/Tm0/bGZkw8paZnJY45J4K2PZrLYq8g==",
+      "engines": {
+        "node": ">=10.0.0"
+      },
+      "peerDependencies": {
+        "bufferutil": "^4.0.1",
+        "utf-8-validate": ">=5.0.2"
+      },
+      "peerDependenciesMeta": {
+        "bufferutil": {
+          "optional": true
+        },
+        "utf-8-validate": {
+          "optional": true
+        }
+      }
+    }
+  }
+}

src/frontend/package.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+  "name": "gradio_image_prompter",
+  "version": "0.4.2",
+  "description": "Gradio UI packages",
+  "type": "module",
+  "author": "",
+  "license": "ISC",
+  "private": false,
+  "dependencies": {
+    "@gradio/atoms": "0.3.1",
+    "@gradio/client": "0.8.2",
+    "@gradio/icons": "0.3.1",
+    "@gradio/statustracker": "0.4.1",
+    "@gradio/upload": "0.5.2",
+    "@gradio/utils": "0.2.0",
+    "@gradio/wasm": "0.3.0",
+    "cropperjs": "^1.5.12",
+    "lazy-brush": "^1.0.1",
+    "resize-observer-polyfill": "^1.5.1"
+  },
+  "main_changeset": true,
+  "main": "./Index.svelte",
+  "exports": {
+    ".": "./Index.svelte",
+    "./example": "./Example.svelte",
+    "./package.json": "./package.json"
+  }
+}

src/frontend/shared/BoxDrawer.svelte ADDED Viewed

	@@ -0,0 +1,237 @@

+<svelte:options accessors={true} />
+<script lang="ts">
+  import { createEventDispatcher, onDestroy, onMount, tick } from "svelte";
+  const dispatch = createEventDispatcher();
+  export let width = 0;
+  export let height = 0;
+  export let natural_width = 0;
+  export let natural_height = 0;
+  let boxes: Array<Array<number>> = [];
+  let points: Array<Array<number>> = [];
+  let canvas_container: HTMLElement;
+  let canvas: HTMLCanvasElement;
+  let ctx: CanvasRenderingContext2D | null;
+  let mouse_pressing: boolean = false;
+  let mouse_button: number;
+  let prev_x: number, prev_y: number;
+  let cur_x: number, cur_y: number;
+  let old_width = 0;
+  let old_height = 0;
+  let canvasObserver: ResizeObserver;
+  async function set_canvas_size(dimensions: {
+    width: number;
+    height: number;
+  }) {
+    await tick();
+    canvas.width = dimensions.width;
+    canvas.height = dimensions.height;
+    canvas.style.width = `${dimensions.width}px`;
+    canvas.style.height = `${dimensions.height}px`;
+    canvas.style.marginTop = `-${dimensions.height}px`;
+  }
+  export async function resize_canvas() {
+    if (width === old_width && height === old_height) return;
+    await set_canvas_size({ width: width, height: height });
+    draw_canvas();
+    setTimeout(() => {
+      old_height = height;
+      old_width = width;
+    }, 100);
+    clear();
+  }
+  export function clear() {
+    boxes = [];
+    points = [];
+    draw_canvas();
+    dispatch("change", points);
+    return true;
+  }
+  export function undo() {
+    boxes.pop();
+    points.pop();
+    draw_canvas();
+    dispatch("change", points);
+    return true;
+  }
+  onMount(async () => {
+    ctx = canvas.getContext("2d");
+    if (ctx) {
+      (ctx.lineJoin = "round"), (ctx.lineCap = "round");
+      ctx.strokeStyle = "#000";
+    }
+    canvasObserver = new ResizeObserver(() => {
+      resize_canvas();
+    });
+    canvasObserver.observe(canvas_container);
+    draw_loop();
+    clear();
+  });
+  onDestroy(() => {
+    canvasObserver.unobserve(canvas_container);
+  });
+  function get_mouse_pos(e: MouseEvent | TouchEvent | FocusEvent) {
+    const rect = canvas.getBoundingClientRect();
+    let screenX, screenY: number;
+    if (e instanceof MouseEvent) {
+      screenX = e.clientX;
+      screenY = e.clientY;
+    } else if (e instanceof TouchEvent) {
+      screenX = e.changedTouches[0].clientX;
+      screenY = e.changedTouches[0].clientY;
+    } else {
+      return { x: prev_x, y: prev_y };
+    }
+    return { x: screenX - rect.left, y: screenY - rect.top };
+  }
+  function handle_draw_start(e: MouseEvent | TouchEvent) {
+    e.preventDefault();
+    (mouse_pressing = true), (mouse_button = 0);
+    if (e instanceof MouseEvent) mouse_button = e.button;
+    const { x, y } = get_mouse_pos(e);
+    (prev_x = x), (prev_y = y);
+  }
+  function handle_draw_move(e: MouseEvent | TouchEvent) {
+    e.preventDefault();
+    const { x, y } = get_mouse_pos(e);
+    (cur_x = x), (cur_y = y);
+  }
+  function handle_draw_end(e: MouseEvent | TouchEvent | FocusEvent) {
+    e.preventDefault();
+    if (mouse_pressing) {
+      const { x, y } = get_mouse_pos(e);
+      let x1 = Math.min(prev_x, x);
+      let y1 = Math.min(prev_y, y);
+      let x2 = Math.max(prev_x, x);
+      let y2 = Math.max(prev_y, y);
+      boxes.push([x1, y1, x2, y2]);
+      let scale_x = natural_width / width;
+      let scale_y = natural_height / height;
+      let is_point = x1 == x2 && y1 == y2;
+      points.push([
+        Math.round(x1 * scale_x),
+        Math.round(y1 * scale_y),
+        is_point ? (mouse_button == 0 ? 1 : 0) : 2, // label1
+        is_point ? 0 : Math.round(x2 * scale_x),
+        is_point ? 0 : Math.round(y2 * scale_y),
+        is_point ? 4 : 3, // label2
+      ]);
+      dispatch("change", points);
+    }
+    mouse_pressing = false;
+  }
+  function draw_loop() {
+    draw_canvas();
+    window.requestAnimationFrame(() => {
+      draw_loop();
+    });
+  }
+  function draw_canvas() {
+    if (!ctx) return;
+    ctx.clearRect(0, 0, width, height);
+    if (mouse_pressing && cur_x != prev_x && prev_y != cur_y) {
+      let boxes_temp = boxes.slice();
+      boxes_temp.push([prev_x, prev_y, cur_x, cur_y]);
+      draw_boxes(boxes_temp);
+      draw_points(boxes);
+    } else {
+      draw_boxes(boxes);
+      draw_points(boxes);
+    }
+  }
+  function draw_boxes(boxes: Array<Array<number>>) {
+    if (!ctx) return;
+    ctx.fillStyle = "rgba(0, 0, 0, 0.1)";
+    ctx.beginPath();
+    boxes.forEach((box: Array<number>) => {
+      if (box[0] != box[2] && box[1] != box[3]) {
+        ctx.rect(box[0], box[1], box[2] - box[0], box[3] - box[1]);
+      }
+    });
+    ctx.fill();
+    ctx.stroke();
+  }
+  function draw_points(boxes: Array<Array<number>>) {
+    if (!ctx) return;
+    // Draw foreground points.
+    ctx.beginPath();
+    ctx.fillStyle = "rgba(0, 255, 255, 1.0)"; // Cyan.
+    boxes.forEach((box: Array<number>, index: number) => {
+      if (points[index][2] == 1) {
+        let radius = Math.sqrt(width * height) * 0.01;
+        ctx.moveTo(box[0] + radius, box[1]);
+        ctx.arc(box[0], box[1], radius, 0, 2 * Math.PI, false);
+      }
+    });
+    ctx.fill();
+    ctx.stroke();
+    // Draw background points.
+    ctx.beginPath();
+    ctx.fillStyle = "rgba(255, 192, 203, 1.0)"; // Pink.
+    boxes.forEach((box: Array<number>, index: number) => {
+      if (points[index][2] == 0) {
+        let radius = Math.sqrt(width * height) * 0.01;
+        ctx.moveTo(box[0] + radius, box[1]);
+        ctx.arc(box[0], box[1], radius, 0, 2 * Math.PI, false);
+      }
+    });
+    ctx.fill();
+    ctx.stroke();
+  }
+</script>
+<div class="wrap" bind:this={canvas_container}>
+  <canvas
+    bind:this={canvas}
+    on:mousedown={handle_draw_start}
+    on:mousemove={handle_draw_move}
+    on:mouseout={handle_draw_move}
+    on:mouseup={handle_draw_end}
+    on:touchstart={handle_draw_start}
+    on:touchmove={handle_draw_move}
+    on:touchend={handle_draw_end}
+    on:touchcancel={handle_draw_end}
+    on:blur={handle_draw_end}
+    on:click|stopPropagation
+    style=" z-index: 15"
+  />
+</div>
+<style>
+  canvas {
+    display: block;
+    position: absolute;
+    top: 0;
+    right: 0;
+    bottom: 0;
+    left: 0;
+    margin: auto;
+  }
+  .wrap {
+    position: relative;
+    width: var(--size-full);
+    height: var(--size-full);
+    touch-action: none;
+  }
+</style>

src/frontend/shared/ClearImage.svelte ADDED Viewed

	@@ -0,0 +1,48 @@

+<script lang="ts">
+  import { createEventDispatcher } from "svelte";
+  import { IconButton } from "@gradio/atoms";
+  import { Undo, Erase, Clear } from "@gradio/icons";
+  const dispatch = createEventDispatcher();
+</script>
+<div>
+  <IconButton
+    Icon={Undo}
+    label="Remove Last Box"
+    on:click={(event) => {
+      dispatch("remove_box");
+      event.stopPropagation();
+    }}
+  />
+  <IconButton
+    Icon={Erase}
+    label="Remove All boxes"
+    on:click={(event) => {
+      dispatch("remove_boxes");
+      event.stopPropagation();
+    }}
+  />
+  <IconButton
+    Icon={Clear}
+    label="Remove Image"
+    on:click={(event) => {
+      dispatch("remove_image");
+      event.stopPropagation();
+    }}
+  />
+</div>
+<style>
+  div {
+    display: flex;
+    position: absolute;
+    top: var(--size-2);
+    right: var(--size-2);
+    justify-content: flex-end;
+    gap: var(--spacing-sm);
+    z-index: var(--layer-5);
+  }
+</style>

src/frontend/shared/Image.svelte ADDED Viewed

	@@ -0,0 +1,15 @@

+<script lang="ts">
+  import type { HTMLImgAttributes } from "svelte/elements";
+  type $$Props = HTMLImgAttributes;
+  import { resolve_wasm_src } from "@gradio/wasm/svelte";
+  export let src: HTMLImgAttributes["src"] = undefined;
+</script>
+{#await resolve_wasm_src(src) then resolved_src}
+  <!-- svelte-ignore a11y-missing-attribute -->
+  <img src={resolved_src} {...$$restProps} />
+{:catch error}
+  <p style="color: red;">{error.message}</p>
+{/await}

src/frontend/shared/ImagePreview.svelte ADDED Viewed

	@@ -0,0 +1,88 @@

+<script lang="ts">
+  import { createEventDispatcher } from "svelte";
+  import type { SelectData } from "@gradio/utils";
+  import { uploadToHuggingFace } from "@gradio/utils";
+  import { BlockLabel, Empty, IconButton, ShareButton } from "@gradio/atoms";
+  import { Download } from "@gradio/icons";
+  import { get_coordinates_of_clicked_image } from "./utils";
+  import { Image } from "@gradio/icons";
+  import { type FileData } from "@gradio/client";
+  import type { I18nFormatter } from "@gradio/utils";
+  export let value: null | FileData;
+  export let label: string | undefined = undefined;
+  export let show_label: boolean;
+  export let show_download_button = true;
+  export let selectable = false;
+  export let show_share_button = false;
+  export let i18n: I18nFormatter;
+  const dispatch = createEventDispatcher<{
+    change: string;
+    select: SelectData;
+  }>();
+  const handle_click = (evt: MouseEvent): void => {
+    let coordinates = get_coordinates_of_clicked_image(evt);
+    if (coordinates) {
+      dispatch("select", { index: coordinates, value: null });
+    }
+  };
+</script>
+<BlockLabel {show_label} Icon={Image} label={label || i18n("image.image")} />
+{#if value === null || !value.url}
+  <Empty unpadded_box={true} size="large"><Image /></Empty>
+{:else}
+  <div class="icon-buttons">
+    {#if show_download_button}
+      <a
+        href={value.url}
+        target={window.__is_colab__ ? "_blank" : null}
+        download={value.orig_name || "image"}
+      >
+        <IconButton Icon={Download} label={i18n("common.download")} />
+      </a>
+    {/if}
+    {#if show_share_button}
+      <ShareButton
+        {i18n}
+        on:share
+        on:error
+        formatter={async (value) => {
+          if (!value) return "";
+          let url = await uploadToHuggingFace(value, "base64");
+          return `<img src="${url}" />`;
+        }}
+        {value}
+      />
+    {/if}
+  </div>
+  <button on:click={handle_click}>
+    <img src={value.url} alt="" class:selectable loading="lazy" />
+  </button>
+{/if}
+<style>
+  img,
+  button {
+    width: var(--size-full);
+    height: var(--size-full);
+    object-fit: contain;
+    display: block;
+    border-radius: var(--radius-lg);
+  }
+  .selectable {
+    cursor: crosshair;
+  }
+  .icon-buttons {
+    display: flex;
+    position: absolute;
+    top: 6px;
+    right: 6px;
+    gap: var(--size-1);
+  }
+</style>

src/frontend/shared/ImageUploader.svelte ADDED Viewed

	@@ -0,0 +1,192 @@

+<script lang="ts">
+  import { createEventDispatcher } from "svelte";
+  import { BlockLabel } from "@gradio/atoms";
+  import { Image } from "@gradio/icons";
+  import type { I18nFormatter } from "@gradio/utils";
+  import { get_coordinates_of_clicked_image } from "./utils";
+  import { ImagePaste, Upload as UploadIcon } from "@gradio/icons";
+  import { Toolbar, IconButton } from "@gradio/atoms";
+  import { Upload } from "@gradio/upload";
+  import { type FileData, normalise_file } from "@gradio/client";
+  import ClearImage from "./ClearImage.svelte";
+  import BoxDrawer from "./BoxDrawer.svelte";
+  const dispatch = createEventDispatcher();
+  let box_drawer: BoxDrawer;
+  export let value: null | FileData;
+  export let points: null | number[][6];
+  export let label: string | undefined = undefined;
+  export let show_label: boolean;
+  function handle_image_load(event: Event) {
+    const element = event.currentTarget as HTMLImageElement;
+    box_drawer.width = element.width;
+    box_drawer.height = element.height;
+    box_drawer.natural_width = element.naturalWidth;
+    box_drawer.natural_height = element.naturalHeight;
+    box_drawer.resize_canvas();
+  }
+  function handle_points_change({ detail }: { detail: number[][6] }) {
+    points = detail;
+    dispatch("points_change", detail);
+  }
+  export let sources: ("clipboard" | "upload")[] = ["upload", "clipboard"];
+  export let streaming = false;
+  export let root: string;
+  export let i18n: I18nFormatter;
+  let upload: Upload;
+  let uploading = false;
+  export let active_tool: "webcam" | null = null;
+  function handle_upload({ detail }: CustomEvent<FileData>): void {
+    value = normalise_file(detail, root, null);
+    dispatch("upload", detail);
+  }
+  $: if (uploading) value = null;
+  $: value && !value.url && (value = normalise_file(value, root, null));
+  let dragging = false;
+  $: dispatch("drag", dragging);
+  function handle_click(evt: MouseEvent): void {
+    let coordinates = get_coordinates_of_clicked_image(evt);
+    if (coordinates) {
+      dispatch("select", { index: coordinates, value: null });
+    }
+  }
+  const sources_meta = {
+    upload: {
+      icon: UploadIcon,
+      label: i18n("Upload"),
+      order: 0,
+    },
+    clipboard: {
+      icon: ImagePaste,
+      label: i18n("Paste"),
+      order: 2,
+    },
+  };
+  $: sources_list = sources.sort(
+    (a, b) => sources_meta[a].order - sources_meta[b].order,
+  );
+  async function handle_toolbar(
+    source: (typeof sources)[number],
+  ): Promise<void> {
+    switch (source) {
+      case "clipboard":
+        navigator.clipboard.read().then(async (items) => {
+          for (let i = 0; i < items.length; i++) {
+            const type = items[i].types.find((t) => t.startsWith("image/"));
+            if (type) {
+              value = null;
+              items[i].getType(type).then(async (blob) => {
+                const f = await upload.load_files([
+                  new File([blob], `clipboard.${type.replace("image/", "")}`),
+                ]);
+                f;
+                value = f?.[0] || null;
+              });
+              break;
+            }
+          }
+        });
+        break;
+      case "upload":
+        upload.open_file_upload();
+        break;
+      default:
+        break;
+    }
+  }
+</script>
+<BlockLabel {show_label} Icon={Image} label={label || "Image"} />
+<div data-testid="image" class="image-container">
+  {#if value?.url}
+    <ClearImage
+      on:remove_box={() => {
+        box_drawer.undo();
+      }}
+      on:remove_boxes={() => {
+        box_drawer.clear();
+      }}
+      on:remove_image={() => {
+        value = null;
+        dispatch("clear");
+      }}
+    />
+  {/if}
+  <div class="upload-container">
+    <Upload
+      hidden={value !== null || active_tool === "webcam"}
+      bind:this={upload}
+      bind:uploading
+      bind:dragging
+      filetype="image/*"
+      on:load={handle_upload}
+      on:error
+      {root}
+      disable_click={!sources.includes("upload")}
+    >
+      {#if value === null && !active_tool}
+        <slot />
+      {/if}
+    </Upload>
+    {#if value !== null && !streaming}
+      <!-- svelte-ignore a11y-click-events-have-key-events-->
+      <!-- svelte-ignore a11y-no-noninteractive-element-interactions-->
+      <img
+        src={value.url}
+        alt={value.alt_text}
+        on:click={handle_click}
+        on:load={handle_image_load}
+      />
+      <BoxDrawer bind:this={box_drawer} on:change={handle_points_change} />
+    {/if}
+  </div>
+  {#if sources.length > 1 || sources.includes("clipboard")}
+    <Toolbar show_border={!value?.url}>
+      {#each sources_list as source}
+        <IconButton
+          on:click={() => handle_toolbar(source)}
+          Icon={sources_meta[source].icon}
+          size="large"
+          label="{source}-image-toolbar-btn"
+          padded={false}
+        />
+      {/each}
+    </Toolbar>
+  {/if}
+</div>
+<style>
+  img {
+    width: var(--size-full);
+    height: var(--size-full);
+  }
+  .upload-container {
+    height: 100%;
+    flex-shrink: 1;
+    max-height: 100%;
+  }
+  .image-container {
+    display: flex;
+    height: 100%;
+    flex-direction: column;
+    justify-content: center;
+    align-items: center;
+    max-height: 100%;
+  }
+</style>

src/frontend/shared/utils.ts ADDED Viewed

	@@ -0,0 +1,24 @@

+export const get_coordinates_of_clicked_image = (
+	evt: MouseEvent
+): [number, number] | null => {
+	let image = evt.currentTarget as HTMLImageElement;
+	const imageRect = image.getBoundingClientRect();
+	const xScale = image.naturalWidth / imageRect.width;
+	const yScale = image.naturalHeight / imageRect.height;
+	if (xScale > yScale) {
+		const displayed_height = image.naturalHeight / xScale;
+		const y_offset = (imageRect.height - displayed_height) / 2;
+		var x = Math.round((evt.clientX - imageRect.left) * xScale);
+		var y = Math.round((evt.clientY - imageRect.top - y_offset) * xScale);
+	} else {
+		const displayed_width = image.naturalWidth / yScale;
+		const x_offset = (imageRect.width - displayed_width) / 2;
+		var x = Math.round((evt.clientX - imageRect.left - x_offset) * yScale);
+		var y = Math.round((evt.clientY - imageRect.top) * yScale);
+	}
+	if (x < 0 || x >= image.naturalWidth || y < 0 || y >= image.naturalHeight) {
+		return null;
+	}
+	return [x, y];
+};

src/pyproject.toml ADDED Viewed

	@@ -0,0 +1,43 @@

+[build-system]
+requires = [
+  "hatchling",
+  "hatch-requirements-txt",
+  "hatch-fancy-pypi-readme>=22.5.0",
+]
+build-backend = "hatchling.build"
+[project]
+name = "gradio_image_prompter"
+version = "0.1.0"
+description = "A gradio component to upload images and process point/box prompts."
+readme = "README.md"
+license = "apache-2.0"
+requires-python = ">=3.8"
+url = "https://github.com/PhyscalX/gradio-image-prompter"
+authors = [{ name = "PhyscalX", email = "neopenx@gmail.com" }]
+keywords = ["gradio-custom-component", "gradio-template-Image"]
+# Add dependencies here
+dependencies = ["gradio>=4.0,<5.0"]
+classifiers = [
+  'Development Status :: 3 - Alpha',
+  'License :: OSI Approved :: Apache Software License',
+  'Operating System :: OS Independent',
+  'Programming Language :: Python :: 3',
+  'Programming Language :: Python :: 3 :: Only',
+  'Programming Language :: Python :: 3.8',
+  'Programming Language :: Python :: 3.9',
+  'Programming Language :: Python :: 3.10',
+  'Programming Language :: Python :: 3.11',
+  'Topic :: Scientific/Engineering',
+  'Topic :: Scientific/Engineering :: Artificial Intelligence',
+  'Topic :: Scientific/Engineering :: Visualization',
+]
+[project.optional-dependencies]
+dev = ["build", "twine"]
+[tool.hatch.build]
+artifacts = ["/backend/gradio_image_prompter/templates", "*.pyi", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates", "backend/gradio_image_prompter/templates"]
+[tool.hatch.build.targets.wheel]
+packages = ["/backend/gradio_image_prompter"]

structures/__init__.py ADDED Viewed

File without changes

structures/bounding_box.py ADDED Viewed

	@@ -0,0 +1,323 @@

+import torch
+# transpose
+FLIP_LEFT_RIGHT = 0
+FLIP_TOP_BOTTOM = 1
+class BoxList(object):
+    """
+    This class represents a set of bounding boxes.
+    The bounding boxes are represented as a Nx4 Tensor.
+    In order to uniquely determine the bounding boxes with respect
+    to an image, we also store the corresponding image dimensions.
+    They can contain extra information that is specific to each bounding box, such as
+    labels.
+    """
+    def __init__(self, bbox, image_size, mode="xyxy"):
+        device = bbox.device if isinstance(bbox, torch.Tensor) else torch.device("cpu")
+        # only do as_tensor if isn't a "no-op", because it hurts JIT tracing
+        if (not isinstance(bbox, torch.Tensor)
+                or bbox.dtype != torch.float32 or bbox.device != device):
+            bbox = torch.as_tensor(bbox, dtype=torch.float32, device=device)
+        if bbox.ndimension() == 1 and bbox.size(-1) ==4:
+            bbox = bbox.unsqueeze(0)
+        if bbox.ndimension() != 2:
+            raise ValueError(
+                "bbox should have 2 dimensions, got {}".format(bbox.ndimension())
+            )
+        if bbox.size(-1) != 4:
+            raise ValueError(
+                "last dimenion of bbox should have a "
+                "size of 4, got {}".format(bbox.size(-1))
+            )
+        if mode not in ("xyxy", "xywh"):
+            raise ValueError("mode should be 'xyxy' or 'xywh'")
+        self.bbox = bbox
+        self.size = image_size  # (image_width, image_height)
+        self.mode = mode
+        self.extra_fields = {}
+    # note: _jit_wrap/_jit_unwrap only work if the keys and the sizes don't change in between
+    def _jit_unwrap(self):
+        return (self.bbox,) + tuple(f for f in (self.get_field(field)
+                                                for field in sorted(self.fields()))
+                                    if isinstance(f, torch.Tensor))
+    def _jit_wrap(self, input_stream):
+        self.bbox = input_stream[0]
+        num_consumed = 1
+        for f in sorted(self.fields()):
+            if isinstance(self.extra_fields[f], torch.Tensor):
+                self.extra_fields[f] = input_stream[num_consumed]
+                num_consumed += 1
+        return self, input_stream[num_consumed:]
+    def add_field(self, field, field_data):
+        self.extra_fields[field] = field_data
+    def get_field(self, field):
+        return self.extra_fields[field]
+    def has_field(self, field):
+        return field in self.extra_fields
+    def fields(self):
+        return list(self.extra_fields.keys())
+    def _copy_extra_fields(self, bbox):
+        for k, v in bbox.extra_fields.items():
+            self.extra_fields[k] = v
+    def convert(self, mode):
+        if mode not in ("xyxy", "xywh"):
+            raise ValueError("mode should be 'xyxy' or 'xywh'")
+        if mode == self.mode:
+            return self
+        # we only have two modes, so don't need to check
+        # self.mode
+        xmin, ymin, xmax, ymax = self._split_into_xyxy()
+        if mode == "xyxy":
+            bbox = torch.cat((xmin, ymin, xmax, ymax), dim=-1)
+            bbox = BoxList(bbox, self.size, mode=mode)
+        else:
+            TO_REMOVE = 1
+            # NOTE: explicitly specify dim to avoid tracing error in GPU
+            bbox = torch.cat(
+                (xmin, ymin, xmax - xmin + TO_REMOVE, ymax - ymin + TO_REMOVE), dim=1
+            )
+            bbox = BoxList(bbox, self.size, mode=mode)
+        bbox._copy_extra_fields(self)
+        return bbox
+    def _split_into_xyxy(self):
+        if self.mode == "xyxy":
+            xmin, ymin, xmax, ymax = self.bbox.split(1, dim=-1)
+            return xmin, ymin, xmax, ymax
+        elif self.mode == "xywh":
+            TO_REMOVE = 1
+            xmin, ymin, w, h = self.bbox.split(1, dim=-1)
+            return (
+                xmin,
+                ymin,
+                xmin + (w - TO_REMOVE).clamp(min=0),
+                ymin + (h - TO_REMOVE).clamp(min=0),
+            )
+        else:
+            raise RuntimeError("Should not be here")
+    def resize(self, size, *args, **kwargs):
+        """
+        Returns a resized copy of this bounding box
+        :param size: The requested size in pixels, as a 2-tuple:
+            (width, height).
+        """
+        ratios = tuple(float(s) / float(s_orig) for s, s_orig in zip(size, self.size))
+        if ratios[0] == ratios[1]:
+            ratio = ratios[0]
+            scaled_box = self.bbox * ratio
+            bbox = BoxList(scaled_box, size, mode=self.mode)
+            # bbox._copy_extra_fields(self)
+            for k, v in self.extra_fields.items():
+                if not isinstance(v, torch.Tensor):
+                    v = v.resize(size, *args, **kwargs)
+                bbox.add_field(k, v)
+            return bbox
+        ratio_width, ratio_height = ratios
+        xmin, ymin, xmax, ymax = self._split_into_xyxy()
+        scaled_xmin = xmin * ratio_width
+        scaled_xmax = xmax * ratio_width
+        scaled_ymin = ymin * ratio_height
+        scaled_ymax = ymax * ratio_height
+        scaled_box = torch.cat(
+            (scaled_xmin, scaled_ymin, scaled_xmax, scaled_ymax), dim=-1
+        )
+        bbox = BoxList(scaled_box, size, mode="xyxy")
+        # bbox._copy_extra_fields(self)
+        for k, v in self.extra_fields.items():
+            if not isinstance(v, torch.Tensor):
+                v = v.resize(size, *args, **kwargs)
+            bbox.add_field(k, v)
+        return bbox.convert(self.mode)
+    def transpose(self, method):
+        """
+        Transpose bounding box (flip or rotate in 90 degree steps)
+        :param method: One of :py:attr:`PIL.Image.FLIP_LEFT_RIGHT`,
+          :py:attr:`PIL.Image.FLIP_TOP_BOTTOM`, :py:attr:`PIL.Image.ROTATE_90`,
+          :py:attr:`PIL.Image.ROTATE_180`, :py:attr:`PIL.Image.ROTATE_270`,
+          :py:attr:`PIL.Image.TRANSPOSE` or :py:attr:`PIL.Image.TRANSVERSE`.
+        """
+        if method not in (FLIP_LEFT_RIGHT, FLIP_TOP_BOTTOM):
+            raise NotImplementedError(
+                "Only FLIP_LEFT_RIGHT and FLIP_TOP_BOTTOM implemented"
+            )
+        image_width, image_height = self.size
+        xmin, ymin, xmax, ymax = self._split_into_xyxy()
+        if method == FLIP_LEFT_RIGHT:
+            TO_REMOVE = 1
+            transposed_xmin = image_width - xmax - TO_REMOVE
+            transposed_xmax = image_width - xmin - TO_REMOVE
+            transposed_ymin = ymin
+            transposed_ymax = ymax
+        elif method == FLIP_TOP_BOTTOM:
+            transposed_xmin = xmin
+            transposed_xmax = xmax
+            transposed_ymin = image_height - ymax
+            transposed_ymax = image_height - ymin
+        transposed_boxes = torch.cat(
+            (transposed_xmin, transposed_ymin, transposed_xmax, transposed_ymax), dim=-1
+        )
+        bbox = BoxList(transposed_boxes, self.size, mode="xyxy")
+        # bbox._copy_extra_fields(self)
+        for k, v in self.extra_fields.items():
+            if not isinstance(v, torch.Tensor):
+                v = v.transpose(method)
+            bbox.add_field(k, v)
+        return bbox.convert(self.mode)
+    def crop(self, box):
+        """
+        Cropss a rectangular region from this bounding box. The box is a
+        4-tuple defining the left, upper, right, and lower pixel
+        coordinate.
+        """
+        xmin, ymin, xmax, ymax = self._split_into_xyxy()
+        w, h = box[2] - box[0], box[3] - box[1]
+        cropped_xmin = (xmin - box[0]).clamp(min=0, max=w)
+        cropped_ymin = (ymin - box[1]).clamp(min=0, max=h)
+        cropped_xmax = (xmax - box[0]).clamp(min=0, max=w)
+        cropped_ymax = (ymax - box[1]).clamp(min=0, max=h)
+        # TODO should I filter empty boxes here?
+        cropped_box = torch.cat(
+            (cropped_xmin, cropped_ymin, cropped_xmax, cropped_ymax), dim=-1
+        )
+        bbox = BoxList(cropped_box, (w, h), mode="xyxy")
+        # bbox._copy_extra_fields(self)
+        for k, v in self.extra_fields.items():
+            if not isinstance(v, torch.Tensor):
+                v = v.crop(box)
+            bbox.add_field(k, v)
+        return bbox.convert(self.mode)
+    # Tensor-like methods
+    def to(self, device):
+        bbox = BoxList(self.bbox.to(device), self.size, self.mode)
+        for k, v in self.extra_fields.items():
+            if hasattr(v, "to"):
+                v = v.to(device)
+            bbox.add_field(k, v)
+        return bbox
+    def __getitem__(self, item):
+        bbox = BoxList(self.bbox[item], self.size, self.mode)
+        for k, v in self.extra_fields.items():
+            bbox.add_field(k, v[item])
+        return bbox
+    def __len__(self):
+        return self.bbox.shape[0]
+    def clip_to_image(self, remove_empty=True):
+        TO_REMOVE = 1
+        x1s = self.bbox[:, 0].clamp(min=0, max=self.size[0] - TO_REMOVE)
+        y1s = self.bbox[:, 1].clamp(min=0, max=self.size[1] - TO_REMOVE)
+        x2s = self.bbox[:, 2].clamp(min=0, max=self.size[0] - TO_REMOVE)
+        y2s = self.bbox[:, 3].clamp(min=0, max=self.size[1] - TO_REMOVE)
+        self.bbox = torch.stack((x1s, y1s, x2s, y2s), dim=-1)
+        if remove_empty:
+            box = self.bbox
+            keep = (box[:, 3] > box[:, 1]) & (box[:, 2] > box[:, 0])
+            return self[keep]
+        return self
+    def area(self):
+        if self.mode == 'xyxy':
+            TO_REMOVE = 1
+            box = self.bbox
+            area = (box[:, 2] - box[:, 0] + TO_REMOVE) * (box[:, 3] - box[:, 1] + TO_REMOVE)
+        elif self.mode == 'xywh':
+            box = self.bbox
+            area = box[:, 2] * box[:, 3]
+        else:
+            raise RuntimeError("Should not be here")
+        return area
+    def copy_with_fields(self, fields):
+        bbox = BoxList(self.bbox, self.size, self.mode)
+        if not isinstance(fields, (list, tuple)):
+            fields = [fields]
+        for field in fields:
+            bbox.add_field(field, self.get_field(field))
+        return bbox
+    def __repr__(self):
+        s = self.__class__.__name__ + "("
+        s += "num_boxes={}, ".format(len(self))
+        s += "image_width={}, ".format(self.size[0])
+        s += "image_height={}, ".format(self.size[1])
+        s += "mode={})".format(self.mode)
+        return s
+    @staticmethod
+    def concate_box_list(list_of_boxes):
+        boxes = torch.cat([i.bbox for i in list_of_boxes], dim=0)
+        extra_fields_keys = list(list_of_boxes[0].extra_fields.keys())
+        extra_fields = {}
+        for key in extra_fields_keys:
+            extra_fields[key] = torch.cat([i.extra_fields[key] for i in list_of_boxes], dim=0)
+        final = list_of_boxes[0].copy_with_fields(extra_fields_keys)
+        final.bbox = boxes
+        final.extra_fields = extra_fields
+        return final
+@torch.jit.unused
+def _onnx_clip_boxes_to_image(boxes, size):
+    # type: (Tensor, Tuple[int, int])
+    """
+    Clip boxes so that they lie inside an image of size `size`.
+    Clip's min max are traced as constants. Use torch.min/max to WAR this issue
+    Arguments:
+        boxes (Tensor[N, 4]): boxes in (x1, y1, x2, y2) format
+        size (Tuple[height, width]): size of the image
+    Returns:
+        clipped_boxes (Tensor[N, 4])
+    """
+    TO_REMOVE = 1
+    device = boxes.device
+    dim = boxes.dim()
+    boxes_x = boxes[..., 0::2]
+    boxes_y = boxes[..., 1::2]
+    boxes_x = torch.max(boxes_x, torch.tensor(0., dtype=torch.float).to(device))
+    boxes_x = torch.min(boxes_x, torch.tensor(size[1] - TO_REMOVE, dtype=torch.float).to(device))
+    boxes_y = torch.max(boxes_y, torch.tensor(0., dtype=torch.float).to(device))
+    boxes_y = torch.min(boxes_y, torch.tensor(size[0] - TO_REMOVE, dtype=torch.float).to(device))
+    clipped_boxes = torch.stack((boxes_x, boxes_y), dim=dim)
+    return clipped_boxes.reshape(boxes.shape)
+if __name__ == "__main__":
+    bbox = BoxList([[0, 0, 10, 10], [0, 0, 5, 5]], (10, 10))
+    s_bbox = bbox.resize((5, 5))
+    print(s_bbox)
+    print(s_bbox.bbox)
+    t_bbox = bbox.transpose(0)
+    print(t_bbox)
+    print(t_bbox.bbox)

structures/grasp_box.py ADDED Viewed

	@@ -0,0 +1,127 @@

+import numpy as np
+class GraspCoder:
+    """
+    This class is to encode grasp annotations similar to BoxCoder class
+    It is supposed to support the following functions:
+        1. Encode grasp annotations:
+            (x1, y1, x2, y2, x3, y3, x4, y4) -> (x_center, y_center, width, height, sine(theta))
+        2. Decode grasp annotations:
+            (x_center, y_center, width, height, sine(theta)) -> (x1, y1, x2, y2, x3, y3, x4, y4)
+        3. Resize box grasp annotations when resizing image
+        4. Transform box according to various image augmentations
+    One GraspCoder class should encode annotations of one image only
+    """
+    def __init__(self, height, width, grasp_annos, grasp_annos_reformat=None):
+        """
+        Args:
+            height: height of image
+            width: width of image
+            grasp_annos: list of numpy.arrays, each of length 8, in format of (x1, y1, x2, y2, x3, y3, x4, y4)
+        """
+        self.height = height
+        self.width = width
+        self.grasp_annos = grasp_annos
+        self.grasp_annos_reformat = grasp_annos_reformat
+    def __len__(self):
+        return len(self.grasp_annos)
+    def encode(self, normalize=True):
+        """
+        (x1, y1, x2, y2, x3, y3, x4, y4) -> (x_center, y_center, width, height, sine(theta))
+        Args:
+            normalize -> bool: return values normalized to 0~1 or not
+        Returns:
+            grasp_annos_reformat: List of numpy.array
+        """
+        grasp_annos_reformat = []
+        for grasp in self.grasp_annos:
+            x1, y1, x2, y2, x3, y3, x4, y4 = tuple(grasp)
+            if (x1 + x2) < (x3 + x4):
+                x1, y1, x2, y2, x3, y3, x4, y4 = x3, y3, x4, y4, x1, y1, x2, y2
+            x_center = (x1 + x3)/2
+            y_center = (y1 + y3)/2
+            width = np.sqrt((x1 - x2)**2 + (y1 - y2)**2)
+            height = np.sqrt((x2 - x3)**2 + (y2 - y3)**2)
+            sine = ((y1 + y2)/2 - y_center) / (height / 2)
+            if normalize:
+                x_center /= self.width
+                y_center /= self.height
+                width /= self.width
+                height /= self.height
+                sine = (sine + 1) / 2
+            grasp_annos_reformat.append(np.array([x_center, y_center, width, height, sine]))
+        self.grasp_annos_reformat = grasp_annos_reformat
+        return grasp_annos_reformat
+    def decode(self):
+        """
+        Decode normalized grasp_annos_reformat, will overwrite self.grasp_annos, and return the overwritten value
+        (x1, y1, x2, y2, x3, y3, x4, y4) -> (x_center, y_center, width, height, sine(theta))
+        Returns:
+            grasp_annos: List of numpy.array
+        """
+        grasp_annos = []
+        for grasp in self.grasp_annos_reformat:
+            x_center, y_center, width, height, sine = tuple(grasp)
+            x_center *= self.width
+            y_center *= self.height
+            width *= self.width
+            height *= self.height
+            sine = sine * 2 - 1
+            cosine = np.sqrt(1 - sine ** 2)
+            angle = np.arcsin(sine)
+            x1 = x_center + cosine * height / 2 + sine * width / 2
+            x2 = x_center + cosine * height / 2 - sine * width / 2
+            y1 = y_center + sine * height / 2 - cosine * width / 2
+            y2 = y_center + sine * height / 2 + cosine * width / 2
+            x3 = x_center * 2 - x1
+            x4 = x_center * 2 - x2
+            y3 = y_center * 2 - y1
+            y4 = y_center * 2 - y2
+            grasp_annos.append(np.array([x1, y1, x2, y2, x3, y3, x4, y4]))
+        self.grasp_annos = grasp_annos
+        return grasp_annos
+    def resize(self, new_size):
+        """
+        Resize the grasp annotations according to resized image
+        Args:
+            new_size -> Tuple: (new_width, new_height)
+            new_height: The resized image height
+            new_width: The resized image width
+        Returns:
+            self
+        """
+        new_width, new_height = new_size
+        grasp_annos = self.grasp_annos
+        old_height, old_width = self.height, self.width
+        resized_grasp_annos = []
+        for grasp in grasp_annos:
+            grasp[0::2] = grasp[0::2] / old_width * new_width
+            grasp[1::2] = grasp[1::2] / old_height * new_height
+            resized_grasp_annos.append(grasp)
+        self.grasp_annos = resized_grasp_annos
+        self.height, self.width = new_height, new_width
+        return self
+    def transpose(self, axis):
+        """
+        For Horizontal/Vertical flip
+        Args:
+            axis: 0 represents X axis, 1 represnets Y axis
+        Returns:
+            self
+        """
+        grasp_annos = self.grasp_annos
+        flipped_grasp_annos = []
+        if axis == 0:
+            for grasp in grasp_annos:
+                grasp[0::2] = self.width - grasp[0::2]
+                flipped_grasp_annos.append(grasp)
+        elif axis == 1:
+            for grasp in grasp_annos:
+                grasp[1::2] = self.height - grasp[1::2]
+                flipped_grasp_annos.append(grasp)
+        self.grasp_annos = flipped_grasp_annos
+        return self

structures/image_list.py ADDED Viewed

	@@ -0,0 +1,67 @@

+import torch
+class ImageList(object):
+    """
+    Structure that holds a list of images (of possibly
+    varying sizes) as a single tensor.
+    This works by padding the images to the same size,
+    and storing in a field the original sizes of each image
+    """
+    def __init__(self, tensors, image_sizes):
+        """
+        Arguments:
+            tensors (tensor)
+            image_sizes (list[tuple[int, int]])
+        """
+        self.tensors = tensors
+        self.image_sizes = image_sizes
+    def to(self, *args, **kwargs):
+        cast_tensor = self.tensors.to(*args, **kwargs)
+        return ImageList(cast_tensor, self.image_sizes)
+def to_image_list(tensors, size_divisible=0):
+    """
+    tensors can be an ImageList, a torch.Tensor or
+    an iterable of Tensors. It can't be a numpy array.
+    When tensors is an iterable of Tensors, it pads
+    the Tensors with zeros so that they have the same
+    shape
+    """
+    if isinstance(tensors, torch.Tensor) and size_divisible > 0:
+        tensors = [tensors]
+    if isinstance(tensors, ImageList):
+        return tensors
+    elif isinstance(tensors, torch.Tensor):
+        # single tensor shape can be inferred
+        assert tensors.dim() == 4
+        image_sizes = [tensor.shape[-2:] for tensor in tensors]
+        return ImageList(tensors, image_sizes)
+    elif isinstance(tensors, (tuple, list)):
+        max_size = tuple(max(s) for s in zip(*[img.shape for img in tensors]))
+        # TODO Ideally, just remove this and let me model handle arbitrary
+        # input sizs
+        if size_divisible > 0:
+            import math
+            stride = size_divisible
+            max_size = list(max_size)
+            max_size[1] = int(math.ceil(max_size[1] / stride) * stride)
+            max_size[2] = int(math.ceil(max_size[2] / stride) * stride)
+            max_size = tuple(max_size)
+        batch_shape = (len(tensors),) + max_size
+        batched_imgs = tensors[0].new(*batch_shape).zero_()
+        for img, pad_img in zip(tensors, batched_imgs):
+            pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img)
+        image_sizes = [im.shape[-2:] for im in tensors]
+        return ImageList(batched_imgs, image_sizes)
+    else:
+        raise TypeError("Unsupported type for to_image_list: {}".format(type(tensors)))

structures/segmentation_mask.py ADDED Viewed

	@@ -0,0 +1,298 @@

+import cv2
+import torch
+import numpy as np
+import pycocotools.mask as mask_utils
+# transpose
+FLIP_LEFT_RIGHT = 0
+FLIP_TOP_BOTTOM = 1
+class MaskList(object):
+    """
+    This class is unfinished and not meant for use yet
+    It is supposed to contain the binary masks for all instances in a list of 2D tensors (H, W)
+    """
+    def __init__(self, masks, size, mode):
+        assert(isinstance(masks, list))
+        assert(mode in ['mask', 'rle'])
+        self.masks = masks
+        self.size = size # (image_width, image_height)
+        self.mode = mode
+    def transpose(self, method):
+        assert (self.mode == "mask"), "RLE masks cannot be transposed. Please convert them to binary first."
+        if method not in (FLIP_LEFT_RIGHT, FLIP_TOP_BOTTOM):
+            raise NotImplementedError(
+                "Only FLIP_LEFT_RIGHT and FLIP_TOP_BOTTOM implemented"
+            )
+        # width, height = self.size
+        masks = np.array(self.masks)
+        if masks.ndim == 2:
+            masks = np.expand_dims(masks, axis=0)
+        if method == FLIP_LEFT_RIGHT:
+            masks = np.flip(masks, axis=2)
+        elif method == FLIP_TOP_BOTTOM:
+            masks = np.flip(masks, axis=1)
+        flipped_masks = np.split(masks, masks.shape[0])
+        flipped_masks = [mask.squeeze(0) for mask in flipped_masks]
+        return MaskList(flipped_masks, self.size, self.mode)
+    def resize(self, size, *args, **kwargs):
+        """
+        Resize the binary mask.
+        :param size: tuple, (image_width, image_height)
+        :param args:
+        :param kwargs:
+        :return:
+        """
+        assert(self.mode == "mask"), "RLE masks cannot be resized. Please convert them to binary first."
+        cat_mask = np.array(self.masks)
+        cat_mask = cat_mask.transpose(1, 2, 0)
+        cat_mask *= 255
+        cat_mask = cat_mask.astype(np.uint8)
+        resized_mask = cv2.resize(cat_mask, size)
+        if resized_mask.ndim == 2:
+            resized_mask = np.expand_dims(resized_mask, axis=2)
+        try:
+            resized_mask = resized_mask.transpose(2, 0, 1)
+        except ValueError:
+            print("?")
+        resized_mask = resized_mask.astype(int)
+        resized_mask = resized_mask // 255
+        # # visualize to check mask correctness
+        # from matplotlib import pyplot as plt
+        # plt.figure()
+        # plt.imshow(resized_mask[0]*255, cmap='gray')
+        # plt.show()
+        mask_list = np.split(resized_mask, resized_mask.shape[0])
+        mask_list = [mask.squeeze(0) for mask in mask_list]
+        return MaskList(mask_list, size, "mask")
+    def pad(self, size):
+        """
+        pad the binary masks according to the new size. New size must be larger than original size in all dimensions
+        :param size: New image size, (image_width, image_height)
+        :return:
+        """
+        assert(size[0] >= self.size[0] and size[1] >= self.size[1]), "New size must be larger than original size in all dimensions"
+        cat_mask = np.array(self.masks)
+        if cat_mask.ndim == 2:
+            cat_mask = np.expand_dims(cat_mask, axis=0)
+        padded_mask = np.zeros([len(self.masks), size[1], size[0]])
+        padded_mask[:, :cat_mask.shape[1], :cat_mask.shape[2]] = cat_mask
+        # # visualize to check mask correctness
+        # from matplotlib import pyplot as plt
+        # plt.figure()
+        # plt.imshow(padded_mask[1]*255, cmap='gray')
+        # plt.show()
+        mask_list = np.split(padded_mask, padded_mask.shape[0])
+        mask_list = [mask.squeeze(0) for mask in mask_list]
+        return MaskList(mask_list, size, "mask")
+    def convert(self, mode):
+        """
+        Convert mask from between mode "mask" and mode "rle"
+        :param mode:
+        :return:
+        """
+        if mode == self.mode:
+            return self
+        elif mode == "rle" and self.mode == "mask":
+            # use pycocotools to encode binary masks to rle
+            rle_mask_list = mask_utils.encode(np.asfortranarray(np.array(self.masks).transpose(1, 2, 0).astype(np.uint8)))
+            return MaskList(rle_mask_list, self.size, "rle")
+        elif mode == "mask" and self.mode == "rle":
+            # use pycocotools to decode rle to binary masks
+            bimasks = mask_utils.decode(self.masks)
+            mask_list = np.split(bimasks.transpose(2, 0, 1), bimasks.shape[2])
+            mask_list = [mask.squeeze(0) for mask in mask_list]
+            return MaskList(mask_list, self.size, "mask")
+    def bbox(self, bbox_mode="xyxy"):
+        """
+        Generate a bounding box according to the binary mask
+        :param bbox_mode:
+        :return:
+        """
+        pass
+    def __len__(self):
+        return len(self.masks)
+    def __repr__(self):
+        s = self.__class__.__name__ + "("
+        s += "num_masks={}, ".format(len(self))
+        s += "image_width={}, ".format(self.size[0])
+        s += "image_height={}, ".format(self.size[1])
+        s += "mode={})".format(self.mode)
+        return s
+class Polygons(object):
+    """
+    This class holds a set of polygons that represents a single instance
+    of an object mask. The object can be represented as a set of
+    polygons
+    """
+    def __init__(self, polygons, size, mode):
+        # assert isinstance(polygons, list), '{}'.format(polygons)
+        if isinstance(polygons, list):
+            polygons = [torch.as_tensor(p, dtype=torch.float32) for p in polygons]
+        elif isinstance(polygons, Polygons):
+            polygons = polygons.polygons
+        self.polygons = polygons
+        self.size = size
+        self.mode = mode
+    def transpose(self, method):
+        if method not in (FLIP_LEFT_RIGHT, FLIP_TOP_BOTTOM):
+            raise NotImplementedError(
+                "Only FLIP_LEFT_RIGHT and FLIP_TOP_BOTTOM implemented"
+            )
+        flipped_polygons = []
+        width, height = self.size
+        if method == FLIP_LEFT_RIGHT:
+            dim = width
+            idx = 0
+        elif method == FLIP_TOP_BOTTOM:
+            dim = height
+            idx = 1
+        for poly in self.polygons:
+            p = poly.clone()
+            TO_REMOVE = 1
+            p[idx::2] = dim - poly[idx::2] - TO_REMOVE
+            flipped_polygons.append(p)
+        return Polygons(flipped_polygons, size=self.size, mode=self.mode)
+    def crop(self, box):
+        w, h = box[2] - box[0], box[3] - box[1]
+        # TODO chck if necessary
+        w = max(w, 1)
+        h = max(h, 1)
+        cropped_polygons = []
+        for poly in self.polygons:
+            p = poly.clone()
+            p[0::2] = p[0::2] - box[0]  # .clamp(min=0, max=w)
+            p[1::2] = p[1::2] - box[1]  # .clamp(min=0, max=h)
+            cropped_polygons.append(p)
+        return Polygons(cropped_polygons, size=(w, h), mode=self.mode)
+    def resize(self, size, *args, **kwargs):
+        ratios = tuple(float(s) / float(s_orig) for s, s_orig in zip(size, self.size))
+        if ratios[0] == ratios[1]:
+            ratio = ratios[0]
+            scaled_polys = [p * ratio for p in self.polygons]
+            return Polygons(scaled_polys, size, mode=self.mode)
+        ratio_w, ratio_h = ratios
+        scaled_polygons = []
+        for poly in self.polygons:
+            p = poly.clone()
+            p[0::2] *= ratio_w
+            p[1::2] *= ratio_h
+            scaled_polygons.append(p)
+        return Polygons(scaled_polygons, size=size, mode=self.mode)
+    def convert(self, mode):
+        width, height = self.size
+        if mode == "mask":
+            rles = mask_utils.frPyObjects(
+                [p.detach().numpy() for p in self.polygons], height, width
+            )
+            rle = mask_utils.merge(rles)
+            mask = mask_utils.decode(rle)
+            mask = torch.from_numpy(mask)
+            # TODO add squeeze?
+            return mask
+    def __repr__(self):
+        s = self.__class__.__name__ + "("
+        s += "num_polygons={}, ".format(len(self.polygons))
+        s += "image_width={}, ".format(self.size[0])
+        s += "image_height={}, ".format(self.size[1])
+        s += "mode={})".format(self.mode)
+        return s
+class SegmentationMask(object):
+    """
+    This class stores the segmentations for all objects in the image
+    """
+    def __init__(self, polygons, size, mode=None):
+        """
+        Arguments:
+            polygons: a list of list of lists of numbers. The first
+                level of the list correspond to individual instances,
+                the second level to all the polygons that compose the
+                object, and the third level to the polygon coordinates.
+        """
+        assert isinstance(polygons, list)
+        self.polygons = [Polygons(p, size, mode) for p in polygons]
+        self.size = size
+        self.mode = mode
+    def transpose(self, method):
+        if method not in (FLIP_LEFT_RIGHT, FLIP_TOP_BOTTOM):
+            raise NotImplementedError(
+                "Only FLIP_LEFT_RIGHT and FLIP_TOP_BOTTOM implemented"
+            )
+        flipped = []
+        for polygon in self.polygons:
+            flipped.append(polygon.transpose(method))
+        return SegmentationMask(flipped, size=self.size, mode=self.mode)
+    def crop(self, box):
+        w, h = box[2] - box[0], box[3] - box[1]
+        cropped = []
+        for polygon in self.polygons:
+            cropped.append(polygon.crop(box))
+        return SegmentationMask(cropped, size=(w, h), mode=self.mode)
+    def resize(self, size, *args, **kwargs):
+        scaled = []
+        for polygon in self.polygons:
+            scaled.append(polygon.resize(size, *args, **kwargs))
+        return SegmentationMask(scaled, size=size, mode=self.mode)
+    def to(self, *args, **kwargs):
+        return self
+    def __getitem__(self, item):
+        if isinstance(item, (int, slice)):
+            selected_polygons = [self.polygons[item]]
+        else:
+            # advanced indexing on a single dimension
+            selected_polygons = []
+            if isinstance(item, torch.Tensor) and item.dtype == torch.bool:
+                item = item.nonzero()
+                item = item.squeeze(1) if item.numel() > 0 else item
+                item = item.tolist()
+            for i in item:
+                selected_polygons.append(self.polygons[i])
+        return SegmentationMask(selected_polygons, size=self.size, mode=self.mode)
+    def __iter__(self):
+        return iter(self.polygons)
+    def __repr__(self):
+        s = self.__class__.__name__ + "("
+        s += "num_instances={}, ".format(len(self.polygons))
+        s += "image_width={}, ".format(self.size[0])
+        s += "image_height={})".format(self.size[1])
+        return s