Spaces:

myn0908
/

S2I-Artwork-Sketch-to-Image-Diffusion

Running

App Files Files Community

myn0908 commited on Jun 29, 2024

Commit

55a3c9a

1 Parent(s): 74ce519

init sketch2image

Browse files

Files changed (20) hide show

.gitignore +5 -0
S2I/__init__.py +2 -0
S2I/commons/__init__.py +2 -0
S2I/commons/controller.py +96 -0
S2I/commons/css.py +196 -0
S2I/logger.py +4 -0
S2I/modules/__init__.py +1 -0
S2I/modules/models.py +91 -0
S2I/modules/sketch2image.py +79 -0
S2I/modules/utils.py +78 -0
S2I/samer/__init__.py +7 -0
S2I/samer/automatic_mask_generator_prob.py +402 -0
S2I/samer/model_args.py +17 -0
S2I/samer/sam_controller.py +307 -0
S2I/samer/seg_anything.py +54 -0
S2I/samer/segment.py +69 -0
S2I/samer/segmentor.py +103 -0
S2I/samer/transfer_tools.py +47 -0
app.py +306 -125
requirements.txt +87 -6

.gitignore ADDED Viewed

	@@ -0,0 +1,5 @@

+__pycache__
+.idea
+*.pyc
+debug
+.DS_Store

S2I/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ from .modules import Sketch2ImagePipeline
2	+ from .commons import Sketch2ImageController, css, scripts

S2I/commons/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ from .controller import Sketch2ImageController
2	+ from .css import css, scripts

S2I/commons/controller.py ADDED Viewed

	@@ -0,0 +1,96 @@

+from PIL import Image
+from io import BytesIO
+import numpy as np
+import base64
+import torch
+import torchvision.transforms.functional as F
+from S2I import Sketch2ImagePipeline
+class Sketch2ImageController():
+    def __init__(self, gr):
+        super().__init__()
+        self.gr = gr
+        self.style_list = [
+            {"name": "Comic",
+             "prompt": "comic {prompt} . graphic illustration, comic art, graphic novel art, vibrant, highly detailed"},
+            {"name": "Cinematic",
+             "prompt": "cinematic still {prompt} . emotional, harmonious, vignette, highly detailed, high budget, bokeh, cinemascope, moody, epic, gorgeous, film grain, grainy"},
+            {"name": "3D Model",
+             "prompt": "professional 3d model {prompt} . octane render, highly detailed, volumetric, dramatic lighting"},
+            {"name": "Anime",
+             "prompt": "anime artwork {prompt} . anime style, key visual, vibrant, studio anime,  highly detailed"},
+            {"name": "Digital Art",
+             "prompt": "concept art {prompt} . digital artwork, illustrative, painterly, matte painting, highly detailed"},
+            {"name": "Photographic",
+             "prompt": "cinematic photo {prompt} . 35mm photograph, film, bokeh, professional, 4k, highly detailed"},
+            {"name": "Pixel art", "prompt": "pixel-art {prompt} . low-res, blocky, pixel art style, 8-bit graphics"},
+            {"name": "Fantasy art",
+             "prompt": "ethereal fantasy concept art of  {prompt} . magnificent, celestial, ethereal, painterly, epic, majestic, magical, fantasy art, cover art, dreamy"},
+            {"name": "Neonpunk",
+             "prompt": "neonpunk style {prompt} . cyberpunk, vaporwave, neon, vibes, vibrant, stunningly beautiful, crisp, detailed, sleek, ultramodern, magenta highlights, dark purple shadows, high contrast, cinematic, ultra detailed, intricate, professional"},
+            {"name": "Manga",
+             "prompt": "manga style {prompt} . vibrant, high-energy, detailed, iconic, Japanese comic style"},
+        ]
+        self.styles = {k["name"]: k["prompt"] for k in self.style_list}
+        self.STYLE_NAMES = list(self.styles.keys())
+        self.DEFAULT_STYLE_NAME = "Fantasy art"
+        self.MAX_SEED = np.iinfo(np.int32).max
+        # Initialize the model once here
+        self.pipe = None
+        self.zero_options = None
+    def load_pipeline(self, zero_options):
+        if self.pipe is None or zero_options != self.zero_options:
+            self.pipe = Sketch2ImagePipeline()
+            self.zero_options = zero_options
+    def update_canvas(self, use_line, use_eraser):
+        brush_size = 20 if use_eraser else 4
+        _color = "#ffffff" if use_eraser else "#000000"
+        return self.gr.update(brush_radius=brush_size, brush_color=_color, interactive=True)
+    def upload_sketch(self, file):
+        _img = Image.open(file.name).convert("L")
+        return self.gr.update(value=_img, source="upload", interactive=True)
+    @staticmethod
+    def pil_image_to_data_uri(img, format="PNG"):
+        buffered = BytesIO()
+        img.save(buffered, format=format)
+        img_str = base64.b64encode(buffered.getvalue()).decode()
+        return f"data:image/{format.lower()};base64,{img_str}"
+    def artwork(self, options, image, prompt, prompt_template, style_name, seed, val_r, faster, model_name, type_flag):
+        self.load_pipeline(zero_options=options)
+        prompt = prompt_template.replace("{prompt}", prompt)
+        if type_flag == 'live-sketch':
+            img = Image.fromarray(np.array(image["composite"])[:, :, -1])
+        elif type_flag == 'upload':
+            img = image["composite"]
+        img = img.convert("RGB")
+        img = img.resize((512, 512))
+        image_t = F.to_tensor(img) > 0.5
+        c_t = image_t.unsqueeze(0).cuda().float()
+        torch.manual_seed(seed)
+        _, _, H, W = c_t.shape
+        noise = torch.randn((1, 4, H // 8, W // 8), device=c_t.device)
+        with torch.no_grad():
+            output_image = self.pipe.generate(c_t, prompt, r=val_r, noise_map=noise, half_model=faster, model_name=model_name)
+        output_pil = F.to_pil_image(output_image[0].cpu() * 0.5 + 0.5)
+        if type_flag == 'live-sketch':
+            input_uri = self.pil_image_to_data_uri(Image.fromarray(255 - np.array(img)))
+        else:
+            input_uri = self.pil_image_to_data_uri(img)
+        return output_pil, self.gr.update(link=input_uri)

S2I/commons/css.py ADDED Viewed

	@@ -0,0 +1,196 @@

+css = """
+@import url('https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.15.1/css/all.min.css');
+/* Outer container */
+.main {
+    display: flex;
+    justify-content: center;
+    align-items: flex-start;
+    width: 100%;
+    max-width: 1200px;
+    margin: 0 auto;
+    padding: 10px;
+    # background: linear-gradient(to right, #6a11cb, #2575fc);
+    # animation: diffusionArtAnimation 10s infinite alternate;
+}
+@keyframes diffusionArtAnimation {
+    0% {
+        background: linear-gradient(135deg, #ff9a9e, #fad0c4);
+    }
+    20% {
+        background: linear-gradient(135deg, #a1c4fd, #c2e9fb);
+    }
+    40% {
+        background: linear-gradient(135deg, #fbc2eb, #a6c1ee);
+    }
+    60% {
+        background: linear-gradient(135deg, #ffecd2, #fcb69f);
+    }
+    80% {
+        background: linear-gradient(135deg, #cfd9df, #e2ebf0);
+    }
+    100% {
+        background: linear-gradient(135deg, #ff9a9e, #fad0c4);
+    }
+}
+#main_row{
+    justify-content: center;
+}
+/* Hide class */
+.svelte-p4aq0j {
+    display: none;
+}
+.wrap.svelte-p4aq0j.svelte-p4aq0j {
+    display: none;
+}
+#download_sketch {
+    display: none;
+}
+#download_output {
+    display: none;
+}
+#column_input, #column_output {
+    width: 100%;
+    max-width: 500px;
+    display: flex;
+    flex-direction: column;
+    align-items: center;
+    padding: 10px;
+}
+#tools_header, #input_header, #output_header, #process_header {
+    display: flex;
+    justify-content: center;
+    align-items: center;
+    width: 100%;
+    max-width: 400px;
+    font-size: 1.2em;
+    color: #fff;
+    text-shadow: 1px 1px 2px #000;
+}
+#nn {
+    width: 100px;
+    height: 100px;
+}
+#column_process {
+    display: flex;
+    justify-content: center;
+    align-items: center;
+    height: 600px;
+}
+#output_image, #input_image {
+    border-radius: 10px;
+    border: 5px solid #fff;
+    width: 100%;
+    max-width: 500px;
+    height: 500px;
+    box-sizing: border-box;
+    display: flex;
+    justify-content: center;
+    align-items: center;
+    background: rgba(255, 255, 255, 0.1);
+    animation: zoomInOut 5s infinite alternate;
+}
+@keyframes zoomInOut {
+    0% {
+        transform: scale(1);
+    }
+    50% {
+        transform: scale(1.05);
+    }
+    100% {
+        transform: scale(1);
+    }
+}
+#output_image > img {
+    border: 5px solid #fff;
+    border-radius: 10px;
+    width: 100%;
+    height: 100%;
+    box-sizing: border-box;
+}
+#input_image > div.image-container.svelte-p3y7hu > div.wrap.svelte-yigbas > canvas:nth-child(1) {
+    border: 5px solid #fff;
+    border-radius: 10px;
+    width: 100%;
+    height: 100%;
+    box-sizing: border-box;
+}
+/* Responsive styles */
+@media (max-width: 768px) {
+    .main {
+        flex-direction: column;
+        width: 100%;
+    }
+    #column_input, #column_output {
+        width: 100%;
+        max-width: 100%;
+        padding: 10px 0;
+    }
+    #tools_header, #input_header, #output_header, #process_header {
+        width: 100%;
+    }
+    #column_process {
+        height: auto;
+    }
+    #output_image, #input_image {
+        max-width: 100%;
+        height: auto;
+    }
+}
+@media (max-width: 480px) {
+    #nn {
+        width: 80px;
+        height: 80px;
+    }
+    #tools_header, #input_header, #output_header, #process_header {
+        max-width: 100%;
+        font-size: 14px;
+    }
+    #column_input, #column_output {
+        max-width: 100%;
+        padding: 10px;
+    }
+}
+# .flex{
+#     background-color: #0b0f19;
+# }
+"""
+scripts = """
+async () => {
+    globalThis.theSketchDownloadFunction = () => {
+        console.log("test")
+        var link = document.createElement("a");
+        dataUri = document.getElementById('download_sketch').href
+        link.setAttribute("href", dataUri)
+        link.setAttribute("download", "sketch.png")
+        document.body.appendChild(link); // Required for Firefox
+        link.click();
+        document.body.removeChild(link); // Clean up
+        // also call the output download function
+        theOutputDownloadFunction();
+      return false
+    }
+}
+"""

S2I/logger.py ADDED Viewed

	@@ -0,0 +1,4 @@

+import logging
+logging.basicConfig(level=logging.INFO,
+                    format='%(asctime)s - %(levelname)s - %(message)s')
+logger = logging.getLogger()

S2I/modules/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .sketch2image import *

S2I/modules/models.py ADDED Viewed

	@@ -0,0 +1,91 @@

+import torch
+import copy
+from diffusers import DDPMScheduler
+from transformers import AutoTokenizer, CLIPTextModel
+from diffusers import AutoencoderKL, UNet2DConditionModel
+from peft import LoraConfig
+from S2I.modules.utils import sc_vae_encoder_fwd, sc_vae_decoder_fwd, download_models, get_model_path
+class RelationShipConvolution(torch.nn.Module):
+    def __init__(self, conv_in_pretrained, conv_in_curr, r):
+        super(RelationShipConvolution, self).__init__()
+        self.conv_in_pretrained = copy.deepcopy(conv_in_pretrained)
+        self.conv_in_curr = copy.deepcopy(conv_in_curr)
+        self.r = r
+    def forward(self, x):
+        x1 = self.conv_in_pretrained(x).detach()
+        x2 = self.conv_in_curr(x)
+        return x1 * (1 - self.r) + x2 * self.r
+class PrimaryModel:
+    def __init__(self, backbone_diffusion_path='stabilityai/sd-turbo'):
+        self.backbone_diffusion_path = backbone_diffusion_path
+        self.global_unet = None
+        self.global_vae = None
+        self.global_tokenizer = None
+        self.global_text_encoder = None
+        self.global_scheduler = None
+    @staticmethod
+    def _load_model(path, model_class, unet_mode=False):
+        model = model_class.from_pretrained(path, subfolder='unet' if unet_mode else 'vae').to('cuda')
+        return model
+    def one_step_scheduler(self):
+        noise_scheduler_1step = DDPMScheduler.from_pretrained(self.backbone_diffusion_path, subfolder="scheduler")
+        noise_scheduler_1step.set_timesteps(1, device="cuda")
+        noise_scheduler_1step.alphas_cumprod = noise_scheduler_1step.alphas_cumprod.cuda()
+        return noise_scheduler_1step
+    def skip_connections(self, vae):
+        vae.encoder.forward = sc_vae_encoder_fwd.__get__(vae.encoder, vae.encoder.__class__)
+        vae.decoder.forward = sc_vae_decoder_fwd.__get__(vae.decoder, vae.decoder.__class__)
+        vae.decoder.skip_conv_1 = torch.nn.Conv2d(512, 512, kernel_size=(1, 1), stride=(1, 1), bias=False).cuda()
+        vae.decoder.skip_conv_2 = torch.nn.Conv2d(256, 512, kernel_size=(1, 1), stride=(1, 1), bias=False).cuda()
+        vae.decoder.skip_conv_3 = torch.nn.Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False).cuda()
+        vae.decoder.skip_conv_4 = torch.nn.Conv2d(128, 256, kernel_size=(1, 1), stride=(1, 1), bias=False).cuda()
+        vae.decoder.ignore_skip = False
+        return vae
+    def from_pretrained(self, model_name, r):
+        if self.global_tokenizer is None:
+            # self.global_tokenizer = AutoTokenizer.from_pretrained(self.backbone_diffusion_path,
+            #                                                       subfolder="tokenizer")
+            self.global_tokenizer = AutoTokenizer.from_pretrained("myn0908/stable-diffusion-3", subfolder="tokenizer_2")
+        if self.global_text_encoder is None:
+            self.global_text_encoder = CLIPTextModel.from_pretrained(self.backbone_diffusion_path,
+                                                                     subfolder="text_encoder").to(device='cuda')
+        if self.global_scheduler is None:
+            self.global_scheduler = self.one_step_scheduler()
+        if self.global_vae is None:
+            self.global_vae = self._load_model(self.backbone_diffusion_path, AutoencoderKL)
+            self.global_vae = self.skip_connections(self.global_vae)
+        if self.global_unet is None:
+            self.global_unet = self._load_model(self.backbone_diffusion_path, UNet2DConditionModel, unet_mode=True)
+            p_ckpt_path = download_models()
+            p_ckpt = get_model_path(model_name=model_name, model_paths=p_ckpt_path)
+            sd = torch.load(p_ckpt, map_location="cpu")
+            conv_in_pretrained = copy.deepcopy(self.global_unet.conv_in)
+            self.global_unet.conv_in = RelationShipConvolution(conv_in_pretrained, self.global_unet.conv_in, r)
+            unet_lora_config = LoraConfig(r=sd["rank_unet"], init_lora_weights="gaussian",
+                                          target_modules=sd["unet_lora_target_modules"])
+            vae_lora_config = LoraConfig(r=sd["rank_vae"], init_lora_weights="gaussian",
+                                         target_modules=sd["vae_lora_target_modules"])
+            self.global_vae.add_adapter(vae_lora_config, adapter_name="vae_skip")
+            _sd_vae = self.global_vae.state_dict()
+            for k in sd["state_dict_vae"]:
+                _sd_vae[k] = sd["state_dict_vae"][k]
+            self.global_vae.load_state_dict(_sd_vae)
+            self.global_unet.add_adapter(unet_lora_config)
+            _sd_unet = self.global_unet.state_dict()
+            for k in sd["state_dict_unet"]:
+                _sd_unet[k] = sd["state_dict_unet"][k]
+            self.global_unet.load_state_dict(_sd_unet, strict=False)

S2I/modules/sketch2image.py ADDED Viewed

	@@ -0,0 +1,79 @@

+from diffusers.utils.peft_utils import set_weights_and_activate_adapters
+from S2I.modules.models import PrimaryModel
+import gc
+import torch
+import warnings
+warnings.filterwarnings("ignore")
+class Sketch2ImagePipeline(PrimaryModel):
+    def __init__(self):
+        super().__init__()
+        self.timestep = torch.tensor([999], device="cuda").long()
+    def generate(self, c_t, prompt=None, prompt_tokens=None, r=1.0, noise_map=None, half_model=None, model_name=None):
+        self.from_pretrained(model_name=model_name, r=r)
+        assert (prompt is None) != (prompt_tokens is None), "Either prompt or prompt_tokens should be provided"
+        if half_model == 'float16':
+            output_image = self._generate_fp16(c_t, prompt, prompt_tokens, r, noise_map)
+        else:
+            output_image = self._generate_full_precision(c_t, prompt, prompt_tokens, r, noise_map)
+        return output_image
+    def _generate_fp16(self, c_t, prompt, prompt_tokens, r, noise_map):
+        with torch.autocast(device_type='cuda', dtype=torch.float16):
+            caption_enc = self._get_caption_enc(prompt, prompt_tokens)
+            self._set_weights_and_activate_adapters(r)
+            encoded_control = self.global_vae.encode(c_t).latent_dist.sample() * self.global_vae.config.scaling_factor
+            unet_input = encoded_control * r + noise_map * (1 - r)
+            unet_output = self.global_unet(unet_input, self.timestep, encoder_hidden_states=caption_enc).sample
+            x_denoise = self.global_scheduler.step(unet_output, self.timestep, unet_input, return_dict=True).prev_sample
+            self.global_vae.decoder.incoming_skip_acts = self.global_vae.encoder.current_down_blocks
+            self.global_vae.decoder.gamma = r
+            output_image = self.global_vae.decode(x_denoise / self.global_vae.config.scaling_factor).sample.clamp(-1, 1)
+        return output_image
+    def _generate_full_precision(self, c_t, prompt, prompt_tokens, r, noise_map):
+        caption_enc = self._get_caption_enc(prompt, prompt_tokens)
+        self._set_weights_and_activate_adapters(r)
+        encoded_control = self.global_vae.encode(c_t).latent_dist.sample() * self.global_vae.config.scaling_factor
+        unet_input = encoded_control * r + noise_map * (1 - r)
+        unet_output = self.global_unet(unet_input, self.timestep, encoder_hidden_states=caption_enc).sample
+        x_denoise = self.global_scheduler.step(unet_output, self.timestep, unet_input, return_dict=True).prev_sample
+        self.global_vae.decoder.incoming_skip_acts = self.global_vae.encoder.current_down_blocks
+        self.global_vae.decoder.gamma = r
+        output_image = self.global_vae.decode(x_denoise / self.global_vae.config.scaling_factor).sample.clamp(-1, 1)
+        return output_image
+    def _get_caption_enc(self, prompt, prompt_tokens):
+        if prompt is not None:
+            caption_tokens = self.global_tokenizer(prompt, max_length=self.global_tokenizer.model_max_length,
+                                                   padding="max_length", truncation=True,
+                                                   return_tensors="pt").input_ids.cuda()
+        else:
+            caption_tokens = prompt_tokens.cuda()
+        return self.global_text_encoder(caption_tokens)[0]
+    def _set_weights_and_activate_adapters(self, r):
+        self.global_unet.set_adapters(["default"], weights=[r])
+        set_weights_and_activate_adapters(self.global_vae, ["vae_skip"], [r])
+    def _move_to_cpu(self, module):
+        module.to("cpu")
+    def _move_to_gpu(self, module):
+        module.to("cuda")

S2I/modules/utils.py ADDED Viewed

	@@ -0,0 +1,78 @@

+import os
+import requests
+from tqdm import tqdm
+from S2I.logger import logger
+def sc_vae_encoder_fwd(self, sample):
+    sample = self.conv_in(sample)
+    self.current_down_blocks = []
+    for down_block in self.down_blocks:
+        self.current_down_blocks.append(sample)
+        sample = down_block(sample)
+    sample = self.mid_block(sample)
+    sample = self.conv_norm_out(sample)
+    sample = self.conv_act(sample)
+    sample = self.conv_out(sample)
+    return sample
+def sc_vae_decoder_fwd(self, sample, latent_embeds=None):
+    sample = self.conv_in(sample)
+    upscale_dtype = next(self.up_blocks.parameters()).dtype
+    sample = self.mid_block(sample, latent_embeds)
+    sample = sample.to(upscale_dtype)
+    if not self.ignore_skip:
+        skip_convs = [self.skip_conv_1, self.skip_conv_2, self.skip_conv_3, self.skip_conv_4]
+        reversed_skip_acts = self.incoming_skip_acts[::-1]
+        for idx, (up_block, skip_conv) in enumerate(zip(self.up_blocks, skip_convs)):
+            skip_in = skip_conv(reversed_skip_acts[idx] * self.gamma)
+            sample += skip_in
+            sample = up_block(sample, latent_embeds)
+    else:
+        for up_block in self.up_blocks:
+            sample = up_block(sample, latent_embeds)
+    sample = self.conv_norm_out(sample, latent_embeds) if latent_embeds else self.conv_norm_out(sample)
+    sample = self.conv_act(sample)
+    sample = self.conv_out(sample)
+    return sample
+def downloading(url, outf):
+    if not os.path.exists(outf):
+        print(f"Downloading checkpoint to {outf}")
+        response = requests.get(url, stream=True)
+        total_size_in_bytes = int(response.headers.get('content-length', 0))
+        block_size = 1024  # 1 Kibibyte
+        progress_bar = tqdm(total=total_size_in_bytes, unit='iB', unit_scale=True)
+        with open(outf, 'wb') as file:
+            for data in response.iter_content(block_size):
+                progress_bar.update(len(data))
+                file.write(data)
+        progress_bar.close()
+        if total_size_in_bytes != 0 and progress_bar.n != total_size_in_bytes:
+            print("ERROR, something went wrong")
+        print(f"Downloaded successfully to {outf}")
+def download_models():
+    urls = {
+        '350k': 'https://huggingface.co/myn0908/sk2ks/resolve/main/sketch_to_image_mixed_weights_350k_lora.pkl?download=true',
+        '100k': 'https://huggingface.co/myn0908/sk2ks/resolve/main/model_16001.pkl?download=true',
+    }
+    # Get the current working directory
+    ckpt_folder = os.path.join(os.getcwd(), 'checkpoints')
+    os.makedirs(ckpt_folder, exist_ok=True)
+    model_paths = {}
+    for model_name, url in urls.items():
+        outf = os.path.join(ckpt_folder, f"sketch2image_lora_{model_name}.pkl")
+        downloading(url, outf)
+        model_paths[model_name] = outf
+    return model_paths
+def get_model_path(model_name, model_paths):
+    return model_paths.get(model_name, "Model not found")

S2I/samer/__init__.py ADDED Viewed

	@@ -0,0 +1,7 @@

+from .model_args import generate_sam_args
+from .segmentor import *
+from .seg_anything import *
+from .segment import *
+from .transfer_tools import *
+from .automatic_mask_generator_prob import SamAutomaticMaskAndProbabilityGenerator
+from .sam_controller import SAMController

S2I/samer/automatic_mask_generator_prob.py ADDED Viewed

	@@ -0,0 +1,402 @@

+from typing import Any, Dict, List, Optional, Tuple
+import numpy as np
+import torch
+import torch.nn.functional as F
+from segment_anything import SamAutomaticMaskGenerator
+from segment_anything.modeling import Sam
+from segment_anything.utils.amg import (MaskData, area_from_rle,
+                                        batched_mask_to_box, box_xyxy_to_xywh,
+                                        batch_iterator,
+                                        uncrop_boxes_xyxy, uncrop_points,
+                                        calculate_stability_score,
+                                        coco_encode_rle, generate_crop_boxes,
+                                        is_box_near_crop_edge,
+                                        mask_to_rle_pytorch, rle_to_mask,
+                                        uncrop_masks)
+from torchvision.ops.boxes import batched_nms, box_area  # type: ignore
+def batched_mask_to_prob(masks: torch.Tensor) -> torch.Tensor:
+    """
+    For implementation, see the following issue comment:
+    "To get the probability map for a mask,
+    we simply do element-wise sigmoid over the logits."
+    URL: https://github.com/facebookresearch/segment-anything/issues/226
+    Args:
+        masks: Tensor of shape [B, H, W] representing batch of binary masks.
+    Returns:
+        Tensor of shape [B, H, W] representing batch of probability maps.
+    """
+    probs = torch.sigmoid(masks).to(masks.device)
+    return probs
+def batched_sobel_filter(probs: torch.Tensor, masks: torch.Tensor, bzp: int
+                         ) -> torch.Tensor:
+    """
+    For implementation, see section D.2 of the paper:
+    "we apply a Sobel filter to the remaining masks' unthresholded probability
+    maps and set values to zero if they do not intersect with the outer
+    boundary pixels of a mask."
+    URL: https://arxiv.org/abs/2304.02643
+    Args:
+        probs: Tensor of shape [B, H, W] representing batch of probability maps.
+        masks: Tensor of shape [B, H, W] representing batch of binary masks.
+    Returns:
+        Tensor of shape [B, H, W] with filtered probability maps.
+    """
+    # probs: [B, H, W]
+    # Add channel dimension to make it [B, 1, H, W]
+    probs = probs.unsqueeze(1)
+    # sobel_filter: [1, 1, 3, 3]
+    sobel_filter_x = torch.tensor([[[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]]],
+                                  dtype=torch.float32
+                                  ).to(probs.device).unsqueeze(0)
+    sobel_filter_y = torch.tensor([[[-1, -2, -1], [0, 0, 0], [1, 2, 1]]],
+                                  dtype=torch.float32
+                                  ).to(probs.device).unsqueeze(0)
+    # Apply the Sobel filters
+    G_x = F.conv2d(probs, sobel_filter_x, padding=1)
+    G_y = F.conv2d(probs, sobel_filter_y, padding=1)
+    # Combine the gradients
+    probs = torch.sqrt(G_x ** 2 + G_y ** 2)
+    # Iterate through each image in the batch
+    for i in range(probs.shape[0]):
+        # Convert binary mask to float
+        mask = masks[i].float()
+        G_x = F.conv2d(mask[None, None], sobel_filter_x, padding=1)
+        G_y = F.conv2d(mask[None, None], sobel_filter_y, padding=1)
+        edge = torch.sqrt(G_x ** 2 + G_y ** 2)
+        outer_boundary = (edge > 0).float()
+        # Set to zero values that don't touch the mask's outer boundary.
+        probs[i, 0] = probs[i, 0] * outer_boundary
+        # Boundary zero padding (BZP).
+        # See "Zero-Shot Edge Detection With SCESAME: Spectral
+        # Clustering-Based Ensemble for Segment Anything Model Estimation".
+        if bzp > 0:
+          probs[i, 0, 0:bzp, :] = 0
+          probs[i, 0, -bzp:, :] = 0
+          probs[i, 0, :, 0:bzp] = 0
+          probs[i, 0, :, -bzp:] = 0
+    # Remove the channel dimension
+    probs = probs.squeeze(1)
+    return probs
+class SamAutomaticMaskAndProbabilityGenerator(SamAutomaticMaskGenerator):
+    def __init__(
+        self,
+        model: Sam,
+        points_per_side: Optional[int] = 16,
+        points_per_batch: int = 64,
+        pred_iou_thresh: float = 0.88,
+        stability_score_thresh: float = 0.95,
+        stability_score_offset: float = 1.0,
+        box_nms_thresh: float = 0.7,
+        crop_n_layers: int = 0,
+        crop_nms_thresh: float = 0.7,
+        crop_overlap_ratio: float = 512 / 1500,
+        crop_n_points_downscale_factor: int = 1,
+        point_grids: Optional[List[np.ndarray]] = None,
+        min_mask_region_area: int = 0,
+        output_mode: str = "binary_mask",
+        nms_threshold: float = 0.7,
+        bzp: int = 0,
+        pred_iou_thresh_filtering=False,
+        stability_score_thresh_filtering=False,
+    ) -> None:
+        """
+        Using a SAM model, generates masks for the entire image.
+        Generates a grid of point prompts over the image, then filters
+        low quality and duplicate masks. The default settings are chosen
+        for SAM with a ViT-H backbone.
+        Arguments:
+          model (Sam): The SAM model to use for mask prediction.
+          points_per_side (int or None): The number of points to be sampled
+            along one side of the image. The total number of points is
+            points_per_side**2. If None, 'point_grids' must provide explicit
+            point sampling.
+          points_per_batch (int): Sets the number of points run simultaneously
+            by the model. Higher numbers may be faster but use more GPU memory.
+          pred_iou_thresh (float): A filtering threshold in [0,1], using the
+            model's predicted mask quality.
+          stability_score_thresh (float): A filtering threshold in [0,1], using
+            the stability of the mask under changes to the cutoff used to binarize
+            the model's mask predictions.
+          stability_score_offset (float): The amount to shift the cutoff when
+            calculated the stability score.
+          box_nms_thresh (float): The box IoU cutoff used by non-maximal
+            suppression to filter duplicate masks.
+          crop_n_layers (int): If >0, mask prediction will be run again on
+            crops of the image. Sets the number of layers to run, where each
+            layer has 2**i_layer number of image crops.
+          crop_nms_thresh (float): The box IoU cutoff used by non-maximal
+            suppression to filter duplicate masks between different crops.
+          crop_overlap_ratio (float): Sets the degree to which crops overlap.
+            In the first crop layer, crops will overlap by this fraction of
+            the image length. Later layers with more crops scale down this overlap.
+          crop_n_points_downscale_factor (int): The number of points-per-side
+            sampled in layer n is scaled down by crop_n_points_downscale_factor**n.
+          point_grids (list(np.ndarray) or None): A list over explicit grids
+            of points used for sampling, normalized to [0,1]. The nth grid in the
+            list is used in the nth crop layer. Exclusive with points_per_side.
+          min_mask_region_area (int): If >0, postprocessing will be applied
+            to remove disconnected regions and holes in masks with area smaller
+            than min_mask_region_area. Requires opencv.
+          output_mode (str): The form masks are returned in. Can be 'binary_mask',
+            'uncompressed_rle', or 'coco_rle'. 'coco_rle' requires pycocotools.
+            For large resolutions, 'binary_mask' may consume large amounts of
+            memory.
+          nms_threshold (float): The IoU threshold used for non-maximal suppression
+        """
+        super().__init__(
+            model,
+            points_per_side,
+            points_per_batch,
+            pred_iou_thresh,
+            stability_score_thresh,
+            stability_score_offset,
+            box_nms_thresh,
+            crop_n_layers,
+            crop_nms_thresh,
+            crop_overlap_ratio,
+            crop_n_points_downscale_factor,
+            point_grids,
+            min_mask_region_area,
+            output_mode,
+        )
+        self.nms_threshold = nms_threshold
+        self.bzp = bzp
+        self.pred_iou_thresh_filtering = pred_iou_thresh_filtering
+        self.stability_score_thresh_filtering = \
+            stability_score_thresh_filtering
+    @torch.no_grad()
+    def generate(self, image: np.ndarray) -> List[Dict[str, Any]]:
+        """
+        Generates masks for the given image.
+        Arguments:
+          image (np.ndarray): The image to generate masks for, in HWC uint8 format.
+        Returns:
+           list(dict(str, any)): A list over records for masks. Each record is
+             a dict containing the following keys:
+               segmentation (dict(str, any) or np.ndarray): The mask. If
+                 output_mode='binary_mask', is an array of shape HW. Otherwise,
+                 is a dictionary containing the RLE.
+               bbox (list(float)): The box around the mask, in XYWH format.
+               area (int): The area in pixels of the mask.
+               predicted_iou (float): The model's own prediction of the mask's
+                 quality. This is filtered by the pred_iou_thresh parameter.
+               point_coords (list(list(float))): The point coordinates input
+                 to the model to generate this mask.
+               stability_score (float): A measure of the mask's quality. This
+                 is filtered on using the stability_score_thresh parameter.
+               crop_box (list(float)): The crop of the image used to generate
+                 the mask, given in XYWH format.
+        """
+        # Generate masks
+        mask_data = self._generate_masks(image)
+        # Filter small disconnected regions and holes in masks
+        if self.min_mask_region_area > 0:
+            mask_data = self.postprocess_small_regions(
+                mask_data,
+                self.min_mask_region_area,
+                max(self.box_nms_thresh, self.crop_nms_thresh),
+            )
+        # Encode masks
+        if self.output_mode == "coco_rle":
+            mask_data["segmentations"] = [coco_encode_rle(rle) for rle in mask_data["rles"]]
+        elif self.output_mode == "binary_mask":
+            mask_data["segmentations"] = [rle_to_mask(rle) for rle in mask_data["rles"]]
+        else:
+            mask_data["segmentations"] = mask_data["rles"]
+        # Write mask records
+        curr_anns = []
+        for idx in range(len(mask_data["segmentations"])):
+            ann = {
+                "segmentation": mask_data["segmentations"][idx],
+                "area": area_from_rle(mask_data["rles"][idx]),
+                "bbox": box_xyxy_to_xywh(mask_data["boxes"][idx]).tolist(),
+                "predicted_iou": mask_data["iou_preds"][idx].item(),
+                "point_coords": [mask_data["points"][idx].tolist()],
+                "stability_score": mask_data["stability_score"][idx].item(),
+                "crop_box": box_xyxy_to_xywh(mask_data["crop_boxes"][idx]).tolist(),
+                "prob": mask_data["probs"][idx],
+            }
+            curr_anns.append(ann)
+        return curr_anns
+    def _process_crop(
+        self,
+        image: np.ndarray,
+        crop_box: List[int],
+        crop_layer_idx: int,
+        orig_size: Tuple[int, ...],
+    ) -> MaskData:
+        # Crop the image and calculate embeddings
+        x0, y0, x1, y1 = crop_box
+        cropped_im = image[y0:y1, x0:x1, :]
+        cropped_im_size = cropped_im.shape[:2]
+        self.predictor.set_image(cropped_im)
+        # Get points for this crop
+        points_scale = np.array(cropped_im_size)[None, ::-1]
+        points_for_image = self.point_grids[crop_layer_idx] * points_scale
+        # Generate masks for this crop in batches
+        data = MaskData()
+        for (points,) in batch_iterator(self.points_per_batch, points_for_image):
+            batch_data = self._process_batch(points, cropped_im_size, crop_box, orig_size)
+            data.cat(batch_data)
+            del batch_data
+        self.predictor.reset_image()
+        # Remove duplicates within this crop.
+        keep_by_nms = batched_nms(
+            data["boxes"].float(),
+            data["iou_preds"],
+            torch.zeros_like(data["boxes"][:, 0]),  # categories
+            iou_threshold=self.box_nms_thresh,
+        )
+        data.filter(keep_by_nms)
+        # Return to the original image frame
+        data["boxes"] = uncrop_boxes_xyxy(data["boxes"], crop_box)
+        data["points"] = uncrop_points(data["points"], crop_box)
+        data["crop_boxes"] = torch.tensor([crop_box for _ in range(len(data["rles"]))])
+        padded_probs = torch.zeros((data["probs"].shape[0], *orig_size),
+                                   dtype=torch.float32,
+                                   device=data["probs"].device)
+        padded_probs[:, y0:y1, x0:x1] = data["probs"]
+        data["probs"] = padded_probs
+        return data
+    def _generate_masks(self, image: np.ndarray) -> MaskData:
+        orig_size = image.shape[:2]
+        crop_boxes, layer_idxs = generate_crop_boxes(
+            orig_size, self.crop_n_layers, self.crop_overlap_ratio
+        )
+        # Iterate over image crops
+        data = MaskData()
+        for crop_box, layer_idx in zip(crop_boxes, layer_idxs):
+            crop_data = self._process_crop(image, crop_box, layer_idx, orig_size)
+            data.cat(crop_data)
+        # Remove duplicate masks between crops
+        if len(crop_boxes) > 1:
+            # Prefer masks from smaller crops
+            scores = 1 / box_area(data["crop_boxes"])
+            scores = scores.to(data["boxes"].device)
+            keep_by_nms = batched_nms(
+                data["boxes"].float(),
+                scores,
+                torch.zeros_like(data["boxes"][:, 0]),  # categories
+                iou_threshold=self.crop_nms_thresh,
+            )
+            data.filter(keep_by_nms)
+        data.to_numpy()
+        return data
+    def _process_batch(
+        self,
+        points: np.ndarray,
+        im_size: Tuple[int, ...],
+        crop_box: List[int],
+        orig_size: Tuple[int, ...],
+    ) -> MaskData:
+        orig_h, orig_w = orig_size
+        # Run model on this batch
+        transformed_points = self.predictor.transform.apply_coords(points, im_size)
+        in_points = torch.as_tensor(transformed_points, device=self.predictor.device)
+        in_labels = torch.ones(in_points.shape[0], dtype=torch.int, device=in_points.device)
+        masks, iou_preds, _ = self.predictor.predict_torch(
+            in_points[:, None, :],
+            in_labels[:, None],
+            multimask_output=True,
+            return_logits=True,
+        )
+        # Serialize predictions and store in MaskData
+        data = MaskData(
+            masks=masks.flatten(0, 1),
+            iou_preds=iou_preds.flatten(0, 1),
+            points=torch.as_tensor(points.repeat(masks.shape[1], axis=0)),
+        )
+        del masks
+        if self.pred_iou_thresh_filtering and self.pred_iou_thresh > 0.0:
+            keep_mask = data["iou_preds"] > self.pred_iou_thresh
+            data.filter(keep_mask)
+        # Calculate stability score
+        data["stability_score"] = calculate_stability_score(
+            data["masks"], self.predictor.model.mask_threshold, self.stability_score_offset
+        )
+        if self.stability_score_thresh_filtering and \
+            self.stability_score_thresh > 0.0:
+            keep_mask = data["stability_score"] >= self.stability_score_thresh
+            data.filter(keep_mask)
+        # Threshold masks and calculate boxes
+        data["probs"] = batched_mask_to_prob(data["masks"])
+        data["masks"] = data["masks"] > self.predictor.model.mask_threshold
+        data["boxes"] = batched_mask_to_box(data["masks"])
+        # Filter boxes that touch crop boundaries
+        keep_mask = ~is_box_near_crop_edge(data["boxes"], crop_box, [0, 0, orig_w, orig_h])
+        if not torch.all(keep_mask):
+            data.filter(keep_mask)
+        # filter by nms
+        if self.nms_threshold > 0.0:
+            keep_mask = batched_nms(
+                data["boxes"].float(),
+                data["iou_preds"],
+                torch.zeros_like(data["boxes"][:, 0]),  # categories
+                iou_threshold=self.nms_threshold,
+            )
+            data.filter(keep_mask)
+        # apply sobel filter for probability map
+        data["probs"] = batched_sobel_filter(data["probs"], data["masks"],
+                                             bzp=self.bzp)
+        # set prob to 0 for pixels outside of crop box
+        # data["probs"] = batched_crop_probs(data["probs"], data["boxes"])
+        # Compress to RLE
+        data["masks"] = uncrop_masks(data["masks"], crop_box, orig_h, orig_w)
+        data["rles"] = mask_to_rle_pytorch(data["masks"])
+        del data["masks"]
+        return data

S2I/samer/model_args.py ADDED Viewed

	@@ -0,0 +1,17 @@

+def generate_sam_args(sam_checkpoint="ckpt", model_type="vit_b", points_per_side=16,
+                      pred_iou_thresh=0.8, stability_score_thresh=0.9, crop_n_layers=1,
+                      crop_n_points_downscale_factor=2, min_mask_region_area=200, gpu_id=0):
+    sam_args = {
+        'sam_checkpoint': f'{sam_checkpoint}/{model_type}.pth',
+        'model_type': model_type,
+        'generator_args': {
+            'points_per_side': points_per_side,
+            'pred_iou_thresh': pred_iou_thresh,
+            'stability_score_thresh': stability_score_thresh,
+            'crop_n_layers': crop_n_layers,
+            'crop_n_points_downscale_factor': crop_n_points_downscale_factor,
+            'min_mask_region_area': min_mask_region_area,
+        },
+        'gpu_id': gpu_id}
+    return sam_args

S2I/samer/sam_controller.py ADDED Viewed

	@@ -0,0 +1,307 @@

+from S2I.samer import SegMent, generate_sam_args
+from S2I.logger import logger
+from tqdm import tqdm
+import gradio as gr
+import numpy as np
+import os
+import shutil
+import cv2
+import requests
+class SAMController:
+    def __init__(self):
+        self.current_model_type = None
+        self.refine_mask = None
+    @staticmethod
+    def clean():
+        return None, None, None, None, None, [[]]
+    @staticmethod
+    def save_mask(refined_mask=None, save=False):
+        if refined_mask is not None and save:
+            if os.path.exists(os.path.join(os.getcwd(), 'output_render')):
+                shutil.rmtree(os.path.join(os.getcwd(), 'output_render'))
+            save_path = os.path.join(os.getcwd(), 'output_render')
+            os.makedirs(save_path, exist_ok=True)
+            cv2.imwrite(os.path.join(save_path, f'refined_mask_result.png'), (refined_mask * 255).astype('uint8'))
+        elif refined_mask is None and save:
+            return os.path.join(os.path.join(os.getcwd(), 'output_render'), f'refined_mask_result.png')
+    @staticmethod
+    def download_models(model_type):
+        dir_path = os.path.join(os.getcwd(), 'root_model')
+        sam_models_path = os.path.join(dir_path, 'sam_models')
+        # Models URLs
+        models_urls = {
+            'sam_models': {
+                'vit_b': 'https://huggingface.co/ybelkada/segment-anything/resolve/main/checkpoints/sam_vit_b_01ec64.pth?download=true',
+                'vit_l': 'https://huggingface.co/segments-arnaud/sam_vit_l/resolve/main/sam_vit_l_0b3195.pth?download=true',
+                'vit_h': 'https://huggingface.co/segments-arnaud/sam_vit_h/resolve/main/sam_vit_h_4b8939.pth?download=true'
+            }
+        }
+        # Download specified model type
+        if model_type in models_urls['sam_models']:
+            model_url = models_urls['sam_models'][model_type]
+            os.makedirs(sam_models_path, exist_ok=True)
+            model_path = os.path.join(sam_models_path, model_type + '.pth')
+            if not os.path.exists(model_path):
+                logger.info(f"Downloading {model_type} model...")
+                response = requests.get(model_url, stream=True)
+                response.raise_for_status()  # Raise an exception for non-2xx status codes
+                total_size = int(response.headers.get('content-length', 0))  # Get file size from headers
+                with tqdm(total=total_size, unit="B", unit_scale=True, desc=f"Downloading {model_type} model") as pbar:
+                    with open(model_path, 'wb') as f:
+                        for chunk in response.iter_content(chunk_size=1024):
+                            f.write(chunk)
+                            pbar.update(len(chunk))
+                logger.info(f"{model_type} model downloaded.")
+            else:
+                logger.info(f"{model_type} model already exists.")
+            return logger.info(f"{model_type} model download complete.")
+        else:
+            return logger.info(f"Invalid model type: {model_type}")
+    @staticmethod
+    def get_models_path(model_type=None, segment=False):
+        sam_models_path = os.path.join(os.getcwd(), 'root_model', 'sam_models')
+        if segment:
+            sam_args = generate_sam_args(sam_checkpoint=sam_models_path, model_type=model_type)
+            return sam_args, sam_models_path
+    @staticmethod
+    def get_click_prompt(click_stack, point):
+        click_stack[0].append(point["coord"])
+        click_stack[1].append(point["mode"]
+                              )
+        prompt = {
+            "points_coord": click_stack[0],
+            "points_mode": click_stack[1],
+            "multi_mask": "True",
+        }
+        return prompt
+    @staticmethod
+    def read_temp_file(temp_file_wrapper):
+        name = temp_file_wrapper.name
+        with open(temp_file_wrapper.name, 'rb') as f:
+            # Read the content of the file
+            file_content = f.read()
+        return file_content, name
+    def get_meta_from_image(self, input_img):
+        file_content, _ = self.read_temp_file(input_img)
+        np_arr = np.frombuffer(file_content, np.uint8)
+        img = cv2.imdecode(np_arr, cv2.IMREAD_COLOR)
+        first_frame = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+        return first_frame, first_frame
+    def is_sam_model(self, model_type):
+        sam_args, sam_models_dir = self.get_models_path(model_type=model_type, segment=True)
+        model_path = os.path.join(sam_models_dir, model_type + '.pth')
+        if not os.path.exists(model_path):
+            self.download_models(model_type=model_type)
+            return 'Model is downloaded', sam_args
+        else:
+            return 'Model is already downloaded', sam_args
+    @staticmethod
+    def init_segment(
+            points_per_side,
+            origin_frame,
+            sam_args,
+            predict_iou_thresh=0.8,
+            stability_score_thresh=0.9,
+            crop_n_layers=1,
+            crop_n_points_downscale_factor=2,
+            min_mask_region_area=200):
+        if origin_frame is None:
+            return None, origin_frame, [[], []]
+        sam_args["generator_args"]["points_per_side"] = points_per_side
+        sam_args["generator_args"]["pred_iou_thresh"] = predict_iou_thresh
+        sam_args["generator_args"]["stability_score_thresh"] = stability_score_thresh
+        sam_args["generator_args"]["crop_n_layers"] = crop_n_layers
+        sam_args["generator_args"]["crop_n_points_downscale_factor"] = crop_n_points_downscale_factor
+        sam_args["generator_args"]["min_mask_region_area"] = min_mask_region_area
+        segment = SegMent(sam_args)
+        logger.info(f"Model Init: {sam_args}")
+        return segment, origin_frame, [[], []]
+    @staticmethod
+    def seg_acc_click(segment, prompt, origin_frame):
+        # seg acc to click
+        refined_mask, masked_frame = segment.seg_acc_click(
+            origin_frame=origin_frame,
+            coords=np.array(prompt["points_coord"]),
+            modes=np.array(prompt["points_mode"]),
+            multimask=prompt["multi_mask"],
+        )
+        return refined_mask, masked_frame
+    def undo_click_stack_and_refine_seg(self, segment, origin_frame, click_stack):
+        if segment is None:
+            return segment, origin_frame, [[], []]
+        logger.info("Undo !")
+        if len(click_stack[0]) > 0:
+            click_stack[0] = click_stack[0][: -1]
+            click_stack[1] = click_stack[1][: -1]
+        if len(click_stack[0]) > 0:
+            prompt = {
+                "points_coord": click_stack[0],
+                "points_mode": click_stack[1],
+                "multi_mask": "True",
+            }
+            _, masked_frame = self.seg_acc_click(segment, prompt, origin_frame)
+            return segment, masked_frame, click_stack
+        else:
+            return segment, origin_frame, [[], []]
+    def reload_segment(self,
+                       check_sam,
+                       segment,
+                       model_type,
+                       point_per_sides,
+                       origin_frame,
+                       predict_iou_thresh,
+                       stability_score_thresh,
+                       crop_n_layers,
+                       crop_n_points_downscale_factor,
+                       min_mask_region_area):
+        status, sam_args = check_sam(model_type)
+        if segment is None or status == 'Model is downloaded':
+            segment, _, _ = self.init_segment(point_per_sides,
+                                              origin_frame,
+                                              sam_args,
+                                              predict_iou_thresh,
+                                              stability_score_thresh,
+                                              crop_n_layers,
+                                              crop_n_points_downscale_factor,
+                                              min_mask_region_area)
+            self.current_model_type = model_type
+        return segment, self.current_model_type, status
+    def sam_click(self,
+                  evt: gr.SelectData,
+                  segment,
+                  origin_frame,
+                  model_type,
+                  point_mode,
+                  click_stack,
+                  point_per_sides,
+                  predict_iou_thresh,
+                  stability_score_thresh,
+                  crop_n_layers,
+                  crop_n_points_downscale_factor,
+                  min_mask_region_area):
+        logger.info("Click")
+        if point_mode == "Positive":
+            point = {"coord": [evt.index[0], evt.index[1]], "mode": 1}
+        else:
+            point = {"coord": [evt.index[0], evt.index[1]], "mode": 0}
+        click_prompt = self.get_click_prompt(click_stack, point)
+        segment, self.current_model_type, status = self.reload_segment(
+            self.is_sam_model,
+            segment,
+            model_type,
+            point_per_sides,
+            origin_frame,
+            predict_iou_thresh,
+            stability_score_thresh,
+            crop_n_layers,
+            crop_n_points_downscale_factor,
+            min_mask_region_area)
+        if segment is not None and model_type != self.current_model_type:
+            segment = None
+            segment, _, status = self.reload_segment(
+                self.is_sam_model,
+                segment,
+                model_type,
+                point_per_sides,
+                origin_frame,
+                predict_iou_thresh,
+                stability_score_thresh,
+                crop_n_layers,
+                crop_n_points_downscale_factor,
+                min_mask_region_area)
+        refined_mask, masked_frame = self.seg_acc_click(segment, click_prompt, origin_frame)
+        self.save_mask(refined_mask, save=True)
+        self.refine_mask = refined_mask
+        return segment, masked_frame, click_stack, status
+    @staticmethod
+    def normalize_image(image):
+        # Normalize the image to the range [0, 1]
+        min_val = image.min()
+        max_val = image.max()
+        image = (image - min_val) / (max_val - min_val)
+        return image
+    @staticmethod
+    def compute_probability(masks):
+        p_max = None
+        for mask in masks:
+            p = mask['prob']
+            if p_max is None:
+                p_max = p
+            else:
+                p_max = np.maximum(p_max, p)
+        return p_max
+    @staticmethod
+    def download_opencv_model(model_url):
+        opencv_model_path = os.path.join(os.getcwd(), 'edges_detection')
+        os.makedirs(opencv_model_path, exist_ok=True)
+        model_path = os.path.join(opencv_model_path, 'edges_detection' + '.yml.gz')
+        response = requests.get(model_url, stream=True)
+        response.raise_for_status()  # Raise an exception for non-2xx status codes
+        total_size = int(response.headers.get('content-length', 0))  # Get file size from headers
+        with tqdm(total=total_size, unit="B", unit_scale=True, desc=f"Downloading opencv model") as pbar:
+            with open(model_path, 'wb') as f:
+                for chunk in response.iter_content(chunk_size=1024):
+                    f.write(chunk)
+                    pbar.update(len(chunk))
+        return model_path
+    def automatic_sam2sketch(self,
+                             segment,
+                             image,
+                             origin_frame,
+                             model_type
+                             ):
+        _, sam_args = self.is_sam_model(model_type)
+        if segment is None or model_type != sam_args['model_type']:
+            segment, _, _ = self.init_segment(
+                points_per_side=16,
+                origin_frame=origin_frame,
+                sam_args=sam_args,
+                predict_iou_thresh=0.8,
+                stability_score_thresh=0.9,
+                crop_n_layers=1,
+                crop_n_points_downscale_factor=2,
+                min_mask_region_area=200)
+        model_path = self.download_opencv_model(model_url='https://github.com/nipunmanral/Object-Detection-using-OpenCV/raw/master/model.yml.gz')
+        masks = segment.automatic_generate_mask(image)
+        p_max = self.compute_probability(masks)
+        edges = self.normalize_image(p_max)
+        edge_detection = cv2.ximgproc.createStructuredEdgeDetection(model_path)
+        orimap = edge_detection.computeOrientation(edges)
+        edges = edge_detection.edgesNms(edges, orimap)
+        edges = (edges * 255).astype('uint8')
+        edges = 255 - edges
+        edges = np.stack((edges,) * 3, axis=-1)
+        return edges

S2I/samer/seg_anything.py ADDED Viewed

	@@ -0,0 +1,54 @@

+import os
+from PIL import Image
+import numpy as np
+from scipy.ndimage import binary_dilation
+np.random.seed(200)
+_palette = ((np.random.random((3 * 255)) * 0.7 + 0.3) * 255).astype(np.uint8).tolist()
+_palette = [0, 0, 0] + _palette
+def save_prediction(predict_mask, output_dir, file_name):
+    save_mask = Image.fromarray(predict_mask.astype(np.uint8))
+    save_mask = save_mask.convert(mode='P')
+    save_mask.putpalette(_palette)
+    save_mask.save(os.path.join(output_dir, file_name))
+def colorize_mask(predict_mask):
+    save_mask = Image.fromarray(predict_mask.astype(np.uint8))
+    save_mask = save_mask.convert(mode='P')
+    save_mask.putpalette(_palette)
+    save_mask = save_mask.convert(mode='RGB')
+    return np.array(save_mask)
+def draw_mask(img, mask, alpha=0.5, id_cnt=False):
+    img_mask = img
+    if id_cnt:
+        # very slow ~ 1s per image
+        obj_ids = np.unique(mask)
+        obj_ids = obj_ids[obj_ids != 0]
+        for ids in obj_ids:
+            # Overlay color on  binary mask
+            if ids <= 255:
+                color = _palette[ids * 3:ids * 3 + 3]
+            else:
+                color = [0, 0, 0]
+            foreground = img * (1 - alpha) + np.ones_like(img) * alpha * np.array(color)
+            binary_mask = (mask == ids)
+            # Compose image
+            img_mask[binary_mask] = foreground[binary_mask]
+            cnt = binary_dilation(binary_mask, iterations=1) ^ binary_mask
+            img_mask[cnt, :] = 0
+    else:
+        binary_mask = (mask != 0)
+        cnt = binary_dilation(binary_mask, iterations=1) ^ binary_mask
+        foreground = img * (1 - alpha) + colorize_mask(mask) * alpha
+        img_mask[binary_mask] = foreground[binary_mask]
+        img_mask[cnt, :] = 0
+    return img_mask.astype(img.dtype)

S2I/samer/segment.py ADDED Viewed

	@@ -0,0 +1,69 @@

+import sys
+sys.path.append("../../..")
+sys.path.append("")
+import cv2
+import numpy as np
+from S2I.samer.segmentor import Segmentor
+from S2I.samer.transfer_tools import draw_outline, draw_points
+from S2I.samer.seg_anything import draw_mask
+class SegMent:
+    def __init__(self, sam_args):
+        self.sam = Segmentor(sam_args)
+        self.reference_objs_list = []
+        self.object_idx = 1
+        self.curr_idx = 1
+        self.origin_merged_mask = None  # init by segment-everything or update
+        self.first_frame_mask = None
+        # debug
+        self.everything_points = []
+        self.everything_labels = []
+        print("SegTracker has been initialized")
+    def seg_acc_bbox(self, origin_frame: np.ndarray, bbox: np.ndarray, ):
+        # get interactive_mask
+        interactive_mask = self.sam.segment_with_box(origin_frame, bbox)[0]
+        refined_merged_mask = self.add_mask(interactive_mask)
+        # draw mask
+        masked_frame = draw_mask(origin_frame.copy(), refined_merged_mask)
+        # draw bbox
+        masked_frame = cv2.rectangle(masked_frame, bbox[0], bbox[1], (0, 0, 255))
+        return refined_merged_mask, masked_frame
+    def seg_acc_click(self, origin_frame: np.ndarray, coords: np.ndarray, modes: np.ndarray, multimask=True):
+        # get interactive_mask
+        interactive_mask = self.sam.segment_with_click(origin_frame, coords, modes, multimask)
+        refined_merged_mask = self.add_mask(interactive_mask)
+        # draw mask
+        masked_frame = draw_mask(origin_frame.copy(), refined_merged_mask)
+        masked_frame = draw_points(coords, modes, masked_frame)
+        # draw outline
+        masked_frame = draw_outline(interactive_mask, masked_frame)
+        return refined_merged_mask, masked_frame
+    def add_mask(self, interactive_mask: np.ndarray):
+        if self.origin_merged_mask is None:
+            self.origin_merged_mask = np.zeros(interactive_mask.shape, dtype=np.uint8)
+        refined_merged_mask = self.origin_merged_mask.copy()
+        refined_merged_mask[interactive_mask > 0] = self.curr_idx
+        return refined_merged_mask
+    def automatic_generate_mask(self, image):
+        masks = self.sam.automatic_segment(image)
+        return masks
+if __name__ == '__main__':
+    pass

S2I/samer/segmentor.py ADDED Viewed

	@@ -0,0 +1,103 @@

+import torch
+import numpy as np
+from segment_anything import sam_model_registry, SamAutomaticMaskGenerator
+from .automatic_mask_generator_prob import SamAutomaticMaskAndProbabilityGenerator
+class Segmentor:
+    def __init__(self, sam_args):
+        """
+        sam_args:
+            sam_checkpoint: path of SAM checkpoint
+            generator_args: args for everything_generator
+            gpu_id: device
+        """
+        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
+        self.sam = sam_model_registry[sam_args["model_type"]](checkpoint=sam_args["sam_checkpoint"])
+        self.sam.to(device=self.device)
+        # self.everything_generator = SamAutomaticMaskGenerator(model=self.sam, **sam_args['generator_args'])
+        self.automatic_generator = SamAutomaticMaskAndProbabilityGenerator(model=self.sam, **sam_args['generator_args'])
+        self.interactive_predictor = self.automatic_generator.predictor
+        self.have_embedded = False
+    @torch.no_grad()
+    def set_image(self, image):
+        # calculate the embedding only once per frame.
+        if not self.have_embedded:
+            self.interactive_predictor.set_image(image)
+            self.have_embedded = True
+    @torch.no_grad()
+    def interactive_predict(self, prompts, mode, multimask=True):
+        assert self.have_embedded, 'image embedding for sam need be set before predict.'
+        if mode == 'point':
+            masks, scores, logits = self.interactive_predictor.predict(point_coords=prompts['point_coords'],
+                                                                       point_labels=prompts['point_modes'],
+                                                                       multimask_output=multimask)
+        elif mode == 'mask':
+            masks, scores, logits = self.interactive_predictor.predict(mask_input=prompts['mask_prompt'],
+                                                                       multimask_output=multimask)
+        elif mode == 'point_mask':
+            masks, scores, logits = self.interactive_predictor.predict(point_coords=prompts['point_coords'],
+                                                                       point_labels=prompts['point_modes'],
+                                                                       mask_input=prompts['mask_prompt'],
+                                                                       multimask_output=multimask)
+        return masks, scores, logits
+    @torch.no_grad()
+    def automatic_segment(self, image):
+        masks = self.automatic_generator.generate(image)
+        return masks
+    @torch.no_grad()
+    def segment_with_click(self, origin_frame, coords, modes, multimask=True):
+        '''
+            return:
+                mask: one-hot
+        '''
+        self.set_image(origin_frame)
+        prompts = {
+            'point_coords': coords,
+            'point_modes': modes,
+        }
+        masks, scores, logits = self.interactive_predict(prompts, 'point', multimask)
+        mask, logit = masks[np.argmax(scores)], logits[np.argmax(scores), :, :]
+        prompts = {
+            'point_coords': coords,
+            'point_modes': modes,
+            'mask_prompt': logit[None, :, :]
+        }
+        masks, scores, logits = self.interactive_predict(prompts, 'point_mask', multimask)
+        mask = masks[np.argmax(scores)]
+        return mask.astype(np.uint8)
+    def segment_with_box(self, origin_frame, bbox, reset_image=False):
+        if reset_image:
+            self.interactive_predictor.set_image(origin_frame)
+        else:
+            self.set_image(origin_frame)
+        masks, scores, logits = self.interactive_predictor.predict(
+            point_coords=None,
+            point_labels=None,
+            box=np.array([bbox[0][0], bbox[0][1], bbox[1][0], bbox[1][1]]),
+            multimask_output=True
+        )
+        mask, logit = masks[np.argmax(scores)], logits[np.argmax(scores), :, :]
+        masks, scores, logits = self.interactive_predictor.predict(
+            point_coords=None,
+            point_labels=None,
+            box=np.array([[bbox[0][0], bbox[0][1], bbox[1][0], bbox[1][1]]]),
+            mask_input=logit[None, :, :],
+            multimask_output=True
+        )
+        mask = masks[np.argmax(scores)]
+        return [mask]

S2I/samer/transfer_tools.py ADDED Viewed

	@@ -0,0 +1,47 @@

+import cv2
+import numpy as np
+def mask2bbox(mask):
+    if len(np.where(mask > 0)[0]) == 0:
+        print(f'not mask')
+        return np.array([[0, 0], [0, 0]]).astype(np.int64)
+    x_ = np.sum(mask, axis=0)
+    y_ = np.sum(mask, axis=1)
+    x0 = np.min(np.nonzero(x_)[0])
+    x1 = np.max(np.nonzero(x_)[0])
+    y0 = np.min(np.nonzero(y_)[0])
+    y1 = np.max(np.nonzero(y_)[0])
+    return np.array([[x0, y0], [x1, y1]]).astype(np.int64)
+def draw_outline(mask, frame):
+    _, binary_mask = cv2.threshold(mask, 0, 255, cv2.THRESH_BINARY)
+    contours, _ = cv2.findContours(binary_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+    cv2.drawContours(frame, contours, -1, (0, 0, 255), 2)
+    return frame
+def draw_points(points, modes, frame):
+    neg_points = points[np.argwhere(modes == 0)[:, 0]]
+    pos_points = points[np.argwhere(modes == 1)[:, 0]]
+    for i in range(len(neg_points)):
+        point = neg_points[i]
+        cv2.circle(frame, (point[0], point[1]), 8, (255, 80, 80), -1)
+    for i in range(len(pos_points)):
+        point = pos_points[i]
+        cv2.circle(frame, (point[0], point[1]), 8, (0, 153, 255), -1)
+    return frame
+if __name__ == '__main__':
+    pass

app.py CHANGED Viewed

@@ -1,146 +1,327 @@
-import gradio as gr
 import numpy as np
 import random
-from diffusers import DiffusionPipeline
-import torch
-device = "cuda" if torch.cuda.is_available() else "cpu"
-if torch.cuda.is_available():
-    torch.cuda.max_memory_allocated(device=device)
-    pipe = DiffusionPipeline.from_pretrained("stabilityai/sdxl-turbo", torch_dtype=torch.float16, variant="fp16", use_safetensors=True)
-    pipe.enable_xformers_memory_efficient_attention()
-    pipe = pipe.to(device)
-else:
-    pipe = DiffusionPipeline.from_pretrained("stabilityai/sdxl-turbo", use_safetensors=True)
-    pipe = pipe.to(device)
-MAX_SEED = np.iinfo(np.int32).max
-MAX_IMAGE_SIZE = 1024
-def infer(prompt, negative_prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps):
-    if randomize_seed:
-        seed = random.randint(0, MAX_SEED)
-    generator = torch.Generator().manual_seed(seed)
-    image = pipe(
-        prompt = prompt,
-        negative_prompt = negative_prompt,
-        guidance_scale = guidance_scale,
-        num_inference_steps = num_inference_steps,
-        width = width,
-        height = height,
-        generator = generator
-    ).images[0]
-    return image
-examples = [
-    "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k",
-    "An astronaut riding a green horse",
-    "A delicious ceviche cheesecake slice",
-]
-css="""
-#col-container {
-    margin: 0 auto;
-    max-width: 520px;
-}
-"""
-if torch.cuda.is_available():
-    power_device = "GPU"
-else:
-    power_device = "CPU"
-with gr.Blocks(css=css) as demo:
-    with gr.Column(elem_id="col-container"):
-        gr.Markdown(f"""
-        # Text-to-Image Gradio Template
-        Currently running on {power_device}.
-        """)
-        with gr.Row():
-            prompt = gr.Text(
-                label="Prompt",
                 show_label=False,
-                max_lines=1,
-                placeholder="Enter your prompt",
-                container=False,
             )
-            run_button = gr.Button("Run", scale=0)
-        result = gr.Image(label="Result", show_label=False)
-        with gr.Accordion("Advanced Settings", open=False):
-            negative_prompt = gr.Text(
-                label="Negative prompt",
-                max_lines=1,
-                placeholder="Enter a negative prompt",
-                visible=False,
             )
-            seed = gr.Slider(
-                label="Seed",
-                minimum=0,
-                maximum=MAX_SEED,
-                step=1,
-                value=0,
             )
-            randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
             with gr.Row():
-                width = gr.Slider(
-                    label="Width",
-                    minimum=256,
-                    maximum=MAX_IMAGE_SIZE,
-                    step=32,
-                    value=512,
-                )
-                height = gr.Slider(
-                    label="Height",
-                    minimum=256,
-                    maximum=MAX_IMAGE_SIZE,
-                    step=32,
-                    value=512,
-                )
-            with gr.Row():
-                guidance_scale = gr.Slider(
-                    label="Guidance scale",
-                    minimum=0.0,
-                    maximum=10.0,
-                    step=0.1,
-                    value=0.0,
-                )
-                num_inference_steps = gr.Slider(
-                    label="Number of inference steps",
-                    minimum=1,
-                    maximum=12,
-                    step=1,
-                    value=2,
-                )
-        gr.Examples(
-            examples = examples,
-            inputs = [prompt]
-        )
-    run_button.click(
-        fn = infer,
-        inputs = [prompt, negative_prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps],
-        outputs = [result]
     )
-demo.queue().launch()

+import os
 import numpy as np
+import io
+os.system("pip install gradio==4.29.0")
+os.system("pip install opencv-python")
+import cv2
+import gradio as gr
 import random
+import warnings
+import spaces
+from PIL import Image
+from S2I import Sketch2ImageController, css, scripts
+dark_mode_theme = """
+function refresh() {
+    const url = new URL(window.location);
+    if (url.searchParams.get('__theme') !== 'dark') {
+        url.searchParams.set('__theme', 'dark');
+        window.location.href = url.href;
+    }
+}
+"""
+os.environ["TOKENIZERS_PARALLELISM"] = "false"
+warnings.filterwarnings("ignore")
+controller = Sketch2ImageController(gr)
+def run_gpu(options, img_init, text_init, prompt_template_init, style_name_init, seeds_init, val_r_values_init, faster_init, model_name_init, clear_flag):
+    return controller.artwork(options, img_init, text_init, prompt_template_init, style_name_init, seeds_init, val_r_values_init, faster_init, model_name_init, clear_flag)
+def run_cpu(options, img_init, text_init, prompt_template_init, style_name_init, seeds_init, val_r_values_init, faster_init, model_name_init, clear_flag):
+    return controller.artwork(options, img_init, text_init, prompt_template_init, style_name_init, seeds_init, val_r_values_init, faster_init, model_name_init, clear_flag)
+def get_dark_mode():
+    return """
+    () => {
+        document.body.classList.toggle('dark');
+    }
+    """
+def clear_session():
+    return gr.update(value=None), gr.update(value=None)
+def assign_gpu(options, img_init, text_init, prompt_template_init, style_name_init, seeds_init, val_r_values_init, faster_init, model_name_init, clear_flag):
+    if options == 'GPU':
+        decorated_run = spaces.GPU(run_gpu)
+        return decorated_run(options, img_init, text_init, prompt_template_init, style_name_init, seeds_init, val_r_values_init, faster_init, model_name_init, clear_flag)
+    else:
+        return run_cpu(options, img_init, text_init, prompt_template_init, style_name_init, seeds_init, val_r_values_init, faster_init, model_name_init, clear_flag)
+def read_temp_file(temp_file_wrapper):
+    name = temp_file_wrapper.name
+    with open(temp_file_wrapper.name, 'rb') as f:
+        # Read the content of the file
+        file_content = f.read()
+    return file_content, name
+def convert_to_pencil_sketch(image):
+    if image is None:
+        raise ValueError(f"Image at path {image} could not be loaded.")
+    # Converting it into grayscale
+    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+    # Inverting the image
+    inverted_image = 255 - gray_image
+    # Blurring the image
+    blurred = cv2.GaussianBlur(inverted_image, (25, 25), 0)
+    inverted_blurred = 255 - blurred
+    # Creating the pencil sketch
+    pencil_sketch = cv2.divide(gray_image, inverted_blurred, scale=256.0)
+    return pencil_sketch
+def get_meta_from_image(input_img, type_image):
+    if input_img is None:
+        return gr.update(value=None)
+    file_content, _ = read_temp_file(input_img)
+    # Read the image using Pillow
+    img = Image.open(io.BytesIO(file_content)).convert("RGB")
+    img_np = np.array(img)
+    if type_image == 'RGB':
+        sketch = convert_to_pencil_sketch(img_np)
+        processed_img = 255 - sketch
+    elif type_image == 'SKETCH':
+        processed_img = 255 - img_np
+    # Convert the processed image back to PIL Image
+    img_pil = Image.fromarray(processed_img.astype('uint8'))
+    return img_pil
+with gr.Blocks(css=css) as demo:
+    gr.HTML(
+                """
+                <!DOCTYPE html>
+                <html lang="en">
+                <head>
+                    <meta charset="UTF-8">
+                    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+                    <title>S2I-Artwork Animation</title>
+                    <style>
+                        @keyframes blinkCursor {
+                            from { border-right-color: rgba(255, 255, 255, 0.75); }
+                            to { border-right-color: transparent; }
+                        }
+                        @keyframes fadeIn {
+                            0% { opacity: 0; transform: translateY(-10px); }
+                            100% { opacity: 1; transform: translateY(0); }
+                        }
+                        @keyframes bounce {
+                            0%, 20%, 50%, 80%, 100% {
+                                transform: translateY(0);
+                            }
+                            40% {
+                                transform: translateY(-10px);
+                            }
+                            60% {
+                                transform: translateY(-5px);
+                            }
+                        }
+                        .typewriter h1 {
+                            overflow: hidden;
+                            border-right: .15em solid rgba(255, 255, 255, 0.75);
+                            white-space: nowrap;
+                            margin: 0 auto;
+                            letter-spacing: .15em;
+                            animation:
+                                zoomInOut 4s infinite;
+                        }
+                        .animated-heading {
+                            animation: fadeIn 2s ease-in-out;
+                        }
+                        .animated-link {
+                            display: inline-block;
+                            animation: bounce 3s infinite;
+                        }
+                    </style>
+                </head>
+                <body>
+                    <div>
+                        <div class="typewriter">
+                            <h1 style="display: flex; align-items: center; justify-content: center; margin-bottom: 10px; text-align: center;">
+                                <img src="https://imgur.com/H2SLps2.png" alt="icon" style="margin-left: 10px; height: 30px;">
+                                S2I-Artwork
+                                <img src="https://imgur.com/cNMKSAy.png" alt="icon" style="margin-left: 10px; height: 30px;">:
+                                Personalized Sketch-to-Art 🧨 Diffusion Models
+                                <img src="https://imgur.com/yDnDd1p.png" alt="icon" style="margin-left: 10px; height: 30px;">
+                            </h1>
+                        </div>
+                        <h3 class="animated-heading" style="text-align: center; margin-bottom: 10px;">Authors: Vo Nguyen An Tin, Nguyen Thiet Su</h3>
+                        <h4 class="animated-heading" style="margin-bottom: 10px;">*This project is the fine-tuning task with LorA on large datasets included: COCO-2017, LHQ, Danbooru, LandScape and Mid-Journey V6</h4>
+                        <h4 class="animated-heading" style="margin-bottom: 10px;">* We public 2 sketch2image-models-lora training on 30K and 60K steps with skip-connection and Transformers Super-Resolution variables</h4>
+                        <h4 class="animated-heading" style="margin-bottom: 10px;">* The inference and demo time of model is faster, you can slowly in the first runtime, but after that, the time process over 1.5 ~ 2s</h4>
+                        <h4 class="animated-heading" style="margin-bottom: 10px;">* View the full code project:
+                            <a class="animated-link" href="https://github.com/aihacker111/S2I-Artwork-Sketch-to-Image/" target="_blank">GitHub Repository</a>
+                        </h4>
+                        <h4 class="animated-heading" style="margin-bottom: 10px;">
+                            <a class="animated-link" href="https://github.com/aihacker111/S2I-Artwork-Sketch-to-Image/" target="_blank">
+                                <img src="https://cdn.buymeacoffee.com/buttons/default-orange.png" alt="Buy Me A Coffee" height="41" width="100">
+                            </a>
+                        </h4>
+                    </div>
+                </body>
+                </html>
+                """
+            )
+    with gr.Row(elem_id="main_row"):
+        with gr.Column(elem_id="column_input"):
+            gr.Markdown("## SKETCH", elem_id="input_header")
+            image = gr.Sketchpad(
+                type="pil",
+                height=512,
+                width=512,
+                min_width=512,
+                image_mode="RGBA",
                 show_label=False,
+                mirror_webcam=False,
+                show_download_button=True,
+                elem_id='input_image',
+                brush=gr.Brush(colors=["#000000"], color_mode="fixed", default_size=4),
+                canvas_size=(1024, 1024),
+                layers=False
             )
+            input_image = gr.File(label='Input image')
+            download_sketch = gr.Button(
+                "Download sketch", scale=1, elem_id="download_sketch"
             )
+        with gr.Column(elem_id="column_output"):
+            gr.Markdown("## IMAGE GENERATE", elem_id="output_header")
+            result = gr.Image(
+                label="Result",
+                height=440,
+                width=440,
+                elem_id="output_image",
+                show_label=False,
+                show_download_button=True,
             )
             with gr.Row():
+                run_button = gr.Button("Generate 🪄", min_width=5, variant='primary')
+                randomize_seed = gr.Button(value='\U0001F3B2', variant='primary')
+                clear_button = gr.Button("Reset Sketch Session", min_width=10, variant='primary')
+            prompt = gr.Textbox(label="Personalized Text", value="", show_label=True)
+            with gr.Accordion("S2I Advances Option", open=True):
+                with gr.Row():
+                    ui_mode = gr.Radio(
+                            choices=["Light Mode", "Dark Mode"],
+                            value="Light Mode",
+                            label="Switch Light/Dark Mode UI",
+                            interactive=True)
+                    type_image = gr.Radio(
+                            choices=["RGB", "SKETCH"],
+                            value="SKETCH",
+                            label="Type of Image (Color Image or Sketch Image)",
+                            interactive=True)
+                    input_type = gr.Radio(
+                                    choices=["live-sketch", "upload"],
+                                    value="live-sketch",
+                                    label="Type Sketch2Image models",
+                                    interactive=True)
+                    style = gr.Dropdown(
+                        label="Style",
+                        choices=controller.STYLE_NAMES,
+                        value=controller.DEFAULT_STYLE_NAME,
+                        scale=1,
+                    )
+                    prompt_temp = gr.Textbox(
+                        label="Prompt Style Template",
+                        value=controller.styles[controller.DEFAULT_STYLE_NAME],
+                        scale=2,
+                        max_lines=1,
+                    )
+                    seed = gr.Textbox(label="Seed", value='42', scale=1, min_width=50)
+                    zero_gpu_options = gr.Radio(
+                            choices=["GPU", "CPU"],
+                            value="GPU",
+                            label="GPU & CPU Options Spaces",
+                            interactive=True)
+                    half_model = gr.Radio(
+                            choices=["float32", "float16"],
+                            value="float16",
+                            label="Demo Speed",
+                            interactive=True)
+                    model_options = gr.Radio(
+                            choices=["100k", "350k"],
+                            value="350k",
+                            label="Type Sketch2Image models",
+                            interactive=True)
+                    val_r = gr.Slider(
+                            label="Sketch guidance: ",
+                            show_label=True,
+                            minimum=0,
+                            maximum=1,
+                            value=0.4,
+                            step=0.01,
+                            scale=3,
+                        )
+    demo.load(None, None, None, js=scripts)
+    ui_mode.change(None, [], [], js=get_dark_mode())
+    randomize_seed.click(
+        lambda x: random.randint(0, controller.MAX_SEED),
+        inputs=[],
+        outputs=seed,
+        queue=False,
+        api_name=False,
+    )
+    inputs = [zero_gpu_options, image, prompt, prompt_temp, style, seed, val_r, half_model, model_options, input_type]
+    outputs = [result, download_sketch]
+    prompt.submit(fn=assign_gpu, inputs=inputs, outputs=outputs, api_name=False)
+    input_image.change(
+                fn=get_meta_from_image,
+                inputs=[
+                    input_image, type_image
+                ],
+                outputs=[
+                    image
+                ]
+            )
+    style.change(
+        lambda x: controller.styles[x],
+        inputs=[style],
+        outputs=[prompt_temp],
+        queue=False,
+        api_name=False,
+    ).then(
+        fn=assign_gpu,
+        inputs=inputs,
+        outputs=outputs,
+        api_name=False,
+    )
+    clear_button.click(fn=clear_session, inputs=[], outputs=[image, result]).then(
+        fn=assign_gpu,
+        inputs=inputs,
+        outputs=outputs,
+        api_name=False,
     )
+    val_r.change(assign_gpu, inputs=inputs, outputs=outputs, queue=False, api_name=False)
+    run_button.click(fn=assign_gpu, inputs=inputs, outputs=outputs, api_name=False)
+    image.change(assign_gpu, inputs=inputs, outputs=outputs, queue=False, api_name=False)
+if __name__ == '__main__':
+    demo.queue()
+    demo.launch(debug=True, share=False)

requirements.txt CHANGED Viewed

@@ -1,6 +1,87 @@
-accelerate
-diffusers
-invisible_watermark
-torch
-transformers
-xformers

+accelerate==0.30.1
+aiofiles==23.2.1
+altair==5.3.0
+annotated-types==0.7.0
+anyio==4.4.0
+attrs==23.2.0
+certifi==2024.2.2
+charset-normalizer==3.3.2
+click==8.1.7
+contourpy==1.2.1
+cycler==0.12.1
+diffusers==0.25.1
+dnspython==2.6.1
+email_validator==2.1.1
+exceptiongroup==1.2.1
+fastapi==0.111.0
+fastapi-cli==0.0.4
+ffmpy==0.3.2
+filelock==3.14.0
+fonttools==4.52.4
+fsspec==2024.5.0
+gradio==4.29.0
+h11==0.14.0
+httpcore==1.0.5
+httptools==0.6.1
+httpx==0.27.0
+huggingface-hub==0.23.0
+idna==3.7
+importlib_metadata==7.1.0
+importlib_resources==6.4.0
+Jinja2==3.1.4
+jsonschema==4.22.0
+jsonschema-specifications==2023.12.1
+kiwisolver==1.4.5
+markdown-it-py==3.0.0
+MarkupSafe==2.1.5
+matplotlib==3.9.0
+mdurl==0.1.2
+mpmath==1.3.0
+networkx==3.3
+numpy==1.26.4
+orjson==3.10.3
+packaging==24.0
+pandas==2.2.2
+peft==0.11.1
+pillow==10.3.0
+psutil==5.9.8
+pydantic==2.7.2
+pydantic_core==2.18.3
+pydub==0.25.1
+Pygments==2.18.0
+pyparsing==3.1.2
+python-dateutil==2.9.0.post0
+python-dotenv==1.0.1
+python-multipart==0.0.9
+pytz==2024.1
+PyYAML==6.0.1
+referencing==0.35.1
+regex==2024.5.15
+requests==2.32.0
+rich==13.7.1
+rpds-py==0.18.1
+ruff==0.4.6
+safetensors==0.4.3
+semantic-version==2.10.0
+shellingham==1.5.4
+six==1.16.0
+sniffio==1.3.1
+starlette==0.37.2
+sympy==1.12
+tokenizers==0.19.1
+tomlkit==0.12.0
+toolz==0.12.1
+torch==2.3.0
+torchvision==0.18.0
+tqdm==4.66.4
+transformers==4.41.0
+typer==0.12.3
+typing_extensions==4.11.0
+tzdata==2024.1
+ujson==5.10.0
+urllib3==2.2.1
+uvicorn==0.30.0
+uvloop==0.19.0
+watchfiles==0.22.0
+websockets==11.0.3
+zipp==3.18.2