Spaces:

fffiloni
/

RB-Modulation

Sleeping

App Files Files Community

fffiloni commited on Sep 2

Commit

2c0e7f7

•

1 Parent(s): 241d1e2

Update app.py

Browse files

Files changed (1) hide show

app.py +84 -75

app.py CHANGED Viewed

@@ -126,89 +126,97 @@ models_rbm.generator.eval().requires_grad_(False)
 sam_model = LangSAM()
-def infer(ref_style_file, style_description, caption):
     global models_rbm, models_b, device
     if low_vram:
         models_to(models_rbm, device=device)
     try:
-        caption = f"{caption} in {style_description}"
-        height=1024
-        width=1024
-        batch_size=1
-        output_file='output.png'
-        stage_c_latent_shape, stage_b_latent_shape = calculate_latent_sizes(height, width, batch_size=batch_size)
-        extras.sampling_configs['cfg'] = 4
-        extras.sampling_configs['shift'] = 2
-        extras.sampling_configs['timesteps'] = 20
-        extras.sampling_configs['t_start'] = 1.0
-        extras_b.sampling_configs['cfg'] = 1.1
-        extras_b.sampling_configs['shift'] = 1
-        extras_b.sampling_configs['timesteps'] = 10
-        extras_b.sampling_configs['t_start'] = 1.0
-        ref_style = resize_image(PIL.Image.open(ref_style_file).convert("RGB")).unsqueeze(0).expand(batch_size, -1, -1, -1).to(device)
-        batch = {'captions': [caption] * batch_size}
-        batch['style'] = ref_style
-        x0_style_forward = models_rbm.effnet(extras.effnet_preprocess(ref_style.to(device)))
-        conditions = core.get_conditions(batch, models_rbm, extras, is_eval=True, is_unconditional=False, eval_image_embeds=True, eval_style=True, eval_csd=False)
-        unconditions = core.get_conditions(batch, models_rbm, extras, is_eval=True, is_unconditional=True, eval_image_embeds=False)
-        conditions_b = core_b.get_conditions(batch, models_b, extras_b, is_eval=True, is_unconditional=False)
-        unconditions_b = core_b.get_conditions(batch, models_b, extras_b, is_eval=True, is_unconditional=True)
-        if low_vram:
-            # The sampling process uses more vram, so we offload everything except two modules to the cpu.
-            models_to(models_rbm, device="cpu", excepts=["generator", "previewer"])
-        # Stage C reverse process.
-        sampling_c = extras.gdf.sample(
-            models_rbm.generator, conditions, stage_c_latent_shape,
-            unconditions, device=device,
-            **extras.sampling_configs,
-            x0_style_forward=x0_style_forward,
-            apply_pushforward=False, tau_pushforward=8,
-            num_iter=3, eta=0.1, tau=20, eval_csd=True,
-            extras=extras, models=models_rbm,
-            lam_style=1, lam_txt_alignment=1.0,
-            use_ddim_sampler=True,
-        )
-        for (sampled_c, _, _) in tqdm(sampling_c, total=extras.sampling_configs['timesteps']):
-            sampled_c = sampled_c
-        # Stage B reverse process.
-        with torch.no_grad(), torch.cuda.amp.autocast(dtype=torch.bfloat16):
-            conditions_b['effnet'] = sampled_c
-            unconditions_b['effnet'] = torch.zeros_like(sampled_c)
-            sampling_b = extras_b.gdf.sample(
-                models_b.generator, conditions_b, stage_b_latent_shape,
-                unconditions_b, device=device, **extras_b.sampling_configs,
-            )
-            for (sampled_b, _, _) in tqdm(sampling_b, total=extras_b.sampling_configs['timesteps']):
-                sampled_b = sampled_b
-            sampled = models_b.stage_a.decode(sampled_b).float()
-        sampled = torch.cat([
-            torch.nn.functional.interpolate(ref_style.cpu(), size=(height, width)),
-            sampled.cpu(),
-        ], dim=0)
-        # Remove the batch dimension and keep only the generated image
-        sampled = sampled[1]  # This selects the generated image, discarding the reference style image
-        # Ensure the tensor is in [C, H, W] format
-        if sampled.dim() == 3 and sampled.shape[0] == 3:
-            sampled_image = T.ToPILImage()(sampled)  # Convert tensor to PIL image
-            sampled_image.save(output_file)  # Save the image as a PNG
-        else:
-            raise ValueError(f"Expected tensor of shape [3, H, W] but got {sampled.shape}")
-        return output_file  # Return the path to the saved image
     finally:
         # Clear CUDA cache
@@ -324,10 +332,11 @@ def infer_compo(style_description, ref_style_file, caption, ref_sub_file):
 def run(style_reference_image, style_description, subject_prompt, subject_reference, use_subject_ref):
     result = None
     if use_subject_ref is True:
         result = infer_compo(style_description, style_reference_image, subject_prompt, subject_reference)
     else:
-        result = infer(style_reference_image, style_description, subject_prompt)
     return result
 def show_hide_subject_image_component(use_subject_ref):

 sam_model = LangSAM()
+def infer(ref_style_file, style_description, caption, progress):
     global models_rbm, models_b, device
     if low_vram:
         models_to(models_rbm, device=device)
     try:
+        with progress:
+            caption = f"{caption} in {style_description}"
+            height=1024
+            width=1024
+            batch_size=1
+            output_file='output.png'
+            stage_c_latent_shape, stage_b_latent_shape = calculate_latent_sizes(height, width, batch_size=batch_size)
+            extras.sampling_configs['cfg'] = 4
+            extras.sampling_configs['shift'] = 2
+            extras.sampling_configs['timesteps'] = 20
+            extras.sampling_configs['t_start'] = 1.0
+            extras_b.sampling_configs['cfg'] = 1.1
+            extras_b.sampling_configs['shift'] = 1
+            extras_b.sampling_configs['timesteps'] = 10
+            extras_b.sampling_configs['t_start'] = 1.0
+            progress(0.1, "Loading style reference image")
+            ref_style = resize_image(PIL.Image.open(ref_style_file).convert("RGB")).unsqueeze(0).expand(batch_size, -1, -1, -1).to(device)
+            batch = {'captions': [caption] * batch_size}
+            batch['style'] = ref_style
+            progress(0.2, "Processing style reference image")
+            x0_style_forward = models_rbm.effnet(extras.effnet_preprocess(ref_style.to(device)))
+            progress(0.3, "Generating conditions")
+            conditions = core.get_conditions(batch, models_rbm, extras, is_eval=True, is_unconditional=False, eval_image_embeds=True, eval_style=True, eval_csd=False)
+            unconditions = core.get_conditions(batch, models_rbm, extras, is_eval=True, is_unconditional=True, eval_image_embeds=False)
+            conditions_b = core_b.get_conditions(batch, models_b, extras_b, is_eval=True, is_unconditional=False)
+            unconditions_b = core_b.get_conditions(batch, models_b, extras_b, is_eval=True, is_unconditional=True)
+            if low_vram:
+                # The sampling process uses more vram, so we offload everything except two modules to the cpu.
+                models_to(models_rbm, device="cpu", excepts=["generator", "previewer"])
+            progress(0.4, "Starting Stage C reverse process")
+            # Stage C reverse process.
+            sampling_c = extras.gdf.sample(
+                models_rbm.generator, conditions, stage_c_latent_shape,
+                unconditions, device=device,
+                **extras.sampling_configs,
+                x0_style_forward=x0_style_forward,
+                apply_pushforward=False, tau_pushforward=8,
+                num_iter=3, eta=0.1, tau=20, eval_csd=True,
+                extras=extras, models=models_rbm,
+                lam_style=1, lam_txt_alignment=1.0,
+                use_ddim_sampler=True,
+            )
+            for (sampled_c, _, _) in progress.track(tqdm(sampling_c, total=extras.sampling_configs['timesteps']), description="Stage C reverse process"):
+                sampled_c = sampled_c
+            progress(0.7, "Starting Stage B reverse process")
+            # Stage B reverse process.
+            with torch.no_grad(), torch.cuda.amp.autocast(dtype=torch.bfloat16):
+                conditions_b['effnet'] = sampled_c
+                unconditions_b['effnet'] = torch.zeros_like(sampled_c)
+                sampling_b = extras_b.gdf.sample(
+                    models_b.generator, conditions_b, stage_b_latent_shape,
+                    unconditions_b, device=device, **extras_b.sampling_configs,
+                )
+                for (sampled_b, _, _) in progress.track(tqdm(sampling_b, total=extras_b.sampling_configs['timesteps']), description="Stage B reverse process"):
+                    sampled_b = sampled_b
+                sampled = models_b.stage_a.decode(sampled_b).float()
+            progress(0.9, "Finalizing the output image")
+            sampled = torch.cat([
+                torch.nn.functional.interpolate(ref_style.cpu(), size=(height, width)),
+                sampled.cpu(),
+            ], dim=0)
+            # Remove the batch dimension and keep only the generated image
+            sampled = sampled[1]  # This selects the generated image, discarding the reference style image
+            # Ensure the tensor is in [C, H, W] format
+            if sampled.dim() == 3 and sampled.shape[0] == 3:
+                sampled_image = T.ToPILImage()(sampled)  # Convert tensor to PIL image
+                sampled_image.save(output_file)  # Save the image as a PNG
+            else:
+                raise ValueError(f"Expected tensor of shape [3, H, W] but got {sampled.shape}")
+            progress(1.0, "Inference complete")
+            return output_file  # Return the path to the saved image
     finally:
         # Clear CUDA cache
 def run(style_reference_image, style_description, subject_prompt, subject_reference, use_subject_ref):
     result = None
+    progress = gr.Progress(track_tqdm=True)
     if use_subject_ref is True:
         result = infer_compo(style_description, style_reference_image, subject_prompt, subject_reference)
     else:
+        result = infer(style_reference_image, style_description, subject_prompt, progress)
     return result
 def show_hide_subject_image_component(use_subject_ref):