Spaces:

fffiloni
/

RB-Modulation

Running on A100

App Files Files Community

fffiloni commited on Aug 31, 2024

Commit

8020398

verified ·

1 Parent(s): c1fff88

Update app.py

Browse files

Files changed (1) hide show

app.py +96 -65

app.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import sys
 import os
 from pathlib import Path
 # Add the StableCascade and CSD directories to the Python path
 app_dir = Path(__file__).parent
@@ -130,17 +131,12 @@ models_rbm = core.Models(
 )
 models_rbm.generator.eval().requires_grad_(False)
-def infer(style_description, ref_style_file, caption):
-    height=1024
-    width=1024
-    batch_size=1
-    output_file='output.png'
-    stage_c_latent_shape, stage_b_latent_shape = calculate_latent_sizes(height, width, batch_size=batch_size)
-    extras.sampling_configs['cfg'] = 4
-    extras.sampling_configs['shift'] = 2
     extras.sampling_configs['timesteps'] = 20
     extras.sampling_configs['t_start'] = 1.0
@@ -149,66 +145,101 @@ def infer(style_description, ref_style_file, caption):
     extras_b.sampling_configs['timesteps'] = 10
     extras_b.sampling_configs['t_start'] = 1.0
-    ref_style = resize_image(PIL.Image.open(ref_style_file).convert("RGB")).unsqueeze(0).expand(batch_size, -1, -1, -1).to(device)
-    batch = {'captions': [caption] * batch_size}
-    batch['style'] = ref_style
-    x0_style_forward = models_rbm.effnet(extras.effnet_preprocess(ref_style.to(device)))
-    conditions = core.get_conditions(batch, models_rbm, extras, is_eval=True, is_unconditional=False, eval_image_embeds=True, eval_style=True, eval_csd=False)
-    unconditions = core.get_conditions(batch, models_rbm, extras, is_eval=True, is_unconditional=True, eval_image_embeds=False)
-    conditions_b = core_b.get_conditions(batch, models_b, extras_b, is_eval=True, is_unconditional=False)
-    unconditions_b = core_b.get_conditions(batch, models_b, extras_b, is_eval=True, is_unconditional=True)
     if low_vram:
-        # The sampling process uses more vram, so we offload everything except two modules to the cpu.
         models_to(models_rbm, device="cpu", excepts=["generator", "previewer"])
-    # Stage C reverse process.
-    sampling_c = extras.gdf.sample(
-        models_rbm.generator, conditions, stage_c_latent_shape,
-        unconditions, device=device,
-        **extras.sampling_configs,
-        x0_style_forward=x0_style_forward,
-        apply_pushforward=False, tau_pushforward=8,
-        num_iter=3, eta=0.1, tau=20, eval_csd=True,
-        extras=extras, models=models_rbm,
-        lam_style=1, lam_txt_alignment=1.0,
-        use_ddim_sampler=True,
-    )
-    for (sampled_c, _, _) in tqdm(sampling_c, total=extras.sampling_configs['timesteps']):
-        sampled_c = sampled_c
-    # Stage B reverse process.
-    with torch.no_grad(), torch.cuda.amp.autocast(dtype=torch.bfloat16):
-        conditions_b['effnet'] = sampled_c
-        unconditions_b['effnet'] = torch.zeros_like(sampled_c)
-        sampling_b = extras_b.gdf.sample(
-            models_b.generator, conditions_b, stage_b_latent_shape,
-            unconditions_b, device=device, **extras_b.sampling_configs,
         )
-        for (sampled_b, _, _) in tqdm(sampling_b, total=extras_b.sampling_configs['timesteps']):
-            sampled_b = sampled_b
-        sampled = models_b.stage_a.decode(sampled_b).float()
-    sampled = torch.cat([
-        torch.nn.functional.interpolate(ref_style.cpu(), size=(height, width)),
-        sampled.cpu(),
-    ], dim=0)
-    # Remove the batch dimension and keep only the generated image
-    sampled = sampled[1]  # This selects the generated image, discarding the reference style image
-    # Ensure the tensor is in [C, H, W] format
-    if sampled.dim() == 3 and sampled.shape[0] == 3:
-        sampled_image = T.ToPILImage()(sampled)  # Convert tensor to PIL image
-        sampled_image.save(output_file)  # Save the image as a PNG
-    else:
-        raise ValueError(f"Expected tensor of shape [3, H, W] but got {sampled.shape}")
-    return output_file  # Return the path to the saved image
 import gradio as gr

 import sys
 import os
 from pathlib import Path
+import gc
 # Add the StableCascade and CSD directories to the Python path
 app_dir = Path(__file__).parent
 )
 models_rbm.generator.eval().requires_grad_(False)
+def reset_inference_state():
+    global models_rbm, models_b, extras, extras_b
+    # Reset sampling configurations
+    extras.sampling_configs['cfg'] = 5
+    extras.sampling_configs['shift'] = 1
     extras.sampling_configs['timesteps'] = 20
     extras.sampling_configs['t_start'] = 1.0
     extras_b.sampling_configs['timesteps'] = 10
     extras_b.sampling_configs['t_start'] = 1.0
+    # Move models back to initial state
     if low_vram:
         models_to(models_rbm, device="cpu", excepts=["generator", "previewer"])
+        models_b.generator.to("cpu")
+    else:
+        models_to(models_rbm, device="cuda")
+        models_b.generator.to("cuda")
+    # Clear CUDA cache
+    torch.cuda.empty_cache()
+    gc.collect()
+def infer(style_description, ref_style_file, caption):
+    try:
+        height=1024
+        width=1024
+        batch_size=1
+        output_file='output.png'
+        stage_c_latent_shape, stage_b_latent_shape = calculate_latent_sizes(height, width, batch_size=batch_size)
+        extras.sampling_configs['cfg'] = 4
+        extras.sampling_configs['shift'] = 2
+        extras.sampling_configs['timesteps'] = 20
+        extras.sampling_configs['t_start'] = 1.0
+        extras_b.sampling_configs['cfg'] = 1.1
+        extras_b.sampling_configs['shift'] = 1
+        extras_b.sampling_configs['timesteps'] = 10
+        extras_b.sampling_configs['t_start'] = 1.0
+        ref_style = resize_image(PIL.Image.open(ref_style_file).convert("RGB")).unsqueeze(0).expand(batch_size, -1, -1, -1).to(device)
+        batch = {'captions': [caption] * batch_size}
+        batch['style'] = ref_style
+        x0_style_forward = models_rbm.effnet(extras.effnet_preprocess(ref_style.to(device)))
+        conditions = core.get_conditions(batch, models_rbm, extras, is_eval=True, is_unconditional=False, eval_image_embeds=True, eval_style=True, eval_csd=False)
+        unconditions = core.get_conditions(batch, models_rbm, extras, is_eval=True, is_unconditional=True, eval_image_embeds=False)
+        conditions_b = core_b.get_conditions(batch, models_b, extras_b, is_eval=True, is_unconditional=False)
+        unconditions_b = core_b.get_conditions(batch, models_b, extras_b, is_eval=True, is_unconditional=True)
+        if low_vram:
+            # The sampling process uses more vram, so we offload everything except two modules to the cpu.
+            models_to(models_rbm, device="cpu", excepts=["generator", "previewer"])
+        # Stage C reverse process.
+        sampling_c = extras.gdf.sample(
+            models_rbm.generator, conditions, stage_c_latent_shape,
+            unconditions, device=device,
+            **extras.sampling_configs,
+            x0_style_forward=x0_style_forward,
+            apply_pushforward=False, tau_pushforward=8,
+            num_iter=3, eta=0.1, tau=20, eval_csd=True,
+            extras=extras, models=models_rbm,
+            lam_style=1, lam_txt_alignment=1.0,
+            use_ddim_sampler=True,
         )
+        for (sampled_c, _, _) in tqdm(sampling_c, total=extras.sampling_configs['timesteps']):
+            sampled_c = sampled_c
+        # Stage B reverse process.
+        with torch.no_grad(), torch.cuda.amp.autocast(dtype=torch.bfloat16):
+            conditions_b['effnet'] = sampled_c
+            unconditions_b['effnet'] = torch.zeros_like(sampled_c)
+            sampling_b = extras_b.gdf.sample(
+                models_b.generator, conditions_b, stage_b_latent_shape,
+                unconditions_b, device=device, **extras_b.sampling_configs,
+            )
+            for (sampled_b, _, _) in tqdm(sampling_b, total=extras_b.sampling_configs['timesteps']):
+                sampled_b = sampled_b
+            sampled = models_b.stage_a.decode(sampled_b).float()
+        sampled = torch.cat([
+            torch.nn.functional.interpolate(ref_style.cpu(), size=(height, width)),
+            sampled.cpu(),
+        ], dim=0)
+        # Remove the batch dimension and keep only the generated image
+        sampled = sampled[1]  # This selects the generated image, discarding the reference style image
+        # Ensure the tensor is in [C, H, W] format
+        if sampled.dim() == 3 and sampled.shape[0] == 3:
+            sampled_image = T.ToPILImage()(sampled)  # Convert tensor to PIL image
+            sampled_image.save(output_file)  # Save the image as a PNG
+        else:
+            raise ValueError(f"Expected tensor of shape [3, H, W] but got {sampled.shape}")
+        return output_file  # Return the path to the saved image
+    finally:
+        # Reset the state after inference, regardless of success or failure
+        reset_inference_state()
 import gradio as gr