nigeljw commited on
Commit
5091a12
1 Parent(s): 651e443

Added better latent space navigation and also added example masks and input

Browse files
app.py CHANGED
@@ -23,6 +23,7 @@ modelIndex = 0
23
  outpaintPipeline = None
24
  oldLatentWalk = None
25
  activeLatents = None
 
26
 
27
  def GenerateNewLatentsForInference():
28
  global latents, oldLatents
@@ -35,6 +36,7 @@ def GenerateNewLatentsForInference():
35
  latents = torch.randn(latentsSize, device=device, dtype=torch.float16)
36
  else:
37
  latents = torch.randn(latentsSize, device=device)
 
38
 
39
  def InitializeOutpainting():
40
  print("Initializing Outpainting")
@@ -63,14 +65,14 @@ def Slerp(start, end, alpha):
63
  second = torch.sin(alpha*omega)/sinOmega
64
  return first.unsqueeze(1)*start + second.unsqueeze(1)*end
65
 
66
- def Diffuse(latentWalk, staticLatents, generatorSeed, inputImage, mask, pauseInference, prompt, negativePrompt, guidanceScale, numInferenceSteps):
67
  global lastImage, lastSeed, generator, oldLatentWalk, activeLatents
68
 
69
  if mask is None or pauseInference is True:
70
  return lastImage
71
 
72
- if staticLatents is False:
73
- GenerateNewLatentsForInference()
74
 
75
  if oldLatentWalk != latentWalk:
76
  activeLatents = Slerp(oldLatents, latents, latentWalk)
@@ -106,42 +108,56 @@ print("Initializing Gradio Interface")
106
 
107
  defaultMask = Image.open("assets/masks/diamond.png")
108
  numInfStepsDesc = "A higher value generally increases quality, but reduces the frames per second of the output stream."
109
- staticLatentsDesc = "This setting increases the frame to frame determisn of the generation. If this is disabled, then the inference will take continuous large walks across the latent space between frames."
110
  generatorSeedDesc = "Identical seeds allow for persistent scene generation between runs, and changing the seed will take a static large walk across the latent space to better control and alter the generation of scene scene content especially when large abberations exist in the reconstruction."
111
  promptDesc = "This text will condition the generation of the scene to help guide the content creation."
112
  negPromptDesc = "This text will help deter the generation from converging towards reconstructing the elements described in the text."
113
  outputText = "This inferred imagery expands the field of view from the masked area of the input camera feed."
114
  latentWalkDesc = "This allows you to walk short spans across the latent space with relatively continuous gradients."
115
 
 
 
 
 
 
 
 
 
116
  with gradio.Blocks(live=True) as ux:
117
  gradio.Markdown("This generative machine learning demonstration streams stable diffusion outpainting inference live from your camera on your computer or phone to expand your local reality and create an alternate world. High quality frame to frame determinism is a hard problem to solve for latent diffusion models as the generation is inherently relative to input noise distributions for the latents, and many factors such as the inherent Bayer noise from the camera images as well as anything that is altered between camera images (such as focus, white balance, etc) causes non-determinism between frames. Some methods apply spationtemporal attention, but this demonstration focuses on the control over the input latents to navigate the latent space. **Increase the lighting of your physical scene from your camera's perspective, and avoid self shadows of scene content, to improve the quality and consistency of the scene generation.**")
118
  with gradio.Row():
119
  with gradio.Column():
120
- staticLatents = gradio.Checkbox(label="Static Latents", info=staticLatentsDesc, value=True)
121
  inputImage = gradio.Image(label="Input Feed", source="webcam", shape=[512,512], streaming=True)
122
  mask = gradio.Image(label="Mask", type="pil", value=defaultMask)
123
- prompt = gradio.Textbox(label="Prompt", info=promptDesc, placeholder="A person in a room with colored hair", lines=3)
124
  negativePrompt = gradio.Textbox(label="Negative Prompt", info=negPromptDesc, placeholder="Facial hair", lines=3)
125
  guidanceScale = gradio.Slider(label="Guidance Scale", info="A higher value causes the generation to be more relative to the text prompt conditioning.", maximum=100, minimum=1, value=7.5, step= 0.1)
126
  numInferenceSteps = gradio.Slider(label="Number of Inference Steps", info=numInfStepsDesc, maximum=100, minimum=1, value=20, step=1)
127
  generatorSeed = gradio.Slider(label="Generator Seed", info=generatorSeedDesc, maximum=10000, minimum=1, value=lastSeed, step=1)
128
  #modelIndex = gradio.Dropdown(modelNames, label="Model", value="runwayml/stable-diffusion-inpainting")
129
- inputImage.style(full_width=True)
130
 
131
  with gradio.Column():
132
- #generateLatents = gradio.Button(value="Generate New Latents")
133
- latentWalk = gradio.Slider(label="Latent Walk", info=latentWalkDesc, maximum=1.0, minimum=0.0, value=0.0)
 
134
  outputImage = gradio.Image(label="Extrapolated Field of View")
135
  pauseInference = gradio.Checkbox(label="Pause Inference", value=False)
136
 
137
- inferenceInputs = [latentWalk, staticLatents, generatorSeed, inputImage, mask, pauseInference, prompt, negativePrompt, guidanceScale, numInferenceSteps]
138
- #generateLatents.click(GenerateNewLatentsForInference)
139
  inputImage.change(fn=Diffuse, inputs=inferenceInputs, outputs=outputImage, show_progress=False)
140
 
141
- #inputExamples = gradio.Examples([["assets/masks/diamond.png"],
142
- # ["assets/masks/sphere.png"],
143
- # ["assets/masks/square.png"]],
144
- # inputs=inferenceInputs,)
 
 
 
 
 
145
 
146
  gradio.Markdown("This demonstration should initialize automatically from the default values, and run relatively well, but if the output is not an ideal reconstruction of your physical local space from your camera's perspective, then you should adjust the generator seed to take large walks across the latent space. In addition, the static latents can be disable to continously walk the latent space, and then it can be set to static again when a better region of the embedded space is found, but this will increase frame to fram non-determinism. You can also condition the generation using prompts to re-enforce or change aspects of the scene. **If you see a black image instead of a generated output image, then you are running into the safety checker.** This can trigger inconsistently even when the generated content is purely PG. If this happens, then increase the lighting of the scene and also increase the number of inference steps to improve the generated predicition to reduce the likelihood of the saftey checker triggering a false positive.")
147
 
 
23
  outpaintPipeline = None
24
  oldLatentWalk = None
25
  activeLatents = None
26
+ oldLatents = None
27
 
28
  def GenerateNewLatentsForInference():
29
  global latents, oldLatents
 
36
  latents = torch.randn(latentsSize, device=device, dtype=torch.float16)
37
  else:
38
  latents = torch.randn(latentsSize, device=device)
39
+ return 0
40
 
41
  def InitializeOutpainting():
42
  print("Initializing Outpainting")
 
65
  second = torch.sin(alpha*omega)/sinOmega
66
  return first.unsqueeze(1)*start + second.unsqueeze(1)*end
67
 
68
+ def Diffuse(latentWalk, generatorSeed, inputImage, mask, prompt, negativePrompt, guidanceScale, numInferenceSteps):
69
  global lastImage, lastSeed, generator, oldLatentWalk, activeLatents
70
 
71
  if mask is None or pauseInference is True:
72
  return lastImage
73
 
74
+ #if staticLatents is False:
75
+ # GenerateNewLatentsForInference()
76
 
77
  if oldLatentWalk != latentWalk:
78
  activeLatents = Slerp(oldLatents, latents, latentWalk)
 
108
 
109
  defaultMask = Image.open("assets/masks/diamond.png")
110
  numInfStepsDesc = "A higher value generally increases quality, but reduces the frames per second of the output stream."
111
+ #staticLatentsDesc = "This setting increases the frame to frame determisn of the generation. If this is disabled, then the inference will take continuous large walks across the latent space between frames."
112
  generatorSeedDesc = "Identical seeds allow for persistent scene generation between runs, and changing the seed will take a static large walk across the latent space to better control and alter the generation of scene scene content especially when large abberations exist in the reconstruction."
113
  promptDesc = "This text will condition the generation of the scene to help guide the content creation."
114
  negPromptDesc = "This text will help deter the generation from converging towards reconstructing the elements described in the text."
115
  outputText = "This inferred imagery expands the field of view from the masked area of the input camera feed."
116
  latentWalkDesc = "This allows you to walk short spans across the latent space with relatively continuous gradients."
117
 
118
+ examplePrompt1 = "A person in a room with colored hair"
119
+ examplePrompt2 = "People in a room with colored hair"
120
+ examplePrompt3 = "A man on a beach with long hair"
121
+ examplePrompt4 = "A woman on a beach with long hair"
122
+ examplePrompt5 = "A panda eating bamboo"
123
+ examplePrompt6 = "A family together in a room"
124
+ examplePrompt7 = "A family together outside with colored hair"
125
+
126
  with gradio.Blocks(live=True) as ux:
127
  gradio.Markdown("This generative machine learning demonstration streams stable diffusion outpainting inference live from your camera on your computer or phone to expand your local reality and create an alternate world. High quality frame to frame determinism is a hard problem to solve for latent diffusion models as the generation is inherently relative to input noise distributions for the latents, and many factors such as the inherent Bayer noise from the camera images as well as anything that is altered between camera images (such as focus, white balance, etc) causes non-determinism between frames. Some methods apply spationtemporal attention, but this demonstration focuses on the control over the input latents to navigate the latent space. **Increase the lighting of your physical scene from your camera's perspective, and avoid self shadows of scene content, to improve the quality and consistency of the scene generation.**")
128
  with gradio.Row():
129
  with gradio.Column():
130
+ #staticLatents = gradio.Checkbox(label="Static Latents", info=staticLatentsDesc, value=True, interactive=True)
131
  inputImage = gradio.Image(label="Input Feed", source="webcam", shape=[512,512], streaming=True)
132
  mask = gradio.Image(label="Mask", type="pil", value=defaultMask)
133
+ prompt = gradio.Textbox(label="Prompt", info=promptDesc, placeholder=examplePrompt1, lines=3)
134
  negativePrompt = gradio.Textbox(label="Negative Prompt", info=negPromptDesc, placeholder="Facial hair", lines=3)
135
  guidanceScale = gradio.Slider(label="Guidance Scale", info="A higher value causes the generation to be more relative to the text prompt conditioning.", maximum=100, minimum=1, value=7.5, step= 0.1)
136
  numInferenceSteps = gradio.Slider(label="Number of Inference Steps", info=numInfStepsDesc, maximum=100, minimum=1, value=20, step=1)
137
  generatorSeed = gradio.Slider(label="Generator Seed", info=generatorSeedDesc, maximum=10000, minimum=1, value=lastSeed, step=1)
138
  #modelIndex = gradio.Dropdown(modelNames, label="Model", value="runwayml/stable-diffusion-inpainting")
139
+ #inputImage.style(full_width=True)
140
 
141
  with gradio.Column():
142
+ gradio.Markdown("The navigation will attempt to continously loiter in its current location in the embedded space if no input variables change. If you click **Generate New Latents**, then it will preserve the current active latents in the walk,create a new set of random latents, and reset the **Latent Walk** value so that you can walk to a new location.")
143
+ generateLatents = gradio.Button(value="Generate New Latents")
144
+ latentWalk = gradio.Slider(label="Latent Walk", info=latentWalkDesc, maximum=1.0, minimum=0.0, value=0.0, interactive=True)
145
  outputImage = gradio.Image(label="Extrapolated Field of View")
146
  pauseInference = gradio.Checkbox(label="Pause Inference", value=False)
147
 
148
+ inferenceInputs = [latentWalk, generatorSeed, inputImage, mask, prompt, negativePrompt, guidanceScale, numInferenceSteps]
149
+ generateLatents.click(GenerateNewLatentsForInference, outputs=latentWalk)
150
  inputImage.change(fn=Diffuse, inputs=inferenceInputs, outputs=outputImage, show_progress=False)
151
 
152
+ examples = [[1.0, 1234, "assets/input/man.png", "assets/masks/diamond.png", examplePrompt1, "", 7.5, 20],
153
+ [0.5, 2048, "assets/input/people.jpg", "assets/masks/star.png", examplePrompt2, "", 7.5, 15],
154
+ [0.3, 8192, "assets/input/man.png", "assets/masks/sphere.png", examplePrompt3, "", 7.5, 25],
155
+ [0.7, 1024, "assets/input/woman.jpg", "assets/masks/spiral.png", examplePrompt4, "", 7.5, 15],
156
+ [1.0, 512, "assets/input/man.png", "assets/masks/square.png", examplePrompt5, "", 7.5, 10],
157
+ [0.1, 256, "assets/input/family.jpg", "assets/masks/wave.png", examplePrompt6, "", 11.5, 30],
158
+ [0.9, 9999, "assets/input/family.jpg", "assets/masks/maze.png", examplePrompt7, "", 17.5, 35],]
159
+
160
+ inputExamples = gradio.Examples(examples, inputs=inferenceInputs, outputs=outputImage, fn=Diffuse)
161
 
162
  gradio.Markdown("This demonstration should initialize automatically from the default values, and run relatively well, but if the output is not an ideal reconstruction of your physical local space from your camera's perspective, then you should adjust the generator seed to take large walks across the latent space. In addition, the static latents can be disable to continously walk the latent space, and then it can be set to static again when a better region of the embedded space is found, but this will increase frame to fram non-determinism. You can also condition the generation using prompts to re-enforce or change aspects of the scene. **If you see a black image instead of a generated output image, then you are running into the safety checker.** This can trigger inconsistently even when the generated content is purely PG. If this happens, then increase the lighting of the scene and also increase the number of inference steps to improve the generated predicition to reduce the likelihood of the saftey checker triggering a false positive.")
163
 
assets/input/family.jpg ADDED
assets/input/man.png ADDED
assets/input/people.jpg ADDED
assets/input/woman.jpg ADDED
assets/masks/maze.png ADDED
assets/masks/spiral.png ADDED
assets/masks/star.png ADDED
assets/masks/wave.png ADDED