Spaces:
Running
on
Zero
Running
on
Zero
update with multiview incontext lora
Browse files- app.py +82 -115
- awesome.png +0 -0
- bedroom_fluxdev.jpg +0 -0
- dvd_logo.png +0 -0
- huggingface.png +0 -0
- livingroom_fluxdev.jpg +0 -0
- wikipedia_logo.png +0 -0
app.py
CHANGED
@@ -1,68 +1,81 @@
|
|
1 |
-
import gradio as gr
|
2 |
import torch
|
|
|
|
|
|
|
3 |
import spaces
|
|
|
4 |
from diffusers import FluxInpaintPipeline
|
5 |
from PIL import Image, ImageFile
|
6 |
|
7 |
-
#ImageFile.LOAD_TRUNCATED_IMAGES = True
|
8 |
|
9 |
# Initialize the pipeline
|
10 |
pipe = FluxInpaintPipeline.from_pretrained(
|
11 |
-
"black-forest-labs/FLUX.1-dev",
|
12 |
-
torch_dtype=torch.bfloat16
|
13 |
)
|
14 |
pipe.to("cuda")
|
15 |
pipe.load_lora_weights(
|
16 |
-
"
|
17 |
-
weight_name="
|
18 |
)
|
19 |
|
20 |
-
def square_center_crop(img, target_size=768):
|
21 |
-
if img.mode in ('RGBA', 'P'):
|
22 |
-
img = img.convert('RGB')
|
23 |
|
24 |
-
|
25 |
-
|
|
|
26 |
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
|
|
|
|
31 |
|
32 |
-
img_cropped = img.crop((left, top, right, bottom))
|
33 |
-
return img_cropped.resize((target_size, target_size), Image.Resampling.LANCZOS)
|
34 |
|
35 |
def duplicate_horizontally(img):
|
36 |
width, height = img.size
|
37 |
-
if width != height:
|
38 |
-
raise ValueError(f"Input image must be square, got {width}x{height}")
|
39 |
|
40 |
-
new_image = Image.new(
|
41 |
new_image.paste(img, (0, 0))
|
42 |
new_image.paste(img, (width, 0))
|
43 |
-
return new_image
|
44 |
|
45 |
-
|
46 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
|
48 |
-
@spaces.GPU
|
49 |
-
def generate(image, prompt_description, prompt_user, progress=gr.Progress(track_tqdm=True)):
|
50 |
-
prompt_structure = "The two-panel image showcases the logo on the left and the application on the right, [LEFT] the left panel is showing "+prompt_description+" [RIGHT] this logo is applied to "
|
51 |
-
prompt = prompt_structure + prompt_user
|
52 |
|
53 |
-
|
54 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
55 |
|
56 |
out = pipe(
|
57 |
prompt=prompt,
|
58 |
-
image=
|
59 |
-
mask_image=
|
60 |
guidance_scale=3.5,
|
61 |
-
height=
|
62 |
-
width=
|
63 |
num_inference_steps=28,
|
64 |
max_sequence_length=256,
|
65 |
-
strength=1
|
66 |
).images[0]
|
67 |
|
68 |
width, height = out.size
|
@@ -70,117 +83,71 @@ def generate(image, prompt_description, prompt_user, progress=gr.Progress(track_
|
|
70 |
image_2 = out.crop((half_width, 0, width, height))
|
71 |
return image_2, out
|
72 |
|
|
|
73 |
with gr.Blocks() as demo:
|
74 |
-
gr.Markdown("#
|
75 |
-
gr.Markdown(
|
|
|
|
|
|
|
|
|
|
|
76 |
|
77 |
with gr.Tab("Demo"):
|
78 |
with gr.Row():
|
79 |
with gr.Column():
|
80 |
input_image = gr.Image(
|
81 |
-
label="Upload
|
82 |
-
type="pil",
|
83 |
-
height=384
|
84 |
)
|
85 |
prompt_description = gr.Textbox(
|
86 |
-
label="Describe
|
87 |
-
placeholder="
|
88 |
)
|
89 |
prompt_input = gr.Textbox(
|
90 |
-
label="
|
91 |
-
placeholder="
|
92 |
)
|
93 |
generate_btn = gr.Button("Generate Application", variant="primary")
|
94 |
-
|
95 |
with gr.Column():
|
96 |
output_image = gr.Image(label="Generated Application")
|
97 |
output_side = gr.Image(label="Side by side")
|
98 |
-
|
99 |
gr.Examples(
|
100 |
examples=[
|
101 |
-
[
|
102 |
-
|
103 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
104 |
],
|
105 |
inputs=[input_image, prompt_description, prompt_input],
|
106 |
outputs=[output_image, output_side],
|
107 |
fn=generate,
|
108 |
-
cache_examples="lazy"
|
109 |
)
|
110 |
-
|
111 |
with gr.Row():
|
112 |
-
gr.Markdown(
|
|
|
113 |
### Instructions:
|
114 |
-
1. Upload a
|
115 |
-
2. Describe
|
116 |
3. Click 'Generate Application' and wait for the result
|
117 |
|
118 |
Note: The generation process might take a few moments.
|
119 |
-
"""
|
120 |
-
|
121 |
-
with gr.Tab("🧨 diffusers implementation"):
|
122 |
-
gr.Markdown("The way this works is combining the [IC LoRA](https://github.com/ali-vilab/In-Context-LoRA) with image-to-image + inpainting. Where the image on the left (the logo) is uploaded by the user, and the image on the right is masked and applied on the product by the LoRA. Based on the [ComfyUI workflow by WizardWhitebeard/klinter](https://civitai.com/articles/8779). Below is a diffusers implementation of the idea")
|
123 |
-
|
124 |
-
gr.Code(language="python", value="""# Support functions
|
125 |
-
def square_center_crop(img, target_size=768):
|
126 |
-
if img.mode in ('RGBA', 'P'):
|
127 |
-
img = img.convert('RGB')
|
128 |
-
|
129 |
-
width, height = img.size
|
130 |
-
crop_size = min(width, height)
|
131 |
-
|
132 |
-
left = (width - crop_size) // 2
|
133 |
-
top = (height - crop_size) // 2
|
134 |
-
right = left + crop_size
|
135 |
-
bottom = top + crop_size
|
136 |
-
|
137 |
-
img_cropped = img.crop((left, top, right, bottom))
|
138 |
-
return img_cropped.resize((target_size, target_size), Image.Resampling.LANCZOS)
|
139 |
-
|
140 |
-
def duplicate_horizontally(img):
|
141 |
-
width, height = img.size
|
142 |
-
if width != height:
|
143 |
-
raise ValueError(f"Input image must be square, got {width}x{height}")
|
144 |
-
|
145 |
-
new_image = Image.new('RGB', (width * 2, height))
|
146 |
-
new_image.paste(img, (0, 0))
|
147 |
-
new_image.paste(img, (width, 0))
|
148 |
-
return new_image"""
|
149 |
-
)
|
150 |
-
|
151 |
-
gr.Code(language="python", value="""import torch
|
152 |
-
from diffusers import FluxInpaintPipeline
|
153 |
-
from PIL import Image
|
154 |
-
|
155 |
-
pipe = FluxInpaintPipeline.from_pretrained("black-forest-labs/FLUX.1-dev", torch_dtype=torch.bfloat16)
|
156 |
-
pipe.to("cuda")
|
157 |
-
pipe.load_lora_weights("ali-vilab/In-Context-LoRA", weight_name="visual-identity-design.safetensors")
|
158 |
-
|
159 |
-
mask = load_image("mask_square.png")
|
160 |
-
image = load_image("the_logo.png")
|
161 |
-
cropped_image = square_center_crop(image) #crop the image you upload to square
|
162 |
-
logo_dupli = duplicate_horizontally(cropped_image) #duplicate it so the right side can be masked
|
163 |
-
|
164 |
-
prompt_structure = "The two-panel image showcases the logo of a brand, [LEFT] the left panel is showing the logo [RIGHT] the right panel has this logo applied to "
|
165 |
-
prompt = prompt_structure + "an coconut, engraved logo on a green coconut"
|
166 |
-
out = pipe(
|
167 |
-
prompt=prompt,
|
168 |
-
image=logo_dupli,
|
169 |
-
mask_image=mask,
|
170 |
-
guidance_scale=6,
|
171 |
-
height=768,
|
172 |
-
width=1536,
|
173 |
-
num_inference_steps=28,
|
174 |
-
max_sequence_length=256,
|
175 |
-
strength=1
|
176 |
-
).images[0]"""
|
177 |
-
)
|
178 |
-
|
179 |
# Set up the click event
|
180 |
generate_btn.click(
|
181 |
fn=generate,
|
182 |
inputs=[input_image, prompt_description, prompt_input],
|
183 |
-
outputs=[output_image, output_side]
|
184 |
)
|
185 |
|
186 |
-
demo.launch()
|
|
|
|
|
1 |
import torch
|
2 |
+
|
3 |
+
torch.jit.script = lambda f: f
|
4 |
+
|
5 |
import spaces
|
6 |
+
import gradio as gr
|
7 |
from diffusers import FluxInpaintPipeline
|
8 |
from PIL import Image, ImageFile
|
9 |
|
10 |
+
# ImageFile.LOAD_TRUNCATED_IMAGES = True
|
11 |
|
12 |
# Initialize the pipeline
|
13 |
pipe = FluxInpaintPipeline.from_pretrained(
|
14 |
+
"black-forest-labs/FLUX.1-dev", torch_dtype=torch.bfloat16
|
|
|
15 |
)
|
16 |
pipe.to("cuda")
|
17 |
pipe.load_lora_weights(
|
18 |
+
"ysmao/multiview-incontext",
|
19 |
+
weight_name="twoview-incontext-b01.safetensors",
|
20 |
)
|
21 |
|
|
|
|
|
|
|
22 |
|
23 |
+
def fractional_resize_image(img, target_size=864):
|
24 |
+
if img.mode in ("RGBA", "P"):
|
25 |
+
img = img.convert("RGB")
|
26 |
|
27 |
+
width, height = img.size
|
28 |
+
scale_factor = target_size / max(width, height)
|
29 |
+
return img.resize(
|
30 |
+
(int(width * scale_factor), int(height * scale_factor)),
|
31 |
+
Image.Resampling.LANCZOS,
|
32 |
+
)
|
33 |
|
|
|
|
|
34 |
|
35 |
def duplicate_horizontally(img):
|
36 |
width, height = img.size
|
|
|
|
|
37 |
|
38 |
+
new_image = Image.new("RGB", (width * 2, height))
|
39 |
new_image.paste(img, (0, 0))
|
40 |
new_image.paste(img, (width, 0))
|
|
|
41 |
|
42 |
+
mask_image = Image.new("RGB", (width * 2, height), (255, 255, 255))
|
43 |
+
left_mask = Image.new(
|
44 |
+
"RGB",
|
45 |
+
(width, height),
|
46 |
+
(0, 0, 0),
|
47 |
+
)
|
48 |
+
mask_image.paste(left_mask, (0, 0))
|
49 |
+
|
50 |
+
return new_image, mask_image
|
51 |
|
|
|
|
|
|
|
|
|
52 |
|
53 |
+
@spaces.GPU(duration=120)
|
54 |
+
def generate(
|
55 |
+
image, prompt_description, prompt_user, progress=gr.Progress(track_tqdm=True)
|
56 |
+
):
|
57 |
+
prompt_structure = (
|
58 |
+
"[TWO-VIEWS] This set of two images presents a scene from two different viewpoints. [IMAGE1] The first image shows "
|
59 |
+
+ prompt_description
|
60 |
+
+ " [IMAGE2] The second image shows the same room but in another viewpoint "
|
61 |
+
)
|
62 |
+
prompt = prompt_structure + prompt_user + "."
|
63 |
+
|
64 |
+
resized_image = fractional_resize_image(image)
|
65 |
+
image_twoview, mask_image = duplicate_horizontally(resized_image)
|
66 |
+
|
67 |
+
image_width, image_height = image_twoview.size
|
68 |
|
69 |
out = pipe(
|
70 |
prompt=prompt,
|
71 |
+
image=image_twoview,
|
72 |
+
mask_image=mask_image,
|
73 |
guidance_scale=3.5,
|
74 |
+
height=image_height,
|
75 |
+
width=image_width,
|
76 |
num_inference_steps=28,
|
77 |
max_sequence_length=256,
|
78 |
+
strength=1,
|
79 |
).images[0]
|
80 |
|
81 |
width, height = out.size
|
|
|
83 |
image_2 = out.crop((half_width, 0, width, height))
|
84 |
return image_2, out
|
85 |
|
86 |
+
|
87 |
with gr.Blocks() as demo:
|
88 |
+
gr.Markdown("# MultiView in Context")
|
89 |
+
gr.Markdown(
|
90 |
+
"### [In-Context LoRA](https://huggingface.co/ali-vilab/In-Context-LoRA) + Image-to-Image + Inpainting. Diffusers implementation based on the [workflow by WizardWhitebeard/klinter](https://civitai.com/articles/8779)"
|
91 |
+
)
|
92 |
+
gr.Markdown(
|
93 |
+
"### Using [MultiView In-Context LoRA](https://huggingface.co/ysmao/multiview-incontext)"
|
94 |
+
)
|
95 |
|
96 |
with gr.Tab("Demo"):
|
97 |
with gr.Row():
|
98 |
with gr.Column():
|
99 |
input_image = gr.Image(
|
100 |
+
label="Upload Source Image", type="pil", height=384
|
|
|
|
|
101 |
)
|
102 |
prompt_description = gr.Textbox(
|
103 |
+
label="Describe the source image",
|
104 |
+
placeholder="a living room with a sofa set with cushions, side tables with table lamps, a flat screen television on a table, houseplants, wall hangings, electric lights, and a carpet on the floor",
|
105 |
)
|
106 |
prompt_input = gr.Textbox(
|
107 |
+
label="Any additional description to the new viewpoint?",
|
108 |
+
placeholder="",
|
109 |
)
|
110 |
generate_btn = gr.Button("Generate Application", variant="primary")
|
111 |
+
|
112 |
with gr.Column():
|
113 |
output_image = gr.Image(label="Generated Application")
|
114 |
output_side = gr.Image(label="Side by side")
|
115 |
+
|
116 |
gr.Examples(
|
117 |
examples=[
|
118 |
+
[
|
119 |
+
"livingroom_fluxdev.jpg",
|
120 |
+
"a living room with a sofa set with cushions, side tables with table lamps, a flat screen television on a table, houseplants, wall hangings, electric lights, and a carpet on the floor",
|
121 |
+
"",
|
122 |
+
],
|
123 |
+
[
|
124 |
+
"bedroom_fluxdev.jpg",
|
125 |
+
"a bedroom with a bed, dresser, and window. The bed is covered with a blanket and pillows, and there is a carpet on the floor. The walls are adorned with photo frames, and the windows have curtains. Through the window, we can see trees outside.",
|
126 |
+
"",
|
127 |
+
],
|
128 |
],
|
129 |
inputs=[input_image, prompt_description, prompt_input],
|
130 |
outputs=[output_image, output_side],
|
131 |
fn=generate,
|
132 |
+
cache_examples="lazy",
|
133 |
)
|
134 |
+
|
135 |
with gr.Row():
|
136 |
+
gr.Markdown(
|
137 |
+
"""
|
138 |
### Instructions:
|
139 |
+
1. Upload a source image
|
140 |
+
2. Describe the source image
|
141 |
3. Click 'Generate Application' and wait for the result
|
142 |
|
143 |
Note: The generation process might take a few moments.
|
144 |
+
"""
|
145 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
146 |
# Set up the click event
|
147 |
generate_btn.click(
|
148 |
fn=generate,
|
149 |
inputs=[input_image, prompt_description, prompt_input],
|
150 |
+
outputs=[output_image, output_side],
|
151 |
)
|
152 |
|
153 |
+
demo.launch()
|
awesome.png
DELETED
Binary file (89.4 kB)
|
|
bedroom_fluxdev.jpg
ADDED
dvd_logo.png
DELETED
Binary file (37.5 kB)
|
|
huggingface.png
DELETED
Binary file (81.7 kB)
|
|
livingroom_fluxdev.jpg
ADDED
wikipedia_logo.png
DELETED
Binary file (379 kB)
|
|