ysmao commited on
Commit
6bcf05c
1 Parent(s): 3609460

update with multiview incontext lora

Browse files
app.py CHANGED
@@ -1,68 +1,81 @@
1
- import gradio as gr
2
  import torch
 
 
 
3
  import spaces
 
4
  from diffusers import FluxInpaintPipeline
5
  from PIL import Image, ImageFile
6
 
7
- #ImageFile.LOAD_TRUNCATED_IMAGES = True
8
 
9
  # Initialize the pipeline
10
  pipe = FluxInpaintPipeline.from_pretrained(
11
- "black-forest-labs/FLUX.1-dev",
12
- torch_dtype=torch.bfloat16
13
  )
14
  pipe.to("cuda")
15
  pipe.load_lora_weights(
16
- "ali-vilab/In-Context-LoRA",
17
- weight_name="visual-identity-design.safetensors"
18
  )
19
 
20
- def square_center_crop(img, target_size=768):
21
- if img.mode in ('RGBA', 'P'):
22
- img = img.convert('RGB')
23
 
24
- width, height = img.size
25
- crop_size = min(width, height)
 
26
 
27
- left = (width - crop_size) // 2
28
- top = (height - crop_size) // 2
29
- right = left + crop_size
30
- bottom = top + crop_size
 
 
31
 
32
- img_cropped = img.crop((left, top, right, bottom))
33
- return img_cropped.resize((target_size, target_size), Image.Resampling.LANCZOS)
34
 
35
  def duplicate_horizontally(img):
36
  width, height = img.size
37
- if width != height:
38
- raise ValueError(f"Input image must be square, got {width}x{height}")
39
 
40
- new_image = Image.new('RGB', (width * 2, height))
41
  new_image.paste(img, (0, 0))
42
  new_image.paste(img, (width, 0))
43
- return new_image
44
 
45
- # Load the mask image
46
- mask = Image.open("mask_square.png")
 
 
 
 
 
 
 
47
 
48
- @spaces.GPU
49
- def generate(image, prompt_description, prompt_user, progress=gr.Progress(track_tqdm=True)):
50
- prompt_structure = "The two-panel image showcases the logo on the left and the application on the right, [LEFT] the left panel is showing "+prompt_description+" [RIGHT] this logo is applied to "
51
- prompt = prompt_structure + prompt_user
52
 
53
- cropped_image = square_center_crop(image)
54
- logo_dupli = duplicate_horizontally(cropped_image)
 
 
 
 
 
 
 
 
 
 
 
 
 
55
 
56
  out = pipe(
57
  prompt=prompt,
58
- image=logo_dupli,
59
- mask_image=mask,
60
  guidance_scale=3.5,
61
- height=768,
62
- width=1536,
63
  num_inference_steps=28,
64
  max_sequence_length=256,
65
- strength=1
66
  ).images[0]
67
 
68
  width, height = out.size
@@ -70,117 +83,71 @@ def generate(image, prompt_description, prompt_user, progress=gr.Progress(track_
70
  image_2 = out.crop((half_width, 0, width, height))
71
  return image_2, out
72
 
 
73
  with gr.Blocks() as demo:
74
- gr.Markdown("# Logo in Context")
75
- gr.Markdown("### [In-Context LoRA](https://huggingface.co/ali-vilab/In-Context-LoRA) + Image-to-Image + Inpainting, apply your logo to anything. diffusers implementation based on the [workflow by WizardWhitebeard/klinter](https://civitai.com/articles/8779)")
 
 
 
 
 
76
 
77
  with gr.Tab("Demo"):
78
  with gr.Row():
79
  with gr.Column():
80
  input_image = gr.Image(
81
- label="Upload Logo Image",
82
- type="pil",
83
- height=384
84
  )
85
  prompt_description = gr.Textbox(
86
- label="Describe your logo",
87
- placeholder="A Hugging Face emoji logo",
88
  )
89
  prompt_input = gr.Textbox(
90
- label="Where should the logo be applied?",
91
- placeholder="e.g., a coffee cup on a wooden table"
92
  )
93
  generate_btn = gr.Button("Generate Application", variant="primary")
94
-
95
  with gr.Column():
96
  output_image = gr.Image(label="Generated Application")
97
  output_side = gr.Image(label="Side by side")
98
-
99
  gr.Examples(
100
  examples=[
101
- ["huggingface.png", "A Hugging Face emoji logo", "An embroidered hat"],
102
- ["awesome.png", "An awesome face logo", "A tattoo on a leg"],
103
- ["dvd_logo.png", "A DVD logo", "a coconut, engraved logo on a green coconut"]
 
 
 
 
 
 
 
104
  ],
105
  inputs=[input_image, prompt_description, prompt_input],
106
  outputs=[output_image, output_side],
107
  fn=generate,
108
- cache_examples="lazy"
109
  )
110
-
111
  with gr.Row():
112
- gr.Markdown("""
 
113
  ### Instructions:
114
- 1. Upload a logo image (preferably square)
115
- 2. Describe where you'd like to see the logo applied
116
  3. Click 'Generate Application' and wait for the result
117
 
118
  Note: The generation process might take a few moments.
119
- """)
120
-
121
- with gr.Tab("🧨 diffusers implementation"):
122
- gr.Markdown("The way this works is combining the [IC LoRA](https://github.com/ali-vilab/In-Context-LoRA) with image-to-image + inpainting. Where the image on the left (the logo) is uploaded by the user, and the image on the right is masked and applied on the product by the LoRA. Based on the [ComfyUI workflow by WizardWhitebeard/klinter](https://civitai.com/articles/8779). Below is a diffusers implementation of the idea")
123
-
124
- gr.Code(language="python", value="""# Support functions
125
- def square_center_crop(img, target_size=768):
126
- if img.mode in ('RGBA', 'P'):
127
- img = img.convert('RGB')
128
-
129
- width, height = img.size
130
- crop_size = min(width, height)
131
-
132
- left = (width - crop_size) // 2
133
- top = (height - crop_size) // 2
134
- right = left + crop_size
135
- bottom = top + crop_size
136
-
137
- img_cropped = img.crop((left, top, right, bottom))
138
- return img_cropped.resize((target_size, target_size), Image.Resampling.LANCZOS)
139
-
140
- def duplicate_horizontally(img):
141
- width, height = img.size
142
- if width != height:
143
- raise ValueError(f"Input image must be square, got {width}x{height}")
144
-
145
- new_image = Image.new('RGB', (width * 2, height))
146
- new_image.paste(img, (0, 0))
147
- new_image.paste(img, (width, 0))
148
- return new_image"""
149
- )
150
-
151
- gr.Code(language="python", value="""import torch
152
- from diffusers import FluxInpaintPipeline
153
- from PIL import Image
154
-
155
- pipe = FluxInpaintPipeline.from_pretrained("black-forest-labs/FLUX.1-dev", torch_dtype=torch.bfloat16)
156
- pipe.to("cuda")
157
- pipe.load_lora_weights("ali-vilab/In-Context-LoRA", weight_name="visual-identity-design.safetensors")
158
-
159
- mask = load_image("mask_square.png")
160
- image = load_image("the_logo.png")
161
- cropped_image = square_center_crop(image) #crop the image you upload to square
162
- logo_dupli = duplicate_horizontally(cropped_image) #duplicate it so the right side can be masked
163
-
164
- prompt_structure = "The two-panel image showcases the logo of a brand, [LEFT] the left panel is showing the logo [RIGHT] the right panel has this logo applied to "
165
- prompt = prompt_structure + "an coconut, engraved logo on a green coconut"
166
- out = pipe(
167
- prompt=prompt,
168
- image=logo_dupli,
169
- mask_image=mask,
170
- guidance_scale=6,
171
- height=768,
172
- width=1536,
173
- num_inference_steps=28,
174
- max_sequence_length=256,
175
- strength=1
176
- ).images[0]"""
177
- )
178
-
179
  # Set up the click event
180
  generate_btn.click(
181
  fn=generate,
182
  inputs=[input_image, prompt_description, prompt_input],
183
- outputs=[output_image, output_side]
184
  )
185
 
186
- demo.launch()
 
 
1
  import torch
2
+
3
+ torch.jit.script = lambda f: f
4
+
5
  import spaces
6
+ import gradio as gr
7
  from diffusers import FluxInpaintPipeline
8
  from PIL import Image, ImageFile
9
 
10
+ # ImageFile.LOAD_TRUNCATED_IMAGES = True
11
 
12
  # Initialize the pipeline
13
  pipe = FluxInpaintPipeline.from_pretrained(
14
+ "black-forest-labs/FLUX.1-dev", torch_dtype=torch.bfloat16
 
15
  )
16
  pipe.to("cuda")
17
  pipe.load_lora_weights(
18
+ "ysmao/multiview-incontext",
19
+ weight_name="twoview-incontext-b01.safetensors",
20
  )
21
 
 
 
 
22
 
23
+ def fractional_resize_image(img, target_size=864):
24
+ if img.mode in ("RGBA", "P"):
25
+ img = img.convert("RGB")
26
 
27
+ width, height = img.size
28
+ scale_factor = target_size / max(width, height)
29
+ return img.resize(
30
+ (int(width * scale_factor), int(height * scale_factor)),
31
+ Image.Resampling.LANCZOS,
32
+ )
33
 
 
 
34
 
35
  def duplicate_horizontally(img):
36
  width, height = img.size
 
 
37
 
38
+ new_image = Image.new("RGB", (width * 2, height))
39
  new_image.paste(img, (0, 0))
40
  new_image.paste(img, (width, 0))
 
41
 
42
+ mask_image = Image.new("RGB", (width * 2, height), (255, 255, 255))
43
+ left_mask = Image.new(
44
+ "RGB",
45
+ (width, height),
46
+ (0, 0, 0),
47
+ )
48
+ mask_image.paste(left_mask, (0, 0))
49
+
50
+ return new_image, mask_image
51
 
 
 
 
 
52
 
53
+ @spaces.GPU(duration=120)
54
+ def generate(
55
+ image, prompt_description, prompt_user, progress=gr.Progress(track_tqdm=True)
56
+ ):
57
+ prompt_structure = (
58
+ "[TWO-VIEWS] This set of two images presents a scene from two different viewpoints. [IMAGE1] The first image shows "
59
+ + prompt_description
60
+ + " [IMAGE2] The second image shows the same room but in another viewpoint "
61
+ )
62
+ prompt = prompt_structure + prompt_user + "."
63
+
64
+ resized_image = fractional_resize_image(image)
65
+ image_twoview, mask_image = duplicate_horizontally(resized_image)
66
+
67
+ image_width, image_height = image_twoview.size
68
 
69
  out = pipe(
70
  prompt=prompt,
71
+ image=image_twoview,
72
+ mask_image=mask_image,
73
  guidance_scale=3.5,
74
+ height=image_height,
75
+ width=image_width,
76
  num_inference_steps=28,
77
  max_sequence_length=256,
78
+ strength=1,
79
  ).images[0]
80
 
81
  width, height = out.size
 
83
  image_2 = out.crop((half_width, 0, width, height))
84
  return image_2, out
85
 
86
+
87
  with gr.Blocks() as demo:
88
+ gr.Markdown("# MultiView in Context")
89
+ gr.Markdown(
90
+ "### [In-Context LoRA](https://huggingface.co/ali-vilab/In-Context-LoRA) + Image-to-Image + Inpainting. Diffusers implementation based on the [workflow by WizardWhitebeard/klinter](https://civitai.com/articles/8779)"
91
+ )
92
+ gr.Markdown(
93
+ "### Using [MultiView In-Context LoRA](https://huggingface.co/ysmao/multiview-incontext)"
94
+ )
95
 
96
  with gr.Tab("Demo"):
97
  with gr.Row():
98
  with gr.Column():
99
  input_image = gr.Image(
100
+ label="Upload Source Image", type="pil", height=384
 
 
101
  )
102
  prompt_description = gr.Textbox(
103
+ label="Describe the source image",
104
+ placeholder="a living room with a sofa set with cushions, side tables with table lamps, a flat screen television on a table, houseplants, wall hangings, electric lights, and a carpet on the floor",
105
  )
106
  prompt_input = gr.Textbox(
107
+ label="Any additional description to the new viewpoint?",
108
+ placeholder="",
109
  )
110
  generate_btn = gr.Button("Generate Application", variant="primary")
111
+
112
  with gr.Column():
113
  output_image = gr.Image(label="Generated Application")
114
  output_side = gr.Image(label="Side by side")
115
+
116
  gr.Examples(
117
  examples=[
118
+ [
119
+ "livingroom_fluxdev.jpg",
120
+ "a living room with a sofa set with cushions, side tables with table lamps, a flat screen television on a table, houseplants, wall hangings, electric lights, and a carpet on the floor",
121
+ "",
122
+ ],
123
+ [
124
+ "bedroom_fluxdev.jpg",
125
+ "a bedroom with a bed, dresser, and window. The bed is covered with a blanket and pillows, and there is a carpet on the floor. The walls are adorned with photo frames, and the windows have curtains. Through the window, we can see trees outside.",
126
+ "",
127
+ ],
128
  ],
129
  inputs=[input_image, prompt_description, prompt_input],
130
  outputs=[output_image, output_side],
131
  fn=generate,
132
+ cache_examples="lazy",
133
  )
134
+
135
  with gr.Row():
136
+ gr.Markdown(
137
+ """
138
  ### Instructions:
139
+ 1. Upload a source image
140
+ 2. Describe the source image
141
  3. Click 'Generate Application' and wait for the result
142
 
143
  Note: The generation process might take a few moments.
144
+ """
145
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
146
  # Set up the click event
147
  generate_btn.click(
148
  fn=generate,
149
  inputs=[input_image, prompt_description, prompt_input],
150
+ outputs=[output_image, output_side],
151
  )
152
 
153
+ demo.launch()
awesome.png DELETED
Binary file (89.4 kB)
 
bedroom_fluxdev.jpg ADDED
dvd_logo.png DELETED
Binary file (37.5 kB)
 
huggingface.png DELETED
Binary file (81.7 kB)
 
livingroom_fluxdev.jpg ADDED
wikipedia_logo.png DELETED
Binary file (379 kB)