Files changed (2) hide show
  1. app.py +153 -150
  2. requirements.txt +36 -7
app.py CHANGED
@@ -1,150 +1,153 @@
1
- import os
2
-
3
- from PIL import Image
4
- import gradio as gr
5
-
6
- from utils.gradio_utils import *
7
- from utils.direction_utils import *
8
- from utils.generate_synthetic import *
9
-
10
-
11
- if __name__=="__main__":
12
-
13
- # populate the list of editing directions
14
- d_name2desc = get_all_directions_names()
15
- d_name2desc["make your own!"] = "make your own!"
16
-
17
- with gr.Blocks(css=CSS_main) as demo:
18
- # Make the header of the demo website
19
- gr.HTML(HTML_header)
20
-
21
- gr.HTML("""
22
- <p>For faster inference without waiting in queue, you may duplicate the space and upgrade to GPU in settings.
23
- <br/>
24
- <a href="https://huggingface.co/spaces/pix2pix-zero-library/pix2pix-zero-demo?duplicate=true">
25
- <img style="margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>
26
- <p/>""")
27
-
28
- with gr.Row():
29
- # col A: the input image or synthetic image prompt
30
- with gr.Column(scale=2) as gc_left:
31
- gr.HTML(" <center> <p style='font-size:150%;'> input </p> </center>")
32
- img_in_real = gr.Image(type="pil", label="Start by uploading an image", elem_id="input_image")
33
- img_in_synth = gr.Image(type="pil", label="Synthesized image", elem_id="input_image_synth", visible=False)
34
- gr.Examples( examples="assets/test_images/", inputs=[img_in_real])
35
- prompt = gr.Textbox(value="a high resolution painting of a cat in the style of van gogh", label="Or use a synthetic image. Prompt:", interactive=True)
36
- with gr.Row():
37
- seed = gr.Number(value=42, label="random seed:", interactive=True)
38
- negative_guidance = gr.Number(value=5, label="negative guidance:", interactive=True)
39
- btn_generate = gr.Button("Generate", label="")
40
- fpath_z_gen = gr.Textbox(value="placeholder", visible=False)
41
-
42
- # col B: the output image
43
- with gr.Column(scale=2) as gc_left:
44
- gr.HTML(" <center> <p style='font-size:150%;'> output </p> </center>")
45
- img_out = gr.Image(type="pil", label="Output Image", visible=True)
46
- with gr.Row():
47
- with gr.Column():
48
- src = gr.Dropdown(list(d_name2desc.values()), label="source", interactive=True, value="cat")
49
- src_custom = gr.Textbox(placeholder="enter new task here!", interactive=True, visible=False, label="custom source direction:")
50
- rad_src = gr.Radio(["GPT3", "flan-t5-xl (free)!", "BLOOMZ-7B (free)!", "fixed-template", "custom sentences"], label="Sentence type:", value="GPT3", interactive=True, visible=False)
51
- custom_sentences_src = gr.Textbox(placeholder="paste list of sentences here", interactive=True, visible=False, label="custom sentences:", lines=5, max_lines=20)
52
-
53
-
54
- with gr.Column():
55
- dest = gr.Dropdown(list(d_name2desc.values()), label="target", interactive=True, value="dog")
56
- dest_custom = gr.Textbox(placeholder="enter new task here!", interactive=True, visible=False, label="custom target direction:")
57
- rad_dest = gr.Radio(["GPT3", "flan-t5-xl (free)!", "BLOOMZ-7B (free)!", "fixed-template", "custom sentences"], label="Sentence type:", value="GPT3", interactive=True, visible=False)
58
- custom_sentences_dest = gr.Textbox(placeholder="paste list of sentences here", interactive=True, visible=False, label="custom sentences:", lines=5, max_lines=20)
59
-
60
-
61
- with gr.Row():
62
- api_key = gr.Textbox(placeholder="enter you OpenAI API key here", interactive=True, visible=False, label="OpenAI API key:", type="password")
63
- org_key = gr.Textbox(placeholder="enter you OpenAI organization key here", interactive=True, visible=False, label="OpenAI Organization:", type="password")
64
- with gr.Row():
65
- btn_edit = gr.Button("Run", label="")
66
- # btn_clear = gr.Button("Clear")
67
-
68
- with gr.Accordion("Change editing settings?", open=True):
69
- num_ddim = gr.Slider(0, 200, 100, label="Number of DDIM steps", interactive=True, elem_id="slider_ddim", step=10)
70
- xa_guidance = gr.Slider(0, 0.25, 0.1, label="Cross Attention guidance", interactive=True, elem_id="slider_xa", step=0.01)
71
- edit_mul = gr.Slider(0, 2, 1.0, label="Edit multiplier", interactive=True, elem_id="slider_edit_mul", step=0.05)
72
-
73
- with gr.Accordion("Generating your own directions", open=False):
74
- gr.Textbox("We provide 5 different ways of computing new custom directions:", show_label=False)
75
- gr.Textbox("We use GPT3 to generate a list of sentences that describe the desired edit. For this options, the users need to make an OpenAI account and enter the API and organizations keys. This option typically results is the best directions and costs roughly $0.14 for one concept.", label="1. GPT3", show_label=True)
76
- gr.Textbox("Alternatively flan-t5-xl model can also be used to to generate a list of sentences that describe the desired edit. This option is free and does not require creating any new accounts.", label="2. flan-t5-xl (free)", show_label=True)
77
- gr.Textbox("Similarly BLOOMZ-7B model can also be used to to generate the sentences for free.", label="3. BLOOMZ-7B (free)", show_label=True)
78
- gr.Textbox("Next, we provide a fixed template based sentence generation. This option does not require any language model and is therefore free and much faster. However the edit directions with this method are often entangled.", label="4. Fixed template", show_label=True)
79
- gr.Textbox("Finally, the user can also generate their own sentences.", label="5. Custom sentences", show_label=True)
80
-
81
-
82
- with gr.Accordion("Tips for getting better results", open=True):
83
- gr.Textbox("The 'Cross Attention guidance' controls the amount of structure guidance to be applied when performing the edit. If the output edited image does not retain the structure from the input, increasing the value will typically address the issue. We recommend changing the value in increments of 0.05.", label="1. Controlling the image structure", show_label=True)
84
- gr.Textbox("If the output image quality is low or has some artifacts, using more steps would be helpful. This can be controlled with the 'Number of DDIM steps' slider.", label="2. Improving Image Quality", show_label=True)
85
- gr.Textbox("There can be two reasons why the output image does not have the desired edit applied. Either the cross attention guidance is too strong, or the edit is insufficient. These can be addressed by reducing the 'Cross Attention guidance' slider or increasing the 'Edit multiplier' respectively.", label="3. Amount of edit applied", show_label=True)
86
-
87
-
88
-
89
- btn_generate.click(launch_generate_sample, [prompt, seed, negative_guidance, num_ddim], [img_in_synth, fpath_z_gen])
90
- btn_generate.click(set_visible_true, [], img_in_synth)
91
- btn_generate.click(set_visible_false, [], img_in_real)
92
-
93
- def fn_clear_all():
94
- return gr.update(value=None), gr.update(value=None), gr.update(value=None)
95
- # btn_clear.click(fn_clear_all, [], [img_out, img_in_real, img_in_synth])
96
- # btn_clear.click(set_visible_true, [], img_in_real)
97
- # btn_clear.click(set_visible_false, [], img_in_synth)
98
- img_in_real.clear(fn_clear_all, [], [img_out, img_in_real, img_in_synth])
99
- img_in_synth.clear(fn_clear_all, [], [img_out, img_in_real, img_in_synth])
100
- img_out.clear(fn_clear_all, [], [img_out, img_in_real, img_in_synth])
101
-
102
-
103
-
104
- # handling custom directions
105
- def on_custom_seleceted(src):
106
- if src=="make your own!": return gr.update(visible=True), gr.update(visible=True), gr.update(visible=True), gr.update(visible=True)
107
- else: return gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False)
108
-
109
- src.change(on_custom_seleceted, [src], [src_custom, rad_src, api_key, org_key])
110
- dest.change(on_custom_seleceted, [dest], [dest_custom, rad_dest, api_key, org_key])
111
-
112
-
113
- def fn_sentence_type_change(rad):
114
- print(rad)
115
- if rad=="GPT3":
116
- return gr.update(visible=True), gr.update(visible=True), gr.update(visible=False)
117
- elif rad=="custom sentences":
118
- return gr.update(visible=False), gr.update(visible=False), gr.update(visible=True)
119
- else:
120
- print("using template sentence or flan-t5-xl or bloomz-7b")
121
- return gr.update(visible=False), gr.update(visible=False), gr.update(visible=False)
122
-
123
- rad_dest.change(fn_sentence_type_change, [rad_dest], [api_key, org_key, custom_sentences_dest])
124
- rad_src.change(fn_sentence_type_change, [rad_src], [api_key, org_key, custom_sentences_src])
125
-
126
- btn_edit.click(launch_main,
127
- [
128
- img_in_real, img_in_synth,
129
- src, src_custom, dest,
130
- dest_custom, num_ddim,
131
- xa_guidance, edit_mul,
132
- fpath_z_gen, prompt,
133
- rad_src, rad_dest,
134
- api_key, org_key,
135
- custom_sentences_src, custom_sentences_dest
136
- ],
137
- [img_out])
138
-
139
-
140
-
141
- gr.HTML("<hr>")
142
-
143
- gr.close_all()
144
- demo.queue(concurrency_count=1)
145
- demo.launch(debug=True)
146
-
147
- # gr.close_all()
148
- # demo.launch(server_port=8089, server_name="0.0.0.0", debug=True)
149
-
150
-
 
 
 
 
1
+ import os
2
+ import subprocess
3
+
4
+ subprocess.run("pip install salesforce-lavis --no-deps", shell=True)
5
+
6
+ from PIL import Image
7
+ import gradio as gr
8
+
9
+ from utils.gradio_utils import *
10
+ from utils.direction_utils import *
11
+ from utils.generate_synthetic import *
12
+
13
+
14
+ if __name__=="__main__":
15
+
16
+ # populate the list of editing directions
17
+ d_name2desc = get_all_directions_names()
18
+ d_name2desc["make your own!"] = "make your own!"
19
+
20
+ with gr.Blocks(css=CSS_main) as demo:
21
+ # Make the header of the demo website
22
+ gr.HTML(HTML_header)
23
+
24
+ gr.HTML("""
25
+ <p>For faster inference without waiting in queue, you may duplicate the space and upgrade to GPU in settings.
26
+ <br/>
27
+ <a href="https://huggingface.co/spaces/pix2pix-zero-library/pix2pix-zero-demo?duplicate=true">
28
+ <img style="margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>
29
+ <p/>""")
30
+
31
+ with gr.Row():
32
+ # col A: the input image or synthetic image prompt
33
+ with gr.Column(scale=2) as gc_left:
34
+ gr.HTML(" <center> <p style='font-size:150%;'> input </p> </center>")
35
+ img_in_real = gr.Image(type="pil", label="Start by uploading an image", elem_id="input_image")
36
+ img_in_synth = gr.Image(type="pil", label="Synthesized image", elem_id="input_image_synth", visible=False)
37
+ gr.Examples( examples="assets/test_images/", inputs=[img_in_real])
38
+ prompt = gr.Textbox(value="a high resolution painting of a cat in the style of van gogh", label="Or use a synthetic image. Prompt:", interactive=True)
39
+ with gr.Row():
40
+ seed = gr.Number(value=42, label="random seed:", interactive=True)
41
+ negative_guidance = gr.Number(value=5, label="negative guidance:", interactive=True)
42
+ btn_generate = gr.Button("Generate", label="")
43
+ fpath_z_gen = gr.Textbox(value="placeholder", visible=False)
44
+
45
+ # col B: the output image
46
+ with gr.Column(scale=2) as gc_left:
47
+ gr.HTML(" <center> <p style='font-size:150%;'> output </p> </center>")
48
+ img_out = gr.Image(type="pil", label="Output Image", visible=True)
49
+ with gr.Row():
50
+ with gr.Column():
51
+ src = gr.Dropdown(list(d_name2desc.values()), label="source", interactive=True, value="cat")
52
+ src_custom = gr.Textbox(placeholder="enter new task here!", interactive=True, visible=False, label="custom source direction:")
53
+ rad_src = gr.Radio(["GPT3", "flan-t5-xl (free)!", "BLOOMZ-7B (free)!", "fixed-template", "custom sentences"], label="Sentence type:", value="GPT3", interactive=True, visible=False)
54
+ custom_sentences_src = gr.Textbox(placeholder="paste list of sentences here", interactive=True, visible=False, label="custom sentences:", lines=5, max_lines=20)
55
+
56
+
57
+ with gr.Column():
58
+ dest = gr.Dropdown(list(d_name2desc.values()), label="target", interactive=True, value="dog")
59
+ dest_custom = gr.Textbox(placeholder="enter new task here!", interactive=True, visible=False, label="custom target direction:")
60
+ rad_dest = gr.Radio(["GPT3", "flan-t5-xl (free)!", "BLOOMZ-7B (free)!", "fixed-template", "custom sentences"], label="Sentence type:", value="GPT3", interactive=True, visible=False)
61
+ custom_sentences_dest = gr.Textbox(placeholder="paste list of sentences here", interactive=True, visible=False, label="custom sentences:", lines=5, max_lines=20)
62
+
63
+
64
+ with gr.Row():
65
+ api_key = gr.Textbox(placeholder="enter you OpenAI API key here", interactive=True, visible=False, label="OpenAI API key:", type="password")
66
+ org_key = gr.Textbox(placeholder="enter you OpenAI organization key here", interactive=True, visible=False, label="OpenAI Organization:", type="password")
67
+ with gr.Row():
68
+ btn_edit = gr.Button("Run", label="")
69
+ # btn_clear = gr.Button("Clear")
70
+
71
+ with gr.Accordion("Change editing settings?", open=True):
72
+ num_ddim = gr.Slider(0, 200, 100, label="Number of DDIM steps", interactive=True, elem_id="slider_ddim", step=10)
73
+ xa_guidance = gr.Slider(0, 0.25, 0.1, label="Cross Attention guidance", interactive=True, elem_id="slider_xa", step=0.01)
74
+ edit_mul = gr.Slider(0, 2, 1.0, label="Edit multiplier", interactive=True, elem_id="slider_edit_mul", step=0.05)
75
+
76
+ with gr.Accordion("Generating your own directions", open=False):
77
+ gr.Textbox("We provide 5 different ways of computing new custom directions:", show_label=False)
78
+ gr.Textbox("We use GPT3 to generate a list of sentences that describe the desired edit. For this options, the users need to make an OpenAI account and enter the API and organizations keys. This option typically results is the best directions and costs roughly $0.14 for one concept.", label="1. GPT3", show_label=True)
79
+ gr.Textbox("Alternatively flan-t5-xl model can also be used to to generate a list of sentences that describe the desired edit. This option is free and does not require creating any new accounts.", label="2. flan-t5-xl (free)", show_label=True)
80
+ gr.Textbox("Similarly BLOOMZ-7B model can also be used to to generate the sentences for free.", label="3. BLOOMZ-7B (free)", show_label=True)
81
+ gr.Textbox("Next, we provide a fixed template based sentence generation. This option does not require any language model and is therefore free and much faster. However the edit directions with this method are often entangled.", label="4. Fixed template", show_label=True)
82
+ gr.Textbox("Finally, the user can also generate their own sentences.", label="5. Custom sentences", show_label=True)
83
+
84
+
85
+ with gr.Accordion("Tips for getting better results", open=True):
86
+ gr.Textbox("The 'Cross Attention guidance' controls the amount of structure guidance to be applied when performing the edit. If the output edited image does not retain the structure from the input, increasing the value will typically address the issue. We recommend changing the value in increments of 0.05.", label="1. Controlling the image structure", show_label=True)
87
+ gr.Textbox("If the output image quality is low or has some artifacts, using more steps would be helpful. This can be controlled with the 'Number of DDIM steps' slider.", label="2. Improving Image Quality", show_label=True)
88
+ gr.Textbox("There can be two reasons why the output image does not have the desired edit applied. Either the cross attention guidance is too strong, or the edit is insufficient. These can be addressed by reducing the 'Cross Attention guidance' slider or increasing the 'Edit multiplier' respectively.", label="3. Amount of edit applied", show_label=True)
89
+
90
+
91
+
92
+ btn_generate.click(launch_generate_sample, [prompt, seed, negative_guidance, num_ddim], [img_in_synth, fpath_z_gen])
93
+ btn_generate.click(set_visible_true, [], img_in_synth)
94
+ btn_generate.click(set_visible_false, [], img_in_real)
95
+
96
+ def fn_clear_all():
97
+ return gr.update(value=None), gr.update(value=None), gr.update(value=None)
98
+ # btn_clear.click(fn_clear_all, [], [img_out, img_in_real, img_in_synth])
99
+ # btn_clear.click(set_visible_true, [], img_in_real)
100
+ # btn_clear.click(set_visible_false, [], img_in_synth)
101
+ img_in_real.clear(fn_clear_all, [], [img_out, img_in_real, img_in_synth])
102
+ img_in_synth.clear(fn_clear_all, [], [img_out, img_in_real, img_in_synth])
103
+ img_out.clear(fn_clear_all, [], [img_out, img_in_real, img_in_synth])
104
+
105
+
106
+
107
+ # handling custom directions
108
+ def on_custom_seleceted(src):
109
+ if src=="make your own!": return gr.update(visible=True), gr.update(visible=True), gr.update(visible=True), gr.update(visible=True)
110
+ else: return gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False)
111
+
112
+ src.change(on_custom_seleceted, [src], [src_custom, rad_src, api_key, org_key])
113
+ dest.change(on_custom_seleceted, [dest], [dest_custom, rad_dest, api_key, org_key])
114
+
115
+
116
+ def fn_sentence_type_change(rad):
117
+ print(rad)
118
+ if rad=="GPT3":
119
+ return gr.update(visible=True), gr.update(visible=True), gr.update(visible=False)
120
+ elif rad=="custom sentences":
121
+ return gr.update(visible=False), gr.update(visible=False), gr.update(visible=True)
122
+ else:
123
+ print("using template sentence or flan-t5-xl or bloomz-7b")
124
+ return gr.update(visible=False), gr.update(visible=False), gr.update(visible=False)
125
+
126
+ rad_dest.change(fn_sentence_type_change, [rad_dest], [api_key, org_key, custom_sentences_dest])
127
+ rad_src.change(fn_sentence_type_change, [rad_src], [api_key, org_key, custom_sentences_src])
128
+
129
+ btn_edit.click(launch_main,
130
+ [
131
+ img_in_real, img_in_synth,
132
+ src, src_custom, dest,
133
+ dest_custom, num_ddim,
134
+ xa_guidance, edit_mul,
135
+ fpath_z_gen, prompt,
136
+ rad_src, rad_dest,
137
+ api_key, org_key,
138
+ custom_sentences_src, custom_sentences_dest
139
+ ],
140
+ [img_out])
141
+
142
+
143
+
144
+ gr.HTML("<hr>")
145
+
146
+ #gr.close_all()
147
+ demo.queue()
148
+ demo.launch(debug=True)
149
+
150
+ # gr.close_all()
151
+ # demo.launch(server_port=8089, server_name="0.0.0.0", debug=True)
152
+
153
+
requirements.txt CHANGED
@@ -1,7 +1,36 @@
1
- transformers
2
- joblib
3
- accelerate
4
- diffusers==0.12.1
5
- salesforce-lavis
6
- openai
7
- #git+https://github.com/pix2pixzero/pix2pix-zero.git
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ transformers
2
+ joblib
3
+ accelerate
4
+ diffusers
5
+ #salesforce-lavis
6
+ numpy<2
7
+ openai
8
+ contexttimer
9
+ decord
10
+ einops
11
+ fairscale
12
+ ftfy
13
+ iopath
14
+ ipython
15
+ omegaconf
16
+ opencv-python-headless
17
+ opendatasets
18
+ packaging
19
+ pandas
20
+ plotly
21
+ pre-commit
22
+ pycocoevalcap
23
+ pycocotools
24
+ python-magic
25
+ scikit-image
26
+ sentencepiece
27
+ spacy
28
+ timm
29
+ torch<=2.4.0
30
+ torchvision
31
+ tqdm
32
+ git+https://github.com/webdataset/webdataset
33
+ wheel
34
+ git+https://github.com/imageio/imageio
35
+ git+https://github.com/matplotlib/matplotlib
36
+ #git+https://github.com/pix2pixzero/pix2pix-zero.git