File size: 6,001 Bytes
120a3c2
45d0452
 
 
202fdfa
45d0452
 
 
120a3c2
45d0452
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120a3c2
 
 
45d0452
120a3c2
 
 
 
 
f7f5be8
45d0452
 
 
8f68280
45d0452
 
 
81cf2fa
 
45d0452
 
81cf2fa
45d0452
202fdfa
 
81cf2fa
 
45d0452
 
 
 
81cf2fa
 
45d0452
 
81cf2fa
 
45d0452
81cf2fa
45d0452
81cf2fa
45d0452
 
 
8f68280
f7f5be8
45d0452
 
 
8f68280
 
45d0452
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f6fc05a
45d0452
 
 
f6fc05a
 
45d0452
 
8f68280
45d0452
f6fc05a
45d0452
4ecd25d
45d0452
 
 
4ecd25d
45d0452
 
 
 
4ecd25d
45d0452
4ecd25d
45d0452
4ecd25d
45d0452
8f68280
45d0452
8f68280
45d0452
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
import gradio as gr
import numpy as np
# from edict_functions import EDICT_editing
from PIL import Image
from utils import Endpoint, get_token
from io import BytesIO
import requests


endpoint = Endpoint()

def local_edict(x, source_text, edit_text,
         edit_strength, guidance_scale,
          steps=50, mix_weight=0.93, ):
    x = Image.fromarray(x)
    return_im =  EDICT_editing(x,
                         source_text,
                         edit_text,
                  steps=steps,
                  mix_weight=mix_weight,
                  init_image_strength=edit_strength,
                  guidance_scale=guidance_scale
                              )[0]
    return np.array(return_im)

def encode_image(image):
    buffered = BytesIO()
    image.save(buffered, format="JPEG", quality=95)
    buffered.seek(0)

    return buffered



def decode_image(img_obj):
    img = Image.open(img_obj).convert("RGB")
    return img

def edict(x, source_text, edit_text,
         edit_strength, guidance_scale,
          steps=50, mix_weight=0.93, ):

    url = endpoint.url
    url = url + "/api/edit"
    headers = {### Misc.

        "User-Agent": "EDICT HuggingFace Space",
        "Auth-Token": get_token(),
    }

    data = {
        "source_text": source_text,
        "edit_text": edit_text,
        "edit_strength": edit_strength,
        "guidance_scale": guidance_scale,
    }

    image = encode_image(Image.fromarray(x))
    files = {"image": image}  

    response = requests.post(url, data=data, files=files, headers=headers)
    
    if response.status_code == 200:
        return np.array(decode_image(BytesIO(response.content)))
    else:
        return "Error: " + response.text   
    # x = decode_image(response)
    # return np.array(x)

examples = [
        ['square_ims/american_gothic.jpg', 'A painting of two people frowning', 'A painting of two people smiling', 0.5, 3],
        ['square_ims/colloseum.jpg', 'An old ruined building', 'A new modern office building', 0.8, 3],
    ]


examples.append(['square_ims/scream.jpg', 'A painting of someone screaming', 'A painting of an alien', 0.5, 3])
examples.append(['square_ims/yosemite.jpg', 'Granite forest valley', 'Granite desert valley', 0.8, 3])
examples.append(['square_ims/einstein.jpg', 'Mouth open', 'Mouth closed', 0.8, 3])
examples.append(['square_ims/einstein.jpg', 'A man', 'A man in K.I.S.S. facepaint', 0.8, 3])
"""
examples.extend([
        ['square_ims/imagenet_cake_2.jpg', 'A cupcake', 'A Chinese New Year cupcake', 0.8, 3],
        ['square_ims/imagenet_cake_2.jpg', 'A cupcake', 'A Union Jack cupcake', 0.8, 3],
        ['square_ims/imagenet_cake_2.jpg', 'A cupcake', 'A Nigerian flag cupcake', 0.8, 3],
        ['square_ims/imagenet_cake_2.jpg', 'A cupcake', 'A Santa Claus cupcake', 0.8, 3],
        ['square_ims/imagenet_cake_2.jpg', 'A cupcake', 'An Easter cupcake', 0.8, 3],
        ['square_ims/imagenet_cake_2.jpg', 'A cupcake', 'A hedgehog cupcake', 0.8, 3],
        ['square_ims/imagenet_cake_2.jpg', 'A cupcake', 'A rose cupcake', 0.8, 3],
    ])
"""

for dog_i in [1, 2]:
    for breed in ['Golden Retriever', 'Chihuahua', 'Dalmatian']:
        examples.append([f'square_ims/imagenet_dog_{dog_i}.jpg', 'A dog', f'A {breed}', 0.8, 3])


description = '**For safety and ethical considerations, we have disabled image uploading from March 21. 2023.\nPlease try examples provided below.**\nA gradio demo for [EDICT](https://arxiv.org/abs/2211.12446) (CVPR23)'
# description = gr.Markdown(description)

article = """

### Prompting Style

As with many text-to-image methods, the prompting style of EDICT can make a big difference. When in doubt, experiment! Some guidance:
* Parallel *Original Description* and *Edit Description* construction as much as possible. Inserting/editing single words often is enough to affect a change while maintaining a lot of the original structure
* Words that will affect the entire setting (e.g. "A photo of " vs. "A painting of") can make a big difference. Playing around with them can help a lot

### Parameters
Both `edit_strength` and `guidance_scale` have similar properties qualitatively: the higher the value the more the image will change. We suggest
* Increasing/decreasing `edit_strength` first, particularly to alter/preserve more of the original structure/content
* Then changing `guidance_scale` to make the change in the edited region more or less pronounced.

Usually we find changing `edit_strength` to be enough, but feel free to play around (and report any interesting results)!

### Misc.

Having difficulty coming up with a caption? Try [BLIP](https://huggingface.co/spaces/Salesforce/BLIP2) to automatically generate one!

As with most StableDiffusion approaches, faces/text are often problematic to render, especially if they're small. Having these in the foreground will help keep them cleaner.

A returned black image means that the [Safety Checker](https://huggingface.co/CompVis/stable-diffusion-safety-checker) triggered on the photo. This happens in odd cases sometimes (it often rejects
the huggingface logo or variations), but we need to keep it in for obvious reasons.
"""
# article = gr.Markdown(description)

iface = gr.Interface(fn=edict, inputs=[gr.Image(interactive=False),
                                       gr.Textbox(label="Original Description"),
                                       gr.Textbox(label="Edit Description"),
                                       # 50, # gr.Slider(5, 50, value=20, step=1),
                                       # 0.93, # gr.Slider(0.5, 1, value=0.7, step=0.05),
                                       gr.Slider(0.0, 1, value=0.8, step=0.05),
                                       gr.Slider(0, 10, value=3, step=0.5),
                                      ],
                     examples = examples,
                     outputs="image",
                     description=description,
                     article=article,
                     cache_examples=True)
iface.launch()