suitify_v1 / app.py
krrishD's picture
Update app.py
1635e3d
raw
history blame
4.71 kB
import cv2
import tempfile
import inspect
from typing import List, Optional, Union
import os
import numpy as np
import torch
import banana_dev as banana
import PIL
from diffusers import AutoencoderKL, DDIMScheduler, DiffusionPipeline, PNDMScheduler, UNet2DConditionModel
from diffusers.pipelines.stable_diffusion import StableDiffusionSafetyChecker
from tqdm.auto import tqdm
from transformers import CLIPFeatureExtractor, CLIPTextModel, CLIPTokenizer
import gradio as gr
import random
import base64
from io import BytesIO
import os
from PIL import Image
import face_recognition
import pillow_heif
def inpaint(p, init_image, mask_image=None, strength=0.75, guidance_scale=7.5, generator=None, num_samples=1, n_iter=1):
buffered_init_img = BytesIO()
buffered_inverted_img = BytesIO()
init_image.save(buffered_init_img,format="JPEG")
mask_image.save(buffered_inverted_img,format="JPEG")
encoded_init_image = base64.b64encode(buffered_init_img.getvalue()).decode('utf-8')
encoded_inverted_image = base64.b64encode(buffered_inverted_img.getvalue()).decode('utf-8')
model_inputs = {
"prompt": "4K UHD professional profile picture of a person wearing a suit for work and posing for a picture, fine details, realistic shaded.",
"init_image": encoded_init_image,
"mask_image": encoded_inverted_image,
"strength": 0.65,
"guidance_scale": 10,
"num_inference_steps": 100
}
out = banana.run(os.environ.get("API_KEY"), os.environ.get("MODEL_KEY"), model_inputs)
image_byte_string = out["modelOutputs"][0]["output_image_base64"]
image_encoded = image_byte_string.encode('utf-8')
image_bytes = BytesIO(base64.b64decode(image_encoded))
return_image = Image.open(image_bytes)
return return_image
def identify_face(user_image):
# img = cv2.imread(user_image.name) # read the resized image in cv2
img = face_recognition.load_image_file(user_image.name)
print(img.shape)
face_locations = face_recognition.face_locations(img)
for face_location in face_locations:
top, right, bottom, left = face_location
mask = np.zeros(img.shape[:2], dtype="uint8")
print(mask.shape)
cv2.rectangle(mask, (left, top), (right, bottom), 255, -1)
inverted_image = cv2.bitwise_not(mask)
return inverted_image
def sample_images(init_image, mask_image):
p = "4K UHD professional profile picture of a person wearing a suit for work"
strength=0.65
guidance_scale=10
num_samples = 1
n_iter = 1
generator = torch.Generator(device="cuda").manual_seed(random.randint(0, 1000000)) # change the seed to get different results
all_images = inpaint(p, init_image, mask_image, strength=strength, guidance_scale=guidance_scale, generator=generator, num_samples=num_samples, n_iter=n_iter)
return all_images
def preprocess_image(image):
w, h = image.size
w, h = map(lambda x: x - x % 32, (w, h)) # resize to integer multiple of 32
image = image.resize((w, h), resample=PIL.Image.LANCZOS)
image = np.array(image).astype(np.float32) / 255.0
image = image[None].transpose(0, 3, 1, 2)
image = torch.from_numpy(image)
return 2.0 * image - 1.0
def preprocess_mask(mask):
mask=mask.convert("L")
w, h = mask.size
w, h = map(lambda x: x - x % 32, (w, h)) # resize to integer multiple of 32
mask = mask.resize((w//8, h//8), resample=PIL.Image.NEAREST)
mask = np.array(mask).astype(np.float32) / 255.0
mask = np.tile(mask,(4,1,1))
mask = mask[None].transpose(0, 1, 2, 3)#what does this step do?
mask = 1 - mask #repaint white, keep black
mask = torch.from_numpy(mask)
return mask
# accept an image input
# trigger the set of functions to occur => identify face, generate mask, save the inverted face mask, sample for the inverted images
# output the sampled images
def main(user_image):
# accept the image as input
pillow_heif.register_heif_opener()
init_image = PIL.Image.open(user_image).convert("RGB")
# # resize the image to be (512, 512)
newsize = (512, 512)
init_image = init_image.resize(newsize)
init_image.save(user_image.name) # save the resized image
## identify the face + save the inverted mask
inverted_mask = identify_face(user_image)
if inverted_mask == None:
print("init_image:", type(init_image))
return init_image
print("inverted_mask: ", inverted_mask)
fp = tempfile.NamedTemporaryFile(mode='wb', suffix=".png")
cv2.imwrite(fp.name, inverted_mask) # save the inverted image
pil_inverted_mask = PIL.Image.open(fp.name).convert("RGB")
# sample the new
return sample_images(init_image, pil_inverted_mask)
demo = gr.Interface(main, gr.Image(type="file"), "image")
demo.launch(debug=True)