|
import subprocess |
|
from PIL import Image |
|
|
|
def download_file(url, output_filename): |
|
command = ['wget', '-O', output_filename, '-q', url] |
|
subprocess.run(command, check=True) |
|
|
|
url1 = 'https://storage.googleapis.com/mediapipe-models/image_segmenter/selfie_multiclass_256x256/float32/latest/selfie_multiclass_256x256.tflite' |
|
url2 = 'https://storage.googleapis.com/mediapipe-models/image_segmenter/selfie_segmenter/float16/latest/selfie_segmenter.tflite' |
|
|
|
filename1 = 'selfie_multiclass_256x256.tflite' |
|
filename2 = 'selfie_segmenter.tflite' |
|
|
|
download_file(url1, filename1) |
|
download_file(url2, filename2) |
|
|
|
import cv2 |
|
import mediapipe as mp |
|
import numpy as np |
|
from mediapipe.tasks import python |
|
from mediapipe.tasks.python import vision |
|
import random |
|
import gradio as gr |
|
import spaces |
|
import torch |
|
from diffusers import FluxInpaintPipeline |
|
from diffusers import FlowMatchEulerDiscreteScheduler, AutoencoderKL |
|
from diffusers.models.transformers.transformer_flux import FluxTransformer2DModel |
|
from transformers import CLIPTextModel, CLIPTokenizer,T5EncoderModel, T5TokenizerFast |
|
|
|
DEVICE = "cuda" if torch.cuda.is_available() else "cpu" |
|
|
|
bfl_repo="black-forest-labs/FLUX.1-dev" |
|
|
|
BG_COLOR = (255, 255, 255) |
|
MASK_COLOR = (0, 0 , 0) |
|
|
|
def maskPerson(input): |
|
base_options = python.BaseOptions(model_asset_path='selfie_multiclass_256x256.tflite') |
|
options = vision.ImageSegmenterOptions(base_options=base_options, |
|
output_category_mask=True) |
|
|
|
with vision.ImageSegmenter.create_from_options(options) as segmenter: |
|
image = mp.Image.create_from_file(input) |
|
segmentation_result = segmenter.segment(image) |
|
person_mask = segmentation_result.confidence_masks[0] |
|
|
|
image_data = image.numpy_view() |
|
fg_image = np.zeros(image_data.shape, dtype=np.uint8) |
|
fg_image[:] = MASK_COLOR |
|
bg_image = np.zeros(image_data.shape, dtype=np.uint8) |
|
bg_image[:] = BG_COLOR |
|
|
|
condition = np.stack((person_mask.numpy_view(),) * 3, axis=-1) > 0.2 |
|
output_image = np.where(condition, fg_image, bg_image) |
|
|
|
return output_image |
|
|
|
def maskHead(input): |
|
base_options = python.BaseOptions(model_asset_path='selfie_multiclass_256x256.tflite') |
|
options = vision.ImageSegmenterOptions(base_options=base_options, |
|
output_category_mask=True) |
|
|
|
with vision.ImageSegmenter.create_from_options(options) as segmenter: |
|
image = mp.Image.create_from_file(input) |
|
|
|
segmentation_result = segmenter.segment(image) |
|
|
|
hairmask = segmentation_result.confidence_masks[1] |
|
facemask = segmentation_result.confidence_masks[3] |
|
|
|
image_data = image.numpy_view() |
|
fg_image = np.zeros(image_data.shape, dtype=np.uint8) |
|
fg_image[:] = MASK_COLOR |
|
bg_image = np.zeros(image_data.shape, dtype=np.uint8) |
|
bg_image[:] = BG_COLOR |
|
|
|
combined_mask = np.maximum(hairmask.numpy_view(), facemask.numpy_view()) |
|
|
|
condition = np.stack((combined_mask,) * 3, axis=-1) > 0.2 |
|
output_image = np.where(condition, fg_image, bg_image) |
|
|
|
return output_image |
|
|
|
def random_positioning(input, output_size=(1024, 1024)): |
|
background = cv2.imread("default.jpeg") |
|
if background is None: |
|
raise ValueError("Unable to load background image") |
|
|
|
background = cv2.resize(background, output_size, interpolation=cv2.INTER_AREA) |
|
|
|
if input is None: |
|
raise ValueError("Unable to load input image") |
|
|
|
scale_factor = random.uniform(0.5, 1.0) |
|
new_size = (int(input.shape[1] * scale_factor), int(input.shape[0] * scale_factor)) |
|
|
|
resized_image = cv2.resize(input, new_size, interpolation=cv2.INTER_AREA) |
|
|
|
if background.shape[2] != resized_image.shape[2]: |
|
raise ValueError("Input image and background image must have the same number of channels") |
|
|
|
x_offset = random.randint(0, output_size[0] - new_size[0]) |
|
y_offset = random.randint(0, output_size[1] - new_size[1]) |
|
background[y_offset:y_offset+new_size[1], x_offset:x_offset+new_size[0]] = resized_image |
|
|
|
return background |
|
|
|
|
|
def remove_background(image_path, mask): |
|
image = cv2.imread(image_path) |
|
inverted_mask = cv2.bitwise_not(mask) |
|
|
|
_, binary_mask = cv2.threshold(inverted_mask, 127, 255, cv2.THRESH_BINARY) |
|
|
|
result = np.zeros_like(image, dtype=np.uint8) |
|
|
|
result[binary_mask == 255] = image[binary_mask == 255] |
|
|
|
return result |
|
|
|
pipe = FluxInpaintPipeline.from_pretrained(bfl_repo, torch_dtype=torch.bfloat16).to(DEVICE) |
|
MAX_SEED = np.iinfo(np.int32).max |
|
TRIGGER = "a photo of TOK" |
|
|
|
|
|
@spaces.GPU(duration=75) |
|
def execute(image, prompt, debug=False): |
|
if not prompt : |
|
gr.Info("Please enter a text prompt.") |
|
return None |
|
|
|
if not image : |
|
gr.Info("Please upload a image.") |
|
return None |
|
|
|
img = cv2.imread(image) |
|
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) |
|
|
|
imgs = [ random_positioning(img)] |
|
|
|
pipe.load_lora_weights("XLabs-AI/flux-RealismLora", weight_name='lora.safetensors') |
|
response = [] |
|
|
|
seed_slicer = random.randint(0, MAX_SEED) |
|
generator = torch.Generator().manual_seed(seed_slicer) |
|
|
|
for image in range(len(imgs)): |
|
current_img = imgs[image] |
|
cv2.imwrite('base_image.jpg', current_img) |
|
cv2.imwrite("mask_person.jpg", maskPerson('base_image.jpg')) |
|
cv2.imwrite("mask_face.jpg", maskHead('base_image.jpg')) |
|
|
|
|
|
im = Image.open('base_image.jpg') |
|
np_arr = np.array(im) |
|
rgb_image = cv2.cvtColor(np_arr, cv2.COLOR_BGR2RGB) |
|
|
|
im = Image.fromarray(rgb_image) |
|
|
|
person = np.array(Image.open('mask_person.jpg')) |
|
face = np.array(Image.open('mask_face.jpg')) |
|
|
|
person_gray = cv2.cvtColor(person, cv2.COLOR_BGR2GRAY) |
|
face_gray = cv2.cvtColor(face, cv2.COLOR_BGR2GRAY) |
|
|
|
_, mask = cv2.threshold(face_gray, 1, 255, cv2.THRESH_BINARY_INV) |
|
mask_inv = cv2.bitwise_not(mask) |
|
person_masked = cv2.bitwise_and(person_gray, person_gray, mask=mask_inv) |
|
face_masked = cv2.bitwise_and(face_gray, face_gray, mask=mask) |
|
result = cv2.add(person_masked, face_masked) |
|
cv2.imwrite('join.jpg', result) |
|
|
|
fund_mask = Image.open('join.jpg') |
|
|
|
result0 = pipe( |
|
prompt=f"{prompt} {TRIGGER}", |
|
image=im, |
|
mask_image=fund_mask, |
|
width=1024, |
|
height=1024, |
|
strength=0.85, |
|
generator=generator, |
|
num_inference_steps=28, |
|
max_sequence_length=256, |
|
joint_attention_kwargs={"scale": 0.9}, |
|
).images[0] |
|
|
|
arr = np.array(result0) |
|
rgb_image = cv2.cvtColor(arr, cv2.COLOR_BGR2RGB) |
|
cv2.imwrite('person.jpg', rgb_image) |
|
cv2.imwrite("mask.jpg", maskHead('person.jpg')) |
|
mask = Image.open('mask.jpg') |
|
|
|
result = pipe( |
|
prompt=f"{prompt} {TRIGGER}", |
|
image=result0, |
|
mask_image=mask, |
|
width=1024, |
|
height=1024, |
|
strength=0.85, |
|
generator=generator, |
|
num_inference_steps=28, |
|
max_sequence_length=256, |
|
joint_attention_kwargs={"scale": 0.9}, |
|
).images[0] |
|
|
|
if debug: |
|
response.append(im) |
|
response.append(person) |
|
response.append(face) |
|
response.append(fund_mask) |
|
response.append(result0) |
|
response.append(mask) |
|
|
|
response.append(result) |
|
return response |
|
|
|
|
|
|
|
description = "This is an unofficial implementation of the ip face adapter for FLUX DEV and does not explicitly follow the ip face model, I created a wrapper with inpaint and mediapipe, I like to call Fake IP Adapter" |
|
title = "Flux IP Face Adapter" |
|
iface = gr.Interface( |
|
fn=execute, |
|
description=description, |
|
title=title, |
|
inputs=[ |
|
gr.Image(type="filepath"), |
|
gr.Textbox(label="Prompt"), |
|
gr.Checkbox(label="Debug Mode") |
|
], |
|
outputs="gallery" |
|
) |
|
|
|
iface.launch(share=True, debug=True) |