metadata
license: other
license_name: flux-1-dev-non-commercial-license
license_link: https://huggingface.co/black-forest-labs/FLUX.1-Depth-dev/blob/main/LICENSE.md
base_model:
- black-forest-labs/FLUX.1-dev
- black-forest-labs/FLUX.1-Depth-dev
pipeline_tag: image-to-image
The Flux pipeline for image inpainting using Flux-dev-Depth/Canny
import torch
from diffusers import DiffusionPipeline, FluxTransformer2DModel
from transformers import T5EncoderModel
from diffusers.utils import load_image, make_image_grid
from image_gen_aux import DepthPreprocessor # https://github.com/huggingface/image_gen_aux
from PIL import Image
import numpy as np
pipe = DiffusionPipeline.from_pretrained(
"black-forest-labs/FLUX.1-Depth-dev",
torch_dtype=torch.bfloat16,
custom_pipeline="afromero/pipeline_flux_control_inpaint",
)
transformer = FluxTransformer2DModel.from_pretrained(
"sayakpaul/FLUX.1-Depth-dev-nf4", subfolder="transformer", torch_dtype=torch.bfloat16
)
text_encoder_2 = T5EncoderModel.from_pretrained(
"sayakpaul/FLUX.1-Depth-dev-nf4", subfolder="text_encoder_2", torch_dtype=torch.bfloat16
)
pipe.transformer = transformer
pipe.text_encoder_2 = text_encoder_2
pipe.to("cuda")
prompt = "The head of a human in a robot body giving a heated speech"
image = load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/robot.png")
head_mask = np.ones_like(image)*255
head_mask[65:380,300:642] = 0
mask_image = Image.fromarray(head_mask)
processor = DepthPreprocessor.from_pretrained("LiheYoung/depth-anything-large-hf")
control_image = processor(image)[0].convert("RGB")
output = pipe(
prompt=prompt,
image=image,
control_image=control_image,
mask_image=mask_image,
height=1024,
width=1024,
num_inference_steps=30,
strength=0.9,
guidance_scale=10.0,
generator=torch.Generator().manual_seed(42),
).images[0]
make_image_grid([image, control_image, mask_image, output], rows=1, cols=4).save("output.png")