from diffusers import FluxPipeline, FluxTransformer2DModel | |
import torch | |
import os | |
# Configuration | |
MODEL_DIR = "./merged_models/2.5_1" | |
IMAGE_OUTPUT_DIR = "./" | |
IMAGE_PREFIX = "flowers_2.5_1" | |
DEVICE = torch.device("cpu") | |
# If True, uses pipeline.enable_sequential_cpu_offload(). Make sure device is CPU. | |
USE_CPU_OFFLOAD = True | |
SEED = 0 | |
# At least 880x656 fits on 24GB GPU w/ sequential offload | |
IMAGE_WIDTH = 1280 | |
IMAGE_HEIGHT = 1024 | |
NUM_STEPS = 10 # Try ~4-8 for 10:1 and ~8-16+ for 4:1 and 2.5:1 ("Default" 6, 10, 16) | |
NUM_IMAGES = 4 | |
CFG = 3.5 | |
PROMPT = ("Impressionistic tableau medium shot painting with soft, blended brushstrokes and muted colors complemented " | |
"by sporadic vibrant highlights.") | |
PROMPT2 = ("Impressionistic tableau painting with soft brushstrokes and muted colors, accented by vibrant highlights, " | |
"of a tranquil courtyard surrounded by wildflowers. Madison, a 19-year-old woman with light dirty blond " | |
"hair and bubblegum-pink highlights in a ponytail, brown eyes, and soft facial features, stands beside " | |
"Amelia, a tall mid-20s woman with deep auburn hair in a messy bun, summer sky-blue eyes, and pronounced " | |
"cheekbones. Together, they exude harmony and intrigue, their contrasting features complementing each " | |
"other.") | |
print("Loading model...") | |
transformer = FluxTransformer2DModel.from_pretrained(MODEL_DIR, torch_dtype=torch.bfloat16, use_safetensors=True) | |
print("Creating pipeline...") | |
pipeline = FluxPipeline.from_pretrained( | |
"black-forest-labs/FLUX.1-dev", transformer=transformer, torch_dtype=torch.bfloat16 | |
, use_safetensors=True, local_dir="./models/dev/", local_dir_use_symlinks=False, | |
ignore_patterns=["flux1-dev.sft", "flux1-dev.safetensors"]).to(DEVICE) | |
pipeline.enable_sequential_cpu_offload() | |
print("Generating image...") | |
# Params: | |
# prompt β The prompt or prompts to guide the image generation. If not defined, one has to pass prompt_embeds. instead. | |
# prompt_2 β The prompt or prompts to be sent to tokenizer_2 and text_encoder_2. If not defined, prompt is will be used instead | |
# height β The height in pixels of the generated image. This is set to 1024 by default for the best results. | |
# width β The width in pixels of the generated image. This is set to 1024 by default for the best results. | |
# num_inference_steps β The number of denoising steps. More denoising steps usually lead to a higher quality image at the expense of slower inference. | |
# timesteps β Custom timesteps to use for the denoising process with schedulers which support a timesteps argument in their set_timesteps method. If not defined, the default behavior when num_inference_steps is passed will be used. Must be in descending order. | |
# guidance_scale β Guidance scale as defined in [Classifier-Free Diffusion Guidance](https:// arxiv. org/ abs/ 2207.12598 ). guidance_scale is defined as w of equation 2. of [Imagen Paper](https:// arxiv. org/ pdf/ 2205.11487.pdf ). Guidance scale is enabled by setting guidance_scale > 1. Higher guidance scale encourages to generate images that are closely linked to the text prompt, usually at the expense of lower image quality. | |
# num_images_per_prompt β The number of images to generate per prompt. | |
# generator β One or a list of [torch generator(s)](https:// pytorch. org/ docs/ stable/ generated/ torch. Generator. html ) to make generation deterministic. | |
# latents β Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image generation. Can be used to tweak the same generation with different prompts. If not provided, a latents tensor will ge generated by sampling using the supplied random generator. | |
# prompt_embeds β Pre-generated text embeddings. Can be used to easily tweak text inputs, e. g. prompt weighting. If not provided, text embeddings will be generated from prompt input argument. | |
# pooled_prompt_embeds β Pre-generated pooled text embeddings. Can be used to easily tweak text inputs, e. g. prompt weighting. If not provided, pooled text embeddings will be generated from prompt input argument. | |
# output_type β The output format of the generate image. Choose between [PIL](https:// pillow. readthedocs. io/ en/ stable/ ): PIL. Image. Image or np. array. | |
# return_dict β Whether or not to return a [~pipelines. flux. FluxPipelineOutput] instead of a plain tuple. | |
# joint_attention_kwargs β A kwargs dictionary that if specified is passed along to the AttentionProcessor as defined under self. processor in [diffusers. models. attention_processor](https:// github. com/ huggingface/ diffusers/ blob/ main/ src/ diffusers/ models/ attention_processor. py ). | |
# callback_on_step_end β A function that calls at the end of each denoising steps during the inference. The function is called with the following arguments: callback_on_step_end(self: DiffusionPipeline, step: int, timestep: int, callback_kwargs: Dict). callback_kwargs will include a list of all tensors as specified by callback_on_step_end_tensor_inputs. | |
# callback_on_step_end_tensor_inputs β The list of tensor inputs for the callback_on_step_end function. The tensors specified in the list will be passed as callback_kwargs argument. You will only be able to include variables listed in the ._callback_tensor_inputs attribute of your pipeline class. | |
# max_sequence_length β Maximum sequence length to use with the prompt. | |
# Returns: | |
# [~pipelines. flux. FluxPipelineOutput] if return_dict is True, otherwise a tuple. When returning a tuple, the first element is a list with the generated images. | |
images = pipeline( | |
prompt=PROMPT, | |
prompt_2=PROMPT2, | |
guidance_scale=CFG, | |
num_inference_steps=NUM_STEPS, | |
height=IMAGE_HEIGHT, | |
width=IMAGE_WIDTH, | |
max_sequence_length=512, | |
generator=torch.manual_seed(42), | |
num_images_per_prompt=NUM_IMAGES, | |
).images | |
for i, image in enumerate(images): | |
print("Saving image...") | |
path = os.path.join(IMAGE_OUTPUT_DIR, f"{IMAGE_PREFIX}_{i}.png") | |
image.save(path) | |
print("Done.") | |