File size: 6,228 Bytes
42f204e
 
 
 
 
52b7cdb
4385f55
 
42f204e
 
 
52b7cdb
 
 
 
 
29003e7
 
52b7cdb
 
 
 
 
42f204e
 
 
 
 
 
 
 
 
 
52b7cdb
 
42f204e
 
 
 
 
52b7cdb
b56725c
 
42f204e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
from diffusers import FluxPipeline, FluxTransformer2DModel
import torch
import os

# Configuration
MODEL_DIR = "./merged_models/10_1"
IMAGE_OUTPUT_DIR = "./flowers"
IMAGE_PREFIX = "flowers_10_1"
DEVICE = torch.device("cpu")
# If True, uses pipeline.enable_sequential_cpu_offload(). Make sure device is CPU.
USE_CPU_OFFLOAD = True
SEED = 922733
# Fits on 24GB GPU w/ sequential offload:
# 6x 1024x768? (etc.)
# 4x 1280x1024 (etc.)
# 3x 1856x920 (or 1680x1016, 1704x1000, 1456x1168, etc.)
# 2x 1920x1080 (or 1864x1128, etc.)
# 1x 1920x1440 (or 2560x1080, etc.), or even 2560x1352 (or 2384x1448, etc.), and yes huge works to varying degree
IMAGE_WIDTH = 1680
IMAGE_HEIGHT = 1016
# Try ~4-8 for 10:1 and ~8-16+ for 4:1 and 2.5:1 ("Default" 6, 10, 12)
NUM_STEPS = 8
NUM_IMAGES = 3
CFG = 3.5
PROMPT = ("Impressionistic tableau medium shot painting with soft, blended brushstrokes and muted colors complemented "
          "by sporadic vibrant highlights.")
PROMPT2 = ("Impressionistic tableau painting with soft brushstrokes and muted colors, accented by vibrant highlights, "
           "of a tranquil courtyard surrounded by wildflowers. Madison, a 19-year-old woman with light dirty blond "
           "hair and bubblegum-pink highlights in a ponytail, brown eyes, and soft facial features, stands beside "
           "Amelia, a tall mid-20s woman with deep auburn hair in a messy bun, summer sky-blue eyes, and pronounced "
           "cheekbones. Together, they exude harmony and intrigue, their contrasting features complementing each "
           "other.")

os.makedirs(IMAGE_OUTPUT_DIR, exist_ok=True)

print("Loading model...")
transformer = FluxTransformer2DModel.from_pretrained(MODEL_DIR, torch_dtype=torch.bfloat16, use_safetensors=True)
print("Creating pipeline...")
pipeline = FluxPipeline.from_pretrained(
    "black-forest-labs/FLUX.1-dev", transformer=transformer, torch_dtype=torch.bfloat16
    , use_safetensors=True).to(DEVICE)
if USE_CPU_OFFLOAD:
    pipeline.enable_sequential_cpu_offload()
print("Generating image...")
# Params:
# prompt – The prompt or prompts to guide the image generation. If not defined, one has to pass prompt_embeds. instead.
# prompt_2 – The prompt or prompts to be sent to tokenizer_2 and text_encoder_2. If not defined, prompt is will be used instead
# height – The height in pixels of the generated image. This is set to 1024 by default for the best results.
# width – The width in pixels of the generated image. This is set to 1024 by default for the best results.
# num_inference_steps – The number of denoising steps. More denoising steps usually lead to a higher quality image at the expense of slower inference.
# timesteps – Custom timesteps to use for the denoising process with schedulers which support a timesteps argument in their set_timesteps method. If not defined, the default behavior when num_inference_steps is passed will be used. Must be in descending order.
# guidance_scale – Guidance scale as defined in [Classifier-Free Diffusion Guidance](https:// arxiv. org/ abs/ 2207.12598 ). guidance_scale is defined as w of equation 2. of [Imagen Paper](https:// arxiv. org/ pdf/ 2205.11487.pdf ). Guidance scale is enabled by setting guidance_scale > 1. Higher guidance scale encourages to generate images that are closely linked to the text prompt, usually at the expense of lower image quality.
# num_images_per_prompt – The number of images to generate per prompt.
# generator – One or a list of [torch generator(s)](https:// pytorch. org/ docs/ stable/ generated/ torch. Generator. html ) to make generation deterministic.
# latents – Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image generation. Can be used to tweak the same generation with different prompts. If not provided, a latents tensor will ge generated by sampling using the supplied random generator.
# prompt_embeds – Pre-generated text embeddings. Can be used to easily tweak text inputs, e. g. prompt weighting. If not provided, text embeddings will be generated from prompt input argument.
# pooled_prompt_embeds – Pre-generated pooled text embeddings. Can be used to easily tweak text inputs, e. g. prompt weighting. If not provided, pooled text embeddings will be generated from prompt input argument.
# output_type – The output format of the generate image. Choose between [PIL](https:// pillow. readthedocs. io/ en/ stable/ ): PIL. Image. Image or np. array.
# return_dict – Whether or not to return a [~pipelines. flux. FluxPipelineOutput] instead of a plain tuple.
# joint_attention_kwargs – A kwargs dictionary that if specified is passed along to the AttentionProcessor as defined under self. processor in [diffusers. models. attention_processor](https:// github. com/ huggingface/ diffusers/ blob/ main/ src/ diffusers/ models/ attention_processor. py ).
# callback_on_step_end – A function that calls at the end of each denoising steps during the inference. The function is called with the following arguments: callback_on_step_end(self: DiffusionPipeline, step: int, timestep: int, callback_kwargs: Dict). callback_kwargs will include a list of all tensors as specified by callback_on_step_end_tensor_inputs.
# callback_on_step_end_tensor_inputs – The list of tensor inputs for the callback_on_step_end function. The tensors specified in the list will be passed as callback_kwargs argument. You will only be able to include variables listed in the ._callback_tensor_inputs attribute of your pipeline class.
# max_sequence_length – Maximum sequence length to use with the prompt.
# Returns:
# [~pipelines. flux. FluxPipelineOutput] if return_dict is True, otherwise a tuple. When returning a tuple, the first element is a list with the generated images.
images = pipeline(
    prompt=PROMPT,
    prompt_2=PROMPT2,
    guidance_scale=CFG,
    num_inference_steps=NUM_STEPS,
    height=IMAGE_HEIGHT,
    width=IMAGE_WIDTH,
    max_sequence_length=512,
    generator=torch.manual_seed(SEED),
    num_images_per_prompt=NUM_IMAGES,
).images
for i, image in enumerate(images):
    print("Saving image...")
    path = os.path.join(IMAGE_OUTPUT_DIR, f"{IMAGE_PREFIX}_{i}.png")
    image.save(path)
print("Done.")