HaileyStorm
commited on
Commit
β’
42f204e
1
Parent(s):
727fd82
Update infer.py
Browse files
infer.py
CHANGED
@@ -1,73 +1,73 @@
|
|
1 |
-
from diffusers import FluxPipeline, FluxTransformer2DModel
|
2 |
-
import torch
|
3 |
-
import os
|
4 |
-
|
5 |
-
# Configuration
|
6 |
-
MODEL_DIR = "./merged_models/2.5_1"
|
7 |
-
IMAGE_OUTPUT_DIR = "./"
|
8 |
-
IMAGE_PREFIX = "flowers_2.5_1"
|
9 |
-
DEVICE = torch.device("cpu")
|
10 |
-
# If True, uses pipeline.enable_sequential_cpu_offload(). Make sure device is CPU.
|
11 |
-
USE_CPU_OFFLOAD = True
|
12 |
-
SEED = 0
|
13 |
-
# At least 880x656 fits on 24GB GPU w/ sequential offload
|
14 |
-
IMAGE_WIDTH = 1280
|
15 |
-
IMAGE_HEIGHT = 1024
|
16 |
-
NUM_STEPS = 10 # Try ~4-8 for 10:1 and ~8-16+ for 4:1 and 2.5:1 ("Default" 6, 10, 16)
|
17 |
-
NUM_IMAGES = 4
|
18 |
-
CFG = 3.5
|
19 |
-
PROMPT = ("Impressionistic tableau medium shot painting with soft, blended brushstrokes and muted colors complemented "
|
20 |
-
"by sporadic vibrant highlights.")
|
21 |
-
PROMPT2 = ("Impressionistic tableau painting with soft brushstrokes and muted colors, accented by vibrant highlights, "
|
22 |
-
"of a tranquil courtyard surrounded by wildflowers. Madison, a 19-year-old woman with light dirty blond "
|
23 |
-
"hair and bubblegum-pink highlights in a ponytail, brown eyes, and soft facial features, stands beside "
|
24 |
-
"Amelia, a tall mid-20s woman with deep auburn hair in a messy bun, summer sky-blue eyes, and pronounced "
|
25 |
-
"cheekbones. Together, they exude harmony and intrigue, their contrasting features complementing each "
|
26 |
-
"other.")
|
27 |
-
|
28 |
-
print("Loading model...")
|
29 |
-
transformer = FluxTransformer2DModel.from_pretrained(MODEL_DIR, torch_dtype=torch.bfloat16, use_safetensors=True)
|
30 |
-
print("Creating pipeline...")
|
31 |
-
pipeline = FluxPipeline.from_pretrained(
|
32 |
-
"black-forest-labs/FLUX.1-dev", transformer=transformer, torch_dtype=torch.bfloat16
|
33 |
-
, use_safetensors=True, local_dir="./models/dev/", local_dir_use_symlinks=False,
|
34 |
-
ignore_patterns=["flux1-dev.sft", "flux1-dev.safetensors"]).to(DEVICE)
|
35 |
-
pipeline.enable_sequential_cpu_offload()
|
36 |
-
print("Generating image...")
|
37 |
-
# Params:
|
38 |
-
# prompt β The prompt or prompts to guide the image generation. If not defined, one has to pass prompt_embeds. instead.
|
39 |
-
# prompt_2 β The prompt or prompts to be sent to tokenizer_2 and text_encoder_2. If not defined, prompt is will be used instead
|
40 |
-
# height β The height in pixels of the generated image. This is set to 1024 by default for the best results.
|
41 |
-
# width β The width in pixels of the generated image. This is set to 1024 by default for the best results.
|
42 |
-
# num_inference_steps β The number of denoising steps. More denoising steps usually lead to a higher quality image at the expense of slower inference.
|
43 |
-
# timesteps β Custom timesteps to use for the denoising process with schedulers which support a timesteps argument in their set_timesteps method. If not defined, the default behavior when num_inference_steps is passed will be used. Must be in descending order.
|
44 |
-
# guidance_scale β Guidance scale as defined in [Classifier-Free Diffusion Guidance](https:// arxiv. org/ abs/ 2207.12598 ). guidance_scale is defined as w of equation 2. of [Imagen Paper](https:// arxiv. org/ pdf/ 2205.11487.pdf ). Guidance scale is enabled by setting guidance_scale > 1. Higher guidance scale encourages to generate images that are closely linked to the text prompt, usually at the expense of lower image quality.
|
45 |
-
# num_images_per_prompt β The number of images to generate per prompt.
|
46 |
-
# generator β One or a list of [torch generator(s)](https:// pytorch. org/ docs/ stable/ generated/ torch. Generator. html ) to make generation deterministic.
|
47 |
-
# latents β Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image generation. Can be used to tweak the same generation with different prompts. If not provided, a latents tensor will ge generated by sampling using the supplied random generator.
|
48 |
-
# prompt_embeds β Pre-generated text embeddings. Can be used to easily tweak text inputs, e. g. prompt weighting. If not provided, text embeddings will be generated from prompt input argument.
|
49 |
-
# pooled_prompt_embeds β Pre-generated pooled text embeddings. Can be used to easily tweak text inputs, e. g. prompt weighting. If not provided, pooled text embeddings will be generated from prompt input argument.
|
50 |
-
# output_type β The output format of the generate image. Choose between [PIL](https:// pillow. readthedocs. io/ en/ stable/ ): PIL. Image. Image or np. array.
|
51 |
-
# return_dict β Whether or not to return a [~pipelines. flux. FluxPipelineOutput] instead of a plain tuple.
|
52 |
-
# joint_attention_kwargs β A kwargs dictionary that if specified is passed along to the AttentionProcessor as defined under self. processor in [diffusers. models. attention_processor](https:// github. com/ huggingface/ diffusers/ blob/ main/ src/ diffusers/ models/ attention_processor. py ).
|
53 |
-
# callback_on_step_end β A function that calls at the end of each denoising steps during the inference. The function is called with the following arguments: callback_on_step_end(self: DiffusionPipeline, step: int, timestep: int, callback_kwargs: Dict). callback_kwargs will include a list of all tensors as specified by callback_on_step_end_tensor_inputs.
|
54 |
-
# callback_on_step_end_tensor_inputs β The list of tensor inputs for the callback_on_step_end function. The tensors specified in the list will be passed as callback_kwargs argument. You will only be able to include variables listed in the ._callback_tensor_inputs attribute of your pipeline class.
|
55 |
-
# max_sequence_length β Maximum sequence length to use with the prompt.
|
56 |
-
# Returns:
|
57 |
-
# [~pipelines. flux. FluxPipelineOutput] if return_dict is True, otherwise a tuple. When returning a tuple, the first element is a list with the generated images.
|
58 |
-
images = pipeline(
|
59 |
-
prompt=PROMPT,
|
60 |
-
prompt_2=PROMPT2,
|
61 |
-
guidance_scale=CFG,
|
62 |
-
num_inference_steps=NUM_STEPS,
|
63 |
-
height=IMAGE_HEIGHT,
|
64 |
-
width=IMAGE_WIDTH,
|
65 |
-
max_sequence_length=512,
|
66 |
-
generator=torch.manual_seed(
|
67 |
-
num_images_per_prompt=NUM_IMAGES,
|
68 |
-
).images
|
69 |
-
for i, image in enumerate(images):
|
70 |
-
print("Saving image...")
|
71 |
-
path = os.path.join(IMAGE_OUTPUT_DIR, f"{IMAGE_PREFIX}_{i}.png")
|
72 |
-
image.save(path)
|
73 |
-
print("Done.")
|
|
|
1 |
+
from diffusers import FluxPipeline, FluxTransformer2DModel
|
2 |
+
import torch
|
3 |
+
import os
|
4 |
+
|
5 |
+
# Configuration
|
6 |
+
MODEL_DIR = "./merged_models/2.5_1"
|
7 |
+
IMAGE_OUTPUT_DIR = "./"
|
8 |
+
IMAGE_PREFIX = "flowers_2.5_1"
|
9 |
+
DEVICE = torch.device("cpu")
|
10 |
+
# If True, uses pipeline.enable_sequential_cpu_offload(). Make sure device is CPU.
|
11 |
+
USE_CPU_OFFLOAD = True
|
12 |
+
SEED = 0
|
13 |
+
# At least 880x656 fits on 24GB GPU w/ sequential offload
|
14 |
+
IMAGE_WIDTH = 1280
|
15 |
+
IMAGE_HEIGHT = 1024
|
16 |
+
NUM_STEPS = 10 # Try ~4-8 for 10:1 and ~8-16+ for 4:1 and 2.5:1 ("Default" 6, 10, 16)
|
17 |
+
NUM_IMAGES = 4
|
18 |
+
CFG = 3.5
|
19 |
+
PROMPT = ("Impressionistic tableau medium shot painting with soft, blended brushstrokes and muted colors complemented "
|
20 |
+
"by sporadic vibrant highlights.")
|
21 |
+
PROMPT2 = ("Impressionistic tableau painting with soft brushstrokes and muted colors, accented by vibrant highlights, "
|
22 |
+
"of a tranquil courtyard surrounded by wildflowers. Madison, a 19-year-old woman with light dirty blond "
|
23 |
+
"hair and bubblegum-pink highlights in a ponytail, brown eyes, and soft facial features, stands beside "
|
24 |
+
"Amelia, a tall mid-20s woman with deep auburn hair in a messy bun, summer sky-blue eyes, and pronounced "
|
25 |
+
"cheekbones. Together, they exude harmony and intrigue, their contrasting features complementing each "
|
26 |
+
"other.")
|
27 |
+
|
28 |
+
print("Loading model...")
|
29 |
+
transformer = FluxTransformer2DModel.from_pretrained(MODEL_DIR, torch_dtype=torch.bfloat16, use_safetensors=True)
|
30 |
+
print("Creating pipeline...")
|
31 |
+
pipeline = FluxPipeline.from_pretrained(
|
32 |
+
"black-forest-labs/FLUX.1-dev", transformer=transformer, torch_dtype=torch.bfloat16
|
33 |
+
, use_safetensors=True, local_dir="./models/dev/", local_dir_use_symlinks=False,
|
34 |
+
ignore_patterns=["flux1-dev.sft", "flux1-dev.safetensors"]).to(DEVICE)
|
35 |
+
pipeline.enable_sequential_cpu_offload()
|
36 |
+
print("Generating image...")
|
37 |
+
# Params:
|
38 |
+
# prompt β The prompt or prompts to guide the image generation. If not defined, one has to pass prompt_embeds. instead.
|
39 |
+
# prompt_2 β The prompt or prompts to be sent to tokenizer_2 and text_encoder_2. If not defined, prompt is will be used instead
|
40 |
+
# height β The height in pixels of the generated image. This is set to 1024 by default for the best results.
|
41 |
+
# width β The width in pixels of the generated image. This is set to 1024 by default for the best results.
|
42 |
+
# num_inference_steps β The number of denoising steps. More denoising steps usually lead to a higher quality image at the expense of slower inference.
|
43 |
+
# timesteps β Custom timesteps to use for the denoising process with schedulers which support a timesteps argument in their set_timesteps method. If not defined, the default behavior when num_inference_steps is passed will be used. Must be in descending order.
|
44 |
+
# guidance_scale β Guidance scale as defined in [Classifier-Free Diffusion Guidance](https:// arxiv. org/ abs/ 2207.12598 ). guidance_scale is defined as w of equation 2. of [Imagen Paper](https:// arxiv. org/ pdf/ 2205.11487.pdf ). Guidance scale is enabled by setting guidance_scale > 1. Higher guidance scale encourages to generate images that are closely linked to the text prompt, usually at the expense of lower image quality.
|
45 |
+
# num_images_per_prompt β The number of images to generate per prompt.
|
46 |
+
# generator β One or a list of [torch generator(s)](https:// pytorch. org/ docs/ stable/ generated/ torch. Generator. html ) to make generation deterministic.
|
47 |
+
# latents β Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image generation. Can be used to tweak the same generation with different prompts. If not provided, a latents tensor will ge generated by sampling using the supplied random generator.
|
48 |
+
# prompt_embeds β Pre-generated text embeddings. Can be used to easily tweak text inputs, e. g. prompt weighting. If not provided, text embeddings will be generated from prompt input argument.
|
49 |
+
# pooled_prompt_embeds β Pre-generated pooled text embeddings. Can be used to easily tweak text inputs, e. g. prompt weighting. If not provided, pooled text embeddings will be generated from prompt input argument.
|
50 |
+
# output_type β The output format of the generate image. Choose between [PIL](https:// pillow. readthedocs. io/ en/ stable/ ): PIL. Image. Image or np. array.
|
51 |
+
# return_dict β Whether or not to return a [~pipelines. flux. FluxPipelineOutput] instead of a plain tuple.
|
52 |
+
# joint_attention_kwargs β A kwargs dictionary that if specified is passed along to the AttentionProcessor as defined under self. processor in [diffusers. models. attention_processor](https:// github. com/ huggingface/ diffusers/ blob/ main/ src/ diffusers/ models/ attention_processor. py ).
|
53 |
+
# callback_on_step_end β A function that calls at the end of each denoising steps during the inference. The function is called with the following arguments: callback_on_step_end(self: DiffusionPipeline, step: int, timestep: int, callback_kwargs: Dict). callback_kwargs will include a list of all tensors as specified by callback_on_step_end_tensor_inputs.
|
54 |
+
# callback_on_step_end_tensor_inputs β The list of tensor inputs for the callback_on_step_end function. The tensors specified in the list will be passed as callback_kwargs argument. You will only be able to include variables listed in the ._callback_tensor_inputs attribute of your pipeline class.
|
55 |
+
# max_sequence_length β Maximum sequence length to use with the prompt.
|
56 |
+
# Returns:
|
57 |
+
# [~pipelines. flux. FluxPipelineOutput] if return_dict is True, otherwise a tuple. When returning a tuple, the first element is a list with the generated images.
|
58 |
+
images = pipeline(
|
59 |
+
prompt=PROMPT,
|
60 |
+
prompt_2=PROMPT2,
|
61 |
+
guidance_scale=CFG,
|
62 |
+
num_inference_steps=NUM_STEPS,
|
63 |
+
height=IMAGE_HEIGHT,
|
64 |
+
width=IMAGE_WIDTH,
|
65 |
+
max_sequence_length=512,
|
66 |
+
generator=torch.manual_seed(SEED),
|
67 |
+
num_images_per_prompt=NUM_IMAGES,
|
68 |
+
).images
|
69 |
+
for i, image in enumerate(images):
|
70 |
+
print("Saving image...")
|
71 |
+
path = os.path.join(IMAGE_OUTPUT_DIR, f"{IMAGE_PREFIX}_{i}.png")
|
72 |
+
image.save(path)
|
73 |
+
print("Done.")
|