HaileyStorm commited on
Commit
42f204e
β€’
1 Parent(s): 727fd82

Update infer.py

Browse files
Files changed (1) hide show
  1. infer.py +73 -73
infer.py CHANGED
@@ -1,73 +1,73 @@
1
- from diffusers import FluxPipeline, FluxTransformer2DModel
2
- import torch
3
- import os
4
-
5
- # Configuration
6
- MODEL_DIR = "./merged_models/2.5_1"
7
- IMAGE_OUTPUT_DIR = "./"
8
- IMAGE_PREFIX = "flowers_2.5_1"
9
- DEVICE = torch.device("cpu")
10
- # If True, uses pipeline.enable_sequential_cpu_offload(). Make sure device is CPU.
11
- USE_CPU_OFFLOAD = True
12
- SEED = 0
13
- # At least 880x656 fits on 24GB GPU w/ sequential offload
14
- IMAGE_WIDTH = 1280
15
- IMAGE_HEIGHT = 1024
16
- NUM_STEPS = 10 # Try ~4-8 for 10:1 and ~8-16+ for 4:1 and 2.5:1 ("Default" 6, 10, 16)
17
- NUM_IMAGES = 4
18
- CFG = 3.5
19
- PROMPT = ("Impressionistic tableau medium shot painting with soft, blended brushstrokes and muted colors complemented "
20
- "by sporadic vibrant highlights.")
21
- PROMPT2 = ("Impressionistic tableau painting with soft brushstrokes and muted colors, accented by vibrant highlights, "
22
- "of a tranquil courtyard surrounded by wildflowers. Madison, a 19-year-old woman with light dirty blond "
23
- "hair and bubblegum-pink highlights in a ponytail, brown eyes, and soft facial features, stands beside "
24
- "Amelia, a tall mid-20s woman with deep auburn hair in a messy bun, summer sky-blue eyes, and pronounced "
25
- "cheekbones. Together, they exude harmony and intrigue, their contrasting features complementing each "
26
- "other.")
27
-
28
- print("Loading model...")
29
- transformer = FluxTransformer2DModel.from_pretrained(MODEL_DIR, torch_dtype=torch.bfloat16, use_safetensors=True)
30
- print("Creating pipeline...")
31
- pipeline = FluxPipeline.from_pretrained(
32
- "black-forest-labs/FLUX.1-dev", transformer=transformer, torch_dtype=torch.bfloat16
33
- , use_safetensors=True, local_dir="./models/dev/", local_dir_use_symlinks=False,
34
- ignore_patterns=["flux1-dev.sft", "flux1-dev.safetensors"]).to(DEVICE)
35
- pipeline.enable_sequential_cpu_offload()
36
- print("Generating image...")
37
- # Params:
38
- # prompt – The prompt or prompts to guide the image generation. If not defined, one has to pass prompt_embeds. instead.
39
- # prompt_2 – The prompt or prompts to be sent to tokenizer_2 and text_encoder_2. If not defined, prompt is will be used instead
40
- # height – The height in pixels of the generated image. This is set to 1024 by default for the best results.
41
- # width – The width in pixels of the generated image. This is set to 1024 by default for the best results.
42
- # num_inference_steps – The number of denoising steps. More denoising steps usually lead to a higher quality image at the expense of slower inference.
43
- # timesteps – Custom timesteps to use for the denoising process with schedulers which support a timesteps argument in their set_timesteps method. If not defined, the default behavior when num_inference_steps is passed will be used. Must be in descending order.
44
- # guidance_scale – Guidance scale as defined in [Classifier-Free Diffusion Guidance](https:// arxiv. org/ abs/ 2207.12598 ). guidance_scale is defined as w of equation 2. of [Imagen Paper](https:// arxiv. org/ pdf/ 2205.11487.pdf ). Guidance scale is enabled by setting guidance_scale > 1. Higher guidance scale encourages to generate images that are closely linked to the text prompt, usually at the expense of lower image quality.
45
- # num_images_per_prompt – The number of images to generate per prompt.
46
- # generator – One or a list of [torch generator(s)](https:// pytorch. org/ docs/ stable/ generated/ torch. Generator. html ) to make generation deterministic.
47
- # latents – Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image generation. Can be used to tweak the same generation with different prompts. If not provided, a latents tensor will ge generated by sampling using the supplied random generator.
48
- # prompt_embeds – Pre-generated text embeddings. Can be used to easily tweak text inputs, e. g. prompt weighting. If not provided, text embeddings will be generated from prompt input argument.
49
- # pooled_prompt_embeds – Pre-generated pooled text embeddings. Can be used to easily tweak text inputs, e. g. prompt weighting. If not provided, pooled text embeddings will be generated from prompt input argument.
50
- # output_type – The output format of the generate image. Choose between [PIL](https:// pillow. readthedocs. io/ en/ stable/ ): PIL. Image. Image or np. array.
51
- # return_dict – Whether or not to return a [~pipelines. flux. FluxPipelineOutput] instead of a plain tuple.
52
- # joint_attention_kwargs – A kwargs dictionary that if specified is passed along to the AttentionProcessor as defined under self. processor in [diffusers. models. attention_processor](https:// github. com/ huggingface/ diffusers/ blob/ main/ src/ diffusers/ models/ attention_processor. py ).
53
- # callback_on_step_end – A function that calls at the end of each denoising steps during the inference. The function is called with the following arguments: callback_on_step_end(self: DiffusionPipeline, step: int, timestep: int, callback_kwargs: Dict). callback_kwargs will include a list of all tensors as specified by callback_on_step_end_tensor_inputs.
54
- # callback_on_step_end_tensor_inputs – The list of tensor inputs for the callback_on_step_end function. The tensors specified in the list will be passed as callback_kwargs argument. You will only be able to include variables listed in the ._callback_tensor_inputs attribute of your pipeline class.
55
- # max_sequence_length – Maximum sequence length to use with the prompt.
56
- # Returns:
57
- # [~pipelines. flux. FluxPipelineOutput] if return_dict is True, otherwise a tuple. When returning a tuple, the first element is a list with the generated images.
58
- images = pipeline(
59
- prompt=PROMPT,
60
- prompt_2=PROMPT2,
61
- guidance_scale=CFG,
62
- num_inference_steps=NUM_STEPS,
63
- height=IMAGE_HEIGHT,
64
- width=IMAGE_WIDTH,
65
- max_sequence_length=512,
66
- generator=torch.manual_seed(42),
67
- num_images_per_prompt=NUM_IMAGES,
68
- ).images
69
- for i, image in enumerate(images):
70
- print("Saving image...")
71
- path = os.path.join(IMAGE_OUTPUT_DIR, f"{IMAGE_PREFIX}_{i}.png")
72
- image.save(path)
73
- print("Done.")
 
1
+ from diffusers import FluxPipeline, FluxTransformer2DModel
2
+ import torch
3
+ import os
4
+
5
+ # Configuration
6
+ MODEL_DIR = "./merged_models/2.5_1"
7
+ IMAGE_OUTPUT_DIR = "./"
8
+ IMAGE_PREFIX = "flowers_2.5_1"
9
+ DEVICE = torch.device("cpu")
10
+ # If True, uses pipeline.enable_sequential_cpu_offload(). Make sure device is CPU.
11
+ USE_CPU_OFFLOAD = True
12
+ SEED = 0
13
+ # At least 880x656 fits on 24GB GPU w/ sequential offload
14
+ IMAGE_WIDTH = 1280
15
+ IMAGE_HEIGHT = 1024
16
+ NUM_STEPS = 10 # Try ~4-8 for 10:1 and ~8-16+ for 4:1 and 2.5:1 ("Default" 6, 10, 16)
17
+ NUM_IMAGES = 4
18
+ CFG = 3.5
19
+ PROMPT = ("Impressionistic tableau medium shot painting with soft, blended brushstrokes and muted colors complemented "
20
+ "by sporadic vibrant highlights.")
21
+ PROMPT2 = ("Impressionistic tableau painting with soft brushstrokes and muted colors, accented by vibrant highlights, "
22
+ "of a tranquil courtyard surrounded by wildflowers. Madison, a 19-year-old woman with light dirty blond "
23
+ "hair and bubblegum-pink highlights in a ponytail, brown eyes, and soft facial features, stands beside "
24
+ "Amelia, a tall mid-20s woman with deep auburn hair in a messy bun, summer sky-blue eyes, and pronounced "
25
+ "cheekbones. Together, they exude harmony and intrigue, their contrasting features complementing each "
26
+ "other.")
27
+
28
+ print("Loading model...")
29
+ transformer = FluxTransformer2DModel.from_pretrained(MODEL_DIR, torch_dtype=torch.bfloat16, use_safetensors=True)
30
+ print("Creating pipeline...")
31
+ pipeline = FluxPipeline.from_pretrained(
32
+ "black-forest-labs/FLUX.1-dev", transformer=transformer, torch_dtype=torch.bfloat16
33
+ , use_safetensors=True, local_dir="./models/dev/", local_dir_use_symlinks=False,
34
+ ignore_patterns=["flux1-dev.sft", "flux1-dev.safetensors"]).to(DEVICE)
35
+ pipeline.enable_sequential_cpu_offload()
36
+ print("Generating image...")
37
+ # Params:
38
+ # prompt – The prompt or prompts to guide the image generation. If not defined, one has to pass prompt_embeds. instead.
39
+ # prompt_2 – The prompt or prompts to be sent to tokenizer_2 and text_encoder_2. If not defined, prompt is will be used instead
40
+ # height – The height in pixels of the generated image. This is set to 1024 by default for the best results.
41
+ # width – The width in pixels of the generated image. This is set to 1024 by default for the best results.
42
+ # num_inference_steps – The number of denoising steps. More denoising steps usually lead to a higher quality image at the expense of slower inference.
43
+ # timesteps – Custom timesteps to use for the denoising process with schedulers which support a timesteps argument in their set_timesteps method. If not defined, the default behavior when num_inference_steps is passed will be used. Must be in descending order.
44
+ # guidance_scale – Guidance scale as defined in [Classifier-Free Diffusion Guidance](https:// arxiv. org/ abs/ 2207.12598 ). guidance_scale is defined as w of equation 2. of [Imagen Paper](https:// arxiv. org/ pdf/ 2205.11487.pdf ). Guidance scale is enabled by setting guidance_scale > 1. Higher guidance scale encourages to generate images that are closely linked to the text prompt, usually at the expense of lower image quality.
45
+ # num_images_per_prompt – The number of images to generate per prompt.
46
+ # generator – One or a list of [torch generator(s)](https:// pytorch. org/ docs/ stable/ generated/ torch. Generator. html ) to make generation deterministic.
47
+ # latents – Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image generation. Can be used to tweak the same generation with different prompts. If not provided, a latents tensor will ge generated by sampling using the supplied random generator.
48
+ # prompt_embeds – Pre-generated text embeddings. Can be used to easily tweak text inputs, e. g. prompt weighting. If not provided, text embeddings will be generated from prompt input argument.
49
+ # pooled_prompt_embeds – Pre-generated pooled text embeddings. Can be used to easily tweak text inputs, e. g. prompt weighting. If not provided, pooled text embeddings will be generated from prompt input argument.
50
+ # output_type – The output format of the generate image. Choose between [PIL](https:// pillow. readthedocs. io/ en/ stable/ ): PIL. Image. Image or np. array.
51
+ # return_dict – Whether or not to return a [~pipelines. flux. FluxPipelineOutput] instead of a plain tuple.
52
+ # joint_attention_kwargs – A kwargs dictionary that if specified is passed along to the AttentionProcessor as defined under self. processor in [diffusers. models. attention_processor](https:// github. com/ huggingface/ diffusers/ blob/ main/ src/ diffusers/ models/ attention_processor. py ).
53
+ # callback_on_step_end – A function that calls at the end of each denoising steps during the inference. The function is called with the following arguments: callback_on_step_end(self: DiffusionPipeline, step: int, timestep: int, callback_kwargs: Dict). callback_kwargs will include a list of all tensors as specified by callback_on_step_end_tensor_inputs.
54
+ # callback_on_step_end_tensor_inputs – The list of tensor inputs for the callback_on_step_end function. The tensors specified in the list will be passed as callback_kwargs argument. You will only be able to include variables listed in the ._callback_tensor_inputs attribute of your pipeline class.
55
+ # max_sequence_length – Maximum sequence length to use with the prompt.
56
+ # Returns:
57
+ # [~pipelines. flux. FluxPipelineOutput] if return_dict is True, otherwise a tuple. When returning a tuple, the first element is a list with the generated images.
58
+ images = pipeline(
59
+ prompt=PROMPT,
60
+ prompt_2=PROMPT2,
61
+ guidance_scale=CFG,
62
+ num_inference_steps=NUM_STEPS,
63
+ height=IMAGE_HEIGHT,
64
+ width=IMAGE_WIDTH,
65
+ max_sequence_length=512,
66
+ generator=torch.manual_seed(SEED),
67
+ num_images_per_prompt=NUM_IMAGES,
68
+ ).images
69
+ for i, image in enumerate(images):
70
+ print("Saving image...")
71
+ path = os.path.join(IMAGE_OUTPUT_DIR, f"{IMAGE_PREFIX}_{i}.png")
72
+ image.save(path)
73
+ print("Done.")