HaileyStorm
/

FLUX.1-Merges

Text-to-Image

Diffusers

Safetensors

English

Model card Files Files and versions Community

HaileyStorm commited on Aug 7, 2024

Commit

42f204e

verified ·

1 Parent(s): 727fd82

Update infer.py

Browse files

Files changed (1) hide show

infer.py +73 -73

infer.py CHANGED Viewed

@@ -1,73 +1,73 @@
-from diffusers import FluxPipeline, FluxTransformer2DModel
-import torch
-import os
-# Configuration
-MODEL_DIR = "./merged_models/2.5_1"
-IMAGE_OUTPUT_DIR = "./"
-IMAGE_PREFIX = "flowers_2.5_1"
-DEVICE = torch.device("cpu")
-# If True, uses pipeline.enable_sequential_cpu_offload(). Make sure device is CPU.
-USE_CPU_OFFLOAD = True
-SEED = 0
-# At least 880x656 fits on 24GB GPU w/ sequential offload
-IMAGE_WIDTH = 1280
-IMAGE_HEIGHT = 1024
-NUM_STEPS = 10  # Try ~4-8 for 10:1 and ~8-16+ for 4:1 and 2.5:1 ("Default" 6, 10, 16)
-NUM_IMAGES = 4
-CFG = 3.5
-PROMPT = ("Impressionistic tableau medium shot painting with soft, blended brushstrokes and muted colors complemented "
-          "by sporadic vibrant highlights.")
-PROMPT2 = ("Impressionistic tableau painting with soft brushstrokes and muted colors, accented by vibrant highlights, "
-           "of a tranquil courtyard surrounded by wildflowers. Madison, a 19-year-old woman with light dirty blond "
-           "hair and bubblegum-pink highlights in a ponytail, brown eyes, and soft facial features, stands beside "
-           "Amelia, a tall mid-20s woman with deep auburn hair in a messy bun, summer sky-blue eyes, and pronounced "
-           "cheekbones. Together, they exude harmony and intrigue, their contrasting features complementing each "
-           "other.")
-print("Loading model...")
-transformer = FluxTransformer2DModel.from_pretrained(MODEL_DIR, torch_dtype=torch.bfloat16, use_safetensors=True)
-print("Creating pipeline...")
-pipeline = FluxPipeline.from_pretrained(
-    "black-forest-labs/FLUX.1-dev", transformer=transformer, torch_dtype=torch.bfloat16
-    , use_safetensors=True, local_dir="./models/dev/", local_dir_use_symlinks=False,
-    ignore_patterns=["flux1-dev.sft", "flux1-dev.safetensors"]).to(DEVICE)
-pipeline.enable_sequential_cpu_offload()
-print("Generating image...")
-# Params:
-# prompt – The prompt or prompts to guide the image generation. If not defined, one has to pass prompt_embeds. instead.
-# prompt_2 – The prompt or prompts to be sent to tokenizer_2 and text_encoder_2. If not defined, prompt is will be used instead
-# height – The height in pixels of the generated image. This is set to 1024 by default for the best results.
-# width – The width in pixels of the generated image. This is set to 1024 by default for the best results.
-# num_inference_steps – The number of denoising steps. More denoising steps usually lead to a higher quality image at the expense of slower inference.
-# timesteps – Custom timesteps to use for the denoising process with schedulers which support a timesteps argument in their set_timesteps method. If not defined, the default behavior when num_inference_steps is passed will be used. Must be in descending order.
-# guidance_scale – Guidance scale as defined in [Classifier-Free Diffusion Guidance](https:// arxiv. org/ abs/ 2207.12598 ). guidance_scale is defined as w of equation 2. of [Imagen Paper](https:// arxiv. org/ pdf/ 2205.11487.pdf ). Guidance scale is enabled by setting guidance_scale > 1. Higher guidance scale encourages to generate images that are closely linked to the text prompt, usually at the expense of lower image quality.
-# num_images_per_prompt – The number of images to generate per prompt.
-# generator – One or a list of [torch generator(s)](https:// pytorch. org/ docs/ stable/ generated/ torch. Generator. html ) to make generation deterministic.
-# latents – Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image generation. Can be used to tweak the same generation with different prompts. If not provided, a latents tensor will ge generated by sampling using the supplied random generator.
-# prompt_embeds – Pre-generated text embeddings. Can be used to easily tweak text inputs, e. g. prompt weighting. If not provided, text embeddings will be generated from prompt input argument.
-# pooled_prompt_embeds – Pre-generated pooled text embeddings. Can be used to easily tweak text inputs, e. g. prompt weighting. If not provided, pooled text embeddings will be generated from prompt input argument.
-# output_type – The output format of the generate image. Choose between [PIL](https:// pillow. readthedocs. io/ en/ stable/ ): PIL. Image. Image or np. array.
-# return_dict – Whether or not to return a [~pipelines. flux. FluxPipelineOutput] instead of a plain tuple.
-# joint_attention_kwargs – A kwargs dictionary that if specified is passed along to the AttentionProcessor as defined under self. processor in [diffusers. models. attention_processor](https:// github. com/ huggingface/ diffusers/ blob/ main/ src/ diffusers/ models/ attention_processor. py ).
-# callback_on_step_end – A function that calls at the end of each denoising steps during the inference. The function is called with the following arguments: callback_on_step_end(self: DiffusionPipeline, step: int, timestep: int, callback_kwargs: Dict). callback_kwargs will include a list of all tensors as specified by callback_on_step_end_tensor_inputs.
-# callback_on_step_end_tensor_inputs – The list of tensor inputs for the callback_on_step_end function. The tensors specified in the list will be passed as callback_kwargs argument. You will only be able to include variables listed in the ._callback_tensor_inputs attribute of your pipeline class.
-# max_sequence_length – Maximum sequence length to use with the prompt.
-# Returns:
-# [~pipelines. flux. FluxPipelineOutput] if return_dict is True, otherwise a tuple. When returning a tuple, the first element is a list with the generated images.
-images = pipeline(
-    prompt=PROMPT,
-    prompt_2=PROMPT2,
-    guidance_scale=CFG,
-    num_inference_steps=NUM_STEPS,
-    height=IMAGE_HEIGHT,
-    width=IMAGE_WIDTH,
-    max_sequence_length=512,
-    generator=torch.manual_seed(42),
-    num_images_per_prompt=NUM_IMAGES,
-).images
-for i, image in enumerate(images):
-    print("Saving image...")
-    path = os.path.join(IMAGE_OUTPUT_DIR, f"{IMAGE_PREFIX}_{i}.png")
-    image.save(path)
-print("Done.")

+from diffusers import FluxPipeline, FluxTransformer2DModel
+import torch
+import os
+# Configuration
+MODEL_DIR = "./merged_models/2.5_1"
+IMAGE_OUTPUT_DIR = "./"
+IMAGE_PREFIX = "flowers_2.5_1"
+DEVICE = torch.device("cpu")
+# If True, uses pipeline.enable_sequential_cpu_offload(). Make sure device is CPU.
+USE_CPU_OFFLOAD = True
+SEED = 0
+# At least 880x656 fits on 24GB GPU w/ sequential offload
+IMAGE_WIDTH = 1280
+IMAGE_HEIGHT = 1024
+NUM_STEPS = 10  # Try ~4-8 for 10:1 and ~8-16+ for 4:1 and 2.5:1 ("Default" 6, 10, 16)
+NUM_IMAGES = 4
+CFG = 3.5
+PROMPT = ("Impressionistic tableau medium shot painting with soft, blended brushstrokes and muted colors complemented "
+          "by sporadic vibrant highlights.")
+PROMPT2 = ("Impressionistic tableau painting with soft brushstrokes and muted colors, accented by vibrant highlights, "
+           "of a tranquil courtyard surrounded by wildflowers. Madison, a 19-year-old woman with light dirty blond "
+           "hair and bubblegum-pink highlights in a ponytail, brown eyes, and soft facial features, stands beside "
+           "Amelia, a tall mid-20s woman with deep auburn hair in a messy bun, summer sky-blue eyes, and pronounced "
+           "cheekbones. Together, they exude harmony and intrigue, their contrasting features complementing each "
+           "other.")
+print("Loading model...")
+transformer = FluxTransformer2DModel.from_pretrained(MODEL_DIR, torch_dtype=torch.bfloat16, use_safetensors=True)
+print("Creating pipeline...")
+pipeline = FluxPipeline.from_pretrained(
+    "black-forest-labs/FLUX.1-dev", transformer=transformer, torch_dtype=torch.bfloat16
+    , use_safetensors=True, local_dir="./models/dev/", local_dir_use_symlinks=False,
+    ignore_patterns=["flux1-dev.sft", "flux1-dev.safetensors"]).to(DEVICE)
+pipeline.enable_sequential_cpu_offload()
+print("Generating image...")
+# Params:
+# prompt – The prompt or prompts to guide the image generation. If not defined, one has to pass prompt_embeds. instead.
+# prompt_2 – The prompt or prompts to be sent to tokenizer_2 and text_encoder_2. If not defined, prompt is will be used instead
+# height – The height in pixels of the generated image. This is set to 1024 by default for the best results.
+# width – The width in pixels of the generated image. This is set to 1024 by default for the best results.
+# num_inference_steps – The number of denoising steps. More denoising steps usually lead to a higher quality image at the expense of slower inference.
+# timesteps – Custom timesteps to use for the denoising process with schedulers which support a timesteps argument in their set_timesteps method. If not defined, the default behavior when num_inference_steps is passed will be used. Must be in descending order.
+# guidance_scale – Guidance scale as defined in [Classifier-Free Diffusion Guidance](https:// arxiv. org/ abs/ 2207.12598 ). guidance_scale is defined as w of equation 2. of [Imagen Paper](https:// arxiv. org/ pdf/ 2205.11487.pdf ). Guidance scale is enabled by setting guidance_scale > 1. Higher guidance scale encourages to generate images that are closely linked to the text prompt, usually at the expense of lower image quality.
+# num_images_per_prompt – The number of images to generate per prompt.
+# generator – One or a list of [torch generator(s)](https:// pytorch. org/ docs/ stable/ generated/ torch. Generator. html ) to make generation deterministic.
+# latents – Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image generation. Can be used to tweak the same generation with different prompts. If not provided, a latents tensor will ge generated by sampling using the supplied random generator.
+# prompt_embeds – Pre-generated text embeddings. Can be used to easily tweak text inputs, e. g. prompt weighting. If not provided, text embeddings will be generated from prompt input argument.
+# pooled_prompt_embeds – Pre-generated pooled text embeddings. Can be used to easily tweak text inputs, e. g. prompt weighting. If not provided, pooled text embeddings will be generated from prompt input argument.
+# output_type – The output format of the generate image. Choose between [PIL](https:// pillow. readthedocs. io/ en/ stable/ ): PIL. Image. Image or np. array.
+# return_dict – Whether or not to return a [~pipelines. flux. FluxPipelineOutput] instead of a plain tuple.
+# joint_attention_kwargs – A kwargs dictionary that if specified is passed along to the AttentionProcessor as defined under self. processor in [diffusers. models. attention_processor](https:// github. com/ huggingface/ diffusers/ blob/ main/ src/ diffusers/ models/ attention_processor. py ).
+# callback_on_step_end – A function that calls at the end of each denoising steps during the inference. The function is called with the following arguments: callback_on_step_end(self: DiffusionPipeline, step: int, timestep: int, callback_kwargs: Dict). callback_kwargs will include a list of all tensors as specified by callback_on_step_end_tensor_inputs.
+# callback_on_step_end_tensor_inputs – The list of tensor inputs for the callback_on_step_end function. The tensors specified in the list will be passed as callback_kwargs argument. You will only be able to include variables listed in the ._callback_tensor_inputs attribute of your pipeline class.
+# max_sequence_length – Maximum sequence length to use with the prompt.
+# Returns:
+# [~pipelines. flux. FluxPipelineOutput] if return_dict is True, otherwise a tuple. When returning a tuple, the first element is a list with the generated images.
+images = pipeline(
+    prompt=PROMPT,
+    prompt_2=PROMPT2,
+    guidance_scale=CFG,
+    num_inference_steps=NUM_STEPS,
+    height=IMAGE_HEIGHT,
+    width=IMAGE_WIDTH,
+    max_sequence_length=512,
+    generator=torch.manual_seed(SEED),
+    num_images_per_prompt=NUM_IMAGES,
+).images
+for i, image in enumerate(images):
+    print("Saving image...")
+    path = os.path.join(IMAGE_OUTPUT_DIR, f"{IMAGE_PREFIX}_{i}.png")
+    image.save(path)
+print("Done.")