|
--- |
|
library_name: diffusers |
|
license: other |
|
license_name: flux-1-dev-non-commercial-license |
|
license_link: LICENSE.md |
|
--- |
|
|
|
> [!NOTE] |
|
> Contains the NF4 checkpoints (`transformer` and `text_encoder_2`) of [`black-forest-labs/FLUX.1-Depth-dev`](https://huggingface.co/black-forest-labs/FLUX.1-Depth-dev). Please adhere to the original model licensing! |
|
|
|
<details> |
|
<summary>Code</summary> |
|
|
|
```py |
|
# !pip install git+https://github.com/asomoza/image_gen_aux.git |
|
from diffusers import DiffusionPipeline, FluxControlPipeline, FluxTransformer2DModel |
|
import torch |
|
from transformers import T5EncoderModel |
|
from image_gen_aux import DepthPreprocessor |
|
from diffusers.utils import load_image |
|
import fire |
|
|
|
|
|
def load_pipeline(four_bit=False): |
|
orig_pipeline = DiffusionPipeline.from_pretrained("black-forest-labs/FLUX.1-dev", torch_dtype=torch.bfloat16) |
|
if four_bit: |
|
print("Using four bit.") |
|
transformer = FluxTransformer2DModel.from_pretrained( |
|
"sayakpaul/FLUX.1-Depth-dev-nf4", subfolder="transformer", torch_dtype=torch.bfloat16 |
|
) |
|
text_encoder_2 = T5EncoderModel.from_pretrained( |
|
"sayakpaul/FLUX.1-Depth-dev-nf4", subfolder="text_encoder_2", torch_dtype=torch.bfloat16 |
|
) |
|
pipeline = FluxControlPipeline.from_pipe( |
|
orig_pipeline, transformer=transformer, text_encoder_2=text_encoder_2, torch_dtype=torch.bfloat16 |
|
) |
|
else: |
|
transformer = FluxTransformer2DModel.from_pretrained( |
|
"black-forest-labs/FLUX.1-Depth-dev", |
|
subfolder="transformer", |
|
revision="refs/pr/1", |
|
torch_dtype=torch.bfloat16, |
|
) |
|
pipeline = FluxControlPipeline.from_pipe(orig_pipeline, transformer=transformer, torch_dtype=torch.bfloat16) |
|
|
|
pipeline.enable_model_cpu_offload() |
|
return pipeline |
|
|
|
@torch.no_grad() |
|
def get_depth(control_image): |
|
processor = DepthPreprocessor.from_pretrained("LiheYoung/depth-anything-large-hf") |
|
control_image = processor(control_image)[0].convert("RGB") |
|
return control_image |
|
|
|
def load_conditions(): |
|
prompt = "A robot made of exotic candies and chocolates of different kinds. The background is filled with confetti and celebratory gifts." |
|
control_image = load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/robot.png") |
|
control_image = get_depth(control_image) |
|
return prompt, control_image |
|
|
|
|
|
def main(four_bit: bool = False): |
|
ckpt_id = "sayakpaul/FLUX.1-Depth-dev-nf4" |
|
pipe = load_pipeline(four_bit=four_bit) |
|
prompt, control_image = load_conditions() |
|
image = pipe( |
|
prompt=prompt, |
|
control_image=control_image, |
|
height=1024, |
|
width=1024, |
|
num_inference_steps=30, |
|
guidance_scale=10.0, |
|
max_sequence_length=512, |
|
generator=torch.Generator("cpu").manual_seed(0), |
|
).images[0] |
|
filename = "output_" + ckpt_id.split("/")[-1].replace(".", "_") |
|
filename += "_4bit" if four_bit else "" |
|
image.save(f"{filename}.png") |
|
|
|
|
|
if __name__ == "__main__": |
|
fire.Fire(main) |
|
``` |
|
|
|
</details> |
|
|
|
## Outputs |
|
|
|
<table> |
|
<thead> |
|
<tr> |
|
<th>Original</th> |
|
<th>NF4</th> |
|
</tr> |
|
</thead> |
|
<tbody> |
|
<tr> |
|
<td> |
|
<img src="./assets/output_FLUX_1-Depth-dev.png" alt="Original"> |
|
</td> |
|
<td> |
|
<img src="./assets/output_FLUX_1-Depth-dev_4bit.png" alt="NF4"> |
|
</td> |
|
</tr> |
|
</tbody> |
|
</table> |