Spaces:
Runtime error
Runtime error
cocktailpeanut
commited on
Commit
•
f38676b
1
Parent(s):
8de121e
update
Browse files- app.py +77 -11
- requirements.txt +3 -1
app.py
CHANGED
@@ -6,24 +6,90 @@ import torch
|
|
6 |
import devicetorch
|
7 |
from diffusers import DiffusionPipeline
|
8 |
|
9 |
-
device = devicetorch.get(torch)
|
10 |
-
if device == "cuda":
|
11 |
-
dtype = torch.bfloat16
|
12 |
-
elif device == "mps":
|
13 |
-
dtype = torch.float16
|
14 |
-
else:
|
15 |
-
dtype = torch.float32
|
16 |
-
#dtype = torch.bfloat16
|
17 |
-
#device = "cuda" if torch.cuda.is_available() else "cpu"
|
18 |
|
19 |
-
#
|
20 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
|
22 |
MAX_SEED = np.iinfo(np.int32).max
|
23 |
MAX_IMAGE_SIZE = 2048
|
24 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
#@spaces.GPU()
|
26 |
def infer(prompt, seed=42, randomize_seed=False, width=1024, height=1024, num_inference_steps=4, progress=gr.Progress(track_tqdm=True)):
|
|
|
27 |
if randomize_seed:
|
28 |
seed = random.randint(0, MAX_SEED)
|
29 |
generator = torch.Generator().manual_seed(seed)
|
|
|
6 |
import devicetorch
|
7 |
from diffusers import DiffusionPipeline
|
8 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
|
10 |
+
# Quant
|
11 |
+
from optimum.quanto import freeze, qfloat8, quantize
|
12 |
+
from diffusers import FlowMatchEulerDiscreteScheduler, AutoencoderKL
|
13 |
+
from diffusers.models.transformers.transformer_flux import FluxTransformer2DModel
|
14 |
+
from diffusers.pipelines.flux.pipeline_flux import FluxPipeline
|
15 |
+
from transformers import CLIPTextModel, CLIPTokenizer,T5EncoderModel, T5TokenizerFast
|
16 |
+
|
17 |
+
|
18 |
+
#device = devicetorch.get(torch)
|
19 |
+
#if device == "cuda":
|
20 |
+
# dtype = torch.bfloat16
|
21 |
+
#elif device == "mps":
|
22 |
+
# dtype = torch.float16
|
23 |
+
#else:
|
24 |
+
# dtype = torch.float32
|
25 |
+
##dtype = torch.bfloat16
|
26 |
+
##device = "cuda" if torch.cuda.is_available() else "cpu"
|
27 |
+
#
|
28 |
+
##pipe = DiffusionPipeline.from_pretrained("black-forest-labs/FLUX.1-schnell", torch_dtype=torch.bfloat16, revision="refs/pr/1").to(device)
|
29 |
+
#pipe = DiffusionPipeline.from_pretrained("cocktailpeanut/xulf-s", torch_dtype=dtype).to(device)
|
30 |
|
31 |
MAX_SEED = np.iinfo(np.int32).max
|
32 |
MAX_IMAGE_SIZE = 2048
|
33 |
|
34 |
+
|
35 |
+
def init():
|
36 |
+
global pipe
|
37 |
+
|
38 |
+
dtype = torch.bfloat16
|
39 |
+
|
40 |
+
# schnell is the distilled turbo model. For the CFG distilled model, use:
|
41 |
+
# bfl_repo = "black-forest-labs/FLUX.1-dev"
|
42 |
+
# revision = "refs/pr/3"
|
43 |
+
#
|
44 |
+
# The undistilled model that uses CFG ("pro") which can use negative prompts
|
45 |
+
# was not released.
|
46 |
+
bfl_repo = "cocktailpeanut/xulf-s"
|
47 |
+
|
48 |
+
scheduler = FlowMatchEulerDiscreteScheduler.from_pretrained(bfl_repo, subfolder="scheduler")
|
49 |
+
text_encoder = CLIPTextModel.from_pretrained("openai/clip-vit-large-patch14", torch_dtype=dtype)
|
50 |
+
tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14", torch_dtype=dtype)
|
51 |
+
text_encoder_2 = T5EncoderModel.from_pretrained(bfl_repo, subfolder="text_encoder_2", torch_dtype=dtype, revision=revision)
|
52 |
+
tokenizer_2 = T5TokenizerFast.from_pretrained(bfl_repo, subfolder="tokenizer_2", torch_dtype=dtype, revision=revision)
|
53 |
+
vae = AutoencoderKL.from_pretrained(bfl_repo, subfolder="vae", torch_dtype=dtype, revision=revision)
|
54 |
+
transformer = FluxTransformer2DModel.from_pretrained(bfl_repo, subfolder="transformer", torch_dtype=dtype, revision=revision)
|
55 |
+
|
56 |
+
# Experimental: Try this to load in 4-bit for <16GB cards.
|
57 |
+
#
|
58 |
+
# from optimum.quanto import qint4
|
59 |
+
# quantize(transformer, weights=qint4, exclude=["proj_out", "x_embedder", "norm_out", "context_embedder"])
|
60 |
+
# freeze(transformer)
|
61 |
+
quantize(transformer, weights=qfloat8)
|
62 |
+
freeze(transformer)
|
63 |
+
|
64 |
+
quantize(text_encoder_2, weights=qfloat8)
|
65 |
+
freeze(text_encoder_2)
|
66 |
+
|
67 |
+
pipe = FluxPipeline(
|
68 |
+
scheduler=scheduler,
|
69 |
+
text_encoder=text_encoder,
|
70 |
+
tokenizer=tokenizer,
|
71 |
+
text_encoder_2=None,
|
72 |
+
tokenizer_2=tokenizer_2,
|
73 |
+
vae=vae,
|
74 |
+
transformer=None,
|
75 |
+
)
|
76 |
+
pipe.text_encoder_2 = text_encoder_2
|
77 |
+
pipe.transformer = transformer
|
78 |
+
pipe.enable_model_cpu_offload()
|
79 |
+
|
80 |
+
# generator = torch.Generator().manual_seed(12345)
|
81 |
+
# image = pipe(
|
82 |
+
# prompt='nekomusume cat girl, digital painting',
|
83 |
+
# width=1024,
|
84 |
+
# height=1024,
|
85 |
+
# num_inference_steps=4,
|
86 |
+
# generator=generator,
|
87 |
+
# guidance_scale=3.5,
|
88 |
+
# ).images[0]
|
89 |
+
|
90 |
#@spaces.GPU()
|
91 |
def infer(prompt, seed=42, randomize_seed=False, width=1024, height=1024, num_inference_steps=4, progress=gr.Progress(track_tqdm=True)):
|
92 |
+
global pipe
|
93 |
if randomize_seed:
|
94 |
seed = random.randint(0, MAX_SEED)
|
95 |
generator = torch.Generator().manual_seed(seed)
|
requirements.txt
CHANGED
@@ -1,9 +1,11 @@
|
|
1 |
accelerate
|
2 |
#git+https://github.com/huggingface/diffusers.git@flux-pipeline
|
3 |
-
git+https://github.com/peanutcocktail/diffusers.git
|
|
|
4 |
invisible_watermark
|
5 |
#torch
|
6 |
transformers==4.42.4
|
7 |
protobuf
|
8 |
#xformers
|
9 |
sentencepiece
|
|
|
|
1 |
accelerate
|
2 |
#git+https://github.com/huggingface/diffusers.git@flux-pipeline
|
3 |
+
#git+https://github.com/peanutcocktail/diffusers.git
|
4 |
+
git+https://github.com/huggingface/diffusers.git
|
5 |
invisible_watermark
|
6 |
#torch
|
7 |
transformers==4.42.4
|
8 |
protobuf
|
9 |
#xformers
|
10 |
sentencepiece
|
11 |
+
optimum-quanto
|