cocktailpeanut commited on
Commit
f38676b
1 Parent(s): 8de121e
Files changed (2) hide show
  1. app.py +77 -11
  2. requirements.txt +3 -1
app.py CHANGED
@@ -6,24 +6,90 @@ import torch
6
  import devicetorch
7
  from diffusers import DiffusionPipeline
8
 
9
- device = devicetorch.get(torch)
10
- if device == "cuda":
11
- dtype = torch.bfloat16
12
- elif device == "mps":
13
- dtype = torch.float16
14
- else:
15
- dtype = torch.float32
16
- #dtype = torch.bfloat16
17
- #device = "cuda" if torch.cuda.is_available() else "cpu"
18
 
19
- #pipe = DiffusionPipeline.from_pretrained("black-forest-labs/FLUX.1-schnell", torch_dtype=torch.bfloat16, revision="refs/pr/1").to(device)
20
- pipe = DiffusionPipeline.from_pretrained("cocktailpeanut/xulf-s", torch_dtype=dtype).to(device)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
  MAX_SEED = np.iinfo(np.int32).max
23
  MAX_IMAGE_SIZE = 2048
24
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  #@spaces.GPU()
26
  def infer(prompt, seed=42, randomize_seed=False, width=1024, height=1024, num_inference_steps=4, progress=gr.Progress(track_tqdm=True)):
 
27
  if randomize_seed:
28
  seed = random.randint(0, MAX_SEED)
29
  generator = torch.Generator().manual_seed(seed)
 
6
  import devicetorch
7
  from diffusers import DiffusionPipeline
8
 
 
 
 
 
 
 
 
 
 
9
 
10
+ # Quant
11
+ from optimum.quanto import freeze, qfloat8, quantize
12
+ from diffusers import FlowMatchEulerDiscreteScheduler, AutoencoderKL
13
+ from diffusers.models.transformers.transformer_flux import FluxTransformer2DModel
14
+ from diffusers.pipelines.flux.pipeline_flux import FluxPipeline
15
+ from transformers import CLIPTextModel, CLIPTokenizer,T5EncoderModel, T5TokenizerFast
16
+
17
+
18
+ #device = devicetorch.get(torch)
19
+ #if device == "cuda":
20
+ # dtype = torch.bfloat16
21
+ #elif device == "mps":
22
+ # dtype = torch.float16
23
+ #else:
24
+ # dtype = torch.float32
25
+ ##dtype = torch.bfloat16
26
+ ##device = "cuda" if torch.cuda.is_available() else "cpu"
27
+ #
28
+ ##pipe = DiffusionPipeline.from_pretrained("black-forest-labs/FLUX.1-schnell", torch_dtype=torch.bfloat16, revision="refs/pr/1").to(device)
29
+ #pipe = DiffusionPipeline.from_pretrained("cocktailpeanut/xulf-s", torch_dtype=dtype).to(device)
30
 
31
  MAX_SEED = np.iinfo(np.int32).max
32
  MAX_IMAGE_SIZE = 2048
33
 
34
+
35
+ def init():
36
+ global pipe
37
+
38
+ dtype = torch.bfloat16
39
+
40
+ # schnell is the distilled turbo model. For the CFG distilled model, use:
41
+ # bfl_repo = "black-forest-labs/FLUX.1-dev"
42
+ # revision = "refs/pr/3"
43
+ #
44
+ # The undistilled model that uses CFG ("pro") which can use negative prompts
45
+ # was not released.
46
+ bfl_repo = "cocktailpeanut/xulf-s"
47
+
48
+ scheduler = FlowMatchEulerDiscreteScheduler.from_pretrained(bfl_repo, subfolder="scheduler")
49
+ text_encoder = CLIPTextModel.from_pretrained("openai/clip-vit-large-patch14", torch_dtype=dtype)
50
+ tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14", torch_dtype=dtype)
51
+ text_encoder_2 = T5EncoderModel.from_pretrained(bfl_repo, subfolder="text_encoder_2", torch_dtype=dtype, revision=revision)
52
+ tokenizer_2 = T5TokenizerFast.from_pretrained(bfl_repo, subfolder="tokenizer_2", torch_dtype=dtype, revision=revision)
53
+ vae = AutoencoderKL.from_pretrained(bfl_repo, subfolder="vae", torch_dtype=dtype, revision=revision)
54
+ transformer = FluxTransformer2DModel.from_pretrained(bfl_repo, subfolder="transformer", torch_dtype=dtype, revision=revision)
55
+
56
+ # Experimental: Try this to load in 4-bit for <16GB cards.
57
+ #
58
+ # from optimum.quanto import qint4
59
+ # quantize(transformer, weights=qint4, exclude=["proj_out", "x_embedder", "norm_out", "context_embedder"])
60
+ # freeze(transformer)
61
+ quantize(transformer, weights=qfloat8)
62
+ freeze(transformer)
63
+
64
+ quantize(text_encoder_2, weights=qfloat8)
65
+ freeze(text_encoder_2)
66
+
67
+ pipe = FluxPipeline(
68
+ scheduler=scheduler,
69
+ text_encoder=text_encoder,
70
+ tokenizer=tokenizer,
71
+ text_encoder_2=None,
72
+ tokenizer_2=tokenizer_2,
73
+ vae=vae,
74
+ transformer=None,
75
+ )
76
+ pipe.text_encoder_2 = text_encoder_2
77
+ pipe.transformer = transformer
78
+ pipe.enable_model_cpu_offload()
79
+
80
+ # generator = torch.Generator().manual_seed(12345)
81
+ # image = pipe(
82
+ # prompt='nekomusume cat girl, digital painting',
83
+ # width=1024,
84
+ # height=1024,
85
+ # num_inference_steps=4,
86
+ # generator=generator,
87
+ # guidance_scale=3.5,
88
+ # ).images[0]
89
+
90
  #@spaces.GPU()
91
  def infer(prompt, seed=42, randomize_seed=False, width=1024, height=1024, num_inference_steps=4, progress=gr.Progress(track_tqdm=True)):
92
+ global pipe
93
  if randomize_seed:
94
  seed = random.randint(0, MAX_SEED)
95
  generator = torch.Generator().manual_seed(seed)
requirements.txt CHANGED
@@ -1,9 +1,11 @@
1
  accelerate
2
  #git+https://github.com/huggingface/diffusers.git@flux-pipeline
3
- git+https://github.com/peanutcocktail/diffusers.git
 
4
  invisible_watermark
5
  #torch
6
  transformers==4.42.4
7
  protobuf
8
  #xformers
9
  sentencepiece
 
 
1
  accelerate
2
  #git+https://github.com/huggingface/diffusers.git@flux-pipeline
3
+ #git+https://github.com/peanutcocktail/diffusers.git
4
+ git+https://github.com/huggingface/diffusers.git
5
  invisible_watermark
6
  #torch
7
  transformers==4.42.4
8
  protobuf
9
  #xformers
10
  sentencepiece
11
+ optimum-quanto