multimodalart HF staff commited on
Commit
af079bb
1 Parent(s): f881d28

attempt compilation

Browse files
Files changed (1) hide show
  1. app.py +13 -0
app.py CHANGED
@@ -7,11 +7,24 @@ import spaces
7
 
8
  device = "cuda" if torch.cuda.is_available() else "cpu"
9
 
 
 
 
 
 
 
 
10
  pipe = AuraFlowPipeline.from_pretrained(
11
  "AuraDiffusion/AuraFlow",
12
  torch_dtype=torch.float16
13
  ).to("cuda")
14
 
 
 
 
 
 
 
15
  MAX_SEED = np.iinfo(np.int32).max
16
  MAX_IMAGE_SIZE = 1024
17
 
 
7
 
8
  device = "cuda" if torch.cuda.is_available() else "cpu"
9
 
10
+ torch.set_float32_matmul_precision("high")
11
+
12
+ torch._inductor.config.conv_1x1_as_mm = True
13
+ torch._inductor.config.coordinate_descent_tuning = True
14
+ torch._inductor.config.epilogue_fusion = False
15
+ torch._inductor.config.coordinate_descent_check_all_directions = True
16
+
17
  pipe = AuraFlowPipeline.from_pretrained(
18
  "AuraDiffusion/AuraFlow",
19
  torch_dtype=torch.float16
20
  ).to("cuda")
21
 
22
+ pipe.transformer.to(memory_format=torch.channels_last)
23
+ pipe.vae.to(memory_format=torch.channels_last)
24
+
25
+ pipe.transformer = torch.compile(pipe.transformer, mode="max-autotune", fullgraph=True)
26
+ pipe.vae.decode = torch.compile(pipe.vae.decode, mode="max-autotune", fullgraph=True)
27
+
28
  MAX_SEED = np.iinfo(np.int32).max
29
  MAX_IMAGE_SIZE = 1024
30