Update src/pipeline.py
Browse files- src/pipeline.py +3 -2
src/pipeline.py
CHANGED
@@ -26,7 +26,7 @@ import numpy as np
|
|
26 |
import torch.nn as nn
|
27 |
import torch.nn.functional as F
|
28 |
from torchao.quantization import quantize_, float8_weight_only, int4_weight_only
|
29 |
-
from para_attn.first_block_cache.diffusers_adapters import apply_cache_on_pipe
|
30 |
import os
|
31 |
os.environ['PYTORCH_CUDA_ALLOC_CONF']="expandable_segments:True"
|
32 |
os.environ["TOKENIZERS_PARALLELISM"] = "True"
|
@@ -34,6 +34,7 @@ torch._dynamo.config.suppress_errors = True
|
|
34 |
torch.backends.cuda.matmul.allow_tf32 = True
|
35 |
torch.backends.cudnn.enabled = True
|
36 |
torch.backends.cudnn.benchmark = True
|
|
|
37 |
|
38 |
# globals
|
39 |
Pipeline = None
|
@@ -67,7 +68,7 @@ def load_pipeline() -> Pipeline:
|
|
67 |
)
|
68 |
# pipeline.vae = torch.compile(vae)
|
69 |
pipeline.to("cuda")
|
70 |
-
pipeline =
|
71 |
quantize_(pipeline.vae, int4_weight_only())
|
72 |
|
73 |
warmup_ = "controllable varied focus thai warriors entertainment claude still goat gang gang yeah"
|
|
|
26 |
import torch.nn as nn
|
27 |
import torch.nn.functional as F
|
28 |
from torchao.quantization import quantize_, float8_weight_only, int4_weight_only
|
29 |
+
from para_attn.first_block_cache.diffusers_adapters import apply_cache_on_pipe as cacher
|
30 |
import os
|
31 |
os.environ['PYTORCH_CUDA_ALLOC_CONF']="expandable_segments:True"
|
32 |
os.environ["TOKENIZERS_PARALLELISM"] = "True"
|
|
|
34 |
torch.backends.cuda.matmul.allow_tf32 = True
|
35 |
torch.backends.cudnn.enabled = True
|
36 |
torch.backends.cudnn.benchmark = True
|
37 |
+
torch.cuda.set_per_process_memory_fraction(0.99)
|
38 |
|
39 |
# globals
|
40 |
Pipeline = None
|
|
|
68 |
)
|
69 |
# pipeline.vae = torch.compile(vae)
|
70 |
pipeline.to("cuda")
|
71 |
+
pipeline = cacher(pipeline,residual_diff_threshold=0.56)
|
72 |
quantize_(pipeline.vae, int4_weight_only())
|
73 |
|
74 |
warmup_ = "controllable varied focus thai warriors entertainment claude still goat gang gang yeah"
|