Spaces:
Running
on
Zero
Running
on
Zero
PommesPeter
commited on
Commit
•
2aded43
1
Parent(s):
49f6f11
Update app.py
Browse files
app.py
CHANGED
@@ -80,7 +80,7 @@ def encode_prompt(
|
|
80 |
return prompt_embeds, prompt_masks
|
81 |
|
82 |
|
83 |
-
def load_model(args, master_port, rank
|
84 |
# import here to avoid huggingface Tokenizer parallelism warnings
|
85 |
from diffusers.models import AutoencoderKL
|
86 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
@@ -161,20 +161,19 @@ def load_model(args, master_port, rank, barrier):
|
|
161 |
)
|
162 |
model.load_state_dict(ckpt, strict=True)
|
163 |
|
164 |
-
barrier.wait()
|
165 |
return text_encoder, tokenizer, vae, model
|
166 |
|
167 |
|
168 |
@torch.no_grad()
|
169 |
-
def model_main(args, master_port, rank, request_queue, response_queue,
|
170 |
dtype = {"bf16": torch.bfloat16, "fp16": torch.float16, "fp32": torch.float32}[
|
171 |
args.precision
|
172 |
]
|
173 |
train_args = torch.load(os.path.join(args.ckpt, "model_args.pth"))
|
174 |
|
175 |
with torch.autocast("cuda", dtype):
|
176 |
-
barrier.wait()
|
177 |
-
|
178 |
while True:
|
179 |
(
|
180 |
cap,
|
@@ -437,24 +436,24 @@ def main():
|
|
437 |
request_queues = []
|
438 |
response_queue = Queue()
|
439 |
# mp_barrier = mp.Barrier(args.num_gpus + 1)
|
440 |
-
barrier = Barrier(args.num_gpus + 1)
|
441 |
for i in range(args.num_gpus):
|
442 |
-
text_encoder, tokenizer, vae, model = load_model(args, master_port, i
|
443 |
-
request_queues.append(Queue())
|
444 |
generation_kwargs = dict(
|
445 |
args=args,
|
446 |
master_port=master_port,
|
447 |
rank=i,
|
448 |
request_queue=request_queues[i],
|
449 |
response_queue=response_queue if i == 0 else None,
|
450 |
-
barrier=barrier,
|
451 |
text_encoder=text_encoder,
|
452 |
tokenizer=tokenizer,
|
453 |
vae=vae,
|
454 |
model=model
|
455 |
)
|
456 |
-
|
457 |
-
thread
|
|
|
458 |
|
459 |
with gr.Blocks() as demo:
|
460 |
with gr.Row():
|
@@ -606,7 +605,7 @@ def main():
|
|
606 |
[output_img],
|
607 |
)
|
608 |
|
609 |
-
barrier.wait()
|
610 |
demo.queue(max_size=20).launch()
|
611 |
|
612 |
|
|
|
80 |
return prompt_embeds, prompt_masks
|
81 |
|
82 |
|
83 |
+
def load_model(args, master_port, rank):
|
84 |
# import here to avoid huggingface Tokenizer parallelism warnings
|
85 |
from diffusers.models import AutoencoderKL
|
86 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
|
|
161 |
)
|
162 |
model.load_state_dict(ckpt, strict=True)
|
163 |
|
164 |
+
# barrier.wait()
|
165 |
return text_encoder, tokenizer, vae, model
|
166 |
|
167 |
|
168 |
@torch.no_grad()
|
169 |
+
def model_main(args, master_port, rank, request_queue, response_queue, text_encoder, tokenizer, vae, model):
|
170 |
dtype = {"bf16": torch.bfloat16, "fp16": torch.float16, "fp32": torch.float32}[
|
171 |
args.precision
|
172 |
]
|
173 |
train_args = torch.load(os.path.join(args.ckpt, "model_args.pth"))
|
174 |
|
175 |
with torch.autocast("cuda", dtype):
|
176 |
+
# barrier.wait()
|
|
|
177 |
while True:
|
178 |
(
|
179 |
cap,
|
|
|
436 |
request_queues = []
|
437 |
response_queue = Queue()
|
438 |
# mp_barrier = mp.Barrier(args.num_gpus + 1)
|
439 |
+
# barrier = Barrier(args.num_gpus + 1)
|
440 |
for i in range(args.num_gpus):
|
441 |
+
text_encoder, tokenizer, vae, model = load_model(args, master_port, i)
|
442 |
+
# request_queues.append(Queue())
|
443 |
generation_kwargs = dict(
|
444 |
args=args,
|
445 |
master_port=master_port,
|
446 |
rank=i,
|
447 |
request_queue=request_queues[i],
|
448 |
response_queue=response_queue if i == 0 else None,
|
|
|
449 |
text_encoder=text_encoder,
|
450 |
tokenizer=tokenizer,
|
451 |
vae=vae,
|
452 |
model=model
|
453 |
)
|
454 |
+
model_main(**generation_kwargs)
|
455 |
+
# thread = Thread(target=model_main, kwargs=generation_kwargs)
|
456 |
+
# thread.start()
|
457 |
|
458 |
with gr.Blocks() as demo:
|
459 |
with gr.Row():
|
|
|
605 |
[output_img],
|
606 |
)
|
607 |
|
608 |
+
# barrier.wait()
|
609 |
demo.queue(max_size=20).launch()
|
610 |
|
611 |
|