Spaces:
Runtime error
Runtime error
Update min_dalle/min_dalle.py
Browse files- min_dalle/min_dalle.py +22 -24
min_dalle/min_dalle.py
CHANGED
@@ -39,10 +39,9 @@ class MinDalle:
|
|
39 |
self.dtype = dtype
|
40 |
self.is_verbose = is_verbose
|
41 |
self.text_token_count = 64
|
42 |
-
|
43 |
-
self.
|
44 |
-
self.
|
45 |
-
self.embed_count = 2048 if is_mega else 512
|
46 |
self.glu_embed_count = 4096 if is_mega else 2730
|
47 |
self.text_vocab_count = 50272 if is_mega else 50264
|
48 |
self.image_vocab_count = 16415 if is_mega else 16384
|
@@ -238,29 +237,27 @@ class MinDalle:
|
|
238 |
for i in range(IMAGE_TOKEN_COUNT):
|
239 |
if(st.session_state.page != 0):
|
240 |
break
|
241 |
-
|
242 |
st.session_state.bar.progress(i/IMAGE_TOKEN_COUNT)
|
243 |
-
|
244 |
-
#torch.cuda.empty_cache()
|
245 |
-
#torch.cpu.empty_cache()
|
246 |
-
#with torch.cuda.amp.autocast(dtype=self.dtype):
|
247 |
-
image_tokens[i + 1], attention_state = self.decoder.forward(
|
248 |
-
settings=settings,
|
249 |
-
attention_mask=attention_mask,
|
250 |
-
encoder_state=encoder_state,
|
251 |
-
attention_state=attention_state,
|
252 |
-
prev_tokens=image_tokens[i],
|
253 |
-
token_index=token_indices[[i]]
|
254 |
-
)
|
255 |
|
256 |
-
|
257 |
-
|
258 |
-
|
259 |
-
|
260 |
-
|
261 |
-
|
|
|
|
|
|
|
|
|
262 |
)
|
263 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
264 |
|
265 |
def generate_image_stream(self, *args, **kwargs) -> Iterator[Image.Image]:
|
266 |
image_stream = self.generate_raw_image_stream(*args, **kwargs)
|
@@ -278,6 +275,7 @@ class MinDalle:
|
|
278 |
image = image.reshape([grid_size ** 2, 2 ** 8, 2 ** 8, 3])
|
279 |
yield image
|
280 |
|
|
|
281 |
def generate_image(self, *args, **kwargs) -> Image.Image:
|
282 |
image_stream = self.generate_image_stream(
|
283 |
*args, **kwargs,
|
|
|
39 |
self.dtype = dtype
|
40 |
self.is_verbose = is_verbose
|
41 |
self.text_token_count = 64
|
42 |
+
self.layer_count = 24 if is_mega else 12
|
43 |
+
self.attention_head_count = 32 if is_mega else 16
|
44 |
+
self.embed_count = 2048 if is_mega else 1024
|
|
|
45 |
self.glu_embed_count = 4096 if is_mega else 2730
|
46 |
self.text_vocab_count = 50272 if is_mega else 50264
|
47 |
self.image_vocab_count = 16415 if is_mega else 16384
|
|
|
237 |
for i in range(IMAGE_TOKEN_COUNT):
|
238 |
if(st.session_state.page != 0):
|
239 |
break
|
|
|
240 |
st.session_state.bar.progress(i/IMAGE_TOKEN_COUNT)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
241 |
|
242 |
+
torch.cuda.empty_cache()
|
243 |
+
#torch.cpu.empty_cache()
|
244 |
+
with torch.cuda.amp.autocast(dtype=self.dtype):
|
245 |
+
image_tokens[i + 1], attention_state = self.decoder.forward(
|
246 |
+
settings=settings,
|
247 |
+
attention_mask=attention_mask,
|
248 |
+
encoder_state=encoder_state,
|
249 |
+
attention_state=attention_state,
|
250 |
+
prev_tokens=image_tokens[i],
|
251 |
+
token_index=token_indices[[i]]
|
252 |
)
|
253 |
+
|
254 |
+
with torch.cuda.amp.autocast(dtype=torch.float32):
|
255 |
+
if ((i + 1) % 32 == 0 and progressive_outputs) or i + 1 == 256:
|
256 |
+
yield self.image_grid_from_tokens(
|
257 |
+
image_tokens=image_tokens[1:].T,
|
258 |
+
is_seamless=is_seamless,
|
259 |
+
is_verbose=is_verbose
|
260 |
+
)
|
261 |
|
262 |
def generate_image_stream(self, *args, **kwargs) -> Iterator[Image.Image]:
|
263 |
image_stream = self.generate_raw_image_stream(*args, **kwargs)
|
|
|
275 |
image = image.reshape([grid_size ** 2, 2 ** 8, 2 ** 8, 3])
|
276 |
yield image
|
277 |
|
278 |
+
|
279 |
def generate_image(self, *args, **kwargs) -> Image.Image:
|
280 |
image_stream = self.generate_image_stream(
|
281 |
*args, **kwargs,
|