tttoaster commited on
Commit
070a748
·
verified ·
1 Parent(s): 4086dbb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -2
app.py CHANGED
@@ -26,7 +26,7 @@ from flask import Flask
26
  import json
27
  from typing import Optional
28
  import cv2
29
- from diffusers import AutoencoderKL, UNet2DConditionModel, EulerDiscreteScheduler
30
 
31
  pyrootutils.setup_root(__file__, indicator=".project-root", pythonpath=True)
32
 
@@ -185,6 +185,10 @@ class LLMService:
185
 
186
  self.visual_encoder.to(self.vit_sd_device, dtype=self.dtype)
187
 
 
 
 
 
188
  self.boi_token_id = self.tokenizer.encode(BOI_TOKEN, add_special_tokens=False)[0]
189
  self.eoi_token_id = self.tokenizer.encode(EOI_TOKEN, add_special_tokens=False)[0]
190
 
@@ -355,6 +359,13 @@ def generate(text_list, image_list, max_new_tokens, force_boi, force_bbox):
355
  for img_idx in range(output['num_gen_imgs']):
356
  img_feat = img_gen_feat[img_idx:img_idx + 1]
357
  generated_image = service.sd_adapter.generate(image_embeds=img_feat, num_inference_steps=50)[0]
 
 
 
 
 
 
 
358
  image_base64 = encode_image(generated_image)
359
  gen_imgs_base64_list.append(image_base64)
360
 
@@ -628,7 +639,7 @@ SEED-X-I can follow multimodal instruction (including images with **dynamic reso
628
 
629
  SEED-X-I **does not support image manipulation**. If you want to experience **SEED-X-Edit** for high-precision image editing, please refer to [[Inference Code]](https://github.com/AILab-CVC/SEED-X).
630
 
631
- Due to insufficient GPU memory, when generating images, we need to offload the LLM to the CPU and move the de-tokenizer to the CPU, which will **result in a long processing time**. If you want to experience the normal model inference speed, you can run [[Inference Code]](https://github.com/AILab-CVC/SEED-X) locally.
632
 
633
 
634
  ## Tips:
 
26
  import json
27
  from typing import Optional
28
  import cv2
29
+ from diffusers import AutoencoderKL, UNet2DConditionModel, EulerDiscreteScheduler, StableDiffusionImg2ImgPipeline
30
 
31
  pyrootutils.setup_root(__file__, indicator=".project-root", pythonpath=True)
32
 
 
185
 
186
  self.visual_encoder.to(self.vit_sd_device, dtype=self.dtype)
187
 
188
+ model_id_or_path = "stablediffusionapi/realistic-vision-v51"
189
+ self.vae_pipe = StableDiffusionImg2ImgPipeline.from_pretrained(model_id_or_path, torch_dtype=torch.float16)
190
+ self.vae_pipe = pipe.to(self.vit_sd_device)
191
+
192
  self.boi_token_id = self.tokenizer.encode(BOI_TOKEN, add_special_tokens=False)[0]
193
  self.eoi_token_id = self.tokenizer.encode(EOI_TOKEN, add_special_tokens=False)[0]
194
 
 
359
  for img_idx in range(output['num_gen_imgs']):
360
  img_feat = img_gen_feat[img_idx:img_idx + 1]
361
  generated_image = service.sd_adapter.generate(image_embeds=img_feat, num_inference_steps=50)[0]
362
+
363
+ init_image = generated_image.resize((1024, 1024))
364
+ prompt = ""
365
+ images = service.vae_pipe(prompt=prompt, image=init_image,
366
+ num_inference_steps=50, guidance_scale=8.0, strength=0.38).images
367
+ generated_image = images[0]
368
+
369
  image_base64 = encode_image(generated_image)
370
  gen_imgs_base64_list.append(image_base64)
371
 
 
639
 
640
  SEED-X-I **does not support image manipulation**. If you want to experience **SEED-X-Edit** for high-precision image editing, please refer to [[Inference Code]](https://github.com/AILab-CVC/SEED-X).
641
 
642
+ If you want to experience the normal model inference speed, you can run [[Inference Code]](https://github.com/AILab-CVC/SEED-X) locally.
643
 
644
 
645
  ## Tips: