ihsanvp commited on
Commit
95cc45b
1 Parent(s): c5c043a

update: space hardware

Browse files
Files changed (3) hide show
  1. .gitignore +3 -1
  2. app.py +52 -14
  3. app_local.py +73 -0
.gitignore CHANGED
@@ -1,2 +1,4 @@
1
  .env
2
- __pycache__/
 
 
 
1
  .env
2
+ __pycache__/
3
+ *.mp4
4
+ *.jpg
app.py CHANGED
@@ -1,21 +1,57 @@
1
  import gradio as gr
2
  import torch
3
  import torchvision
4
- from diffusers import I2VGenXLPipeline
5
- from diffusers.utils.loading_utils import load_image
6
  from PIL import Image
7
 
8
- device = torch.device("cpu")
9
- negative_prompt = "Distorted, discontinuous, Ugly, blurry, low resolution, motionless, static, disfigured, disconnected limbs, Ugly faces, incomplete arms"
10
- generator = torch.manual_seed(8888)
11
- pipeline = I2VGenXLPipeline.from_pretrained("ali-vilab/i2vgen-xl", torch_dtype=torch.float16, variant="fp16")
12
- pipeline.to(device)
13
- pipeline.enable_model_cpu_offload()
14
- pipeline.unet.enable_forward_chunking()
15
 
16
- def generate(image: Image.Image, prompt: str):
17
- image = image.convert("RGB")
18
- frames = pipeline(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  prompt=prompt,
20
  image=image,
21
  num_inference_steps=50,
@@ -24,12 +60,14 @@ def generate(image: Image.Image, prompt: str):
24
  generator=generator,
25
  decode_chunk_size=6,
26
  ).frames[0]
27
- torchvision.io.write_video("video.mp4", frames, fps=16)
 
 
28
  return "video.mp4"
29
 
30
  app = gr.Interface(
31
  fn=generate,
32
- inputs=[gr.Image(type="pil"), "text"],
33
  outputs=gr.Video()
34
  )
35
 
 
1
  import gradio as gr
2
  import torch
3
  import torchvision
4
+ from diffusers import I2VGenXLPipeline, DiffusionPipeline
5
+ from torchvision.transforms.functional import to_tensor
6
  from PIL import Image
7
 
8
+ if gr.NO_RELOAD:
9
+ n_steps = 40
10
+ high_noise_frac = 0.8
11
+ negative_prompt = "Distorted, discontinuous, Ugly, blurry, low resolution, motionless, static, disfigured, disconnected limbs, Ugly faces, incomplete arms"
12
+ generator = torch.manual_seed(8888)
 
 
13
 
14
+ base = DiffusionPipeline.from_pretrained(
15
+ "stabilityai/stable-diffusion-xl-base-1.0",
16
+ torch_dtype=torch.float16,
17
+ variant="fp16",
18
+ use_safetensors=True,
19
+ )
20
+ # refiner = DiffusionPipeline.from_pretrained(
21
+ # "stabilityai/stable-diffusion-xl-refiner-1.0",
22
+ # text_encoder_2=base.text_encoder_2,
23
+ # vae=base.vae,
24
+ # torch_dtype=torch.float16,
25
+ # use_safetensors=True,
26
+ # variant="fp16",
27
+ # )
28
+ # refiner.to("cuda")
29
+ # base.to("cuda")
30
+ # refiner.enable_model_cpu_offload()
31
+ base.enable_model_cpu_offload()
32
+ pipeline = I2VGenXLPipeline.from_pretrained("ali-vilab/i2vgen-xl", torch_dtype=torch.float16, variant="fp16")
33
+ pipeline.enable_model_cpu_offload()
34
+ pipeline.unet.enable_forward_chunking()
35
+
36
+ def generate(prompt: str):
37
+ image = base(
38
+ prompt=prompt,
39
+ num_inference_steps=n_steps,
40
+ # denoising_end=high_noise_frac,
41
+ # output_type="latent",
42
+ ).images[0]
43
+ # image = refiner(
44
+ # prompt=prompt,
45
+ # num_inference_steps=n_steps,
46
+ # denoising_start=high_noise_frac,
47
+ # image=image,
48
+ # ).images[0]
49
+ # print(image)
50
+ # print(type(image))
51
+ # print(image.size())
52
+ image.save("frame.jpg")
53
+ image = to_tensor(image)
54
+ frames: list[Image.Image] = pipeline(
55
  prompt=prompt,
56
  image=image,
57
  num_inference_steps=50,
 
60
  generator=generator,
61
  decode_chunk_size=6,
62
  ).frames[0]
63
+ frames = [to_tensor(frame.convert("RGB")).mul(255).byte().permute(1, 2, 0) for frame in frames]
64
+ frames = torch.stack(frames)
65
+ torchvision.io.write_video("video.mp4", frames, fps=4)
66
  return "video.mp4"
67
 
68
  app = gr.Interface(
69
  fn=generate,
70
+ inputs=["text"],
71
  outputs=gr.Video()
72
  )
73
 
app_local.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ import torchvision
4
+ from diffusers import I2VGenXLPipeline, DiffusionPipeline
5
+ from torchvision.transforms.functional import to_tensor
6
+ from PIL import Image
7
+
8
+ if gr.NO_RELOAD:
9
+ n_steps = 50
10
+ high_noise_frac = 0.8
11
+ negative_prompt = "Distorted, discontinuous, Ugly, blurry, low resolution, motionless, static, disfigured, disconnected limbs, Ugly faces, incomplete arms"
12
+ generator = torch.manual_seed(8888)
13
+
14
+ base = DiffusionPipeline.from_pretrained(
15
+ "stabilityai/stable-diffusion-xl-base-1.0",
16
+ torch_dtype=torch.float16,
17
+ variant="fp16",
18
+ use_safetensors=True,
19
+ )
20
+ refiner = DiffusionPipeline.from_pretrained(
21
+ "stabilityai/stable-diffusion-xl-refiner-1.0",
22
+ text_encoder_2=base.text_encoder_2,
23
+ vae=base.vae,
24
+ torch_dtype=torch.float16,
25
+ use_safetensors=True,
26
+ variant="fp16",
27
+ )
28
+ pipeline = I2VGenXLPipeline.from_pretrained("ali-vilab/i2vgen-xl", torch_dtype=torch.float16, variant="fp16")
29
+
30
+ base.to("cuda")
31
+ refiner.to("cuda")
32
+ pipeline.to("cuda")
33
+
34
+ base.unet = torch.compile(base.unet, mode="reduce-overhead", fullgraph=True)
35
+ refiner.unet = torch.compile(refiner.unet, mode="reduce-overhead", fullgraph=True)
36
+ pipeline.unet = torch.compile(pipeline.unet, mode="reduce-overhead", fullgraph=True)
37
+
38
+ def generate(prompt: str):
39
+ image = base(
40
+ prompt=prompt,
41
+ num_inference_steps=n_steps,
42
+ denoising_end=high_noise_frac,
43
+ output_type="latent",
44
+ ).images[0]
45
+ image = refiner(
46
+ prompt=prompt,
47
+ num_inference_steps=n_steps,
48
+ denoising_start=high_noise_frac,
49
+ image=image,
50
+ ).images[0]
51
+ image = to_tensor(image)
52
+ frames: list[Image.Image] = pipeline(
53
+ prompt=prompt,
54
+ image=image,
55
+ num_inference_steps=50,
56
+ negative_prompt=negative_prompt,
57
+ guidance_scale=9.0,
58
+ generator=generator,
59
+ ).frames[0]
60
+ frames = [to_tensor(frame.convert("RGB")).mul(255).byte().permute(1, 2, 0) for frame in frames]
61
+ frames = torch.stack(frames)
62
+ torchvision.io.write_video("video.mp4", frames, fps=8)
63
+ return "video.mp4"
64
+
65
+ app = gr.Interface(
66
+ fn=generate,
67
+ inputs=["text"],
68
+ outputs=gr.Video()
69
+ )
70
+
71
+ if __name__ == "__main__":
72
+ app.launch()
73
+