BUG

#9
by windkkk - opened
import torch
from diffusers import AutoencoderKLAllegro, AllegroPipeline
from diffusers.utils import export_to_video


vae = AutoencoderKLAllegro.from_pretrained("rhymes-ai/Allegro", subfolder="vae", torch_dtype=torch.bfloat16)
pipe = AllegroPipeline.from_pretrained(
    "rhymes-ai/Allegro", vae=vae, torch_dtype=torch.bfloat16
)


pipe.enable_sequential_cpu_offload() #♥♥♥

prompt = "a dog"

positive_prompt = """
(masterpiece)
"""

negative_prompt = """
nsfw
"""


prompt = prompt.format(prompt.lower().strip())
video = pipe(prompt, negative_prompt=negative_prompt, guidance_scale=7.5, max_sequence_length=20, num_inference_steps=1, generator=torch.Generator(device="cuda:0").manual_seed(42)).frames[0]


export_to_video(video, "output.mp4", fps=2)

torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 560.82 GiB. GPU 0 has a total capacity of 16.00 GiB of which 10.19 GiB is free. Of the allocated memory 4.78 GiB is allocated by PyTorch, and 37.32 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

♥♥♥?

Rhymes.AI org

Hi Windkkk,

Do you mind telling us your runtime specs (Hardware & Software)? I did not encounter this issue running the code.

br,
Maazel

Hi Windkkk,

Do you mind telling us your runtime specs (Hardware & Software)? I did not encounter this issue running the code.

br,
Maazel

Can you share your code that works properly? (GPU less than 16G)

Rhymes.AI org

ok, this following process is verified to use less than 10G VRAM, but takes significantly longer to infer(like more than 1 hour):

1.install designated diffuser version by:
!pip install git+https://github.com/huggingface/diffusers.git@9214f4a3782a74e510eff7e09b59457fe8b63511

2.test if diffuser is successfully installed and can load model properly by:
import diffusers
print(diffusers.version)

from diffusers import AllegroPipeline

pipe = AllegroPipeline.from_pretrained("rhymes-ai/Allegro")
print(hasattr(pipe, "enable_sequential_cpu_offload"))

3.load model and inference using cpu_offloading:
import torch
from diffusers import AutoencoderKLAllegro, AllegroPipeline
from diffusers.utils import export_to_video

vae = AutoencoderKLAllegro.from_pretrained(
"rhymes-ai/Allegro",
subfolder="vae",
torch_dtype=torch.float32
)

pipe = AllegroPipeline.from_pretrained(
"rhymes-ai/Allegro",
vae=vae,
torch_dtype=torch.bfloat16
)

pipe.to("cuda")

pipe.enable_sequential_cpu_offload()

pipe.vae.enable_tiling()

prompt = "A seaside harbor with bright sunlight and sparkling seawater, with many boats in the water. From an aerial view, the boats vary in size and color, some moving and some stationary. Fishing boats in the water suggest that this location might be a popular spot for docking fishing boats."

positive_prompt = """
(masterpiece), (best quality), (ultra-detailed), (unwatermarked),
{}
emotional, harmonious, vignette, 4k epic detailed, shot on kodak, 35mm photo,
sharp focus, high budget, cinemascope, moody, epic, gorgeous
"""

negative_prompt = """
nsfw, lowres, bad anatomy, bad hands, text, error, missing fingers, extra digit, fewer digits, cropped, worst quality,
low quality, normal quality, jpeg artifacts, signature, watermark, username, blurry.
"""

prompt = positive_prompt.format(prompt.lower().strip())

output = pipe(
prompt=prompt,
negative_prompt=negative_prompt,
guidance_scale=7.5,
max_sequence_length=512,
num_inference_steps=100,
generator=torch.Generator(device="cuda").manual_seed(42)
)

video = output.frames[0]

export_to_video(video, "output.mp4", fps=15)

Sign up or log in to comment