jbilcke-hf's picture
jbilcke-hf HF staff
Upload demo.py
e3dcab5 verified
raw
history blame
3.23 kB
from huggingface_hub import InferenceClient
import base64
import os
from pathlib import Path
import time
def save_video(base64_video: str, output_path: str):
"""Save base64 encoded video to a file"""
video_bytes = base64.b64decode(base64_video)
with open(output_path, "wb") as f:
f.write(video_bytes)
def generate_video(
prompt: str,
endpoint_url: str,
token: str = None,
resolution: str = "1280x720",
video_length: int = 129,
num_inference_steps: int = 50,
seed: int = -1,
guidance_scale: float = 1.0,
flow_shift: float = 7.0,
embedded_guidance_scale: float = 6.0
) -> str:
"""Generate a video using the custom inference endpoint.
Args:
prompt: Text prompt describing the video
endpoint_url: Full URL to the inference endpoint
token: HuggingFace API token for authentication
resolution: Video resolution (default: "1280x720")
video_length: Number of frames (default: 129 for 5s)
num_inference_steps: Number of inference steps (default: 50)
seed: Random seed, -1 for random (default: -1)
guidance_scale: Guidance scale value (default: 1.0)
flow_shift: Flow shift value (default: 7.0)
embedded_guidance_scale: Embedded guidance scale (default: 6.0)
Returns:
Path to the saved video file
"""
# Initialize client
client = InferenceClient(model=endpoint_url, token=token)
# Prepare payload
payload = {
"inputs": prompt,
"resolution": resolution,
"video_length": video_length,
"num_inference_steps": num_inference_steps,
"seed": seed,
"guidance_scale": guidance_scale,
"flow_shift": flow_shift,
"embedded_guidance_scale": embedded_guidance_scale
}
# Make request
response = client.post(json=payload)
result = response.json()
# Save video
timestamp = int(time.time())
output_path = f"generated_video_{timestamp}.mp4"
save_video(result["video_base64"], output_path)
print(f"Video generated with seed {result['seed']}")
return output_path
if __name__ == "__main__":
hf_api_token = os.environ.get('HF_API_TOKEN', '')
endpoint_url = os.environ.get('ENDPOINT_URL', '')
video_path = generate_video(
endpoint_url=endpoint_url,
token=hf_api_token,
prompt="A cat walks on the grass, realistic style.",
# min resolution is 64x64, max is 4096x4096 (increment steps are by 16px)
# however the model is designed for 1280x720
resolution="1280x720",
# numbers of frames plus one (max 1024?)
# increments by 4 frames
video_length=49, # 129,
# number of denoising/sampling steps (default: 30)
num_inference_steps: int = 15, # 50,
seed: int = -1, # -1 to keep it random
# not sure why we have two guidance scales
guidance_scale = 1.0, # 3
# strength of prompt guidance (default: 6.0)
embedded_guidance_scale: float = 6.0
# video length (larger values result in shorter videos, default: 9.0, max: 30)
flow_shift: float = 9.0,
)
print(f"Video saved to: {video_path}")