pseudo-flex-v2 / app.py
bghira's picture
Update app.py
3311456 verified
raw
history blame
2.35 kB
import spaces
import torch
from diffusers import DiffusionPipeline
import gradio as gr
# Load the pre-trained diffusion model
pipe = DiffusionPipeline.from_pretrained('ptx0/pseudo-flex-v2', torch_dtype=torch.bfloat16)
pipe.to('cuda')
import re
def extract_resolution(resolution_str):
match = re.match(r'(\d+)x(\d+)', resolution_str)
if match:
width = int(match.group(1))
height = int(match.group(2))
return (width, height)
else:
return None
# Define the image generation function with adjustable parameters and a progress bar
@spaces.GPU
def generate(prompt, guidance_scale, guidance_rescale, num_inference_steps, resolution, negative_prompt):
width, height = extract_resolution(resolution) or (1024, 1024)
return pipe(
prompt,
negative_prompt=negative_prompt,
guidance_scale=guidance_scale,
guidance_rescale=guidance_rescale,
num_inference_steps=num_inference_steps,
width=width, height=height
).images
# Example prompts to demonstrate the model's capabilities
example_prompts = [
["A futuristic cityscape at night under a starry sky", 7.5, 25, "blurry, overexposed"],
["A serene landscape with a flowing river and autumn trees", 8.0, 20, "crowded, noisy"],
["An abstract painting of joy and energy in bright colors", 9.0, 30, "dark, dull"]
]
# Create a Gradio interface, 1024x1024,1152x960,896x1152
iface = gr.Interface(
fn=generate,
inputs=[
gr.Text(label="Enter your prompt"),
gr.Slider(1, 20, step=0.1, label="Guidance Scale", value=9.5),
gr.Slider(0, 1, step=0.1, label="Rescale classifier-free guidance", value=0.7),
gr.Slider(1, 50, step=1, label="Number of Inference Steps", value=25),
gr.Radio(["1024x1024", "1152x960", "896x1152"], label="Resolution", value="1152x960"),
gr.Text(value="underexposed, blurry, ugly, washed-out", label="Negative Prompt")
],
outputs=gr.Gallery(height=1024, min_width=1024, columns=2),
examples=example_prompts,
title="Flex v2 (SD 2.1-v) Demonstration",
description="Flex v2 is a multi-aspect finetune of SD 2.1-v (768px) that is up-sized to a base resolution of 1 megapixel (1024px). This model utilises a zero-terminal SNR noise schedule, formulated to allow for very dark and very bright images."
).launch()