import json import random import uuid import gradio as gr import spaces import torch from diffusers import DiffusionPipeline from transformers import AutoModelForCausalLM, AutoTokenizer device = torch.device("cuda:0") llm = AutoModelForCausalLM.from_pretrained("Azure99/blossom-v5-14b", torch_dtype=torch.float16, device_map="auto") tokenizer = AutoTokenizer.from_pretrained("Azure99/blossom-v5-14b") diffusion_pipe = DiffusionPipeline.from_pretrained( "playgroundai/playground-v2.5-1024px-aesthetic", torch_dtype=torch.float16, use_safetensors=True, add_watermarker=False, variant="fp16" ) diffusion_pipe.to(device) def get_input_ids(inst, bot_prefix): return tokenizer.encode("A chat between a human and an artificial intelligence bot. " "The bot gives helpful, detailed, and polite answers to the human's questions.\n" f"|Human|: {inst}\n|Bot|: {bot_prefix}", add_special_tokens=True) def save_image(img): unique_name = str(uuid.uuid4()) + ".png" img.save(unique_name) return unique_name LLM_PROMPT = '''你的任务是从输入的[作画要求]中抽取画面描述(description),然后description翻译为英文(en_description),最后对en_description进行扩写(expanded_description),增加足够多的细节,且符合人类的第一直觉。 [输出]是一个json,包含description、en_description、expanded_description三个字符串字段,请直接输出json,不要输出任何 无关内容。 下面是一些示例: [作画要求]->"画一幅画:落霞与孤鹜齐飞,秋水共长天一色。" [输出]->{"description": "落霞与孤鹜齐飞,秋水共长天一色", "en_description": "The setting sun and the solitary duck fly together, the autumn water shares a single hue with the vast sky", "expanded_description": "A single duck is flying in the vast sky that is shared with the setting sun, mirroring the serenity of the autumn waters beneath. The tranquil water is reflecting the sublime orange hues of the twilight sky, blending all into a picturesque harmony. It's an awe-inspiring end to a quiet autumn day, the scene dominated by immense tranquility and beauty."} [作画要求]->"原神中的可莉" [输出]->{"description": "原神中的可莉", "en_description": "Klee in Genshin Impact", "expanded_description": "A small animated girl with flaming red hair in pigtails, amber eyes, and a star-shaped hairpin. She is dressed in a crimson and white outfit with a distinctive gold trim. On her back, she carries a large russet backpack that looks like a treasure chest. In her hand, she holds a supernatural-looking bomb with sparkling particles evolving around it. This setting is in a colorful and magical world with ethereal floating islands and a dabbling stream nearby."} [作画要求]->"create an image for me. a close up of a woman wearing a transparent, prismatic, elaborate nemeses headdress, over the should pose, brown skin-tone" [输出]->{"description": "a close up of a woman wearing a transparent, prismatic, elaborate nemeses headdress, over the should pose, brown skin-tone", "en_description": "a close up of a woman wearing a transparent, prismatic, elaborate nemeses headdress, over the should pose, brown skin-tone", "expanded_description": "An intricate close-up detailing a woman with a brown skin-tone. She is seen from an over-the-shoulder perspective and is adorned with a transparent, prismatic, and elaborate headdress similar to a nemes, showcasing a myriad of refracted colors across its unique design."} [作画要求]->"一只高贵的柯基犬,素描画风格\n根据上面的描述生成一张图片吧!" [输出]->{"description": "一只高贵的柯基犬,素描画风格", "en_description": "A noble corgi dog, sketch style", "expanded_description": "A noble corgi dog, standing with a majestic aura, in the style of an old-fashioned sketch. The corgi displays a regal posture, with its head raised high and ears perked up. Its fur is short and dense, embodying various shades of brown and white. The sketch emphasizes the contrast and depth of the corgi's coat textures, his expressive eyes full of intelligence and alertness, and his little stubby tail wagging with joy. The background is composed of light strokes, providing a minimalistic setup that further highlights the corgi's nobility."} [作画要求]->$USER_PROMPT [输出]->''' BOT_PREFIX = '{"description": "' @spaces.GPU(enable_queue=True) def generate( prompt: str, progress=gr.Progress(track_tqdm=True), ): input_ids = get_input_ids(LLM_PROMPT.replace("$USER_PROMPT", json.dumps(prompt, ensure_ascii=False)), BOT_PREFIX) generation_kwargs = dict(input_ids=torch.tensor([input_ids]).to(llm.device), do_sample=True, max_new_tokens=512, temperature=0.5, top_p=0.85, top_k=50, repetition_penalty=1.05) llm_result = llm.generate(**generation_kwargs) llm_result = BOT_PREFIX + tokenizer.decode(llm_result.cpu()[0], skip_special_tokens=True) print(llm_result) expanded_description = json.loads(llm_result)["expanded_description"] print(expanded_description) seed = random.randint(0, 2147483647) diffusion_pipe.to(device) generator = torch.Generator().manual_seed(seed) images = diffusion_pipe( prompt=expanded_description, negative_prompt=None, width=1024, height=1024, guidance_scale=3, num_inference_steps=25, generator=generator, num_images_per_prompt=1, use_resolution_binning=True, output_type="pil", ).images image_paths = [save_image(img) for img in images] return image_paths css = ''' .gradio-container{max-width: 560px !important} h1{text-align:center} ''' with gr.Blocks(css=css) as demo: gr.Markdown("# Blossom Playground v2.5") with gr.Group(): with gr.Row(): prompt = gr.Text( label="Prompt", show_label=False, max_lines=1, placeholder="Enter your prompt", container=False, ) run_button = gr.Button("Run", scale=0) result = gr.Gallery(label="Result", columns=1, show_label=False) gr.on( triggers=[ prompt.submit, run_button.click, ], fn=generate, inputs=[ prompt, ], outputs=[result], api_name="run", ) if __name__ == "__main__": demo.queue(max_size=20).launch()