from modules.utils import * class Text2Image: def __init__(self, device, pretrained_model_dir): print("Initializing Text2Image to %s" % device) self.device = device self.torch_dtype = torch.float16 if 'cuda' in device else torch.float32 self.pipe = StableDiffusionPipeline.from_pretrained(f"{pretrained_model_dir}/stable-diffusion-v1-5", torch_dtype=self.torch_dtype) self.pipe.to(device) self.a_prompt = 'best quality, extremely detailed' self.n_prompt = 'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, ' \ 'fewer digits, cropped, worst quality, low quality' @prompts(name="Generate Image From User Input Text", description="useful when you want to generate an image from a user input text and save it to a file. " "like: generate an image of an object or something, or generate an image that includes some objects. " "The input to this tool should be a string, representing the text used to generate image. ") def inference(self, text): image_filename = os.path.join('image', str(uuid.uuid4())[0:8] + ".png") prompt = text + ', ' + self.a_prompt image = self.pipe(prompt, negative_prompt=self.n_prompt).images[0] image.save(image_filename) print( f"\nProcessed Text2Image, Input Text: {text}, Output Image: {image_filename}") return image_filename