visual-chatgpt-zh-vits / modules /controlnet_depth.py
FrankZxShen's picture
Upload 55 files
aa69275
raw
history blame contribute delete
No virus
3.42 kB
from modules.utils import *
class Image2Depth:
def __init__(self, device, pretrained_model_dir):
print("Initializing Image2Depth")
self.depth_estimator = pipeline('depth-estimation')
@prompts(name="Predict Depth On Image",
description="useful when you want to detect depth of the image. like: generate the depth from this image, "
"or detect the depth map on this image, or predict the depth for this image. "
"The input to this tool should be a string, representing the image_path")
def inference(self, inputs):
image = Image.open(inputs)
depth = self.depth_estimator(image)['depth']
depth = np.array(depth)
depth = depth[:, :, None]
depth = np.concatenate([depth, depth, depth], axis=2)
depth = Image.fromarray(depth)
updated_image_path = get_new_image_name(inputs, func_name="depth")
depth.save(updated_image_path)
print(f"\nProcessed Image2Depth, Input Image: {inputs}, Output Depth: {updated_image_path}")
return updated_image_path
class DepthText2Image:
def __init__(self, device, pretrained_model_dir):
print("Initializing DepthText2Image to %s" % device)
self.torch_dtype = torch.float16 if 'cuda' in device else torch.float32
self.controlnet = ControlNetModel.from_pretrained(
f"{pretrained_model_dir}/sd-controlnet-depth", torch_dtype=self.torch_dtype)
self.pipe = StableDiffusionControlNetPipeline.from_pretrained(
f"{pretrained_model_dir}/stable-diffusion-v1-5", controlnet=self.controlnet, safety_checker=None,
torch_dtype=self.torch_dtype)
self.pipe.scheduler = UniPCMultistepScheduler.from_config(self.pipe.scheduler.config)
self.pipe.to(device)
self.seed = -1
self.a_prompt = 'best quality, extremely detailed'
self.n_prompt = 'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit,' \
' fewer digits, cropped, worst quality, low quality'
@prompts(name="Generate Image Condition On Depth",
description="useful when you want to generate a new real image from both the user desciption and depth image. "
"like: generate a real image of a object or something from this depth image, "
"or generate a new real image of a object or something from the depth map. "
"The input to this tool should be a comma seperated string of two, "
"representing the image_path and the user description")
def inference(self, inputs):
image_path, instruct_text = inputs.split(",")[0], ','.join(inputs.split(',')[1:])
image = Image.open(image_path)
self.seed = random.randint(0, 65535)
seed_everything(self.seed)
prompt = instruct_text + ', ' + self.a_prompt
image = self.pipe(prompt, image, num_inference_steps=20, eta=0.0, negative_prompt=self.n_prompt,
guidance_scale=9.0).images[0]
updated_image_path = get_new_image_name(image_path, func_name="depth2image")
image.save(updated_image_path)
print(f"\nProcessed DepthText2Image, Input Depth: {image_path}, Input Text: {instruct_text}, "
f"Output Image: {updated_image_path}")
return updated_image_path