|
|
|
from huggingface_hub import HfApi |
|
import torch |
|
|
|
import requests |
|
from PIL import Image |
|
|
|
from diffusers import DDIMScheduler, StableDiffusionPix2PixZeroPipeline |
|
from diffusers.schedulers.scheduling_ddim_inverse import DDIMInverseScheduler |
|
from transformers import BlipForConditionalGeneration, BlipProcessor |
|
|
|
api = HfApi() |
|
img_url = "https://github.com/pix2pixzero/pix2pix-zero/raw/main/assets/test_images/cats/cat_6.png" |
|
raw_image = Image.open(requests.get(img_url, stream=True).raw).convert('RGB').resize((512, 512)) |
|
|
|
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base") |
|
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base", torch_dtype=torch.float16, low_cpu_mem_usage=True) |
|
|
|
model_ckpt = "CompVis/stable-diffusion-v1-4" |
|
pipeline = StableDiffusionPix2PixZeroPipeline.from_pretrained( |
|
model_ckpt, caption_generator=model, caption_processor=processor, torch_dtype=torch.float16, safety_checker=None, |
|
) |
|
pipeline.enable_model_cpu_offload() |
|
|
|
caption = pipeline.generate_caption(raw_image) |
|
|
|
pipeline.scheduler = DDIMScheduler.from_config(pipeline.scheduler.config) |
|
pipeline.inverse_scheduler = DDIMInverseScheduler.from_config(pipeline.scheduler.config) |
|
|
|
print(caption) |
|
|
|
generator = torch.manual_seed(0) |
|
inv_latents = pipeline.invert(caption, image=raw_image, generator=generator).latents |
|
|
|
source_prompts = 4 * ["a cat sitting on the street", "a cat playing in the field", "a face of a cat"] |
|
target_prompts = 4 * ["a dog sitting on the street", "a dog playing in the field", "a face of a dog"] |
|
|
|
source_embeds = pipeline.get_embeds(source_prompts, batch_size=2) |
|
target_embeds = pipeline.get_embeds(target_prompts, batch_size=2) |
|
|
|
|
|
image = pipeline( |
|
caption, |
|
source_embeds=source_embeds, |
|
target_embeds=target_embeds, |
|
num_inference_steps=50, |
|
cross_attention_guidance_amount=0.15, |
|
generator=generator, |
|
latents=inv_latents, |
|
negative_prompt=caption, |
|
).images[0] |
|
|
|
path = "/home/patrick_huggingface_co/images/aa.png" |
|
image.save(path) |
|
|
|
api.upload_file( |
|
path_or_fileobj=path, |
|
path_in_repo=path.split("/")[-1], |
|
repo_id="patrickvonplaten/images", |
|
repo_type="dataset", |
|
) |
|
print("https://huggingface.co/datasets/patrickvonplaten/images/blob/main/aa.png") |
|
|