Spaces:
Build error
Build error
File size: 3,633 Bytes
fcc479d 31a600f fcc479d a5da834 fcc479d 31a600f fcc479d ded69d7 fcc479d 47dd336 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 |
# --------------------------------------------------------
# X-Decoder -- Generalized Decoding for Pixel, Image, and Language
# Copyright (c) 2022 Microsoft
# Licensed under The MIT License [see LICENSE for details]
# Written by Jianwei Yang (jianwyan@microsoft.com), Xueyan Zou (xueyan@cs.wisc.edu)
# --------------------------------------------------------
import os
os.system("python -m pip install git+https://github.com/MaureenZOU/detectron2-xyz.git")
import gradio as gr
import torch
import argparse
from xdecoder.BaseModel import BaseModel
from xdecoder import build_model
from utils.distributed import init_distributed
from utils.arguments import load_opt_from_config_files
from tasks import *
def parse_option():
parser = argparse.ArgumentParser('X-Decoder All-in-One Demo', add_help=False)
parser.add_argument('--conf_files', default="configs/xdecoder/svlp_focalt_lang.yaml", metavar="FILE", help='path to config file', )
args = parser.parse_args()
return args
'''
build args
'''
args = parse_option()
opt = load_opt_from_config_files(args.conf_files)
opt = init_distributed(opt)
# META DATA
pretrained_pth_last = os.path.join("xdecoder_focalt_last.pt")
pretrained_pth_novg = os.path.join("xdecoder_focalt_last_novg.pt")
if not os.path.exists(pretrained_pth_last):
os.system("wget {}".format("https://projects4jw.blob.core.windows.net/x-decoder/release/xdecoder_focalt_last.pt"))
if not os.path.exists(pretrained_pth_novg):
os.system("wget {}".format("https://projects4jw.blob.core.windows.net/x-decoder/release/xdecoder_focalt_last_novg.pt"))
'''
build model
'''
model_last = BaseModel(opt, build_model(opt)).from_pretrained(pretrained_pth_last).eval().cuda()
model_cap = BaseModel(opt, build_model(opt)).from_pretrained(pretrained_pth_novg).eval().cuda()
with torch.no_grad():
model_last.model.sem_seg_head.predictor.lang_encoder.get_text_embeddings(["background", "background"], is_eval=True)
model_cap.model.sem_seg_head.predictor.lang_encoder.get_text_embeddings(["background", "background"], is_eval=True)
'''
inference model
'''
@torch.no_grad()
def inference(image, instruction, *args, **kwargs):
image = image.convert("RGB")
with torch.autocast(device_type='cuda', dtype=torch.float16):
return referring_inpainting_gpt3(model_last, image, instruction, *args, **kwargs)
'''
launch app
'''
title = "Instructional Image Editing"
description = "<p style='text-align: center'> <a href='https://x-decoder-vl.github.io/' target='_blank'>Project Page</a> | <a href='https://arxiv.org/pdf/2212.11270.pdf' target='_blank'>Paper</a> | <a href='https://github.com/microsoft/X-Decoder' target='_blank'>Github Repo</a> | <a href='https://youtu.be/wYp6vmyolqE' target='_blank'>Video</a> </p>"
help_text = """
This demo is leveraging X-Decoder's fine-grained understanding for instruct-based image editing. You can use it to:
1. Remove object, e.g., remove the dog in the image
2. Change object, e.g., change the sky with a mountain
"""
gr.Markdown(help_text)
inputs = [gr.inputs.Image(type='pil'), gr.Textbox(label="instruction")]
gr.Interface(
fn=inference,
inputs=inputs,
outputs=[
gr.outputs.Image(
type="pil",
label="edit result"),
],
examples=[
["./images/apples.jpg", "change green apple to a red apple"],
["./images/girl_and_two_boys.png", "remove the boy with blue backbag"],
["./images/dog.png", "remove the dog"],
["./images/horse.png", "change the sky to mountain"],
],
title=title,
description=description,
allow_flagging='never',
cache_examples=True,
).launch() |