Spaces:
Sleeping
Sleeping
File size: 9,680 Bytes
f8ea2c9 848ce1e f8ea2c9 8bef29c f8ea2c9 8bef29c f8ea2c9 8bef29c c3164df 8bef29c bf92928 8bef29c f8ea2c9 8bef29c b898ea7 bf92928 8bef29c f8ea2c9 8bef29c b898ea7 bf92928 8132ec4 8bef29c 8132ec4 f8ea2c9 bdb5c5d f8ea2c9 f8692f6 8132ec4 f8692f6 8bef29c f8692f6 29fd41c f8692f6 29fd41c f8692f6 29fd41c 848ce1e 43ba66f 838b9d1 43ba66f 848ce1e 8bef29c b898ea7 d506e15 bf92928 f8ea2c9 1920c80 f8ea2c9 bdb5c5d f8ea2c9 8bef29c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 |
import os, yaml
import gradio as gr
import requests
import argparse
from PIL import Image
import numpy as np
import torch
from transformers import AutoModelForCausalLM
from huggingface_hub import hf_hub_download
## InstructIR Plugin ##
from insir_models import instructir
from insir_text.models import LanguageModel, LMHead
hf_hub_download(repo_id="marcosv/InstructIR", filename="im_instructir-7d.pt", local_dir="./")
hf_hub_download(repo_id="marcosv/InstructIR", filename="lm_instructir-7d.pt", local_dir="./")
CONFIG = "eval5d.yml"
LM_MODEL = "lm_instructir-7d.pt"
MODEL_NAME = "im_instructir-7d.pt"
def dict2namespace(config):
namespace = argparse.Namespace()
for key, value in config.items():
if isinstance(value, dict):
new_value = dict2namespace(value)
else:
new_value = value
setattr(namespace, key, new_value)
return namespace
# parse config file
with open(os.path.join(CONFIG), "r") as f:
config = yaml.safe_load(f)
cfg = dict2namespace(config)
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
ir_model = instructir.create_model(input_channels =cfg.model.in_ch, width=cfg.model.width, enc_blks = cfg.model.enc_blks,
middle_blk_num = cfg.model.middle_blk_num, dec_blks = cfg.model.dec_blks, txtdim=cfg.model.textdim)
ir_model = ir_model.to(device)
print ("IMAGE MODEL CKPT:", MODEL_NAME)
ir_model.load_state_dict(torch.load(MODEL_NAME, map_location="cpu"), strict=True)
os.environ["TOKENIZERS_PARALLELISM"] = "false"
LMODEL = cfg.llm.model
language_model = LanguageModel(model=LMODEL)
lm_head = LMHead(embedding_dim=cfg.llm.model_dim, hidden_dim=cfg.llm.embd_dim, num_classes=cfg.llm.nclasses)
lm_head = lm_head.to(device)
print("LMHEAD MODEL CKPT:", LM_MODEL)
lm_head.load_state_dict(torch.load(LM_MODEL, map_location="cpu"), strict=True)
def process_img(image, prompt=None):
if prompt is None:
prompt = chat("How to improve the quality of the image?", [], image, None, None, None)
prompt += "Please help me improve its quality!"
print(prompt)
img = np.array(image)
img = img / 255.
img = img.astype(np.float32)
y = torch.tensor(img).permute(2,0,1).unsqueeze(0).to(device)
lm_embd = language_model(prompt)
lm_embd = lm_embd.to(device)
with torch.no_grad():
text_embd, deg_pred = lm_head(lm_embd)
x_hat = ir_model(y, text_embd)
restored_img = x_hat.squeeze().permute(1,2,0).clamp_(0, 1).cpu().detach().numpy()
restored_img = np.clip(restored_img, 0. , 1.)
restored_img = (restored_img * 255.0).round().astype(np.uint8) # float32 to uint8
return Image.fromarray(restored_img) #(image, Image.fromarray(restored_img))
## InstructIR Plugin ##
model = AutoModelForCausalLM.from_pretrained("q-future/co-instruct-preview",
trust_remote_code=True,
torch_dtype=torch.float16,
attn_implementation="eager",
device_map={"":"cuda:0"})
def chat(message, history, image_1, image_2, image_3, image_4):
print(history)
if history:
if image_1 is not None and image_2 is None:
past_message = "USER: The input image: <|image|>" + history[0][0] + " ASSISTANT:" + history[0][1]
for i in range((len(history) - 1)):
past_message += "USER:" +history[i][0] + " ASSISTANT:" + history[i][1] + "</s>"
message = past_message + "USER:" + message + " ASSISTANT:"
images = [image_1]
if image_1 is not None and image_2 is not None:
if image_3 is None:
past_message = "USER: The first image: <|image|>\nThe second image: <|image|>" + history[0][0] + " ASSISTANT:" + history[0][1] + "</s>"
for i in range((len(history) - 1)):
past_message += "USER:" + history[i][0] + " ASSISTANT:" + history[i][1] + "</s>"
message = past_message + "USER:" + message + " ASSISTANT:"
images = [image_1, image_2]
else:
if image_4 is None:
past_message = "USER: The first image: <|image|>\nThe second image: <|image|>\nThe third image:<|image|>" + history[0][0] + " ASSISTANT:" + history[0][1] + "</s>"
for i in range((len(history) - 1)):
past_message += "USER:" + history[i][0] + " ASSISTANT:" + history[i][1] + "</s>"
message = past_message + "USER:" + message + " ASSISTANT:"
images = [image_1, image_2, image_3]
else:
past_message = "USER: The first image: <|image|>\nThe second image: <|image|>\nThe third image:<|image|>\nThe fourth image:<|image|>" + history[0][0] + " ASSISTANT:" + history[0][1] + "</s>"
for i in range((len(history) - 1)):
past_message += "USER:" + history[i][0] + " ASSISTANT:" + history[i][1] + "</s>"
message = past_message + "USER:" + message + " ASSISTANT:"
images = [image_1, image_2, image_3, image_4]
else:
if image_1 is not None and image_2 is None:
message = "USER: The input image: <|image|>" + message + " ASSISTANT:"
images = [image_1]
if image_1 is not None and image_2 is not None:
if image_3 is None:
message = "USER: The first image: <|image|>\nThe second image: <|image|>" + message + " ASSISTANT:"
images = [image_1, image_2]
else:
if image_4 is None:
message = "USER: The first image: <|image|>\nThe second image: <|image|>\nThe third image:<|image|>" + message + " ASSISTANT:"
images = [image_1, image_2, image_3]
else:
message = "USER: The first image: <|image|>\nThe second image: <|image|>\nThe third image:<|image|>\nThe fourth image:<|image|>" + message + " ASSISTANT:"
images = [image_1, image_2, image_3, image_4]
print(message)
return model.tokenizer.batch_decode(model.chat(message, images, max_new_tokens=600).clamp(0, 100000))[0].split("ASSISTANT:")[-1]
#### Image,Prompts examples
examples = [
["Which part of the image is relatively clearer, the upper part or the lower part? Please analyze in details.", "examples/sausage.jpg", None],
["Which image is noisy, and which one is with motion blur? Please analyze in details.", "examples/211.jpg", "examples/frog.png"],
["What is the problem in this image, and how to fix it? Please answer my questions one by one.", "examples/lol_748.png", None],
]
#<h1 align="center"><a href="https://github.com/Q-Future/Q-Instruct"><img src="https://github.com/Q-Future/Q-Instruct/blob/main/q_instruct_logo.png?raw=true", alt="Q-Instruct (mPLUG-Owl-2)" border="0" style="margin: 0 auto; height: 85px;" /></a> </h1>
title = "Co-Instruct-Plus🧑🏫🖌️"
with gr.Blocks(title="Co-Instruct-Plus🧑🏫🖌️") as demo:
title_markdown = ("""
<h1 align="center"><a href="https://github.com/Q-Future/Co-Instruct"><img src="https://raw.githubusercontent.com/Q-Future/Co-Instruct/main/co-instruct.png", alt="Co-Instruct" border="0" style="margin: 0 auto; height: 85px;" /></a> </h1>
<div align="center">Built upon <strong>Q-Instruct: Improving Low-level Visual Abilities for Multi-modality Foundation Models</strong></div>
<div align="center">Built upon Co-Instruct: <strong>Towards Open-ended Visual Quality Comparison</strong></div>
<div align="center">Co-Instruct is the Upgraded Version of Q-Instruct with Multi-image (up to 4, same as GPT-4V) Support! We also support <a href='https://huggingface.co/marcosv/InstructIR'>InstructIR</a> as PLUGIN!</div>
<h5 align="center"> Please find our more accurate visual scoring demo on <a href='https://huggingface.co/spaces/teowu/OneScorer'>[OneScorer]</a> (Q-Align)!</h2>
<div align="center">
<div style="display:flex; gap: 0.25rem;" align="center">
<a href='https://github.com/Q-Future/Q-Instruct'><img src='https://img.shields.io/badge/Github-Code-blue'></a>
<a href="https://Q-Instruct.github.io/Q-Instruct/fig/Q_Instruct_v0_1_preview.pdf"><img src="https://img.shields.io/badge/Technical-Report-red"></a>
<a href='https://github.com/Q-Future/Q-Instruct/stargazers'><img src='https://img.shields.io/github/stars/Q-Future/Q-Instruct.svg?style=social'></a>
</div>
</div>
""")
gr.Markdown(title_markdown)
with gr.Row():
input_img_1 = gr.Image(type='pil', label="Image 1 (First image)")
input_img_2 = gr.Image(type='pil', label="Image 2 (Second image)")
input_img_3 = gr.Image(type='pil', label="Image 3 (Third image)")
input_img_4 = gr.Image(type='pil', label="Image 4 (Third image)")
with gr.Row():
with gr.Column(scale=2):
gr.ChatInterface(fn = chat, additional_inputs=[input_img_1, input_img_2, input_img_3, input_img_4], theme="Soft", examples=examples)
with gr.Column(scale=1):
input_image_ir = gr.Image(type="pil", label="Image for Auto Restoration")
output_image_ir = gr.Image(type="pil", label="Output of Auto Restoration")
gr.Interface(
fn=process_img,
inputs=[input_image_ir],
outputs=[output_image_ir],
examples=["examples/gopro.png", "examples/noise50.png", "examples/lol_748.png"],
)
demo.launch(share=True) |