|
import torch |
|
import gradio as gr |
|
from PIL import Image |
|
from torchvision import transforms |
|
from statistics import mean |
|
Image.MAX_IMAGE_PIXELS = None |
|
|
|
def read_img_file(f): |
|
img = Image.open(f) |
|
if img.mode != 'RGB': |
|
img = img.convert('RGB') |
|
return img |
|
|
|
_transform_test_random=transforms.Compose([ |
|
transforms.RandomCrop((256,256)), |
|
transforms.ToTensor(), |
|
transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))]) |
|
|
|
_transform_test_random_vit = transforms.Compose([ |
|
transforms.RandomCrop((252,252)), |
|
transforms.ToTensor(), |
|
transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))]) |
|
|
|
def detect(img, model_choices): |
|
if model_choices == "EVA-02 ViT L/14": |
|
model = torch.load("./model_eva.pth",map_location="cpu").cpu().eval() |
|
_transform = _transform_test_random_vit |
|
elif model_choices == "ConvNext Large": |
|
model = torch.load("./model_convnext.pth",map_location="cpu").cpu().eval() |
|
_transform = _transform_test_random |
|
elif model_choices == "EfficientNet-V2 B0": |
|
model = torch.load("./model_effnet.pth",map_location="cpu").cpu().eval() |
|
_transform = _transform_test_random |
|
|
|
output = "" |
|
with torch.inference_mode(): |
|
tmp=[] |
|
for _ in range(10): |
|
img_random_crop = _transform(img) |
|
outputs = model.forward(img_random_crop.unsqueeze(0)) |
|
outputs = torch.sigmoid(outputs).cpu().numpy() |
|
tmp.append(outputs[0][0]) |
|
output+=f"{str(tmp)}\n" |
|
output+=f"10 try method: {mean(tmp)}\n" |
|
|
|
|
|
|
|
with torch.inference_mode(): |
|
img_crop = _transform(img) |
|
outputs = model.forward(img_crop.unsqueeze(0)) |
|
outputs = torch.sigmoid(outputs).cpu().numpy() |
|
output+=f"1 try method: {outputs}\n" |
|
|
|
return output |
|
|
|
model_choices = ["ConvNext Large", "EVA-02 ViT L/14", "EfficientNet-V2 B0"] |
|
|
|
descr = f""" |
|
Detecting AutoEncoder is Enough to Catch LDM Generated Images (https://arxiv.org/abs/2411.06441) |
|
Code at https://github.com/qwertyforce/Detect_LDM_By_Detecting_VAE |
|
Models at https://huggingface.co/qwertyforce/Detect_LDM_By_Detecting_VAE |
|
""" |
|
demo = gr.Interface(fn=detect, |
|
inputs=[gr.Image(type="pil", label="Input Image"), |
|
gr.Radio( |
|
model_choices, |
|
type="value", |
|
value="EVA-02 ViT L/14", |
|
label="Choose Detector Model", |
|
)], outputs="text",title = "Detecting AutoEncoder is Enough to Catch LDM Generated Images",description=descr) |
|
demo.launch() |