|
import numpy as np |
|
import torch |
|
import torch.nn as nn |
|
from transformers import CLIPConfig, CLIPVisionModelWithProjection, PreTrainedModel |
|
|
|
from ...utils import logging |
|
|
|
|
|
logger = logging.get_logger(__name__) |
|
|
|
|
|
class IFSafetyChecker(PreTrainedModel): |
|
config_class = CLIPConfig |
|
|
|
_no_split_modules = ["CLIPEncoderLayer"] |
|
|
|
def __init__(self, config: CLIPConfig): |
|
super().__init__(config) |
|
|
|
self.vision_model = CLIPVisionModelWithProjection(config.vision_config) |
|
|
|
self.p_head = nn.Linear(config.vision_config.projection_dim, 1) |
|
self.w_head = nn.Linear(config.vision_config.projection_dim, 1) |
|
|
|
@torch.no_grad() |
|
def forward(self, clip_input, images, p_threshold=0.5, w_threshold=0.5): |
|
image_embeds = self.vision_model(clip_input)[0] |
|
|
|
nsfw_detected = self.p_head(image_embeds) |
|
nsfw_detected = nsfw_detected.flatten() |
|
nsfw_detected = nsfw_detected > p_threshold |
|
nsfw_detected = nsfw_detected.tolist() |
|
|
|
if any(nsfw_detected): |
|
logger.warning( |
|
"Potential NSFW content was detected in one or more images. A black image will be returned instead." |
|
" Try again with a different prompt and/or seed." |
|
) |
|
|
|
for idx, nsfw_detected_ in enumerate(nsfw_detected): |
|
if nsfw_detected_: |
|
images[idx] = np.zeros(images[idx].shape) |
|
|
|
watermark_detected = self.w_head(image_embeds) |
|
watermark_detected = watermark_detected.flatten() |
|
watermark_detected = watermark_detected > w_threshold |
|
watermark_detected = watermark_detected.tolist() |
|
|
|
if any(watermark_detected): |
|
logger.warning( |
|
"Potential watermarked content was detected in one or more images. A black image will be returned instead." |
|
" Try again with a different prompt and/or seed." |
|
) |
|
|
|
for idx, watermark_detected_ in enumerate(watermark_detected): |
|
if watermark_detected_: |
|
images[idx] = np.zeros(images[idx].shape) |
|
|
|
return images, nsfw_detected, watermark_detected |
|
|