rogerxavier commited on
Commit
239857b
1 Parent(s): 98e49c6

Update 0filterImage.py

Browse files
Files changed (1) hide show
  1. 0filterImage.py +188 -25
0filterImage.py CHANGED
@@ -1,28 +1,182 @@
1
- #通过modelscope接口对问题图片予以删除,保证过审
2
-
3
-
4
- import base64
5
- import json
6
- import os
7
- from io import BytesIO
8
  import pandas as pd
9
  from PIL import Image
10
- from dotenv import load_dotenv
11
- import requests
12
- from transformers import pipeline
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
 
15
- def get_nsfw_score(image_path:str,model:"模型")->float:
16
- #输入图片和模型,返回是否有问题
17
- img = Image.open(image_path)
18
- result = model(images=img)
19
- nsfw_score = next((item['score'] for item in result if item['label']=='nsfw'),None)
20
- return nsfw_score
21
-
22
 
23
  if __name__ == '__main__':
24
- load_dotenv()
25
- model = pipeline("image-classification", model="Falconsai/nsfw_image_detection")#加载模型
26
  # 获取当前目录的子目录的路径
27
  img_path = 'manga'
28
  subdir_path = os.path.join(os.getcwd(), img_path)
@@ -34,11 +188,20 @@ if __name__ == '__main__':
34
  if file.endswith(".jpg") or file.endswith(".png"):
35
  image_files.append(os.path.relpath(os.path.join(root, file)))
36
  for image_path in image_files:
37
- result = get_nsfw_score(image_path,model)#返回float的得分
38
- if result> 0.5:
39
- print("发现问题图片,需要删除以过审:",image_path)
 
 
 
 
 
 
 
 
 
 
40
  os.remove(image_path)
 
41
  else:
42
- print(image_path, "图片没有问题")
43
-
44
-
 
1
+ import numpy as np
2
+ import os, re, cv2
3
+ from typing import *
 
 
 
 
4
  import pandas as pd
5
  from PIL import Image
6
+ from huggingface_hub import hf_hub_download
7
+ from onnxruntime import InferenceSession
8
+
9
+
10
+
11
+ # noinspection PyUnresolvedReferences
12
+ def make_square(img, target_size):
13
+ old_size = img.shape[:2]
14
+ desired_size = max(old_size)
15
+ desired_size = max(desired_size, target_size)
16
+
17
+ delta_w = desired_size - old_size[1]
18
+ delta_h = desired_size - old_size[0]
19
+ top, bottom = delta_h // 2, delta_h - (delta_h // 2)
20
+ left, right = delta_w // 2, delta_w - (delta_w // 2)
21
+
22
+ color = [255, 255, 255]
23
+ return cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)
24
+
25
+
26
+ # noinspection PyUnresolvedReferences
27
+ def smart_resize(img, size):
28
+ # Assumes the image has already gone through make_square
29
+ if img.shape[0] > size:
30
+ img = cv2.resize(img, (size, size), interpolation=cv2.INTER_AREA)
31
+ elif img.shape[0] < size:
32
+ img = cv2.resize(img, (size, size), interpolation=cv2.INTER_CUBIC)
33
+ else: # just do nothing
34
+ pass
35
+
36
+ return img
37
+
38
+
39
+ class WaifuDiffusionInterrogator:
40
+ def __init__(
41
+ self,
42
+ repo='SmilingWolf/wd-v1-4-vit-tagger',
43
+ model_path='model.onnx',
44
+ tags_path='selected_tags.csv',
45
+ mode: str = "auto"
46
+ ) -> None:
47
+ self.__repo = repo
48
+ self.__model_path = model_path
49
+ self.__tags_path = tags_path
50
+ self._provider_mode = mode
51
+
52
+ self.__initialized = False
53
+ self._model, self._tags = None, None
54
+
55
+ def _init(self) -> None:
56
+ if self.__initialized:
57
+ return
58
+
59
+ model_path = hf_hub_download(self.__repo, filename=self.__model_path)
60
+ tags_path = hf_hub_download(self.__repo, filename=self.__tags_path)
61
+
62
+ self._model = InferenceSession(str(model_path))
63
+ self._tags = pd.read_csv(tags_path)
64
+
65
+ self.__initialized = True
66
+
67
+ def _calculation(self, image: Image.Image) -> pd.DataFrame:
68
+ # print(image) todo: figure out what to do if URL
69
+ self._init()
70
+
71
+ # code for converting the image and running the model is taken from the link below
72
+ # thanks, SmilingWolf!
73
+ # https://huggingface.co/spaces/SmilingWolf/wd-v1-4-tags/blob/main/app.py
74
+
75
+ # convert an image to fit the model
76
+ _, height, _, _ = self._model.get_inputs()[0].shape
77
+
78
+ # alpha to white
79
+ print(image)
80
+ image = image.convert('RGBA')
81
+ new_image = Image.new('RGBA', image.size, 'WHITE')
82
+ new_image.paste(image, mask=image)
83
+ image = new_image.convert('RGB')
84
+ image = np.asarray(image)
85
+
86
+ # PIL RGB to OpenCV BGR
87
+ image = image[:, :, ::-1]
88
+
89
+ image = make_square(image, height)
90
+ image = smart_resize(image, height)
91
+ image = image.astype(np.float32)
92
+ image = np.expand_dims(image, 0)
93
+
94
+ # evaluate model
95
+ input_name = self._model.get_inputs()[0].name
96
+ label_name = self._model.get_outputs()[0].name
97
+ confidence = self._model.run([label_name], {input_name: image})[0]
98
+
99
+ full_tags = self._tags[['name', 'category']].copy()
100
+ full_tags['confidence'] = confidence[0]
101
+
102
+ return full_tags
103
+
104
+ def interrogate(self, image: Image) -> Tuple[Dict[str, float], Dict[str, float]]:
105
+ full_tags = self._calculation(image)
106
+
107
+ # first 4 items are for rating (general, sensitive, questionable, explicit)
108
+ ratings = dict(full_tags[full_tags['category'] == 9][['name', 'confidence']].values)
109
+
110
+ # rest are regular tags
111
+ tags = dict(full_tags[full_tags['category'] != 9][['name', 'confidence']].values)
112
+
113
+ return ratings, tags
114
+
115
+
116
+ WAIFU_MODELS: Mapping[str, WaifuDiffusionInterrogator] = {
117
+ 'chen-vit': WaifuDiffusionInterrogator(),
118
+ 'chen-convnext': WaifuDiffusionInterrogator(
119
+ repo='SmilingWolf/wd-v1-4-convnext-tagger'
120
+ ),
121
+ 'chen-convnext2': WaifuDiffusionInterrogator(
122
+ repo="SmilingWolf/wd-v1-4-convnextv2-tagger-v2"
123
+ ),
124
+ 'chen-swinv2': WaifuDiffusionInterrogator(
125
+ repo='SmilingWolf/wd-v1-4-swinv2-tagger-v2'
126
+ ),
127
+ 'chen-moat2': WaifuDiffusionInterrogator(
128
+ repo='SmilingWolf/wd-v1-4-moat-tagger-v2'
129
+ ),
130
+ 'chen-convnext3': WaifuDiffusionInterrogator(
131
+ repo='SmilingWolf/wd-convnext-tagger-v3'
132
+ ),
133
+ 'chen-vit3': WaifuDiffusionInterrogator(
134
+ repo='SmilingWolf/wd-vit-tagger-v3'
135
+ ),
136
+ 'chen-swinv3': WaifuDiffusionInterrogator(
137
+ repo='SmilingWolf/wd-swinv2-tagger-v3'
138
+ ),
139
+ }
140
+ RE_SPECIAL = re.compile(r'([\\()])')
141
+
142
+
143
+ def image_to_wd14_tags(image: Image.Image, model_name: str, threshold: float,
144
+ use_spaces: bool, use_escape: bool, include_ranks=False, score_descend=True) \
145
+ -> Tuple[Mapping[str, float], str, Mapping[str, float]]:
146
+ model = WAIFU_MODELS[model_name]
147
+ ratings, tags = model.interrogate(image)
148
+
149
+ filtered_tags = {
150
+ tag: score for tag, score in tags.items()
151
+ if score >= threshold
152
+ }
153
+
154
+ text_items = []
155
+ tags_pairs = filtered_tags.items()
156
+ if score_descend:
157
+ tags_pairs = sorted(tags_pairs, key=lambda x: (-x[1], x[0]))
158
+ for tag, score in tags_pairs:
159
+ tag_outformat = tag
160
+ if use_spaces:
161
+ tag_outformat = tag_outformat.replace('_', '-')
162
+ else:
163
+ tag_outformat = tag_outformat.replace(' ', ', ')
164
+ tag_outformat = tag_outformat.replace('_', ' ')
165
+ if use_escape:
166
+ tag_outformat = re.sub(RE_SPECIAL, r'\\\1', tag_outformat)
167
+ if include_ranks:
168
+ tag_outformat = f"({tag_outformat}:{score:.3f})"
169
+ text_items.append(tag_outformat)
170
+ if use_spaces:
171
+ output_text = ' '.join(text_items)
172
+ else:
173
+ output_text = ', '.join(text_items)
174
+
175
+ return ratings, output_text, filtered_tags
176
 
177
 
 
 
 
 
 
 
 
178
 
179
  if __name__ == '__main__':
 
 
180
  # 获取当前目录的子目录的路径
181
  img_path = 'manga'
182
  subdir_path = os.path.join(os.getcwd(), img_path)
 
188
  if file.endswith(".jpg") or file.endswith(".png"):
189
  image_files.append(os.path.relpath(os.path.join(root, file)))
190
  for image_path in image_files:
191
+ # 打开并读取图像文件
192
+ image_data = Image.open(image_path)
193
+ result = image_to_wd14_tags(image_data, 'chen-moat2', 0.5, True, True)#传入数据判断标签,然后只看rating tag就行,即第[0]个
194
+ # 从 result 中提取第一个元素(rating)
195
+ rating_dict = result[0]
196
+ # 找到占比最大的元素
197
+ max_proportion_key = max(rating_dict, key=rating_dict.get)
198
+ max_proportion_value = rating_dict[max_proportion_key]
199
+
200
+ # 输出占比最大的元素
201
+ print(f"占比最大的元素为:{max_proportion_key},占比为:{max_proportion_value}")
202
+ if max_proportion_key=="questionable" or max_proportion_key=="explicit":
203
+ print("图片不合格,开始删除")
204
  os.remove(image_path)
205
+ print("成功删除不合格图片")
206
  else:
207
+ print("图片合格")