Spaces:
Runtime error
Runtime error
Harisreedhar
commited on
Commit
β’
db275a2
1
Parent(s):
638204f
update nsfw-checker
Browse files- app.py +127 -81
- assets/pretrained_models/open-nsfw.onnx +3 -0
- face_analyser.py +0 -1
- face_enhancer.py +15 -3
- face_parsing/__init__.py +2 -2
- face_parsing/parse_mask.py +66 -9
- face_swapper.py +55 -70
- nsfw_checker/LICENSE.md +11 -0
- nsfw_checker/__init__.py +1 -0
- nsfw_checker/opennsfw.py +37 -0
- requirements.txt +0 -1
- utils.py +43 -33
app.py
CHANGED
@@ -12,16 +12,18 @@ import insightface
|
|
12 |
import onnxruntime
|
13 |
import numpy as np
|
14 |
import gradio as gr
|
|
|
|
|
15 |
from tqdm import tqdm
|
16 |
import concurrent.futures
|
17 |
from moviepy.editor import VideoFileClip
|
18 |
|
19 |
-
from
|
20 |
-
from face_swapper import Inswapper, paste_to_whole
|
21 |
from face_analyser import detect_conditions, get_analysed_data, swap_options_list
|
22 |
-
from
|
23 |
-
from
|
24 |
-
from utils import trim_video, StreamerThread, ProcessBar, open_directory, split_list_by_lengths, merge_img_sequence_from_ref
|
25 |
|
26 |
## ------------------------------ USER ARGS ------------------------------
|
27 |
|
@@ -39,7 +41,7 @@ user_args = parser.parse_args()
|
|
39 |
USE_COLAB = user_args.colab
|
40 |
USE_CUDA = user_args.cuda
|
41 |
DEF_OUTPUT_PATH = user_args.out_dir
|
42 |
-
BATCH_SIZE = user_args.batch_size
|
43 |
WORKSPACE = None
|
44 |
OUTPUT_FILE = None
|
45 |
CURRENT_FRAME = None
|
@@ -60,8 +62,9 @@ MASK_INCLUDE = [
|
|
60 |
"U-Lip"
|
61 |
]
|
62 |
MASK_SOFT_KERNEL = 17
|
63 |
-
MASK_SOFT_ITERATIONS =
|
64 |
-
MASK_BLUR_AMOUNT =
|
|
|
65 |
|
66 |
FACE_SWAPPER = None
|
67 |
FACE_ANALYSER = None
|
@@ -70,7 +73,7 @@ FACE_PARSER = None
|
|
70 |
NSFW_DETECTOR = None
|
71 |
FACE_ENHANCER_LIST = ["NONE"]
|
72 |
FACE_ENHANCER_LIST.extend(get_available_enhancer_names())
|
73 |
-
|
74 |
|
75 |
## ------------------------------ SET EXECUTION PROVIDER ------------------------------
|
76 |
# Note: Non CUDA users may change settings here
|
@@ -113,12 +116,12 @@ def load_face_swapper_model(path="./assets/pretrained_models/inswapper_128.onnx"
|
|
113 |
def load_face_parser_model(path="./assets/pretrained_models/79999_iter.pth"):
|
114 |
global FACE_PARSER
|
115 |
if FACE_PARSER is None:
|
116 |
-
FACE_PARSER =
|
117 |
|
118 |
-
def load_nsfw_detector_model(path="./assets/pretrained_models/
|
119 |
global NSFW_DETECTOR
|
120 |
if NSFW_DETECTOR is None:
|
121 |
-
NSFW_DETECTOR =
|
122 |
|
123 |
|
124 |
load_face_analyser_model()
|
@@ -145,6 +148,7 @@ def process(
|
|
145 |
mask_soft_kernel,
|
146 |
mask_soft_iterations,
|
147 |
blur_amount,
|
|
|
148 |
face_scale,
|
149 |
enable_laplacian_blend,
|
150 |
crop_top,
|
@@ -189,6 +193,7 @@ def process(
|
|
189 |
get_finsh_text = lambda start_time: f"βοΈ Completed in {int(total_exec_time(start_time)[0])} min {int(total_exec_time(start_time)[1])} sec."
|
190 |
|
191 |
## ------------------------------ PREPARE INPUTS & LOAD MODELS ------------------------------
|
|
|
192 |
yield "### \n β Loading NSFW detector model...", *ui_before()
|
193 |
load_nsfw_detector_model()
|
194 |
|
@@ -199,7 +204,8 @@ def process(
|
|
199 |
load_face_swapper_model()
|
200 |
|
201 |
if face_enhancer_name != "NONE":
|
202 |
-
|
|
|
203 |
FACE_ENHANCER = load_face_enhancer_model(name=face_enhancer_name, device=device)
|
204 |
else:
|
205 |
FACE_ENHANCER = None
|
@@ -209,15 +215,19 @@ def process(
|
|
209 |
load_face_parser_model()
|
210 |
|
211 |
includes = mask_regions_to_list(mask_includes)
|
212 |
-
smooth_mask = SoftErosion(kernel_size=17, threshold=0.9, iterations=int(mask_soft_iterations)).to(device) if mask_soft_iterations > 0 else None
|
213 |
specifics = list(specifics)
|
214 |
half = len(specifics) // 2
|
215 |
sources = specifics[:half]
|
216 |
specifics = specifics[half:]
|
217 |
-
|
218 |
-
|
|
|
|
|
|
|
219 |
|
220 |
def swap_process(image_sequence):
|
|
|
|
|
221 |
yield "### \n β Checking contents...", *ui_before()
|
222 |
nsfw = NSFW_DETECTOR.is_nsfw(image_sequence)
|
223 |
if nsfw:
|
@@ -227,6 +237,8 @@ def process(
|
|
227 |
return False
|
228 |
EMPTY_CACHE()
|
229 |
|
|
|
|
|
230 |
yield "### \n β Analysing face data...", *ui_before()
|
231 |
if condition != "Specific Face":
|
232 |
source_data = source_path, age
|
@@ -241,81 +253,99 @@ def process(
|
|
241 |
scale=face_scale
|
242 |
)
|
243 |
|
244 |
-
|
245 |
-
preds, aimgs, matrs = FACE_SWAPPER.batch_forward(whole_frame_list, analysed_targets, analysed_sources)
|
246 |
-
EMPTY_CACHE()
|
247 |
|
248 |
-
|
249 |
-
|
250 |
-
|
251 |
-
|
252 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
253 |
|
|
|
|
|
|
|
254 |
if face_enhancer_name != "NONE":
|
255 |
-
yield f"### \n β
|
256 |
-
for idx, pred in tqdm(enumerate(preds), total=
|
257 |
enhancer_model, enhancer_model_runner = FACE_ENHANCER
|
258 |
pred = enhancer_model_runner(pred, enhancer_model)
|
259 |
preds[idx] = cv2.resize(pred, (512,512))
|
260 |
-
aimgs[idx] = cv2.resize(aimgs[idx], (512,512))
|
261 |
-
matrs[idx] /= 0.25
|
262 |
-
|
263 |
EMPTY_CACHE()
|
264 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
265 |
split_preds = split_list_by_lengths(preds, num_faces_per_frame)
|
266 |
del preds
|
267 |
-
split_aimgs = split_list_by_lengths(aimgs, num_faces_per_frame)
|
268 |
-
del aimgs
|
269 |
split_matrs = split_list_by_lengths(matrs, num_faces_per_frame)
|
270 |
del matrs
|
|
|
|
|
271 |
|
272 |
-
|
273 |
-
|
|
|
|
|
274 |
whole_img_path = frame_img
|
275 |
whole_img = cv2.imread(whole_img_path)
|
276 |
-
|
277 |
-
|
|
|
|
|
|
|
|
|
278 |
cv2.imwrite(whole_img_path, whole_img)
|
279 |
|
280 |
-
def concurrent_post_process(image_sequence,
|
281 |
with concurrent.futures.ThreadPoolExecutor() as executor:
|
282 |
futures = []
|
283 |
for idx, frame_img in enumerate(image_sequence):
|
284 |
-
future = executor.submit(
|
285 |
-
post_process,
|
286 |
-
idx,
|
287 |
-
frame_img,
|
288 |
-
split_preds,
|
289 |
-
split_aimgs,
|
290 |
-
split_matrs,
|
291 |
-
enable_laplacian_blend,
|
292 |
-
crop_top,
|
293 |
-
crop_bott,
|
294 |
-
crop_left,
|
295 |
-
crop_right
|
296 |
-
)
|
297 |
futures.append(future)
|
298 |
|
299 |
-
for future in tqdm(concurrent.futures.as_completed(futures), total=len(futures), desc="
|
300 |
-
|
301 |
-
result = future.result()
|
302 |
-
except Exception as e:
|
303 |
-
print(f"An error occurred: {e}")
|
304 |
|
305 |
concurrent_post_process(
|
306 |
image_sequence,
|
307 |
split_preds,
|
308 |
-
split_aimgs,
|
309 |
split_matrs,
|
|
|
310 |
enable_laplacian_blend,
|
311 |
-
|
312 |
-
|
313 |
-
|
314 |
-
crop_right
|
315 |
)
|
316 |
|
317 |
|
318 |
-
|
319 |
## ------------------------------ IMAGE ------------------------------
|
320 |
|
321 |
if input_type == "Image":
|
@@ -496,7 +526,7 @@ def stop_running():
|
|
496 |
if hasattr(STREAMER, "stop"):
|
497 |
STREAMER.stop()
|
498 |
STREAMER = None
|
499 |
-
|
500 |
|
501 |
|
502 |
def slider_changed(show_frame, video_path, frame_index):
|
@@ -581,6 +611,18 @@ with gr.Blocks(css=css) as interface:
|
|
581 |
)
|
582 |
|
583 |
with gr.Tab("πͺ Other Settings"):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
584 |
with gr.Accordion("Advanced Mask", open=False):
|
585 |
enable_face_parser_mask = gr.Checkbox(
|
586 |
label="Enable Face Parsing",
|
@@ -609,26 +651,32 @@ with gr.Blocks(css=css) as interface:
|
|
609 |
interactive=True,
|
610 |
|
611 |
)
|
612 |
-
|
613 |
-
|
614 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
615 |
minimum=0,
|
|
|
|
|
|
|
616 |
interactive=True,
|
617 |
)
|
618 |
|
619 |
-
|
620 |
-
|
621 |
-
|
622 |
-
|
623 |
-
|
624 |
-
|
625 |
-
|
626 |
-
|
627 |
-
with gr.Accordion("Crop Mask", open=False):
|
628 |
-
crop_top = gr.Number(label="Top", value=0, minimum=0, interactive=True)
|
629 |
-
crop_bott = gr.Number(label="Bottom", value=0, minimum=0, interactive=True)
|
630 |
-
crop_left = gr.Number(label="Left", value=0, minimum=0, interactive=True)
|
631 |
-
crop_right = gr.Number(label="Right", value=0, minimum=0, interactive=True)
|
632 |
|
633 |
enable_laplacian_blend = gr.Checkbox(
|
634 |
label="Laplacian Blending",
|
@@ -636,9 +684,6 @@ with gr.Blocks(css=css) as interface:
|
|
636 |
interactive=True,
|
637 |
)
|
638 |
|
639 |
-
face_enhancer_name = gr.Dropdown(
|
640 |
-
FACE_ENHANCER_LIST, label="Face Enhancer", value="NONE", multiselect=False, interactive=True
|
641 |
-
)
|
642 |
|
643 |
source_image_input = gr.Image(
|
644 |
label="Source face", type="filepath", interactive=True
|
@@ -830,6 +875,7 @@ with gr.Blocks(css=css) as interface:
|
|
830 |
mask_soft_kernel,
|
831 |
mask_soft_iterations,
|
832 |
blur_amount,
|
|
|
833 |
face_scale,
|
834 |
enable_laplacian_blend,
|
835 |
crop_top,
|
@@ -848,7 +894,7 @@ with gr.Blocks(css=css) as interface:
|
|
848 |
]
|
849 |
|
850 |
swap_event = swap_button.click(
|
851 |
-
fn=process, inputs=swap_inputs, outputs=swap_outputs, show_progress=True
|
852 |
)
|
853 |
|
854 |
cancel_button.click(
|
|
|
12 |
import onnxruntime
|
13 |
import numpy as np
|
14 |
import gradio as gr
|
15 |
+
import threading
|
16 |
+
import queue
|
17 |
from tqdm import tqdm
|
18 |
import concurrent.futures
|
19 |
from moviepy.editor import VideoFileClip
|
20 |
|
21 |
+
from nsfw_checker import NSFWChecker
|
22 |
+
from face_swapper import Inswapper, paste_to_whole
|
23 |
from face_analyser import detect_conditions, get_analysed_data, swap_options_list
|
24 |
+
from face_parsing import init_parsing_model, get_parsed_mask, mask_regions, mask_regions_to_list
|
25 |
+
from face_enhancer import get_available_enhancer_names, load_face_enhancer_model, cv2_interpolations
|
26 |
+
from utils import trim_video, StreamerThread, ProcessBar, open_directory, split_list_by_lengths, merge_img_sequence_from_ref, create_image_grid
|
27 |
|
28 |
## ------------------------------ USER ARGS ------------------------------
|
29 |
|
|
|
41 |
USE_COLAB = user_args.colab
|
42 |
USE_CUDA = user_args.cuda
|
43 |
DEF_OUTPUT_PATH = user_args.out_dir
|
44 |
+
BATCH_SIZE = int(user_args.batch_size)
|
45 |
WORKSPACE = None
|
46 |
OUTPUT_FILE = None
|
47 |
CURRENT_FRAME = None
|
|
|
62 |
"U-Lip"
|
63 |
]
|
64 |
MASK_SOFT_KERNEL = 17
|
65 |
+
MASK_SOFT_ITERATIONS = 10
|
66 |
+
MASK_BLUR_AMOUNT = 0.1
|
67 |
+
MASK_ERODE_AMOUNT = 0.15
|
68 |
|
69 |
FACE_SWAPPER = None
|
70 |
FACE_ANALYSER = None
|
|
|
73 |
NSFW_DETECTOR = None
|
74 |
FACE_ENHANCER_LIST = ["NONE"]
|
75 |
FACE_ENHANCER_LIST.extend(get_available_enhancer_names())
|
76 |
+
FACE_ENHANCER_LIST.extend(cv2_interpolations)
|
77 |
|
78 |
## ------------------------------ SET EXECUTION PROVIDER ------------------------------
|
79 |
# Note: Non CUDA users may change settings here
|
|
|
116 |
def load_face_parser_model(path="./assets/pretrained_models/79999_iter.pth"):
|
117 |
global FACE_PARSER
|
118 |
if FACE_PARSER is None:
|
119 |
+
FACE_PARSER = init_parsing_model(path, device=device)
|
120 |
|
121 |
+
def load_nsfw_detector_model(path="./assets/pretrained_models/open-nsfw.onnx"):
|
122 |
global NSFW_DETECTOR
|
123 |
if NSFW_DETECTOR is None:
|
124 |
+
NSFW_DETECTOR = NSFWChecker(model_path=path, providers=PROVIDER)
|
125 |
|
126 |
|
127 |
load_face_analyser_model()
|
|
|
148 |
mask_soft_kernel,
|
149 |
mask_soft_iterations,
|
150 |
blur_amount,
|
151 |
+
erode_amount,
|
152 |
face_scale,
|
153 |
enable_laplacian_blend,
|
154 |
crop_top,
|
|
|
193 |
get_finsh_text = lambda start_time: f"βοΈ Completed in {int(total_exec_time(start_time)[0])} min {int(total_exec_time(start_time)[1])} sec."
|
194 |
|
195 |
## ------------------------------ PREPARE INPUTS & LOAD MODELS ------------------------------
|
196 |
+
|
197 |
yield "### \n β Loading NSFW detector model...", *ui_before()
|
198 |
load_nsfw_detector_model()
|
199 |
|
|
|
204 |
load_face_swapper_model()
|
205 |
|
206 |
if face_enhancer_name != "NONE":
|
207 |
+
if face_enhancer_name not in cv2_interpolations:
|
208 |
+
yield f"### \n β Loading {face_enhancer_name} model...", *ui_before()
|
209 |
FACE_ENHANCER = load_face_enhancer_model(name=face_enhancer_name, device=device)
|
210 |
else:
|
211 |
FACE_ENHANCER = None
|
|
|
215 |
load_face_parser_model()
|
216 |
|
217 |
includes = mask_regions_to_list(mask_includes)
|
|
|
218 |
specifics = list(specifics)
|
219 |
half = len(specifics) // 2
|
220 |
sources = specifics[:half]
|
221 |
specifics = specifics[half:]
|
222 |
+
if crop_top > crop_bott:
|
223 |
+
crop_top, crop_bott = crop_bott, crop_top
|
224 |
+
if crop_left > crop_right:
|
225 |
+
crop_left, crop_right = crop_right, crop_left
|
226 |
+
crop_mask = (crop_top, 511-crop_bott, crop_left, 511-crop_right)
|
227 |
|
228 |
def swap_process(image_sequence):
|
229 |
+
## ------------------------------ CONTENT CHECK ------------------------------
|
230 |
+
|
231 |
yield "### \n β Checking contents...", *ui_before()
|
232 |
nsfw = NSFW_DETECTOR.is_nsfw(image_sequence)
|
233 |
if nsfw:
|
|
|
237 |
return False
|
238 |
EMPTY_CACHE()
|
239 |
|
240 |
+
## ------------------------------ ANALYSE FACE ------------------------------
|
241 |
+
|
242 |
yield "### \n β Analysing face data...", *ui_before()
|
243 |
if condition != "Specific Face":
|
244 |
source_data = source_path, age
|
|
|
253 |
scale=face_scale
|
254 |
)
|
255 |
|
256 |
+
## ------------------------------ SWAP FUNC ------------------------------
|
|
|
|
|
257 |
|
258 |
+
yield "### \n β Generating faces...", *ui_before()
|
259 |
+
preds = []
|
260 |
+
matrs = []
|
261 |
+
count = 0
|
262 |
+
global PREVIEW
|
263 |
+
for batch_pred, batch_matr in FACE_SWAPPER.batch_forward(whole_frame_list, analysed_targets, analysed_sources):
|
264 |
+
preds.extend(batch_pred)
|
265 |
+
matrs.extend(batch_matr)
|
266 |
+
EMPTY_CACHE()
|
267 |
+
count += 1
|
268 |
+
|
269 |
+
if USE_CUDA:
|
270 |
+
image_grid = create_image_grid(batch_pred, size=128)
|
271 |
+
PREVIEW = image_grid[:, :, ::-1]
|
272 |
+
yield f"### \n β Generating face Batch {count}", *ui_before()
|
273 |
|
274 |
+
## ------------------------------ FACE ENHANCEMENT ------------------------------
|
275 |
+
|
276 |
+
generated_len = len(preds)
|
277 |
if face_enhancer_name != "NONE":
|
278 |
+
yield f"### \n β Upscaling faces with {face_enhancer_name}...", *ui_before()
|
279 |
+
for idx, pred in tqdm(enumerate(preds), total=generated_len, desc=f"Upscaling with {face_enhancer_name}"):
|
280 |
enhancer_model, enhancer_model_runner = FACE_ENHANCER
|
281 |
pred = enhancer_model_runner(pred, enhancer_model)
|
282 |
preds[idx] = cv2.resize(pred, (512,512))
|
|
|
|
|
|
|
283 |
EMPTY_CACHE()
|
284 |
|
285 |
+
## ------------------------------ FACE PARSING ------------------------------
|
286 |
+
|
287 |
+
if enable_face_parser:
|
288 |
+
yield "### \n β Face-parsing mask...", *ui_before()
|
289 |
+
masks = []
|
290 |
+
count = 0
|
291 |
+
for batch_mask in get_parsed_mask(FACE_PARSER, preds, classes=includes, device=device, batch_size=BATCH_SIZE, softness=int(mask_soft_iterations)):
|
292 |
+
masks.append(batch_mask)
|
293 |
+
EMPTY_CACHE()
|
294 |
+
count += 1
|
295 |
+
|
296 |
+
if len(batch_mask) > 1:
|
297 |
+
image_grid = create_image_grid(batch_mask, size=128)
|
298 |
+
PREVIEW = image_grid[:, :, ::-1]
|
299 |
+
yield f"### \n β Face parsing Batch {count}", *ui_before()
|
300 |
+
masks = np.concatenate(masks, axis=0) if len(masks) >= 1 else masks
|
301 |
+
else:
|
302 |
+
masks = [None] * generated_len
|
303 |
+
|
304 |
+
## ------------------------------ SPLIT LIST ------------------------------
|
305 |
+
|
306 |
split_preds = split_list_by_lengths(preds, num_faces_per_frame)
|
307 |
del preds
|
|
|
|
|
308 |
split_matrs = split_list_by_lengths(matrs, num_faces_per_frame)
|
309 |
del matrs
|
310 |
+
split_masks = split_list_by_lengths(masks, num_faces_per_frame)
|
311 |
+
del masks
|
312 |
|
313 |
+
## ------------------------------ PASTE-BACK ------------------------------
|
314 |
+
|
315 |
+
yield "### \n β Pasting back...", *ui_before()
|
316 |
+
def post_process(frame_idx, frame_img, split_preds, split_matrs, split_masks, enable_laplacian_blend, crop_mask, blur_amount, erode_amount):
|
317 |
whole_img_path = frame_img
|
318 |
whole_img = cv2.imread(whole_img_path)
|
319 |
+
blend_method = 'laplacian' if enable_laplacian_blend else 'linear'
|
320 |
+
for p, m, mask in zip(split_preds[frame_idx], split_matrs[frame_idx], split_masks[frame_idx]):
|
321 |
+
p = cv2.resize(p, (512,512))
|
322 |
+
mask = cv2.resize(mask, (512,512)) if mask is not None else None
|
323 |
+
m /= 0.25
|
324 |
+
whole_img = paste_to_whole(p, whole_img, m, mask=mask, crop_mask=crop_mask, blend_method=blend_method, blur_amount=blur_amount, erode_amount=erode_amount)
|
325 |
cv2.imwrite(whole_img_path, whole_img)
|
326 |
|
327 |
+
def concurrent_post_process(image_sequence, *args):
|
328 |
with concurrent.futures.ThreadPoolExecutor() as executor:
|
329 |
futures = []
|
330 |
for idx, frame_img in enumerate(image_sequence):
|
331 |
+
future = executor.submit(post_process, idx, frame_img, *args)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
332 |
futures.append(future)
|
333 |
|
334 |
+
for future in tqdm(concurrent.futures.as_completed(futures), total=len(futures), desc="Pasting back"):
|
335 |
+
result = future.result()
|
|
|
|
|
|
|
336 |
|
337 |
concurrent_post_process(
|
338 |
image_sequence,
|
339 |
split_preds,
|
|
|
340 |
split_matrs,
|
341 |
+
split_masks,
|
342 |
enable_laplacian_blend,
|
343 |
+
crop_mask,
|
344 |
+
blur_amount,
|
345 |
+
erode_amount
|
|
|
346 |
)
|
347 |
|
348 |
|
|
|
349 |
## ------------------------------ IMAGE ------------------------------
|
350 |
|
351 |
if input_type == "Image":
|
|
|
526 |
if hasattr(STREAMER, "stop"):
|
527 |
STREAMER.stop()
|
528 |
STREAMER = None
|
529 |
+
return "Cancelled"
|
530 |
|
531 |
|
532 |
def slider_changed(show_frame, video_path, frame_index):
|
|
|
611 |
)
|
612 |
|
613 |
with gr.Tab("πͺ Other Settings"):
|
614 |
+
face_scale = gr.Slider(
|
615 |
+
label="Face Scale",
|
616 |
+
minimum=0,
|
617 |
+
maximum=2,
|
618 |
+
value=1,
|
619 |
+
interactive=True,
|
620 |
+
)
|
621 |
+
|
622 |
+
face_enhancer_name = gr.Dropdown(
|
623 |
+
FACE_ENHANCER_LIST, label="Face Enhancer", value="NONE", multiselect=False, interactive=True
|
624 |
+
)
|
625 |
+
|
626 |
with gr.Accordion("Advanced Mask", open=False):
|
627 |
enable_face_parser_mask = gr.Checkbox(
|
628 |
label="Enable Face Parsing",
|
|
|
651 |
interactive=True,
|
652 |
|
653 |
)
|
654 |
+
|
655 |
+
|
656 |
+
with gr.Accordion("Crop Mask", open=False):
|
657 |
+
crop_top = gr.Slider(label="Top", minimum=0, maximum=511, value=0, step=1, interactive=True)
|
658 |
+
crop_bott = gr.Slider(label="Bottom", minimum=0, maximum=511, value=511, step=1, interactive=True)
|
659 |
+
crop_left = gr.Slider(label="Left", minimum=0, maximum=511, value=0, step=1, interactive=True)
|
660 |
+
crop_right = gr.Slider(label="Right", minimum=0, maximum=511, value=511, step=1, interactive=True)
|
661 |
+
|
662 |
+
|
663 |
+
erode_amount = gr.Slider(
|
664 |
+
label="Mask Erode",
|
665 |
minimum=0,
|
666 |
+
maximum=1,
|
667 |
+
value=MASK_ERODE_AMOUNT,
|
668 |
+
step=0.05,
|
669 |
interactive=True,
|
670 |
)
|
671 |
|
672 |
+
blur_amount = gr.Slider(
|
673 |
+
label="Mask Blur",
|
674 |
+
minimum=0,
|
675 |
+
maximum=1,
|
676 |
+
value=MASK_BLUR_AMOUNT,
|
677 |
+
step=0.05,
|
678 |
+
interactive=True,
|
679 |
+
)
|
|
|
|
|
|
|
|
|
|
|
680 |
|
681 |
enable_laplacian_blend = gr.Checkbox(
|
682 |
label="Laplacian Blending",
|
|
|
684 |
interactive=True,
|
685 |
)
|
686 |
|
|
|
|
|
|
|
687 |
|
688 |
source_image_input = gr.Image(
|
689 |
label="Source face", type="filepath", interactive=True
|
|
|
875 |
mask_soft_kernel,
|
876 |
mask_soft_iterations,
|
877 |
blur_amount,
|
878 |
+
erode_amount,
|
879 |
face_scale,
|
880 |
enable_laplacian_blend,
|
881 |
crop_top,
|
|
|
894 |
]
|
895 |
|
896 |
swap_event = swap_button.click(
|
897 |
+
fn=process, inputs=swap_inputs, outputs=swap_outputs, show_progress=True
|
898 |
)
|
899 |
|
900 |
cancel_button.click(
|
assets/pretrained_models/open-nsfw.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:864bb37bf8863564b87eb330ab8c785a79a773f4e7c43cb96db52ed8611305fa
|
3 |
+
size 23590724
|
face_analyser.py
CHANGED
@@ -192,4 +192,3 @@ def get_analysed_data(face_analyser, image_sequence, source_data, swap_condition
|
|
192 |
num_faces_per_frame.append(n_faces)
|
193 |
|
194 |
return analysed_target_list, analysed_source_list, whole_frame_eql_list, num_faces_per_frame
|
195 |
-
|
|
|
192 |
num_faces_per_frame.append(n_faces)
|
193 |
|
194 |
return analysed_target_list, analysed_source_list, whole_frame_eql_list, num_faces_per_frame
|
|
face_enhancer.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
import os
|
|
|
2 |
import torch
|
3 |
import gfpgan
|
4 |
from PIL import Image
|
@@ -22,6 +23,7 @@ supported_enhancers = {
|
|
22 |
"REAL-ESRGAN 8x": ("./assets/pretrained_models/RealESRGAN_x8.pth", realesrgan_runner)
|
23 |
}
|
24 |
|
|
|
25 |
|
26 |
def get_available_enhancer_names():
|
27 |
available = []
|
@@ -33,9 +35,10 @@ def get_available_enhancer_names():
|
|
33 |
|
34 |
|
35 |
def load_face_enhancer_model(name='GFPGAN', device="cpu"):
|
36 |
-
assert name in get_available_enhancer_names(), f"Face enhancer {name} unavailable."
|
37 |
-
|
38 |
-
|
|
|
39 |
if name == 'GFPGAN':
|
40 |
model = gfpgan.GFPGANer(model_path=model_path, upscale=1, device=device)
|
41 |
elif name == 'REAL-ESRGAN 2x':
|
@@ -47,6 +50,15 @@ def load_face_enhancer_model(name='GFPGAN', device="cpu"):
|
|
47 |
elif name == 'REAL-ESRGAN 8x':
|
48 |
model = RealESRGAN(device, scale=8)
|
49 |
model.load_weights(model_path, download=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
else:
|
51 |
model = None
|
52 |
return (model, model_runner)
|
|
|
1 |
import os
|
2 |
+
import cv2
|
3 |
import torch
|
4 |
import gfpgan
|
5 |
from PIL import Image
|
|
|
23 |
"REAL-ESRGAN 8x": ("./assets/pretrained_models/RealESRGAN_x8.pth", realesrgan_runner)
|
24 |
}
|
25 |
|
26 |
+
cv2_interpolations = ["LANCZOS4", "CUBIC", "NEAREST"]
|
27 |
|
28 |
def get_available_enhancer_names():
|
29 |
available = []
|
|
|
35 |
|
36 |
|
37 |
def load_face_enhancer_model(name='GFPGAN', device="cpu"):
|
38 |
+
assert name in get_available_enhancer_names() + cv2_interpolations, f"Face enhancer {name} unavailable."
|
39 |
+
if name in supported_enhancers.keys():
|
40 |
+
model_path, model_runner = supported_enhancers.get(name)
|
41 |
+
model_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), model_path)
|
42 |
if name == 'GFPGAN':
|
43 |
model = gfpgan.GFPGANer(model_path=model_path, upscale=1, device=device)
|
44 |
elif name == 'REAL-ESRGAN 2x':
|
|
|
50 |
elif name == 'REAL-ESRGAN 8x':
|
51 |
model = RealESRGAN(device, scale=8)
|
52 |
model.load_weights(model_path, download=False)
|
53 |
+
elif name == 'LANCZOS4':
|
54 |
+
model = None
|
55 |
+
model_runner = lambda img, _: cv2.resize(img, (512,512), interpolation=cv2.INTER_LANCZOS4)
|
56 |
+
elif name == 'CUBIC':
|
57 |
+
model = None
|
58 |
+
model_runner = lambda img, _: cv2.resize(img, (512,512), interpolation=cv2.INTER_CUBIC)
|
59 |
+
elif name == 'NEAREST':
|
60 |
+
model = None
|
61 |
+
model_runner = lambda img, _: cv2.resize(img, (512,512), interpolation=cv2.INTER_NEAREST)
|
62 |
else:
|
63 |
model = None
|
64 |
return (model, model_runner)
|
face_parsing/__init__.py
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
-
from .swap import init_parser, swap_regions, mask_regions, mask_regions_to_list
|
2 |
from .model import BiSeNet
|
3 |
-
from .parse_mask import init_parsing_model, get_parsed_mask
|
|
|
1 |
+
from .swap import init_parser, swap_regions, mask_regions, mask_regions_to_list
|
2 |
from .model import BiSeNet
|
3 |
+
from .parse_mask import init_parsing_model, get_parsed_mask, SoftErosion
|
face_parsing/parse_mask.py
CHANGED
@@ -10,12 +10,55 @@ import torchvision.transforms as transforms
|
|
10 |
|
11 |
from . model import BiSeNet
|
12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
transform = transforms.Compose([
|
14 |
transforms.Resize((512, 512)),
|
15 |
transforms.ToTensor(),
|
16 |
transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
|
17 |
])
|
18 |
|
|
|
|
|
19 |
def init_parsing_model(model_path, device="cpu"):
|
20 |
net = BiSeNet(19)
|
21 |
net.to(device)
|
@@ -27,7 +70,10 @@ def transform_images(imgs):
|
|
27 |
tensor_images = torch.stack([transform(Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))) for img in imgs], dim=0)
|
28 |
return tensor_images
|
29 |
|
30 |
-
def get_parsed_mask(net, imgs, classes=[1, 2, 3, 4, 5, 10, 11, 12, 13], device="cpu", batch_size=8):
|
|
|
|
|
|
|
31 |
masks = []
|
32 |
for i in tqdm(range(0, len(imgs), batch_size), total=len(imgs) // batch_size, desc="Face-parsing"):
|
33 |
batch_imgs = imgs[i:i + batch_size]
|
@@ -35,16 +81,27 @@ def get_parsed_mask(net, imgs, classes=[1, 2, 3, 4, 5, 10, 11, 12, 13], device="
|
|
35 |
tensor_images = transform_images(batch_imgs).to(device)
|
36 |
with torch.no_grad():
|
37 |
out = net(tensor_images)[0]
|
38 |
-
parsing = out.argmax(dim=1)
|
39 |
-
|
|
|
|
|
|
|
|
|
40 |
|
41 |
-
|
|
|
|
|
|
|
42 |
|
43 |
-
|
44 |
-
|
|
|
45 |
|
46 |
-
|
47 |
-
|
|
|
48 |
|
49 |
-
|
|
|
50 |
|
|
|
|
10 |
|
11 |
from . model import BiSeNet
|
12 |
|
13 |
+
class SoftErosion(nn.Module):
|
14 |
+
def __init__(self, kernel_size=15, threshold=0.6, iterations=1):
|
15 |
+
super(SoftErosion, self).__init__()
|
16 |
+
r = kernel_size // 2
|
17 |
+
self.padding = r
|
18 |
+
self.iterations = iterations
|
19 |
+
self.threshold = threshold
|
20 |
+
|
21 |
+
# Create kernel
|
22 |
+
y_indices, x_indices = torch.meshgrid(torch.arange(0., kernel_size), torch.arange(0., kernel_size))
|
23 |
+
dist = torch.sqrt((x_indices - r) ** 2 + (y_indices - r) ** 2)
|
24 |
+
kernel = dist.max() - dist
|
25 |
+
kernel /= kernel.sum()
|
26 |
+
kernel = kernel.view(1, 1, *kernel.shape)
|
27 |
+
self.register_buffer('weight', kernel)
|
28 |
+
|
29 |
+
def forward(self, x):
|
30 |
+
batch_size = x.size(0) # Get the batch size
|
31 |
+
output = []
|
32 |
+
|
33 |
+
for i in tqdm(range(batch_size), desc="Soft-Erosion", leave=False):
|
34 |
+
input_tensor = x[i:i+1] # Take one input tensor from the batch
|
35 |
+
input_tensor = input_tensor.float() # Convert input to float tensor
|
36 |
+
input_tensor = input_tensor.unsqueeze(1) # Add a channel dimension
|
37 |
+
|
38 |
+
for _ in range(self.iterations - 1):
|
39 |
+
input_tensor = torch.min(input_tensor, F.conv2d(input_tensor, weight=self.weight,
|
40 |
+
groups=input_tensor.shape[1],
|
41 |
+
padding=self.padding))
|
42 |
+
input_tensor = F.conv2d(input_tensor, weight=self.weight, groups=input_tensor.shape[1],
|
43 |
+
padding=self.padding)
|
44 |
+
|
45 |
+
mask = input_tensor >= self.threshold
|
46 |
+
input_tensor[mask] = 1.0
|
47 |
+
input_tensor[~mask] /= input_tensor[~mask].max()
|
48 |
+
|
49 |
+
input_tensor = input_tensor.squeeze(1) # Remove the extra channel dimension
|
50 |
+
output.append(input_tensor.detach().cpu().numpy())
|
51 |
+
|
52 |
+
return np.array(output)
|
53 |
+
|
54 |
transform = transforms.Compose([
|
55 |
transforms.Resize((512, 512)),
|
56 |
transforms.ToTensor(),
|
57 |
transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
|
58 |
])
|
59 |
|
60 |
+
|
61 |
+
|
62 |
def init_parsing_model(model_path, device="cpu"):
|
63 |
net = BiSeNet(19)
|
64 |
net.to(device)
|
|
|
70 |
tensor_images = torch.stack([transform(Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))) for img in imgs], dim=0)
|
71 |
return tensor_images
|
72 |
|
73 |
+
def get_parsed_mask(net, imgs, classes=[1, 2, 3, 4, 5, 10, 11, 12, 13], device="cpu", batch_size=8, softness=20):
|
74 |
+
if softness > 0:
|
75 |
+
smooth_mask = SoftErosion(kernel_size=17, threshold=0.9, iterations=softness).to(device)
|
76 |
+
|
77 |
masks = []
|
78 |
for i in tqdm(range(0, len(imgs), batch_size), total=len(imgs) // batch_size, desc="Face-parsing"):
|
79 |
batch_imgs = imgs[i:i + batch_size]
|
|
|
81 |
tensor_images = transform_images(batch_imgs).to(device)
|
82 |
with torch.no_grad():
|
83 |
out = net(tensor_images)[0]
|
84 |
+
# parsing = out.argmax(dim=1)
|
85 |
+
# arget_classes = torch.tensor(classes).to(device)
|
86 |
+
# batch_masks = torch.isin(parsing, target_classes).to(device)
|
87 |
+
## torch.isin was slightly slower in my test, so using np.isin
|
88 |
+
parsing = out.argmax(dim=1).detach().cpu().numpy()
|
89 |
+
batch_masks = np.isin(parsing, classes).astype('float32')
|
90 |
|
91 |
+
if softness > 0:
|
92 |
+
# batch_masks = smooth_mask(batch_masks).transpose(1,0,2,3)[0]
|
93 |
+
mask_tensor = torch.from_numpy(batch_masks.copy()).float().to(device)
|
94 |
+
batch_masks = smooth_mask(mask_tensor).transpose(1,0,2,3)[0]
|
95 |
|
96 |
+
yield batch_masks
|
97 |
+
|
98 |
+
#masks.append(batch_masks)
|
99 |
|
100 |
+
#if len(masks) >= 1:
|
101 |
+
# masks = np.concatenate(masks, axis=0)
|
102 |
+
# masks = np.repeat(np.expand_dims(masks, axis=1), 3, axis=1)
|
103 |
|
104 |
+
# for i, mask in enumerate(masks):
|
105 |
+
# cv2.imwrite(f"mask/{i}.jpg", (mask * 255).astype("uint8"))
|
106 |
|
107 |
+
#return masks
|
face_swapper.py
CHANGED
@@ -5,10 +5,13 @@ import cv2
|
|
5 |
import onnxruntime
|
6 |
import numpy as np
|
7 |
from tqdm import tqdm
|
|
|
8 |
from onnx import numpy_helper
|
9 |
from skimage import transform as trans
|
10 |
import torchvision.transforms.functional as F
|
11 |
-
|
|
|
|
|
12 |
|
13 |
arcface_dst = np.array(
|
14 |
[[38.2946, 51.6963], [73.5318, 51.5014], [56.0252, 71.7366],
|
@@ -46,72 +49,53 @@ class Inswapper():
|
|
46 |
model = onnx.load(self.model_file)
|
47 |
graph = model.graph
|
48 |
self.emap = numpy_helper.to_array(graph.initializer[-1])
|
49 |
-
self.input_mean = 0.0
|
50 |
-
self.input_std = 255.0
|
51 |
|
52 |
self.session_options = onnxruntime.SessionOptions()
|
53 |
self.session = onnxruntime.InferenceSession(self.model_file, sess_options=self.session_options, providers=providers)
|
54 |
|
55 |
-
inputs = self.session.get_inputs()
|
56 |
-
self.input_names = [inp.name for inp in inputs]
|
57 |
-
outputs = self.session.get_outputs()
|
58 |
-
self.output_names = [out.name for out in outputs]
|
59 |
-
assert len(self.output_names) == 1
|
60 |
-
self.output_shape = outputs[0].shape
|
61 |
-
input_cfg = inputs[0]
|
62 |
-
input_shape = input_cfg.shape
|
63 |
-
self.input_shape = input_shape
|
64 |
-
self.input_size = tuple(input_shape[2:4][::-1])
|
65 |
-
|
66 |
def forward(self, imgs, latents):
|
67 |
preds = []
|
68 |
for img, latent in zip(imgs, latents):
|
69 |
-
img =
|
70 |
-
pred = self.session.run(
|
71 |
preds.append(pred)
|
72 |
|
73 |
def get(self, imgs, target_faces, source_faces):
|
74 |
imgs = list(imgs)
|
75 |
|
76 |
preds = [None] * len(imgs)
|
77 |
-
aimgs = [None] * len(imgs)
|
78 |
matrs = [None] * len(imgs)
|
79 |
|
80 |
for idx, (img, target_face, source_face) in enumerate(zip(imgs, target_faces, source_faces)):
|
81 |
-
|
82 |
-
|
83 |
-
matrs[idx] = M
|
84 |
-
pred = self.session.run(self.output_names, {self.input_names[0]: blob, self.input_names[1]: latent})[0]
|
85 |
pred = pred.transpose((0, 2, 3, 1))[0]
|
86 |
pred = np.clip(255 * pred, 0, 255).astype(np.uint8)[:, :, ::-1]
|
|
|
87 |
preds[idx] = pred
|
|
|
88 |
|
89 |
-
return (preds,
|
90 |
|
91 |
def prepare_data(self, img, target_face, source_face):
|
92 |
if isinstance(img, str):
|
93 |
img = cv2.imread(img)
|
94 |
|
95 |
-
|
96 |
|
97 |
-
blob = cv2.dnn.blobFromImage(
|
98 |
-
(self.input_mean, self.input_mean, self.input_mean), swapRB=True)
|
99 |
|
100 |
latent = source_face.normed_embedding.reshape((1, -1))
|
101 |
latent = np.dot(latent, self.emap)
|
102 |
latent /= np.linalg.norm(latent)
|
103 |
|
104 |
-
return (
|
105 |
|
106 |
def batch_forward(self, img_list, target_f_list, source_f_list):
|
107 |
num_samples = len(img_list)
|
108 |
num_batches = (num_samples + self.batch_size - 1) // self.batch_size
|
109 |
|
110 |
-
|
111 |
-
aimgs = []
|
112 |
-
matrs = []
|
113 |
-
|
114 |
-
for i in tqdm(range(num_batches), desc="Swapping face"):
|
115 |
start_idx = i * self.batch_size
|
116 |
end_idx = min((i + 1) * self.batch_size, num_samples)
|
117 |
|
@@ -119,47 +103,48 @@ class Inswapper():
|
|
119 |
batch_target_f = target_f_list[start_idx:end_idx]
|
120 |
batch_source_f = source_f_list[start_idx:end_idx]
|
121 |
|
122 |
-
batch_pred,
|
123 |
-
preds.extend(batch_pred)
|
124 |
-
aimgs.extend(batch_aimg)
|
125 |
-
matrs.extend(batch_matr)
|
126 |
-
|
127 |
-
return (preds, aimgs, matrs)
|
128 |
-
|
129 |
|
130 |
-
|
131 |
-
IM = cv2.invertAffineTransform(M)
|
132 |
|
133 |
-
img_white = make_white_image(aimg.shape[:2], crop=crop_mask, white_value=255)
|
134 |
|
135 |
-
|
136 |
-
|
|
|
|
|
|
|
137 |
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
158 |
|
159 |
-
|
160 |
-
matrix = cv2.invertAffineTransform(matrix)
|
161 |
-
mask = np.ones(foreground.shape, dtype="float32")
|
162 |
-
foreground = cv2.warpAffine(foreground, matrix, (background.shape[1], background.shape[0]), borderValue=0.0)
|
163 |
-
mask = cv2.warpAffine(mask, matrix, (background.shape[1], background.shape[0]), borderValue=0.0)
|
164 |
-
composite_image = mask * foreground + (1 - mask) * background
|
165 |
-
return composite_image
|
|
|
5 |
import onnxruntime
|
6 |
import numpy as np
|
7 |
from tqdm import tqdm
|
8 |
+
import torch.nn as nn
|
9 |
from onnx import numpy_helper
|
10 |
from skimage import transform as trans
|
11 |
import torchvision.transforms.functional as F
|
12 |
+
import torch.nn.functional as F
|
13 |
+
from utils import mask_crop, laplacian_blending
|
14 |
+
|
15 |
|
16 |
arcface_dst = np.array(
|
17 |
[[38.2946, 51.6963], [73.5318, 51.5014], [56.0252, 71.7366],
|
|
|
49 |
model = onnx.load(self.model_file)
|
50 |
graph = model.graph
|
51 |
self.emap = numpy_helper.to_array(graph.initializer[-1])
|
|
|
|
|
52 |
|
53 |
self.session_options = onnxruntime.SessionOptions()
|
54 |
self.session = onnxruntime.InferenceSession(self.model_file, sess_options=self.session_options, providers=providers)
|
55 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
56 |
def forward(self, imgs, latents):
|
57 |
preds = []
|
58 |
for img, latent in zip(imgs, latents):
|
59 |
+
img = img / 255
|
60 |
+
pred = self.session.run(['output'], {'target': img, 'source': latent})[0]
|
61 |
preds.append(pred)
|
62 |
|
63 |
def get(self, imgs, target_faces, source_faces):
|
64 |
imgs = list(imgs)
|
65 |
|
66 |
preds = [None] * len(imgs)
|
|
|
67 |
matrs = [None] * len(imgs)
|
68 |
|
69 |
for idx, (img, target_face, source_face) in enumerate(zip(imgs, target_faces, source_faces)):
|
70 |
+
matrix, blob, latent = self.prepare_data(img, target_face, source_face)
|
71 |
+
pred = self.session.run(['output'], {'target': blob, 'source': latent})[0]
|
|
|
|
|
72 |
pred = pred.transpose((0, 2, 3, 1))[0]
|
73 |
pred = np.clip(255 * pred, 0, 255).astype(np.uint8)[:, :, ::-1]
|
74 |
+
|
75 |
preds[idx] = pred
|
76 |
+
matrs[idx] = matrix
|
77 |
|
78 |
+
return (preds, matrs)
|
79 |
|
80 |
def prepare_data(self, img, target_face, source_face):
|
81 |
if isinstance(img, str):
|
82 |
img = cv2.imread(img)
|
83 |
|
84 |
+
aligned_img, matrix = norm_crop2(img, target_face.kps, 128)
|
85 |
|
86 |
+
blob = cv2.dnn.blobFromImage(aligned_img, 1.0 / 255, (128, 128), (0., 0., 0.), swapRB=True)
|
|
|
87 |
|
88 |
latent = source_face.normed_embedding.reshape((1, -1))
|
89 |
latent = np.dot(latent, self.emap)
|
90 |
latent /= np.linalg.norm(latent)
|
91 |
|
92 |
+
return (matrix, blob, latent)
|
93 |
|
94 |
def batch_forward(self, img_list, target_f_list, source_f_list):
|
95 |
num_samples = len(img_list)
|
96 |
num_batches = (num_samples + self.batch_size - 1) // self.batch_size
|
97 |
|
98 |
+
for i in tqdm(range(num_batches), desc="Generating face"):
|
|
|
|
|
|
|
|
|
99 |
start_idx = i * self.batch_size
|
100 |
end_idx = min((i + 1) * self.batch_size, num_samples)
|
101 |
|
|
|
103 |
batch_target_f = target_f_list[start_idx:end_idx]
|
104 |
batch_source_f = source_f_list[start_idx:end_idx]
|
105 |
|
106 |
+
batch_pred, batch_matr = self.get(batch_img, batch_target_f, batch_source_f)
|
|
|
|
|
|
|
|
|
|
|
|
|
107 |
|
108 |
+
yield batch_pred, batch_matr
|
|
|
109 |
|
|
|
110 |
|
111 |
+
def paste_to_whole(foreground, background, matrix, mask=None, crop_mask=(0,0,0,0), blur_amount=0.1, erode_amount = 0.15, blend_method='linear'):
|
112 |
+
inv_matrix = cv2.invertAffineTransform(matrix)
|
113 |
+
fg_shape = foreground.shape[:2]
|
114 |
+
bg_shape = (background.shape[1], background.shape[0])
|
115 |
+
foreground = cv2.warpAffine(foreground, inv_matrix, bg_shape, borderValue=0.0)
|
116 |
|
117 |
+
if mask is None:
|
118 |
+
mask = np.full(fg_shape, 1., dtype=np.float32)
|
119 |
+
mask = mask_crop(mask, crop_mask)
|
120 |
+
mask = cv2.warpAffine(mask, inv_matrix, bg_shape, borderValue=0.0)
|
121 |
+
else:
|
122 |
+
assert fg_shape == mask.shape[:2], "foreground & mask shape mismatch!"
|
123 |
+
mask = mask_crop(mask, crop_mask).astype('float32')
|
124 |
+
mask = cv2.warpAffine(mask, inv_matrix, (background.shape[1], background.shape[0]), borderValue=0.0)
|
125 |
+
|
126 |
+
_mask = mask.copy()
|
127 |
+
_mask[_mask > 0.05] = 1.
|
128 |
+
non_zero_points = cv2.findNonZero(_mask)
|
129 |
+
_, _, w, h = cv2.boundingRect(non_zero_points)
|
130 |
+
mask_size = int(np.sqrt(w * h))
|
131 |
+
|
132 |
+
if erode_amount > 0:
|
133 |
+
kernel_size = max(int(mask_size * erode_amount), 1)
|
134 |
+
structuring_element = cv2.getStructuringElement(cv2.MORPH_RECT, (kernel_size, kernel_size))
|
135 |
+
mask = cv2.erode(mask, structuring_element)
|
136 |
+
|
137 |
+
if blur_amount > 0:
|
138 |
+
kernel_size = max(int(mask_size * blur_amount), 3)
|
139 |
+
if kernel_size % 2 == 0:
|
140 |
+
kernel_size += 1
|
141 |
+
mask = cv2.GaussianBlur(mask, (kernel_size, kernel_size), 0)
|
142 |
+
|
143 |
+
mask = np.tile(np.expand_dims(mask, axis=-1), (1, 1, 3))
|
144 |
+
|
145 |
+
if blend_method == 'laplacian':
|
146 |
+
composite_image = laplacian_blending(foreground, background, mask.clip(0,1), num_levels=4)
|
147 |
+
else:
|
148 |
+
composite_image = mask * foreground + (1 - mask) * background
|
149 |
|
150 |
+
return composite_image.astype("uint8").clip(0, 255)
|
|
|
|
|
|
|
|
|
|
|
|
nsfw_checker/LICENSE.md
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
Copyright 2016, Yahoo Inc.
|
3 |
+
|
4 |
+
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
|
5 |
+
|
6 |
+
1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
|
7 |
+
|
8 |
+
2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
|
9 |
+
|
10 |
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
11 |
+
|
nsfw_checker/__init__.py
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
from . opennsfw import NSFWChecker
|
nsfw_checker/opennsfw.py
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import cv2
|
2 |
+
import torch
|
3 |
+
import onnx
|
4 |
+
import onnxruntime
|
5 |
+
import numpy as np
|
6 |
+
from tqdm import tqdm
|
7 |
+
|
8 |
+
# https://github.com/yahoo/open_nsfw
|
9 |
+
|
10 |
+
class NSFWChecker:
|
11 |
+
def __init__(self, model_path=None, providers=["CPUExecutionProvider"]):
|
12 |
+
model = onnx.load(model_path)
|
13 |
+
self.input_name = model.graph.input[0].name
|
14 |
+
session_options = onnxruntime.SessionOptions()
|
15 |
+
self.session = onnxruntime.InferenceSession(model_path, sess_options=session_options, providers=providers)
|
16 |
+
|
17 |
+
def is_nsfw(self, img_paths, threshold = 0.85):
|
18 |
+
skip_step = 1
|
19 |
+
total_len = len(img_paths)
|
20 |
+
if total_len < 100: skip_step = 1
|
21 |
+
if total_len > 100 and total_len < 500: skip_step = 10
|
22 |
+
if total_len > 500 and total_len < 1000: skip_step = 20
|
23 |
+
if total_len > 1000 and total_len < 10000: skip_step = 50
|
24 |
+
if total_len > 10000: skip_step = 100
|
25 |
+
|
26 |
+
for idx in tqdm(range(0, total_len, skip_step), total=int(total_len // skip_step), desc="Checking for NSFW contents"):
|
27 |
+
img = cv2.imread(img_paths[idx])
|
28 |
+
img = cv2.resize(img, (224,224)).astype('float32')
|
29 |
+
img -= np.array([104, 117, 123], dtype=np.float32)
|
30 |
+
img = np.expand_dims(img, axis=0)
|
31 |
+
|
32 |
+
score = self.session.run(None, {self.input_name:img})[0][0][1]
|
33 |
+
|
34 |
+
if score > threshold:
|
35 |
+
print(f"Detected nsfw score:{score}")
|
36 |
+
return True
|
37 |
+
return False
|
requirements.txt
CHANGED
@@ -9,5 +9,4 @@ onnxruntime==1.15.0
|
|
9 |
opencv-python>=4.7.0.72
|
10 |
opencv-python-headless>=4.7.0.72
|
11 |
gfpgan==1.3.8
|
12 |
-
timm==0.9.2
|
13 |
|
|
|
9 |
opencv-python>=4.7.0.72
|
10 |
opencv-python-headless>=4.7.0.72
|
11 |
gfpgan==1.3.8
|
|
|
12 |
|
utils.py
CHANGED
@@ -2,7 +2,6 @@ import os
|
|
2 |
import cv2
|
3 |
import time
|
4 |
import glob
|
5 |
-
import torch
|
6 |
import shutil
|
7 |
import platform
|
8 |
import datetime
|
@@ -11,7 +10,6 @@ import numpy as np
|
|
11 |
from threading import Thread
|
12 |
from moviepy.editor import VideoFileClip, ImageSequenceClip
|
13 |
from moviepy.video.io.ffmpeg_tools import ffmpeg_extract_subclip
|
14 |
-
from face_parsing import init_parser, swap_regions, mask_regions, mask_regions_to_list, SoftErosion
|
15 |
|
16 |
|
17 |
logo_image = cv2.imread("./assets/images/logo.png", cv2.IMREAD_UNCHANGED)
|
@@ -69,7 +67,7 @@ def trim_video(video_path, output_path, start_frame, stop_frame):
|
|
69 |
os.makedirs(temp_path, exist_ok=True)
|
70 |
trimmed_video_file_path = os.path.join(temp_path, trimmed_video_filename)
|
71 |
|
72 |
-
video = VideoFileClip(video_path)
|
73 |
fps = video.fps
|
74 |
start_time = start_frame / fps
|
75 |
duration = (stop_frame - start_frame) / fps
|
@@ -174,7 +172,7 @@ def split_list_by_lengths(data, length_list):
|
|
174 |
|
175 |
|
176 |
def merge_img_sequence_from_ref(ref_video_path, image_sequence, output_file_name):
|
177 |
-
video_clip = VideoFileClip(ref_video_path)
|
178 |
fps = video_clip.fps
|
179 |
duration = video_clip.duration
|
180 |
total_frames = video_clip.reader.nframes
|
@@ -224,12 +222,12 @@ def scale_bbox_from_center(bbox, scale_width, scale_height, image_width, image_h
|
|
224 |
return scaled_bbox
|
225 |
|
226 |
|
227 |
-
def laplacian_blending(A, B, m, num_levels=
|
228 |
assert A.shape == B.shape
|
229 |
assert B.shape == m.shape
|
230 |
height = m.shape[0]
|
231 |
width = m.shape[1]
|
232 |
-
size_list = np.array([4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096])
|
233 |
size = size_list[np.where(size_list > max(height, width))][0]
|
234 |
GA = np.zeros((size, size, 3), dtype=np.float32)
|
235 |
GA[:height, :width, :] = A
|
@@ -264,30 +262,42 @@ def laplacian_blending(A, B, m, num_levels=4):
|
|
264 |
for i in range(1,num_levels):
|
265 |
ls_ = cv2.pyrUp(ls_)
|
266 |
ls_ = cv2.add(ls_, LS[i])
|
267 |
-
ls_ =
|
268 |
-
|
269 |
-
|
270 |
-
|
271 |
-
|
272 |
-
|
273 |
-
|
274 |
-
|
275 |
-
|
276 |
-
|
277 |
-
|
278 |
-
|
279 |
-
|
280 |
-
|
281 |
-
|
282 |
-
|
283 |
-
|
284 |
-
|
285 |
-
|
286 |
-
|
287 |
-
|
288 |
-
|
289 |
-
def
|
290 |
-
|
291 |
-
|
292 |
-
|
293 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
import cv2
|
3 |
import time
|
4 |
import glob
|
|
|
5 |
import shutil
|
6 |
import platform
|
7 |
import datetime
|
|
|
10 |
from threading import Thread
|
11 |
from moviepy.editor import VideoFileClip, ImageSequenceClip
|
12 |
from moviepy.video.io.ffmpeg_tools import ffmpeg_extract_subclip
|
|
|
13 |
|
14 |
|
15 |
logo_image = cv2.imread("./assets/images/logo.png", cv2.IMREAD_UNCHANGED)
|
|
|
67 |
os.makedirs(temp_path, exist_ok=True)
|
68 |
trimmed_video_file_path = os.path.join(temp_path, trimmed_video_filename)
|
69 |
|
70 |
+
video = VideoFileClip(video_path, fps_source="fps")
|
71 |
fps = video.fps
|
72 |
start_time = start_frame / fps
|
73 |
duration = (stop_frame - start_frame) / fps
|
|
|
172 |
|
173 |
|
174 |
def merge_img_sequence_from_ref(ref_video_path, image_sequence, output_file_name):
|
175 |
+
video_clip = VideoFileClip(ref_video_path, fps_source="fps")
|
176 |
fps = video_clip.fps
|
177 |
duration = video_clip.duration
|
178 |
total_frames = video_clip.reader.nframes
|
|
|
222 |
return scaled_bbox
|
223 |
|
224 |
|
225 |
+
def laplacian_blending(A, B, m, num_levels=7):
|
226 |
assert A.shape == B.shape
|
227 |
assert B.shape == m.shape
|
228 |
height = m.shape[0]
|
229 |
width = m.shape[1]
|
230 |
+
size_list = np.array([4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192])
|
231 |
size = size_list[np.where(size_list > max(height, width))][0]
|
232 |
GA = np.zeros((size, size, 3), dtype=np.float32)
|
233 |
GA[:height, :width, :] = A
|
|
|
262 |
for i in range(1,num_levels):
|
263 |
ls_ = cv2.pyrUp(ls_)
|
264 |
ls_ = cv2.add(ls_, LS[i])
|
265 |
+
ls_ = ls_[:height, :width, :]
|
266 |
+
#ls_ = (ls_ - np.min(ls_)) * (255.0 / (np.max(ls_) - np.min(ls_)))
|
267 |
+
return ls_.clip(0, 255)
|
268 |
+
|
269 |
+
|
270 |
+
def mask_crop(mask, crop):
|
271 |
+
top, bottom, left, right = crop
|
272 |
+
shape = mask.shape
|
273 |
+
top = int(top)
|
274 |
+
bottom = int(bottom)
|
275 |
+
if top + bottom < shape[1]:
|
276 |
+
if top > 0: mask[:top, :] = 0
|
277 |
+
if bottom > 0: mask[-bottom:, :] = 0
|
278 |
+
|
279 |
+
left = int(left)
|
280 |
+
right = int(right)
|
281 |
+
if left + right < shape[0]:
|
282 |
+
if left > 0: mask[:, :left] = 0
|
283 |
+
if right > 0: mask[:, -right:] = 0
|
284 |
+
|
285 |
+
return mask
|
286 |
+
|
287 |
+
def create_image_grid(images, size=128):
|
288 |
+
num_images = len(images)
|
289 |
+
num_cols = int(np.ceil(np.sqrt(num_images)))
|
290 |
+
num_rows = int(np.ceil(num_images / num_cols))
|
291 |
+
grid = np.zeros((num_rows * size, num_cols * size, 3), dtype=np.uint8)
|
292 |
+
|
293 |
+
for i, image in enumerate(images):
|
294 |
+
row_idx = (i // num_cols) * size
|
295 |
+
col_idx = (i % num_cols) * size
|
296 |
+
image = cv2.resize(image.copy(), (size,size))
|
297 |
+
if image.dtype != np.uint8:
|
298 |
+
image = (image.astype('float32') * 255).astype('uint8')
|
299 |
+
if image.ndim == 2:
|
300 |
+
image = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)
|
301 |
+
grid[row_idx:row_idx + size, col_idx:col_idx + size] = image
|
302 |
+
|
303 |
+
return grid
|