Harisreedhar commited on
Commit
db275a2
β€’
1 Parent(s): 638204f

update nsfw-checker

Browse files
app.py CHANGED
@@ -12,16 +12,18 @@ import insightface
12
  import onnxruntime
13
  import numpy as np
14
  import gradio as gr
 
 
15
  from tqdm import tqdm
16
  import concurrent.futures
17
  from moviepy.editor import VideoFileClip
18
 
19
- from nsfw_detector import get_nsfw_detector
20
- from face_swapper import Inswapper, paste_to_whole, place_foreground_on_background
21
  from face_analyser import detect_conditions, get_analysed_data, swap_options_list
22
- from face_enhancer import get_available_enhancer_names, load_face_enhancer_model
23
- from face_parsing import init_parser, swap_regions, mask_regions, mask_regions_to_list, SoftErosion
24
- from utils import trim_video, StreamerThread, ProcessBar, open_directory, split_list_by_lengths, merge_img_sequence_from_ref
25
 
26
  ## ------------------------------ USER ARGS ------------------------------
27
 
@@ -39,7 +41,7 @@ user_args = parser.parse_args()
39
  USE_COLAB = user_args.colab
40
  USE_CUDA = user_args.cuda
41
  DEF_OUTPUT_PATH = user_args.out_dir
42
- BATCH_SIZE = user_args.batch_size
43
  WORKSPACE = None
44
  OUTPUT_FILE = None
45
  CURRENT_FRAME = None
@@ -60,8 +62,9 @@ MASK_INCLUDE = [
60
  "U-Lip"
61
  ]
62
  MASK_SOFT_KERNEL = 17
63
- MASK_SOFT_ITERATIONS = 7
64
- MASK_BLUR_AMOUNT = 20
 
65
 
66
  FACE_SWAPPER = None
67
  FACE_ANALYSER = None
@@ -70,7 +73,7 @@ FACE_PARSER = None
70
  NSFW_DETECTOR = None
71
  FACE_ENHANCER_LIST = ["NONE"]
72
  FACE_ENHANCER_LIST.extend(get_available_enhancer_names())
73
-
74
 
75
  ## ------------------------------ SET EXECUTION PROVIDER ------------------------------
76
  # Note: Non CUDA users may change settings here
@@ -113,12 +116,12 @@ def load_face_swapper_model(path="./assets/pretrained_models/inswapper_128.onnx"
113
  def load_face_parser_model(path="./assets/pretrained_models/79999_iter.pth"):
114
  global FACE_PARSER
115
  if FACE_PARSER is None:
116
- FACE_PARSER = init_parser(path, mode=device)
117
 
118
- def load_nsfw_detector_model(path="./assets/pretrained_models/nsfwmodel_281.pth"):
119
  global NSFW_DETECTOR
120
  if NSFW_DETECTOR is None:
121
- NSFW_DETECTOR = get_nsfw_detector(model_path=path, device=device)
122
 
123
 
124
  load_face_analyser_model()
@@ -145,6 +148,7 @@ def process(
145
  mask_soft_kernel,
146
  mask_soft_iterations,
147
  blur_amount,
 
148
  face_scale,
149
  enable_laplacian_blend,
150
  crop_top,
@@ -189,6 +193,7 @@ def process(
189
  get_finsh_text = lambda start_time: f"βœ”οΈ Completed in {int(total_exec_time(start_time)[0])} min {int(total_exec_time(start_time)[1])} sec."
190
 
191
  ## ------------------------------ PREPARE INPUTS & LOAD MODELS ------------------------------
 
192
  yield "### \n βŒ› Loading NSFW detector model...", *ui_before()
193
  load_nsfw_detector_model()
194
 
@@ -199,7 +204,8 @@ def process(
199
  load_face_swapper_model()
200
 
201
  if face_enhancer_name != "NONE":
202
- yield f"### \n βŒ› Loading {face_enhancer_name} model...", *ui_before()
 
203
  FACE_ENHANCER = load_face_enhancer_model(name=face_enhancer_name, device=device)
204
  else:
205
  FACE_ENHANCER = None
@@ -209,15 +215,19 @@ def process(
209
  load_face_parser_model()
210
 
211
  includes = mask_regions_to_list(mask_includes)
212
- smooth_mask = SoftErosion(kernel_size=17, threshold=0.9, iterations=int(mask_soft_iterations)).to(device) if mask_soft_iterations > 0 else None
213
  specifics = list(specifics)
214
  half = len(specifics) // 2
215
  sources = specifics[:half]
216
  specifics = specifics[half:]
217
-
218
- ## ------------------------------ ANALYSE & SWAP FUNC ------------------------------
 
 
 
219
 
220
  def swap_process(image_sequence):
 
 
221
  yield "### \n βŒ› Checking contents...", *ui_before()
222
  nsfw = NSFW_DETECTOR.is_nsfw(image_sequence)
223
  if nsfw:
@@ -227,6 +237,8 @@ def process(
227
  return False
228
  EMPTY_CACHE()
229
 
 
 
230
  yield "### \n βŒ› Analysing face data...", *ui_before()
231
  if condition != "Specific Face":
232
  source_data = source_path, age
@@ -241,81 +253,99 @@ def process(
241
  scale=face_scale
242
  )
243
 
244
- yield "### \n βŒ› Swapping faces...", *ui_before()
245
- preds, aimgs, matrs = FACE_SWAPPER.batch_forward(whole_frame_list, analysed_targets, analysed_sources)
246
- EMPTY_CACHE()
247
 
248
- if enable_face_parser:
249
- yield "### \n βŒ› Applying face-parsing mask...", *ui_before()
250
- for idx, (pred, aimg) in tqdm(enumerate(zip(preds, aimgs)), total=len(preds), desc="Face parsing"):
251
- preds[idx] = swap_regions(pred, aimg, FACE_PARSER, smooth_mask, includes=includes, blur=int(blur_amount))
252
- EMPTY_CACHE()
 
 
 
 
 
 
 
 
 
 
253
 
 
 
 
254
  if face_enhancer_name != "NONE":
255
- yield f"### \n βŒ› Enhancing faces with {face_enhancer_name}...", *ui_before()
256
- for idx, pred in tqdm(enumerate(preds), total=len(preds), desc=f"{face_enhancer_name}"):
257
  enhancer_model, enhancer_model_runner = FACE_ENHANCER
258
  pred = enhancer_model_runner(pred, enhancer_model)
259
  preds[idx] = cv2.resize(pred, (512,512))
260
- aimgs[idx] = cv2.resize(aimgs[idx], (512,512))
261
- matrs[idx] /= 0.25
262
-
263
  EMPTY_CACHE()
264
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
265
  split_preds = split_list_by_lengths(preds, num_faces_per_frame)
266
  del preds
267
- split_aimgs = split_list_by_lengths(aimgs, num_faces_per_frame)
268
- del aimgs
269
  split_matrs = split_list_by_lengths(matrs, num_faces_per_frame)
270
  del matrs
 
 
271
 
272
- yield "### \n βŒ› Post-processing...", *ui_before()
273
- def post_process(frame_idx, frame_img, split_preds, split_aimgs, split_matrs, enable_laplacian_blend, crop_top, crop_bott, crop_left, crop_right):
 
 
274
  whole_img_path = frame_img
275
  whole_img = cv2.imread(whole_img_path)
276
- for p, a, m in zip(split_preds[frame_idx], split_aimgs[frame_idx], split_matrs[frame_idx]):
277
- whole_img = paste_to_whole(p, a, m, whole_img, laplacian_blend=enable_laplacian_blend, crop_mask=(crop_top, crop_bott, crop_left, crop_right))
 
 
 
 
278
  cv2.imwrite(whole_img_path, whole_img)
279
 
280
- def concurrent_post_process(image_sequence, split_preds, split_aimgs, split_matrs, enable_laplacian_blend, crop_top, crop_bott, crop_left, crop_right):
281
  with concurrent.futures.ThreadPoolExecutor() as executor:
282
  futures = []
283
  for idx, frame_img in enumerate(image_sequence):
284
- future = executor.submit(
285
- post_process,
286
- idx,
287
- frame_img,
288
- split_preds,
289
- split_aimgs,
290
- split_matrs,
291
- enable_laplacian_blend,
292
- crop_top,
293
- crop_bott,
294
- crop_left,
295
- crop_right
296
- )
297
  futures.append(future)
298
 
299
- for future in tqdm(concurrent.futures.as_completed(futures), total=len(futures), desc="Post-Processing"):
300
- try:
301
- result = future.result()
302
- except Exception as e:
303
- print(f"An error occurred: {e}")
304
 
305
  concurrent_post_process(
306
  image_sequence,
307
  split_preds,
308
- split_aimgs,
309
  split_matrs,
 
310
  enable_laplacian_blend,
311
- crop_top,
312
- crop_bott,
313
- crop_left,
314
- crop_right
315
  )
316
 
317
 
318
-
319
  ## ------------------------------ IMAGE ------------------------------
320
 
321
  if input_type == "Image":
@@ -496,7 +526,7 @@ def stop_running():
496
  if hasattr(STREAMER, "stop"):
497
  STREAMER.stop()
498
  STREAMER = None
499
- yield "cancelled !"
500
 
501
 
502
  def slider_changed(show_frame, video_path, frame_index):
@@ -581,6 +611,18 @@ with gr.Blocks(css=css) as interface:
581
  )
582
 
583
  with gr.Tab("πŸͺ„ Other Settings"):
 
 
 
 
 
 
 
 
 
 
 
 
584
  with gr.Accordion("Advanced Mask", open=False):
585
  enable_face_parser_mask = gr.Checkbox(
586
  label="Enable Face Parsing",
@@ -609,26 +651,32 @@ with gr.Blocks(css=css) as interface:
609
  interactive=True,
610
 
611
  )
612
- blur_amount = gr.Number(
613
- label="Mask Blur",
614
- value=MASK_BLUR_AMOUNT,
 
 
 
 
 
 
 
 
615
  minimum=0,
 
 
 
616
  interactive=True,
617
  )
618
 
619
- face_scale = gr.Slider(
620
- label="Face Scale",
621
- minimum=0,
622
- maximum=2,
623
- value=1,
624
- interactive=True,
625
- )
626
-
627
- with gr.Accordion("Crop Mask", open=False):
628
- crop_top = gr.Number(label="Top", value=0, minimum=0, interactive=True)
629
- crop_bott = gr.Number(label="Bottom", value=0, minimum=0, interactive=True)
630
- crop_left = gr.Number(label="Left", value=0, minimum=0, interactive=True)
631
- crop_right = gr.Number(label="Right", value=0, minimum=0, interactive=True)
632
 
633
  enable_laplacian_blend = gr.Checkbox(
634
  label="Laplacian Blending",
@@ -636,9 +684,6 @@ with gr.Blocks(css=css) as interface:
636
  interactive=True,
637
  )
638
 
639
- face_enhancer_name = gr.Dropdown(
640
- FACE_ENHANCER_LIST, label="Face Enhancer", value="NONE", multiselect=False, interactive=True
641
- )
642
 
643
  source_image_input = gr.Image(
644
  label="Source face", type="filepath", interactive=True
@@ -830,6 +875,7 @@ with gr.Blocks(css=css) as interface:
830
  mask_soft_kernel,
831
  mask_soft_iterations,
832
  blur_amount,
 
833
  face_scale,
834
  enable_laplacian_blend,
835
  crop_top,
@@ -848,7 +894,7 @@ with gr.Blocks(css=css) as interface:
848
  ]
849
 
850
  swap_event = swap_button.click(
851
- fn=process, inputs=swap_inputs, outputs=swap_outputs, show_progress=True,
852
  )
853
 
854
  cancel_button.click(
 
12
  import onnxruntime
13
  import numpy as np
14
  import gradio as gr
15
+ import threading
16
+ import queue
17
  from tqdm import tqdm
18
  import concurrent.futures
19
  from moviepy.editor import VideoFileClip
20
 
21
+ from nsfw_checker import NSFWChecker
22
+ from face_swapper import Inswapper, paste_to_whole
23
  from face_analyser import detect_conditions, get_analysed_data, swap_options_list
24
+ from face_parsing import init_parsing_model, get_parsed_mask, mask_regions, mask_regions_to_list
25
+ from face_enhancer import get_available_enhancer_names, load_face_enhancer_model, cv2_interpolations
26
+ from utils import trim_video, StreamerThread, ProcessBar, open_directory, split_list_by_lengths, merge_img_sequence_from_ref, create_image_grid
27
 
28
  ## ------------------------------ USER ARGS ------------------------------
29
 
 
41
  USE_COLAB = user_args.colab
42
  USE_CUDA = user_args.cuda
43
  DEF_OUTPUT_PATH = user_args.out_dir
44
+ BATCH_SIZE = int(user_args.batch_size)
45
  WORKSPACE = None
46
  OUTPUT_FILE = None
47
  CURRENT_FRAME = None
 
62
  "U-Lip"
63
  ]
64
  MASK_SOFT_KERNEL = 17
65
+ MASK_SOFT_ITERATIONS = 10
66
+ MASK_BLUR_AMOUNT = 0.1
67
+ MASK_ERODE_AMOUNT = 0.15
68
 
69
  FACE_SWAPPER = None
70
  FACE_ANALYSER = None
 
73
  NSFW_DETECTOR = None
74
  FACE_ENHANCER_LIST = ["NONE"]
75
  FACE_ENHANCER_LIST.extend(get_available_enhancer_names())
76
+ FACE_ENHANCER_LIST.extend(cv2_interpolations)
77
 
78
  ## ------------------------------ SET EXECUTION PROVIDER ------------------------------
79
  # Note: Non CUDA users may change settings here
 
116
  def load_face_parser_model(path="./assets/pretrained_models/79999_iter.pth"):
117
  global FACE_PARSER
118
  if FACE_PARSER is None:
119
+ FACE_PARSER = init_parsing_model(path, device=device)
120
 
121
+ def load_nsfw_detector_model(path="./assets/pretrained_models/open-nsfw.onnx"):
122
  global NSFW_DETECTOR
123
  if NSFW_DETECTOR is None:
124
+ NSFW_DETECTOR = NSFWChecker(model_path=path, providers=PROVIDER)
125
 
126
 
127
  load_face_analyser_model()
 
148
  mask_soft_kernel,
149
  mask_soft_iterations,
150
  blur_amount,
151
+ erode_amount,
152
  face_scale,
153
  enable_laplacian_blend,
154
  crop_top,
 
193
  get_finsh_text = lambda start_time: f"βœ”οΈ Completed in {int(total_exec_time(start_time)[0])} min {int(total_exec_time(start_time)[1])} sec."
194
 
195
  ## ------------------------------ PREPARE INPUTS & LOAD MODELS ------------------------------
196
+
197
  yield "### \n βŒ› Loading NSFW detector model...", *ui_before()
198
  load_nsfw_detector_model()
199
 
 
204
  load_face_swapper_model()
205
 
206
  if face_enhancer_name != "NONE":
207
+ if face_enhancer_name not in cv2_interpolations:
208
+ yield f"### \n βŒ› Loading {face_enhancer_name} model...", *ui_before()
209
  FACE_ENHANCER = load_face_enhancer_model(name=face_enhancer_name, device=device)
210
  else:
211
  FACE_ENHANCER = None
 
215
  load_face_parser_model()
216
 
217
  includes = mask_regions_to_list(mask_includes)
 
218
  specifics = list(specifics)
219
  half = len(specifics) // 2
220
  sources = specifics[:half]
221
  specifics = specifics[half:]
222
+ if crop_top > crop_bott:
223
+ crop_top, crop_bott = crop_bott, crop_top
224
+ if crop_left > crop_right:
225
+ crop_left, crop_right = crop_right, crop_left
226
+ crop_mask = (crop_top, 511-crop_bott, crop_left, 511-crop_right)
227
 
228
  def swap_process(image_sequence):
229
+ ## ------------------------------ CONTENT CHECK ------------------------------
230
+
231
  yield "### \n βŒ› Checking contents...", *ui_before()
232
  nsfw = NSFW_DETECTOR.is_nsfw(image_sequence)
233
  if nsfw:
 
237
  return False
238
  EMPTY_CACHE()
239
 
240
+ ## ------------------------------ ANALYSE FACE ------------------------------
241
+
242
  yield "### \n βŒ› Analysing face data...", *ui_before()
243
  if condition != "Specific Face":
244
  source_data = source_path, age
 
253
  scale=face_scale
254
  )
255
 
256
+ ## ------------------------------ SWAP FUNC ------------------------------
 
 
257
 
258
+ yield "### \n βŒ› Generating faces...", *ui_before()
259
+ preds = []
260
+ matrs = []
261
+ count = 0
262
+ global PREVIEW
263
+ for batch_pred, batch_matr in FACE_SWAPPER.batch_forward(whole_frame_list, analysed_targets, analysed_sources):
264
+ preds.extend(batch_pred)
265
+ matrs.extend(batch_matr)
266
+ EMPTY_CACHE()
267
+ count += 1
268
+
269
+ if USE_CUDA:
270
+ image_grid = create_image_grid(batch_pred, size=128)
271
+ PREVIEW = image_grid[:, :, ::-1]
272
+ yield f"### \n βŒ› Generating face Batch {count}", *ui_before()
273
 
274
+ ## ------------------------------ FACE ENHANCEMENT ------------------------------
275
+
276
+ generated_len = len(preds)
277
  if face_enhancer_name != "NONE":
278
+ yield f"### \n βŒ› Upscaling faces with {face_enhancer_name}...", *ui_before()
279
+ for idx, pred in tqdm(enumerate(preds), total=generated_len, desc=f"Upscaling with {face_enhancer_name}"):
280
  enhancer_model, enhancer_model_runner = FACE_ENHANCER
281
  pred = enhancer_model_runner(pred, enhancer_model)
282
  preds[idx] = cv2.resize(pred, (512,512))
 
 
 
283
  EMPTY_CACHE()
284
 
285
+ ## ------------------------------ FACE PARSING ------------------------------
286
+
287
+ if enable_face_parser:
288
+ yield "### \n βŒ› Face-parsing mask...", *ui_before()
289
+ masks = []
290
+ count = 0
291
+ for batch_mask in get_parsed_mask(FACE_PARSER, preds, classes=includes, device=device, batch_size=BATCH_SIZE, softness=int(mask_soft_iterations)):
292
+ masks.append(batch_mask)
293
+ EMPTY_CACHE()
294
+ count += 1
295
+
296
+ if len(batch_mask) > 1:
297
+ image_grid = create_image_grid(batch_mask, size=128)
298
+ PREVIEW = image_grid[:, :, ::-1]
299
+ yield f"### \n βŒ› Face parsing Batch {count}", *ui_before()
300
+ masks = np.concatenate(masks, axis=0) if len(masks) >= 1 else masks
301
+ else:
302
+ masks = [None] * generated_len
303
+
304
+ ## ------------------------------ SPLIT LIST ------------------------------
305
+
306
  split_preds = split_list_by_lengths(preds, num_faces_per_frame)
307
  del preds
 
 
308
  split_matrs = split_list_by_lengths(matrs, num_faces_per_frame)
309
  del matrs
310
+ split_masks = split_list_by_lengths(masks, num_faces_per_frame)
311
+ del masks
312
 
313
+ ## ------------------------------ PASTE-BACK ------------------------------
314
+
315
+ yield "### \n βŒ› Pasting back...", *ui_before()
316
+ def post_process(frame_idx, frame_img, split_preds, split_matrs, split_masks, enable_laplacian_blend, crop_mask, blur_amount, erode_amount):
317
  whole_img_path = frame_img
318
  whole_img = cv2.imread(whole_img_path)
319
+ blend_method = 'laplacian' if enable_laplacian_blend else 'linear'
320
+ for p, m, mask in zip(split_preds[frame_idx], split_matrs[frame_idx], split_masks[frame_idx]):
321
+ p = cv2.resize(p, (512,512))
322
+ mask = cv2.resize(mask, (512,512)) if mask is not None else None
323
+ m /= 0.25
324
+ whole_img = paste_to_whole(p, whole_img, m, mask=mask, crop_mask=crop_mask, blend_method=blend_method, blur_amount=blur_amount, erode_amount=erode_amount)
325
  cv2.imwrite(whole_img_path, whole_img)
326
 
327
+ def concurrent_post_process(image_sequence, *args):
328
  with concurrent.futures.ThreadPoolExecutor() as executor:
329
  futures = []
330
  for idx, frame_img in enumerate(image_sequence):
331
+ future = executor.submit(post_process, idx, frame_img, *args)
 
 
 
 
 
 
 
 
 
 
 
 
332
  futures.append(future)
333
 
334
+ for future in tqdm(concurrent.futures.as_completed(futures), total=len(futures), desc="Pasting back"):
335
+ result = future.result()
 
 
 
336
 
337
  concurrent_post_process(
338
  image_sequence,
339
  split_preds,
 
340
  split_matrs,
341
+ split_masks,
342
  enable_laplacian_blend,
343
+ crop_mask,
344
+ blur_amount,
345
+ erode_amount
 
346
  )
347
 
348
 
 
349
  ## ------------------------------ IMAGE ------------------------------
350
 
351
  if input_type == "Image":
 
526
  if hasattr(STREAMER, "stop"):
527
  STREAMER.stop()
528
  STREAMER = None
529
+ return "Cancelled"
530
 
531
 
532
  def slider_changed(show_frame, video_path, frame_index):
 
611
  )
612
 
613
  with gr.Tab("πŸͺ„ Other Settings"):
614
+ face_scale = gr.Slider(
615
+ label="Face Scale",
616
+ minimum=0,
617
+ maximum=2,
618
+ value=1,
619
+ interactive=True,
620
+ )
621
+
622
+ face_enhancer_name = gr.Dropdown(
623
+ FACE_ENHANCER_LIST, label="Face Enhancer", value="NONE", multiselect=False, interactive=True
624
+ )
625
+
626
  with gr.Accordion("Advanced Mask", open=False):
627
  enable_face_parser_mask = gr.Checkbox(
628
  label="Enable Face Parsing",
 
651
  interactive=True,
652
 
653
  )
654
+
655
+
656
+ with gr.Accordion("Crop Mask", open=False):
657
+ crop_top = gr.Slider(label="Top", minimum=0, maximum=511, value=0, step=1, interactive=True)
658
+ crop_bott = gr.Slider(label="Bottom", minimum=0, maximum=511, value=511, step=1, interactive=True)
659
+ crop_left = gr.Slider(label="Left", minimum=0, maximum=511, value=0, step=1, interactive=True)
660
+ crop_right = gr.Slider(label="Right", minimum=0, maximum=511, value=511, step=1, interactive=True)
661
+
662
+
663
+ erode_amount = gr.Slider(
664
+ label="Mask Erode",
665
  minimum=0,
666
+ maximum=1,
667
+ value=MASK_ERODE_AMOUNT,
668
+ step=0.05,
669
  interactive=True,
670
  )
671
 
672
+ blur_amount = gr.Slider(
673
+ label="Mask Blur",
674
+ minimum=0,
675
+ maximum=1,
676
+ value=MASK_BLUR_AMOUNT,
677
+ step=0.05,
678
+ interactive=True,
679
+ )
 
 
 
 
 
680
 
681
  enable_laplacian_blend = gr.Checkbox(
682
  label="Laplacian Blending",
 
684
  interactive=True,
685
  )
686
 
 
 
 
687
 
688
  source_image_input = gr.Image(
689
  label="Source face", type="filepath", interactive=True
 
875
  mask_soft_kernel,
876
  mask_soft_iterations,
877
  blur_amount,
878
+ erode_amount,
879
  face_scale,
880
  enable_laplacian_blend,
881
  crop_top,
 
894
  ]
895
 
896
  swap_event = swap_button.click(
897
+ fn=process, inputs=swap_inputs, outputs=swap_outputs, show_progress=True
898
  )
899
 
900
  cancel_button.click(
assets/pretrained_models/open-nsfw.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:864bb37bf8863564b87eb330ab8c785a79a773f4e7c43cb96db52ed8611305fa
3
+ size 23590724
face_analyser.py CHANGED
@@ -192,4 +192,3 @@ def get_analysed_data(face_analyser, image_sequence, source_data, swap_condition
192
  num_faces_per_frame.append(n_faces)
193
 
194
  return analysed_target_list, analysed_source_list, whole_frame_eql_list, num_faces_per_frame
195
-
 
192
  num_faces_per_frame.append(n_faces)
193
 
194
  return analysed_target_list, analysed_source_list, whole_frame_eql_list, num_faces_per_frame
 
face_enhancer.py CHANGED
@@ -1,4 +1,5 @@
1
  import os
 
2
  import torch
3
  import gfpgan
4
  from PIL import Image
@@ -22,6 +23,7 @@ supported_enhancers = {
22
  "REAL-ESRGAN 8x": ("./assets/pretrained_models/RealESRGAN_x8.pth", realesrgan_runner)
23
  }
24
 
 
25
 
26
  def get_available_enhancer_names():
27
  available = []
@@ -33,9 +35,10 @@ def get_available_enhancer_names():
33
 
34
 
35
  def load_face_enhancer_model(name='GFPGAN', device="cpu"):
36
- assert name in get_available_enhancer_names(), f"Face enhancer {name} unavailable."
37
- model_path, model_runner = supported_enhancers.get(name)
38
- model_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), model_path)
 
39
  if name == 'GFPGAN':
40
  model = gfpgan.GFPGANer(model_path=model_path, upscale=1, device=device)
41
  elif name == 'REAL-ESRGAN 2x':
@@ -47,6 +50,15 @@ def load_face_enhancer_model(name='GFPGAN', device="cpu"):
47
  elif name == 'REAL-ESRGAN 8x':
48
  model = RealESRGAN(device, scale=8)
49
  model.load_weights(model_path, download=False)
 
 
 
 
 
 
 
 
 
50
  else:
51
  model = None
52
  return (model, model_runner)
 
1
  import os
2
+ import cv2
3
  import torch
4
  import gfpgan
5
  from PIL import Image
 
23
  "REAL-ESRGAN 8x": ("./assets/pretrained_models/RealESRGAN_x8.pth", realesrgan_runner)
24
  }
25
 
26
+ cv2_interpolations = ["LANCZOS4", "CUBIC", "NEAREST"]
27
 
28
  def get_available_enhancer_names():
29
  available = []
 
35
 
36
 
37
  def load_face_enhancer_model(name='GFPGAN', device="cpu"):
38
+ assert name in get_available_enhancer_names() + cv2_interpolations, f"Face enhancer {name} unavailable."
39
+ if name in supported_enhancers.keys():
40
+ model_path, model_runner = supported_enhancers.get(name)
41
+ model_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), model_path)
42
  if name == 'GFPGAN':
43
  model = gfpgan.GFPGANer(model_path=model_path, upscale=1, device=device)
44
  elif name == 'REAL-ESRGAN 2x':
 
50
  elif name == 'REAL-ESRGAN 8x':
51
  model = RealESRGAN(device, scale=8)
52
  model.load_weights(model_path, download=False)
53
+ elif name == 'LANCZOS4':
54
+ model = None
55
+ model_runner = lambda img, _: cv2.resize(img, (512,512), interpolation=cv2.INTER_LANCZOS4)
56
+ elif name == 'CUBIC':
57
+ model = None
58
+ model_runner = lambda img, _: cv2.resize(img, (512,512), interpolation=cv2.INTER_CUBIC)
59
+ elif name == 'NEAREST':
60
+ model = None
61
+ model_runner = lambda img, _: cv2.resize(img, (512,512), interpolation=cv2.INTER_NEAREST)
62
  else:
63
  model = None
64
  return (model, model_runner)
face_parsing/__init__.py CHANGED
@@ -1,3 +1,3 @@
1
- from .swap import init_parser, swap_regions, mask_regions, mask_regions_to_list, SoftErosion
2
  from .model import BiSeNet
3
- from .parse_mask import init_parsing_model, get_parsed_mask
 
1
+ from .swap import init_parser, swap_regions, mask_regions, mask_regions_to_list
2
  from .model import BiSeNet
3
+ from .parse_mask import init_parsing_model, get_parsed_mask, SoftErosion
face_parsing/parse_mask.py CHANGED
@@ -10,12 +10,55 @@ import torchvision.transforms as transforms
10
 
11
  from . model import BiSeNet
12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  transform = transforms.Compose([
14
  transforms.Resize((512, 512)),
15
  transforms.ToTensor(),
16
  transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
17
  ])
18
 
 
 
19
  def init_parsing_model(model_path, device="cpu"):
20
  net = BiSeNet(19)
21
  net.to(device)
@@ -27,7 +70,10 @@ def transform_images(imgs):
27
  tensor_images = torch.stack([transform(Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))) for img in imgs], dim=0)
28
  return tensor_images
29
 
30
- def get_parsed_mask(net, imgs, classes=[1, 2, 3, 4, 5, 10, 11, 12, 13], device="cpu", batch_size=8):
 
 
 
31
  masks = []
32
  for i in tqdm(range(0, len(imgs), batch_size), total=len(imgs) // batch_size, desc="Face-parsing"):
33
  batch_imgs = imgs[i:i + batch_size]
@@ -35,16 +81,27 @@ def get_parsed_mask(net, imgs, classes=[1, 2, 3, 4, 5, 10, 11, 12, 13], device="
35
  tensor_images = transform_images(batch_imgs).to(device)
36
  with torch.no_grad():
37
  out = net(tensor_images)[0]
38
- parsing = out.argmax(dim=1).cpu().numpy()
39
- batch_masks = np.isin(parsing, classes)
 
 
 
 
40
 
41
- masks.append(batch_masks)
 
 
 
42
 
43
- masks = np.concatenate(masks, axis=0)
44
- # masks = np.repeat(np.expand_dims(masks, axis=1), 3, axis=1)
 
45
 
46
- for i, mask in enumerate(masks):
47
- cv2.imwrite(f"mask/{i}.jpg", (mask * 255).astype("uint8"))
 
48
 
49
- return masks
 
50
 
 
 
10
 
11
  from . model import BiSeNet
12
 
13
+ class SoftErosion(nn.Module):
14
+ def __init__(self, kernel_size=15, threshold=0.6, iterations=1):
15
+ super(SoftErosion, self).__init__()
16
+ r = kernel_size // 2
17
+ self.padding = r
18
+ self.iterations = iterations
19
+ self.threshold = threshold
20
+
21
+ # Create kernel
22
+ y_indices, x_indices = torch.meshgrid(torch.arange(0., kernel_size), torch.arange(0., kernel_size))
23
+ dist = torch.sqrt((x_indices - r) ** 2 + (y_indices - r) ** 2)
24
+ kernel = dist.max() - dist
25
+ kernel /= kernel.sum()
26
+ kernel = kernel.view(1, 1, *kernel.shape)
27
+ self.register_buffer('weight', kernel)
28
+
29
+ def forward(self, x):
30
+ batch_size = x.size(0) # Get the batch size
31
+ output = []
32
+
33
+ for i in tqdm(range(batch_size), desc="Soft-Erosion", leave=False):
34
+ input_tensor = x[i:i+1] # Take one input tensor from the batch
35
+ input_tensor = input_tensor.float() # Convert input to float tensor
36
+ input_tensor = input_tensor.unsqueeze(1) # Add a channel dimension
37
+
38
+ for _ in range(self.iterations - 1):
39
+ input_tensor = torch.min(input_tensor, F.conv2d(input_tensor, weight=self.weight,
40
+ groups=input_tensor.shape[1],
41
+ padding=self.padding))
42
+ input_tensor = F.conv2d(input_tensor, weight=self.weight, groups=input_tensor.shape[1],
43
+ padding=self.padding)
44
+
45
+ mask = input_tensor >= self.threshold
46
+ input_tensor[mask] = 1.0
47
+ input_tensor[~mask] /= input_tensor[~mask].max()
48
+
49
+ input_tensor = input_tensor.squeeze(1) # Remove the extra channel dimension
50
+ output.append(input_tensor.detach().cpu().numpy())
51
+
52
+ return np.array(output)
53
+
54
  transform = transforms.Compose([
55
  transforms.Resize((512, 512)),
56
  transforms.ToTensor(),
57
  transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
58
  ])
59
 
60
+
61
+
62
  def init_parsing_model(model_path, device="cpu"):
63
  net = BiSeNet(19)
64
  net.to(device)
 
70
  tensor_images = torch.stack([transform(Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))) for img in imgs], dim=0)
71
  return tensor_images
72
 
73
+ def get_parsed_mask(net, imgs, classes=[1, 2, 3, 4, 5, 10, 11, 12, 13], device="cpu", batch_size=8, softness=20):
74
+ if softness > 0:
75
+ smooth_mask = SoftErosion(kernel_size=17, threshold=0.9, iterations=softness).to(device)
76
+
77
  masks = []
78
  for i in tqdm(range(0, len(imgs), batch_size), total=len(imgs) // batch_size, desc="Face-parsing"):
79
  batch_imgs = imgs[i:i + batch_size]
 
81
  tensor_images = transform_images(batch_imgs).to(device)
82
  with torch.no_grad():
83
  out = net(tensor_images)[0]
84
+ # parsing = out.argmax(dim=1)
85
+ # arget_classes = torch.tensor(classes).to(device)
86
+ # batch_masks = torch.isin(parsing, target_classes).to(device)
87
+ ## torch.isin was slightly slower in my test, so using np.isin
88
+ parsing = out.argmax(dim=1).detach().cpu().numpy()
89
+ batch_masks = np.isin(parsing, classes).astype('float32')
90
 
91
+ if softness > 0:
92
+ # batch_masks = smooth_mask(batch_masks).transpose(1,0,2,3)[0]
93
+ mask_tensor = torch.from_numpy(batch_masks.copy()).float().to(device)
94
+ batch_masks = smooth_mask(mask_tensor).transpose(1,0,2,3)[0]
95
 
96
+ yield batch_masks
97
+
98
+ #masks.append(batch_masks)
99
 
100
+ #if len(masks) >= 1:
101
+ # masks = np.concatenate(masks, axis=0)
102
+ # masks = np.repeat(np.expand_dims(masks, axis=1), 3, axis=1)
103
 
104
+ # for i, mask in enumerate(masks):
105
+ # cv2.imwrite(f"mask/{i}.jpg", (mask * 255).astype("uint8"))
106
 
107
+ #return masks
face_swapper.py CHANGED
@@ -5,10 +5,13 @@ import cv2
5
  import onnxruntime
6
  import numpy as np
7
  from tqdm import tqdm
 
8
  from onnx import numpy_helper
9
  from skimage import transform as trans
10
  import torchvision.transforms.functional as F
11
- from utils import make_white_image, laplacian_blending
 
 
12
 
13
  arcface_dst = np.array(
14
  [[38.2946, 51.6963], [73.5318, 51.5014], [56.0252, 71.7366],
@@ -46,72 +49,53 @@ class Inswapper():
46
  model = onnx.load(self.model_file)
47
  graph = model.graph
48
  self.emap = numpy_helper.to_array(graph.initializer[-1])
49
- self.input_mean = 0.0
50
- self.input_std = 255.0
51
 
52
  self.session_options = onnxruntime.SessionOptions()
53
  self.session = onnxruntime.InferenceSession(self.model_file, sess_options=self.session_options, providers=providers)
54
 
55
- inputs = self.session.get_inputs()
56
- self.input_names = [inp.name for inp in inputs]
57
- outputs = self.session.get_outputs()
58
- self.output_names = [out.name for out in outputs]
59
- assert len(self.output_names) == 1
60
- self.output_shape = outputs[0].shape
61
- input_cfg = inputs[0]
62
- input_shape = input_cfg.shape
63
- self.input_shape = input_shape
64
- self.input_size = tuple(input_shape[2:4][::-1])
65
-
66
  def forward(self, imgs, latents):
67
  preds = []
68
  for img, latent in zip(imgs, latents):
69
- img = (img - self.input_mean) / self.input_std
70
- pred = self.session.run(self.output_names, {self.input_names[0]: img, self.input_names[1]: latent})[0]
71
  preds.append(pred)
72
 
73
  def get(self, imgs, target_faces, source_faces):
74
  imgs = list(imgs)
75
 
76
  preds = [None] * len(imgs)
77
- aimgs = [None] * len(imgs)
78
  matrs = [None] * len(imgs)
79
 
80
  for idx, (img, target_face, source_face) in enumerate(zip(imgs, target_faces, source_faces)):
81
- aimg, M, blob, latent = self.prepare_data(img, target_face, source_face)
82
- aimgs[idx] = aimg
83
- matrs[idx] = M
84
- pred = self.session.run(self.output_names, {self.input_names[0]: blob, self.input_names[1]: latent})[0]
85
  pred = pred.transpose((0, 2, 3, 1))[0]
86
  pred = np.clip(255 * pred, 0, 255).astype(np.uint8)[:, :, ::-1]
 
87
  preds[idx] = pred
 
88
 
89
- return (preds, aimgs, matrs)
90
 
91
  def prepare_data(self, img, target_face, source_face):
92
  if isinstance(img, str):
93
  img = cv2.imread(img)
94
 
95
- aimg, M = norm_crop2(img, target_face.kps, self.input_size[0])
96
 
97
- blob = cv2.dnn.blobFromImage(aimg, 1.0 / self.input_std, self.input_size,
98
- (self.input_mean, self.input_mean, self.input_mean), swapRB=True)
99
 
100
  latent = source_face.normed_embedding.reshape((1, -1))
101
  latent = np.dot(latent, self.emap)
102
  latent /= np.linalg.norm(latent)
103
 
104
- return (aimg, M, blob, latent)
105
 
106
  def batch_forward(self, img_list, target_f_list, source_f_list):
107
  num_samples = len(img_list)
108
  num_batches = (num_samples + self.batch_size - 1) // self.batch_size
109
 
110
- preds = []
111
- aimgs = []
112
- matrs = []
113
-
114
- for i in tqdm(range(num_batches), desc="Swapping face"):
115
  start_idx = i * self.batch_size
116
  end_idx = min((i + 1) * self.batch_size, num_samples)
117
 
@@ -119,47 +103,48 @@ class Inswapper():
119
  batch_target_f = target_f_list[start_idx:end_idx]
120
  batch_source_f = source_f_list[start_idx:end_idx]
121
 
122
- batch_pred, batch_aimg, batch_matr = self.get(batch_img, batch_target_f, batch_source_f)
123
- preds.extend(batch_pred)
124
- aimgs.extend(batch_aimg)
125
- matrs.extend(batch_matr)
126
-
127
- return (preds, aimgs, matrs)
128
-
129
 
130
- def paste_to_whole(bgr_fake, aimg, M, whole_img, laplacian_blend=True, crop_mask=(0,0,0,0)):
131
- IM = cv2.invertAffineTransform(M)
132
 
133
- img_white = make_white_image(aimg.shape[:2], crop=crop_mask, white_value=255)
134
 
135
- bgr_fake = cv2.warpAffine(bgr_fake, IM, (whole_img.shape[1], whole_img.shape[0]), borderValue=0.0)
136
- img_white = cv2.warpAffine(img_white, IM, (whole_img.shape[1], whole_img.shape[0]), borderValue=0.0)
 
 
 
137
 
138
- img_white[img_white > 20] = 255
139
- img_mask = img_white
140
- mask_h_inds, mask_w_inds = np.where(img_mask == 255)
141
- mask_size = int(np.sqrt(np.ptp(mask_h_inds) * np.ptp(mask_w_inds)))
142
-
143
- k = max(mask_size // 10, 10)
144
- img_mask = cv2.erode(img_mask, np.ones((k, k), np.uint8), iterations=1)
145
-
146
- k = max(mask_size // 20, 5)
147
- kernel_size = (k, k)
148
- blur_size = tuple(2 * i + 1 for i in kernel_size)
149
- img_mask = cv2.GaussianBlur(img_mask, blur_size, 0) / 255
150
- img_mask = np.tile(np.expand_dims(img_mask, axis=-1), (1, 1, 3))
151
-
152
- if laplacian_blend:
153
- bgr_fake = laplacian_blending(bgr_fake.astype("float32").clip(0,255), whole_img.astype("float32").clip(0,255), img_mask.clip(0,1))
154
- bgr_fake = bgr_fake.astype("float32")
155
-
156
- fake_merged = img_mask * bgr_fake + (1 - img_mask) * whole_img.astype(np.float32)
157
- return fake_merged.astype("uint8")
 
 
 
 
 
 
 
 
 
 
 
 
158
 
159
- def place_foreground_on_background(foreground, background, matrix):
160
- matrix = cv2.invertAffineTransform(matrix)
161
- mask = np.ones(foreground.shape, dtype="float32")
162
- foreground = cv2.warpAffine(foreground, matrix, (background.shape[1], background.shape[0]), borderValue=0.0)
163
- mask = cv2.warpAffine(mask, matrix, (background.shape[1], background.shape[0]), borderValue=0.0)
164
- composite_image = mask * foreground + (1 - mask) * background
165
- return composite_image
 
5
  import onnxruntime
6
  import numpy as np
7
  from tqdm import tqdm
8
+ import torch.nn as nn
9
  from onnx import numpy_helper
10
  from skimage import transform as trans
11
  import torchvision.transforms.functional as F
12
+ import torch.nn.functional as F
13
+ from utils import mask_crop, laplacian_blending
14
+
15
 
16
  arcface_dst = np.array(
17
  [[38.2946, 51.6963], [73.5318, 51.5014], [56.0252, 71.7366],
 
49
  model = onnx.load(self.model_file)
50
  graph = model.graph
51
  self.emap = numpy_helper.to_array(graph.initializer[-1])
 
 
52
 
53
  self.session_options = onnxruntime.SessionOptions()
54
  self.session = onnxruntime.InferenceSession(self.model_file, sess_options=self.session_options, providers=providers)
55
 
 
 
 
 
 
 
 
 
 
 
 
56
  def forward(self, imgs, latents):
57
  preds = []
58
  for img, latent in zip(imgs, latents):
59
+ img = img / 255
60
+ pred = self.session.run(['output'], {'target': img, 'source': latent})[0]
61
  preds.append(pred)
62
 
63
  def get(self, imgs, target_faces, source_faces):
64
  imgs = list(imgs)
65
 
66
  preds = [None] * len(imgs)
 
67
  matrs = [None] * len(imgs)
68
 
69
  for idx, (img, target_face, source_face) in enumerate(zip(imgs, target_faces, source_faces)):
70
+ matrix, blob, latent = self.prepare_data(img, target_face, source_face)
71
+ pred = self.session.run(['output'], {'target': blob, 'source': latent})[0]
 
 
72
  pred = pred.transpose((0, 2, 3, 1))[0]
73
  pred = np.clip(255 * pred, 0, 255).astype(np.uint8)[:, :, ::-1]
74
+
75
  preds[idx] = pred
76
+ matrs[idx] = matrix
77
 
78
+ return (preds, matrs)
79
 
80
  def prepare_data(self, img, target_face, source_face):
81
  if isinstance(img, str):
82
  img = cv2.imread(img)
83
 
84
+ aligned_img, matrix = norm_crop2(img, target_face.kps, 128)
85
 
86
+ blob = cv2.dnn.blobFromImage(aligned_img, 1.0 / 255, (128, 128), (0., 0., 0.), swapRB=True)
 
87
 
88
  latent = source_face.normed_embedding.reshape((1, -1))
89
  latent = np.dot(latent, self.emap)
90
  latent /= np.linalg.norm(latent)
91
 
92
+ return (matrix, blob, latent)
93
 
94
  def batch_forward(self, img_list, target_f_list, source_f_list):
95
  num_samples = len(img_list)
96
  num_batches = (num_samples + self.batch_size - 1) // self.batch_size
97
 
98
+ for i in tqdm(range(num_batches), desc="Generating face"):
 
 
 
 
99
  start_idx = i * self.batch_size
100
  end_idx = min((i + 1) * self.batch_size, num_samples)
101
 
 
103
  batch_target_f = target_f_list[start_idx:end_idx]
104
  batch_source_f = source_f_list[start_idx:end_idx]
105
 
106
+ batch_pred, batch_matr = self.get(batch_img, batch_target_f, batch_source_f)
 
 
 
 
 
 
107
 
108
+ yield batch_pred, batch_matr
 
109
 
 
110
 
111
+ def paste_to_whole(foreground, background, matrix, mask=None, crop_mask=(0,0,0,0), blur_amount=0.1, erode_amount = 0.15, blend_method='linear'):
112
+ inv_matrix = cv2.invertAffineTransform(matrix)
113
+ fg_shape = foreground.shape[:2]
114
+ bg_shape = (background.shape[1], background.shape[0])
115
+ foreground = cv2.warpAffine(foreground, inv_matrix, bg_shape, borderValue=0.0)
116
 
117
+ if mask is None:
118
+ mask = np.full(fg_shape, 1., dtype=np.float32)
119
+ mask = mask_crop(mask, crop_mask)
120
+ mask = cv2.warpAffine(mask, inv_matrix, bg_shape, borderValue=0.0)
121
+ else:
122
+ assert fg_shape == mask.shape[:2], "foreground & mask shape mismatch!"
123
+ mask = mask_crop(mask, crop_mask).astype('float32')
124
+ mask = cv2.warpAffine(mask, inv_matrix, (background.shape[1], background.shape[0]), borderValue=0.0)
125
+
126
+ _mask = mask.copy()
127
+ _mask[_mask > 0.05] = 1.
128
+ non_zero_points = cv2.findNonZero(_mask)
129
+ _, _, w, h = cv2.boundingRect(non_zero_points)
130
+ mask_size = int(np.sqrt(w * h))
131
+
132
+ if erode_amount > 0:
133
+ kernel_size = max(int(mask_size * erode_amount), 1)
134
+ structuring_element = cv2.getStructuringElement(cv2.MORPH_RECT, (kernel_size, kernel_size))
135
+ mask = cv2.erode(mask, structuring_element)
136
+
137
+ if blur_amount > 0:
138
+ kernel_size = max(int(mask_size * blur_amount), 3)
139
+ if kernel_size % 2 == 0:
140
+ kernel_size += 1
141
+ mask = cv2.GaussianBlur(mask, (kernel_size, kernel_size), 0)
142
+
143
+ mask = np.tile(np.expand_dims(mask, axis=-1), (1, 1, 3))
144
+
145
+ if blend_method == 'laplacian':
146
+ composite_image = laplacian_blending(foreground, background, mask.clip(0,1), num_levels=4)
147
+ else:
148
+ composite_image = mask * foreground + (1 - mask) * background
149
 
150
+ return composite_image.astype("uint8").clip(0, 255)
 
 
 
 
 
 
nsfw_checker/LICENSE.md ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ Copyright 2016, Yahoo Inc.
3
+
4
+ Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
5
+
6
+ 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
7
+
8
+ 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
9
+
10
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
11
+
nsfw_checker/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from . opennsfw import NSFWChecker
nsfw_checker/opennsfw.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ import torch
3
+ import onnx
4
+ import onnxruntime
5
+ import numpy as np
6
+ from tqdm import tqdm
7
+
8
+ # https://github.com/yahoo/open_nsfw
9
+
10
+ class NSFWChecker:
11
+ def __init__(self, model_path=None, providers=["CPUExecutionProvider"]):
12
+ model = onnx.load(model_path)
13
+ self.input_name = model.graph.input[0].name
14
+ session_options = onnxruntime.SessionOptions()
15
+ self.session = onnxruntime.InferenceSession(model_path, sess_options=session_options, providers=providers)
16
+
17
+ def is_nsfw(self, img_paths, threshold = 0.85):
18
+ skip_step = 1
19
+ total_len = len(img_paths)
20
+ if total_len < 100: skip_step = 1
21
+ if total_len > 100 and total_len < 500: skip_step = 10
22
+ if total_len > 500 and total_len < 1000: skip_step = 20
23
+ if total_len > 1000 and total_len < 10000: skip_step = 50
24
+ if total_len > 10000: skip_step = 100
25
+
26
+ for idx in tqdm(range(0, total_len, skip_step), total=int(total_len // skip_step), desc="Checking for NSFW contents"):
27
+ img = cv2.imread(img_paths[idx])
28
+ img = cv2.resize(img, (224,224)).astype('float32')
29
+ img -= np.array([104, 117, 123], dtype=np.float32)
30
+ img = np.expand_dims(img, axis=0)
31
+
32
+ score = self.session.run(None, {self.input_name:img})[0][0][1]
33
+
34
+ if score > threshold:
35
+ print(f"Detected nsfw score:{score}")
36
+ return True
37
+ return False
requirements.txt CHANGED
@@ -9,5 +9,4 @@ onnxruntime==1.15.0
9
  opencv-python>=4.7.0.72
10
  opencv-python-headless>=4.7.0.72
11
  gfpgan==1.3.8
12
- timm==0.9.2
13
 
 
9
  opencv-python>=4.7.0.72
10
  opencv-python-headless>=4.7.0.72
11
  gfpgan==1.3.8
 
12
 
utils.py CHANGED
@@ -2,7 +2,6 @@ import os
2
  import cv2
3
  import time
4
  import glob
5
- import torch
6
  import shutil
7
  import platform
8
  import datetime
@@ -11,7 +10,6 @@ import numpy as np
11
  from threading import Thread
12
  from moviepy.editor import VideoFileClip, ImageSequenceClip
13
  from moviepy.video.io.ffmpeg_tools import ffmpeg_extract_subclip
14
- from face_parsing import init_parser, swap_regions, mask_regions, mask_regions_to_list, SoftErosion
15
 
16
 
17
  logo_image = cv2.imread("./assets/images/logo.png", cv2.IMREAD_UNCHANGED)
@@ -69,7 +67,7 @@ def trim_video(video_path, output_path, start_frame, stop_frame):
69
  os.makedirs(temp_path, exist_ok=True)
70
  trimmed_video_file_path = os.path.join(temp_path, trimmed_video_filename)
71
 
72
- video = VideoFileClip(video_path)
73
  fps = video.fps
74
  start_time = start_frame / fps
75
  duration = (stop_frame - start_frame) / fps
@@ -174,7 +172,7 @@ def split_list_by_lengths(data, length_list):
174
 
175
 
176
  def merge_img_sequence_from_ref(ref_video_path, image_sequence, output_file_name):
177
- video_clip = VideoFileClip(ref_video_path)
178
  fps = video_clip.fps
179
  duration = video_clip.duration
180
  total_frames = video_clip.reader.nframes
@@ -224,12 +222,12 @@ def scale_bbox_from_center(bbox, scale_width, scale_height, image_width, image_h
224
  return scaled_bbox
225
 
226
 
227
- def laplacian_blending(A, B, m, num_levels=4):
228
  assert A.shape == B.shape
229
  assert B.shape == m.shape
230
  height = m.shape[0]
231
  width = m.shape[1]
232
- size_list = np.array([4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096])
233
  size = size_list[np.where(size_list > max(height, width))][0]
234
  GA = np.zeros((size, size, 3), dtype=np.float32)
235
  GA[:height, :width, :] = A
@@ -264,30 +262,42 @@ def laplacian_blending(A, B, m, num_levels=4):
264
  for i in range(1,num_levels):
265
  ls_ = cv2.pyrUp(ls_)
266
  ls_ = cv2.add(ls_, LS[i])
267
- ls_ = np.clip(ls_[:height, :width, :], 0, 255)
268
- return ls_
269
-
270
-
271
- def make_white_image(shape, crop=None, white_value=255):
272
- img_white = np.full((shape[0], shape[1]), white_value, dtype=np.float32)
273
- if crop is not None:
274
- top = int(crop[0])
275
- bottom = int(crop[1])
276
- if top + bottom < shape[1]:
277
- if top > 0: img_white[:top, :] = 0
278
- if bottom > 0: img_white[-bottom:, :] = 0
279
-
280
- left = int(crop[2])
281
- right = int(crop[3])
282
- if left + right < shape[0]:
283
- if left > 0: img_white[:, :left] = 0
284
- if right > 0: img_white[:, -right:] = 0
285
-
286
- return img_white
287
-
288
-
289
- def remove_hair(img, model=None):
290
- if model is None:
291
- path = "./assets/pretrained_models/79999_iter.pth"
292
- model = init_parser(path, mode="cuda" if torch.cuda.is_available() else "cpu")
293
-
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  import cv2
3
  import time
4
  import glob
 
5
  import shutil
6
  import platform
7
  import datetime
 
10
  from threading import Thread
11
  from moviepy.editor import VideoFileClip, ImageSequenceClip
12
  from moviepy.video.io.ffmpeg_tools import ffmpeg_extract_subclip
 
13
 
14
 
15
  logo_image = cv2.imread("./assets/images/logo.png", cv2.IMREAD_UNCHANGED)
 
67
  os.makedirs(temp_path, exist_ok=True)
68
  trimmed_video_file_path = os.path.join(temp_path, trimmed_video_filename)
69
 
70
+ video = VideoFileClip(video_path, fps_source="fps")
71
  fps = video.fps
72
  start_time = start_frame / fps
73
  duration = (stop_frame - start_frame) / fps
 
172
 
173
 
174
  def merge_img_sequence_from_ref(ref_video_path, image_sequence, output_file_name):
175
+ video_clip = VideoFileClip(ref_video_path, fps_source="fps")
176
  fps = video_clip.fps
177
  duration = video_clip.duration
178
  total_frames = video_clip.reader.nframes
 
222
  return scaled_bbox
223
 
224
 
225
+ def laplacian_blending(A, B, m, num_levels=7):
226
  assert A.shape == B.shape
227
  assert B.shape == m.shape
228
  height = m.shape[0]
229
  width = m.shape[1]
230
+ size_list = np.array([4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192])
231
  size = size_list[np.where(size_list > max(height, width))][0]
232
  GA = np.zeros((size, size, 3), dtype=np.float32)
233
  GA[:height, :width, :] = A
 
262
  for i in range(1,num_levels):
263
  ls_ = cv2.pyrUp(ls_)
264
  ls_ = cv2.add(ls_, LS[i])
265
+ ls_ = ls_[:height, :width, :]
266
+ #ls_ = (ls_ - np.min(ls_)) * (255.0 / (np.max(ls_) - np.min(ls_)))
267
+ return ls_.clip(0, 255)
268
+
269
+
270
+ def mask_crop(mask, crop):
271
+ top, bottom, left, right = crop
272
+ shape = mask.shape
273
+ top = int(top)
274
+ bottom = int(bottom)
275
+ if top + bottom < shape[1]:
276
+ if top > 0: mask[:top, :] = 0
277
+ if bottom > 0: mask[-bottom:, :] = 0
278
+
279
+ left = int(left)
280
+ right = int(right)
281
+ if left + right < shape[0]:
282
+ if left > 0: mask[:, :left] = 0
283
+ if right > 0: mask[:, -right:] = 0
284
+
285
+ return mask
286
+
287
+ def create_image_grid(images, size=128):
288
+ num_images = len(images)
289
+ num_cols = int(np.ceil(np.sqrt(num_images)))
290
+ num_rows = int(np.ceil(num_images / num_cols))
291
+ grid = np.zeros((num_rows * size, num_cols * size, 3), dtype=np.uint8)
292
+
293
+ for i, image in enumerate(images):
294
+ row_idx = (i // num_cols) * size
295
+ col_idx = (i % num_cols) * size
296
+ image = cv2.resize(image.copy(), (size,size))
297
+ if image.dtype != np.uint8:
298
+ image = (image.astype('float32') * 255).astype('uint8')
299
+ if image.ndim == 2:
300
+ image = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)
301
+ grid[row_idx:row_idx + size, col_idx:col_idx + size] = image
302
+
303
+ return grid