SwapFace_AI

Runtime error

App Files Files Community

Harisreedhar commited on Jul 30, 2023

Commit

db275a2

1 Parent(s): 638204f

update nsfw-checker

Browse files

Files changed (12) hide show

app.py +127 -81
assets/pretrained_models/open-nsfw.onnx +3 -0
face_analyser.py +0 -1
face_enhancer.py +15 -3
face_parsing/__init__.py +2 -2
face_parsing/parse_mask.py +66 -9
face_swapper.py +55 -70
nsfw_checker/LICENSE.md +11 -0
nsfw_checker/__init__.py +1 -0
nsfw_checker/opennsfw.py +37 -0
requirements.txt +0 -1
utils.py +43 -33

app.py CHANGED Viewed

@@ -12,16 +12,18 @@ import insightface
 import onnxruntime
 import numpy as np
 import gradio as gr
 from tqdm import tqdm
 import concurrent.futures
 from moviepy.editor import VideoFileClip
-from nsfw_detector import get_nsfw_detector
-from face_swapper import Inswapper, paste_to_whole, place_foreground_on_background
 from face_analyser import detect_conditions, get_analysed_data, swap_options_list
-from face_enhancer import get_available_enhancer_names, load_face_enhancer_model
-from face_parsing import init_parser, swap_regions, mask_regions, mask_regions_to_list, SoftErosion
-from utils import trim_video, StreamerThread, ProcessBar, open_directory, split_list_by_lengths, merge_img_sequence_from_ref
 ## ------------------------------ USER ARGS ------------------------------
@@ -39,7 +41,7 @@ user_args = parser.parse_args()
 USE_COLAB = user_args.colab
 USE_CUDA = user_args.cuda
 DEF_OUTPUT_PATH = user_args.out_dir
-BATCH_SIZE = user_args.batch_size
 WORKSPACE = None
 OUTPUT_FILE = None
 CURRENT_FRAME = None
@@ -60,8 +62,9 @@ MASK_INCLUDE = [
     "U-Lip"
 ]
 MASK_SOFT_KERNEL = 17
-MASK_SOFT_ITERATIONS = 7
-MASK_BLUR_AMOUNT = 20
 FACE_SWAPPER = None
 FACE_ANALYSER = None
@@ -70,7 +73,7 @@ FACE_PARSER = None
 NSFW_DETECTOR = None
 FACE_ENHANCER_LIST = ["NONE"]
 FACE_ENHANCER_LIST.extend(get_available_enhancer_names())
 ## ------------------------------ SET EXECUTION PROVIDER ------------------------------
 # Note: Non CUDA users may change settings here
@@ -113,12 +116,12 @@ def load_face_swapper_model(path="./assets/pretrained_models/inswapper_128.onnx"
 def load_face_parser_model(path="./assets/pretrained_models/79999_iter.pth"):
     global FACE_PARSER
     if FACE_PARSER is None:
-        FACE_PARSER = init_parser(path, mode=device)
-def load_nsfw_detector_model(path="./assets/pretrained_models/nsfwmodel_281.pth"):
     global NSFW_DETECTOR
     if NSFW_DETECTOR is None:
-        NSFW_DETECTOR = get_nsfw_detector(model_path=path, device=device)
 load_face_analyser_model()
@@ -145,6 +148,7 @@ def process(
     mask_soft_kernel,
     mask_soft_iterations,
     blur_amount,
     face_scale,
     enable_laplacian_blend,
     crop_top,
@@ -189,6 +193,7 @@ def process(
     get_finsh_text = lambda start_time: f"✔️ Completed in {int(total_exec_time(start_time)[0])} min {int(total_exec_time(start_time)[1])} sec."
     ## ------------------------------ PREPARE INPUTS & LOAD MODELS ------------------------------
     yield "### \n ⌛ Loading NSFW detector model...", *ui_before()
     load_nsfw_detector_model()
@@ -199,7 +204,8 @@ def process(
     load_face_swapper_model()
     if face_enhancer_name != "NONE":
-        yield f"### \n ⌛ Loading {face_enhancer_name} model...", *ui_before()
         FACE_ENHANCER = load_face_enhancer_model(name=face_enhancer_name, device=device)
     else:
         FACE_ENHANCER = None
@@ -209,15 +215,19 @@ def process(
         load_face_parser_model()
     includes = mask_regions_to_list(mask_includes)
-    smooth_mask = SoftErosion(kernel_size=17, threshold=0.9, iterations=int(mask_soft_iterations)).to(device) if mask_soft_iterations > 0 else None
     specifics = list(specifics)
     half = len(specifics) // 2
     sources = specifics[:half]
     specifics = specifics[half:]
-    ## ------------------------------ ANALYSE & SWAP FUNC ------------------------------
     def swap_process(image_sequence):
         yield "### \n ⌛ Checking contents...", *ui_before()
         nsfw = NSFW_DETECTOR.is_nsfw(image_sequence)
         if nsfw:
@@ -227,6 +237,8 @@ def process(
             return False
         EMPTY_CACHE()
         yield "### \n ⌛ Analysing face data...", *ui_before()
         if condition != "Specific Face":
             source_data = source_path, age
@@ -241,81 +253,99 @@ def process(
             scale=face_scale
         )
-        yield "### \n ⌛ Swapping faces...", *ui_before()
-        preds, aimgs, matrs = FACE_SWAPPER.batch_forward(whole_frame_list, analysed_targets, analysed_sources)
-        EMPTY_CACHE()
-        if enable_face_parser:
-            yield "### \n ⌛ Applying face-parsing mask...", *ui_before()
-            for idx, (pred, aimg) in tqdm(enumerate(zip(preds, aimgs)), total=len(preds), desc="Face parsing"):
-                preds[idx] = swap_regions(pred, aimg, FACE_PARSER, smooth_mask, includes=includes, blur=int(blur_amount))
-        EMPTY_CACHE()
         if face_enhancer_name != "NONE":
-            yield f"### \n ⌛ Enhancing faces with {face_enhancer_name}...", *ui_before()
-            for idx, pred in tqdm(enumerate(preds), total=len(preds), desc=f"{face_enhancer_name}"):
                 enhancer_model, enhancer_model_runner = FACE_ENHANCER
                 pred = enhancer_model_runner(pred, enhancer_model)
                 preds[idx] = cv2.resize(pred, (512,512))
-                aimgs[idx] = cv2.resize(aimgs[idx], (512,512))
-                matrs[idx] /= 0.25
         EMPTY_CACHE()
         split_preds = split_list_by_lengths(preds, num_faces_per_frame)
         del preds
-        split_aimgs = split_list_by_lengths(aimgs, num_faces_per_frame)
-        del aimgs
         split_matrs = split_list_by_lengths(matrs, num_faces_per_frame)
         del matrs
-        yield "### \n ⌛ Post-processing...", *ui_before()
-        def post_process(frame_idx, frame_img, split_preds, split_aimgs, split_matrs, enable_laplacian_blend, crop_top, crop_bott, crop_left, crop_right):
             whole_img_path = frame_img
             whole_img = cv2.imread(whole_img_path)
-            for p, a, m in zip(split_preds[frame_idx], split_aimgs[frame_idx], split_matrs[frame_idx]):
-                whole_img = paste_to_whole(p, a, m, whole_img, laplacian_blend=enable_laplacian_blend, crop_mask=(crop_top, crop_bott, crop_left, crop_right))
             cv2.imwrite(whole_img_path, whole_img)
-        def concurrent_post_process(image_sequence, split_preds, split_aimgs, split_matrs, enable_laplacian_blend, crop_top, crop_bott, crop_left, crop_right):
             with concurrent.futures.ThreadPoolExecutor() as executor:
                 futures = []
                 for idx, frame_img in enumerate(image_sequence):
-                    future = executor.submit(
-                        post_process,
-                        idx,
-                        frame_img,
-                        split_preds,
-                        split_aimgs,
-                        split_matrs,
-                        enable_laplacian_blend,
-                        crop_top,
-                        crop_bott,
-                        crop_left,
-                        crop_right
-                    )
                     futures.append(future)
-                for future in tqdm(concurrent.futures.as_completed(futures), total=len(futures), desc="Post-Processing"):
-                    try:
-                        result = future.result()
-                    except Exception as e:
-                        print(f"An error occurred: {e}")
         concurrent_post_process(
             image_sequence,
             split_preds,
-            split_aimgs,
             split_matrs,
             enable_laplacian_blend,
-            crop_top,
-            crop_bott,
-            crop_left,
-            crop_right
         )
     ## ------------------------------ IMAGE ------------------------------
     if input_type == "Image":
@@ -496,7 +526,7 @@ def stop_running():
     if hasattr(STREAMER, "stop"):
         STREAMER.stop()
         STREAMER = None
-    yield "cancelled !"
 def slider_changed(show_frame, video_path, frame_index):
@@ -581,6 +611,18 @@ with gr.Blocks(css=css) as interface:
                     )
                 with gr.Tab("🪄 Other Settings"):
                     with gr.Accordion("Advanced Mask", open=False):
                         enable_face_parser_mask = gr.Checkbox(
                             label="Enable Face Parsing",
@@ -609,26 +651,32 @@ with gr.Blocks(css=css) as interface:
                             interactive=True,
                         )
-                        blur_amount = gr.Number(
-                            label="Mask Blur",
-                            value=MASK_BLUR_AMOUNT,
                             minimum=0,
                             interactive=True,
                         )
-                    face_scale = gr.Slider(
-                        label="Face Scale",
-                        minimum=0,
-                        maximum=2,
-                        value=1,
-                        interactive=True,
-                    )
-                    with gr.Accordion("Crop Mask", open=False):
-                        crop_top = gr.Number(label="Top", value=0, minimum=0, interactive=True)
-                        crop_bott = gr.Number(label="Bottom", value=0, minimum=0, interactive=True)
-                        crop_left = gr.Number(label="Left", value=0, minimum=0, interactive=True)
-                        crop_right = gr.Number(label="Right", value=0, minimum=0, interactive=True)
                     enable_laplacian_blend = gr.Checkbox(
                         label="Laplacian Blending",
@@ -636,9 +684,6 @@ with gr.Blocks(css=css) as interface:
                         interactive=True,
                     )
-                    face_enhancer_name = gr.Dropdown(
-                        FACE_ENHANCER_LIST, label="Face Enhancer", value="NONE", multiselect=False, interactive=True
-                    )
                 source_image_input = gr.Image(
                     label="Source face", type="filepath", interactive=True
@@ -830,6 +875,7 @@ with gr.Blocks(css=css) as interface:
         mask_soft_kernel,
         mask_soft_iterations,
         blur_amount,
         face_scale,
         enable_laplacian_blend,
         crop_top,
@@ -848,7 +894,7 @@ with gr.Blocks(css=css) as interface:
     ]
     swap_event = swap_button.click(
-        fn=process, inputs=swap_inputs, outputs=swap_outputs, show_progress=True,
     )
     cancel_button.click(

 import onnxruntime
 import numpy as np
 import gradio as gr
+import threading
+import queue
 from tqdm import tqdm
 import concurrent.futures
 from moviepy.editor import VideoFileClip
+from nsfw_checker import NSFWChecker
+from face_swapper import Inswapper, paste_to_whole
 from face_analyser import detect_conditions, get_analysed_data, swap_options_list
+from face_parsing import init_parsing_model, get_parsed_mask, mask_regions, mask_regions_to_list
+from face_enhancer import get_available_enhancer_names, load_face_enhancer_model, cv2_interpolations
+from utils import trim_video, StreamerThread, ProcessBar, open_directory, split_list_by_lengths, merge_img_sequence_from_ref, create_image_grid
 ## ------------------------------ USER ARGS ------------------------------
 USE_COLAB = user_args.colab
 USE_CUDA = user_args.cuda
 DEF_OUTPUT_PATH = user_args.out_dir
+BATCH_SIZE = int(user_args.batch_size)
 WORKSPACE = None
 OUTPUT_FILE = None
 CURRENT_FRAME = None
     "U-Lip"
 ]
 MASK_SOFT_KERNEL = 17
+MASK_SOFT_ITERATIONS = 10
+MASK_BLUR_AMOUNT = 0.1
+MASK_ERODE_AMOUNT = 0.15
 FACE_SWAPPER = None
 FACE_ANALYSER = None
 NSFW_DETECTOR = None
 FACE_ENHANCER_LIST = ["NONE"]
 FACE_ENHANCER_LIST.extend(get_available_enhancer_names())
+FACE_ENHANCER_LIST.extend(cv2_interpolations)
 ## ------------------------------ SET EXECUTION PROVIDER ------------------------------
 # Note: Non CUDA users may change settings here
 def load_face_parser_model(path="./assets/pretrained_models/79999_iter.pth"):
     global FACE_PARSER
     if FACE_PARSER is None:
+        FACE_PARSER = init_parsing_model(path, device=device)
+def load_nsfw_detector_model(path="./assets/pretrained_models/open-nsfw.onnx"):
     global NSFW_DETECTOR
     if NSFW_DETECTOR is None:
+        NSFW_DETECTOR = NSFWChecker(model_path=path, providers=PROVIDER)
 load_face_analyser_model()
     mask_soft_kernel,
     mask_soft_iterations,
     blur_amount,
+    erode_amount,
     face_scale,
     enable_laplacian_blend,
     crop_top,
     get_finsh_text = lambda start_time: f"✔️ Completed in {int(total_exec_time(start_time)[0])} min {int(total_exec_time(start_time)[1])} sec."
     ## ------------------------------ PREPARE INPUTS & LOAD MODELS ------------------------------
     yield "### \n ⌛ Loading NSFW detector model...", *ui_before()
     load_nsfw_detector_model()
     load_face_swapper_model()
     if face_enhancer_name != "NONE":
+        if face_enhancer_name not in cv2_interpolations:
+            yield f"### \n ⌛ Loading {face_enhancer_name} model...", *ui_before()
         FACE_ENHANCER = load_face_enhancer_model(name=face_enhancer_name, device=device)
     else:
         FACE_ENHANCER = None
         load_face_parser_model()
     includes = mask_regions_to_list(mask_includes)
     specifics = list(specifics)
     half = len(specifics) // 2
     sources = specifics[:half]
     specifics = specifics[half:]
+    if crop_top > crop_bott:
+        crop_top, crop_bott = crop_bott, crop_top
+    if crop_left > crop_right:
+        crop_left, crop_right = crop_right, crop_left
+    crop_mask = (crop_top, 511-crop_bott, crop_left, 511-crop_right)
     def swap_process(image_sequence):
+        ## ------------------------------ CONTENT CHECK ------------------------------
         yield "### \n ⌛ Checking contents...", *ui_before()
         nsfw = NSFW_DETECTOR.is_nsfw(image_sequence)
         if nsfw:
             return False
         EMPTY_CACHE()
+        ## ------------------------------ ANALYSE FACE ------------------------------
         yield "### \n ⌛ Analysing face data...", *ui_before()
         if condition != "Specific Face":
             source_data = source_path, age
             scale=face_scale
         )
+        ## ------------------------------ SWAP FUNC ------------------------------
+        yield "### \n ⌛ Generating faces...", *ui_before()
+        preds = []
+        matrs = []
+        count = 0
+        global PREVIEW
+        for batch_pred, batch_matr in FACE_SWAPPER.batch_forward(whole_frame_list, analysed_targets, analysed_sources):
+            preds.extend(batch_pred)
+            matrs.extend(batch_matr)
+            EMPTY_CACHE()
+            count += 1
+            if USE_CUDA:
+                image_grid = create_image_grid(batch_pred, size=128)
+                PREVIEW = image_grid[:, :, ::-1]
+                yield f"### \n ⌛ Generating face Batch {count}", *ui_before()
+        ## ------------------------------ FACE ENHANCEMENT ------------------------------
+        generated_len = len(preds)
         if face_enhancer_name != "NONE":
+            yield f"### \n ⌛ Upscaling faces with {face_enhancer_name}...", *ui_before()
+            for idx, pred in tqdm(enumerate(preds), total=generated_len, desc=f"Upscaling with {face_enhancer_name}"):
                 enhancer_model, enhancer_model_runner = FACE_ENHANCER
                 pred = enhancer_model_runner(pred, enhancer_model)
                 preds[idx] = cv2.resize(pred, (512,512))
         EMPTY_CACHE()
+        ## ------------------------------ FACE PARSING ------------------------------
+        if enable_face_parser:
+            yield "### \n ⌛ Face-parsing mask...", *ui_before()
+            masks = []
+            count = 0
+            for batch_mask in get_parsed_mask(FACE_PARSER, preds, classes=includes, device=device, batch_size=BATCH_SIZE, softness=int(mask_soft_iterations)):
+                masks.append(batch_mask)
+                EMPTY_CACHE()
+                count += 1
+                if len(batch_mask) > 1:
+                    image_grid = create_image_grid(batch_mask, size=128)
+                    PREVIEW = image_grid[:, :, ::-1]
+                    yield f"### \n ⌛ Face parsing Batch {count}", *ui_before()
+            masks = np.concatenate(masks, axis=0) if len(masks) >= 1 else masks
+        else:
+            masks = [None] * generated_len
+        ## ------------------------------ SPLIT LIST ------------------------------
         split_preds = split_list_by_lengths(preds, num_faces_per_frame)
         del preds
         split_matrs = split_list_by_lengths(matrs, num_faces_per_frame)
         del matrs
+        split_masks = split_list_by_lengths(masks, num_faces_per_frame)
+        del masks
+        ## ------------------------------ PASTE-BACK ------------------------------
+        yield "### \n ⌛ Pasting back...", *ui_before()
+        def post_process(frame_idx, frame_img, split_preds, split_matrs, split_masks, enable_laplacian_blend, crop_mask, blur_amount, erode_amount):
             whole_img_path = frame_img
             whole_img = cv2.imread(whole_img_path)
+            blend_method = 'laplacian' if enable_laplacian_blend else 'linear'
+            for p, m, mask in zip(split_preds[frame_idx], split_matrs[frame_idx], split_masks[frame_idx]):
+                p = cv2.resize(p, (512,512))
+                mask = cv2.resize(mask, (512,512)) if mask is not None else None
+                m /= 0.25
+                whole_img = paste_to_whole(p, whole_img, m, mask=mask, crop_mask=crop_mask, blend_method=blend_method, blur_amount=blur_amount, erode_amount=erode_amount)
             cv2.imwrite(whole_img_path, whole_img)
+        def concurrent_post_process(image_sequence, *args):
             with concurrent.futures.ThreadPoolExecutor() as executor:
                 futures = []
                 for idx, frame_img in enumerate(image_sequence):
+                    future = executor.submit(post_process, idx, frame_img, *args)
                     futures.append(future)
+                for future in tqdm(concurrent.futures.as_completed(futures), total=len(futures), desc="Pasting back"):
+                    result = future.result()
         concurrent_post_process(
             image_sequence,
             split_preds,
             split_matrs,
+            split_masks,
             enable_laplacian_blend,
+            crop_mask,
+            blur_amount,
+            erode_amount
         )
     ## ------------------------------ IMAGE ------------------------------
     if input_type == "Image":
     if hasattr(STREAMER, "stop"):
         STREAMER.stop()
         STREAMER = None
+    return "Cancelled"
 def slider_changed(show_frame, video_path, frame_index):
                     )
                 with gr.Tab("🪄 Other Settings"):
+                    face_scale = gr.Slider(
+                        label="Face Scale",
+                        minimum=0,
+                        maximum=2,
+                        value=1,
+                        interactive=True,
+                    )
+                    face_enhancer_name = gr.Dropdown(
+                        FACE_ENHANCER_LIST, label="Face Enhancer", value="NONE", multiselect=False, interactive=True
+                    )
                     with gr.Accordion("Advanced Mask", open=False):
                         enable_face_parser_mask = gr.Checkbox(
                             label="Enable Face Parsing",
                             interactive=True,
                         )
+                    with gr.Accordion("Crop Mask", open=False):
+                        crop_top = gr.Slider(label="Top", minimum=0, maximum=511, value=0, step=1, interactive=True)
+                        crop_bott = gr.Slider(label="Bottom", minimum=0, maximum=511, value=511, step=1, interactive=True)
+                        crop_left = gr.Slider(label="Left", minimum=0, maximum=511, value=0, step=1, interactive=True)
+                        crop_right = gr.Slider(label="Right", minimum=0, maximum=511, value=511, step=1, interactive=True)
+                    erode_amount = gr.Slider(
+                            label="Mask Erode",
                             minimum=0,
+                            maximum=1,
+                            value=MASK_ERODE_AMOUNT,
+                            step=0.05,
                             interactive=True,
                         )
+                    blur_amount = gr.Slider(
+                            label="Mask Blur",
+                            minimum=0,
+                            maximum=1,
+                            value=MASK_BLUR_AMOUNT,
+                            step=0.05,
+                            interactive=True,
+                        )
                     enable_laplacian_blend = gr.Checkbox(
                         label="Laplacian Blending",
                         interactive=True,
                     )
                 source_image_input = gr.Image(
                     label="Source face", type="filepath", interactive=True
         mask_soft_kernel,
         mask_soft_iterations,
         blur_amount,
+        erode_amount,
         face_scale,
         enable_laplacian_blend,
         crop_top,
     ]
     swap_event = swap_button.click(
+        fn=process, inputs=swap_inputs, outputs=swap_outputs, show_progress=True
     )
     cancel_button.click(

assets/pretrained_models/open-nsfw.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:864bb37bf8863564b87eb330ab8c785a79a773f4e7c43cb96db52ed8611305fa
+size 23590724

face_analyser.py CHANGED Viewed

@@ -192,4 +192,3 @@ def get_analysed_data(face_analyser, image_sequence, source_data, swap_condition
         num_faces_per_frame.append(n_faces)
     return analysed_target_list, analysed_source_list, whole_frame_eql_list, num_faces_per_frame


192	num_faces_per_frame.append(n_faces)
193
194	return analysed_target_list, analysed_source_list, whole_frame_eql_list, num_faces_per_frame

face_enhancer.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import os
 import torch
 import gfpgan
 from PIL import Image
@@ -22,6 +23,7 @@ supported_enhancers = {
     "REAL-ESRGAN 8x": ("./assets/pretrained_models/RealESRGAN_x8.pth", realesrgan_runner)
 }
 def get_available_enhancer_names():
     available = []
@@ -33,9 +35,10 @@ def get_available_enhancer_names():
 def load_face_enhancer_model(name='GFPGAN', device="cpu"):
-    assert name in get_available_enhancer_names(), f"Face enhancer {name} unavailable."
-    model_path, model_runner = supported_enhancers.get(name)
-    model_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), model_path)
     if name == 'GFPGAN':
         model = gfpgan.GFPGANer(model_path=model_path, upscale=1, device=device)
     elif name == 'REAL-ESRGAN 2x':
@@ -47,6 +50,15 @@ def load_face_enhancer_model(name='GFPGAN', device="cpu"):
     elif name == 'REAL-ESRGAN 8x':
         model = RealESRGAN(device, scale=8)
         model.load_weights(model_path, download=False)
     else:
         model = None
     return (model, model_runner)

 import os
+import cv2
 import torch
 import gfpgan
 from PIL import Image
     "REAL-ESRGAN 8x": ("./assets/pretrained_models/RealESRGAN_x8.pth", realesrgan_runner)
 }
+cv2_interpolations = ["LANCZOS4", "CUBIC", "NEAREST"]
 def get_available_enhancer_names():
     available = []
 def load_face_enhancer_model(name='GFPGAN', device="cpu"):
+    assert name in get_available_enhancer_names() + cv2_interpolations, f"Face enhancer {name} unavailable."
+    if name in supported_enhancers.keys():
+        model_path, model_runner = supported_enhancers.get(name)
+        model_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), model_path)
     if name == 'GFPGAN':
         model = gfpgan.GFPGANer(model_path=model_path, upscale=1, device=device)
     elif name == 'REAL-ESRGAN 2x':
     elif name == 'REAL-ESRGAN 8x':
         model = RealESRGAN(device, scale=8)
         model.load_weights(model_path, download=False)
+    elif name == 'LANCZOS4':
+        model = None
+        model_runner = lambda img, _: cv2.resize(img, (512,512), interpolation=cv2.INTER_LANCZOS4)
+    elif name == 'CUBIC':
+        model = None
+        model_runner = lambda img, _: cv2.resize(img, (512,512), interpolation=cv2.INTER_CUBIC)
+    elif name == 'NEAREST':
+        model = None
+        model_runner = lambda img, _: cv2.resize(img, (512,512), interpolation=cv2.INTER_NEAREST)
     else:
         model = None
     return (model, model_runner)

face_parsing/__init__.py CHANGED Viewed

@@ -1,3 +1,3 @@
-from .swap import init_parser, swap_regions, mask_regions, mask_regions_to_list, SoftErosion
 from .model import BiSeNet
-from .parse_mask import init_parsing_model, get_parsed_mask

+from .swap import init_parser, swap_regions, mask_regions, mask_regions_to_list
 from .model import BiSeNet
+from .parse_mask import init_parsing_model, get_parsed_mask, SoftErosion

face_parsing/parse_mask.py CHANGED Viewed

@@ -10,12 +10,55 @@ import torchvision.transforms as transforms
 from . model import BiSeNet
 transform = transforms.Compose([
     transforms.Resize((512, 512)),
     transforms.ToTensor(),
     transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
 ])
 def init_parsing_model(model_path, device="cpu"):
     net = BiSeNet(19)
     net.to(device)
@@ -27,7 +70,10 @@ def transform_images(imgs):
     tensor_images = torch.stack([transform(Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))) for img in imgs], dim=0)
     return tensor_images
-def get_parsed_mask(net, imgs, classes=[1, 2, 3, 4, 5, 10, 11, 12, 13], device="cpu", batch_size=8):
     masks = []
     for i in tqdm(range(0, len(imgs), batch_size), total=len(imgs) // batch_size, desc="Face-parsing"):
         batch_imgs = imgs[i:i + batch_size]
@@ -35,16 +81,27 @@ def get_parsed_mask(net, imgs, classes=[1, 2, 3, 4, 5, 10, 11, 12, 13], device="
         tensor_images = transform_images(batch_imgs).to(device)
         with torch.no_grad():
             out = net(tensor_images)[0]
-        parsing = out.argmax(dim=1).cpu().numpy()
-        batch_masks = np.isin(parsing, classes)
-        masks.append(batch_masks)
-    masks = np.concatenate(masks, axis=0)
-    # masks = np.repeat(np.expand_dims(masks, axis=1), 3, axis=1)
-    for i, mask in enumerate(masks):
-        cv2.imwrite(f"mask/{i}.jpg", (mask * 255).astype("uint8"))
-    return masks

 from . model import BiSeNet
+class SoftErosion(nn.Module):
+    def __init__(self, kernel_size=15, threshold=0.6, iterations=1):
+        super(SoftErosion, self).__init__()
+        r = kernel_size // 2
+        self.padding = r
+        self.iterations = iterations
+        self.threshold = threshold
+        # Create kernel
+        y_indices, x_indices = torch.meshgrid(torch.arange(0., kernel_size), torch.arange(0., kernel_size))
+        dist = torch.sqrt((x_indices - r) ** 2 + (y_indices - r) ** 2)
+        kernel = dist.max() - dist
+        kernel /= kernel.sum()
+        kernel = kernel.view(1, 1, *kernel.shape)
+        self.register_buffer('weight', kernel)
+    def forward(self, x):
+        batch_size = x.size(0)  # Get the batch size
+        output = []
+        for i in tqdm(range(batch_size), desc="Soft-Erosion", leave=False):
+            input_tensor = x[i:i+1]  # Take one input tensor from the batch
+            input_tensor = input_tensor.float()  # Convert input to float tensor
+            input_tensor = input_tensor.unsqueeze(1)  # Add a channel dimension
+            for _ in range(self.iterations - 1):
+                input_tensor = torch.min(input_tensor, F.conv2d(input_tensor, weight=self.weight,
+                                                                groups=input_tensor.shape[1],
+                                                                padding=self.padding))
+            input_tensor = F.conv2d(input_tensor, weight=self.weight, groups=input_tensor.shape[1],
+                                    padding=self.padding)
+            mask = input_tensor >= self.threshold
+            input_tensor[mask] = 1.0
+            input_tensor[~mask] /= input_tensor[~mask].max()
+            input_tensor = input_tensor.squeeze(1)  # Remove the extra channel dimension
+            output.append(input_tensor.detach().cpu().numpy())
+        return np.array(output)
 transform = transforms.Compose([
     transforms.Resize((512, 512)),
     transforms.ToTensor(),
     transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
 ])
 def init_parsing_model(model_path, device="cpu"):
     net = BiSeNet(19)
     net.to(device)
     tensor_images = torch.stack([transform(Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))) for img in imgs], dim=0)
     return tensor_images
+def get_parsed_mask(net, imgs, classes=[1, 2, 3, 4, 5, 10, 11, 12, 13], device="cpu", batch_size=8, softness=20):
+    if softness > 0:
+        smooth_mask = SoftErosion(kernel_size=17, threshold=0.9, iterations=softness).to(device)
     masks = []
     for i in tqdm(range(0, len(imgs), batch_size), total=len(imgs) // batch_size, desc="Face-parsing"):
         batch_imgs = imgs[i:i + batch_size]
         tensor_images = transform_images(batch_imgs).to(device)
         with torch.no_grad():
             out = net(tensor_images)[0]
+        # parsing = out.argmax(dim=1)
+        # arget_classes = torch.tensor(classes).to(device)
+        # batch_masks = torch.isin(parsing, target_classes).to(device)
+        ## torch.isin was slightly slower in my test, so using np.isin
+        parsing = out.argmax(dim=1).detach().cpu().numpy()
+        batch_masks = np.isin(parsing, classes).astype('float32')
+        if softness > 0:
+            # batch_masks = smooth_mask(batch_masks).transpose(1,0,2,3)[0]
+            mask_tensor = torch.from_numpy(batch_masks.copy()).float().to(device)
+            batch_masks = smooth_mask(mask_tensor).transpose(1,0,2,3)[0]
+        yield batch_masks
+        #masks.append(batch_masks)
+    #if len(masks) >= 1:
+    #    masks = np.concatenate(masks, axis=0)
+    # masks = np.repeat(np.expand_dims(masks, axis=1), 3, axis=1)
+    # for i, mask in enumerate(masks):
+    #    cv2.imwrite(f"mask/{i}.jpg", (mask * 255).astype("uint8"))
+    #return masks

face_swapper.py CHANGED Viewed

@@ -5,10 +5,13 @@ import cv2
 import onnxruntime
 import numpy as np
 from tqdm import tqdm
 from onnx import numpy_helper
 from skimage import transform as trans
 import torchvision.transforms.functional as F
-from utils import make_white_image, laplacian_blending
 arcface_dst = np.array(
     [[38.2946, 51.6963], [73.5318, 51.5014], [56.0252, 71.7366],
@@ -46,72 +49,53 @@ class Inswapper():
         model = onnx.load(self.model_file)
         graph = model.graph
         self.emap = numpy_helper.to_array(graph.initializer[-1])
-        self.input_mean = 0.0
-        self.input_std = 255.0
         self.session_options = onnxruntime.SessionOptions()
         self.session = onnxruntime.InferenceSession(self.model_file, sess_options=self.session_options, providers=providers)
-        inputs = self.session.get_inputs()
-        self.input_names = [inp.name for inp in inputs]
-        outputs = self.session.get_outputs()
-        self.output_names = [out.name for out in outputs]
-        assert len(self.output_names) == 1
-        self.output_shape = outputs[0].shape
-        input_cfg = inputs[0]
-        input_shape = input_cfg.shape
-        self.input_shape = input_shape
-        self.input_size = tuple(input_shape[2:4][::-1])
     def forward(self, imgs, latents):
         preds = []
         for img, latent in zip(imgs, latents):
-            img = (img - self.input_mean) / self.input_std
-            pred = self.session.run(self.output_names, {self.input_names[0]: img, self.input_names[1]: latent})[0]
             preds.append(pred)
     def get(self, imgs, target_faces, source_faces):
         imgs = list(imgs)
         preds = [None] * len(imgs)
-        aimgs = [None] * len(imgs)
         matrs = [None] * len(imgs)
         for idx, (img, target_face, source_face) in enumerate(zip(imgs, target_faces, source_faces)):
-            aimg, M, blob, latent = self.prepare_data(img, target_face, source_face)
-            aimgs[idx] = aimg
-            matrs[idx] = M
-            pred = self.session.run(self.output_names, {self.input_names[0]: blob, self.input_names[1]: latent})[0]
             pred = pred.transpose((0, 2, 3, 1))[0]
             pred = np.clip(255 * pred, 0, 255).astype(np.uint8)[:, :, ::-1]
             preds[idx] = pred
-        return (preds, aimgs, matrs)
     def prepare_data(self, img, target_face, source_face):
         if isinstance(img, str):
             img = cv2.imread(img)
-        aimg, M = norm_crop2(img, target_face.kps, self.input_size[0])
-        blob = cv2.dnn.blobFromImage(aimg, 1.0 / self.input_std, self.input_size,
-                (self.input_mean, self.input_mean, self.input_mean), swapRB=True)
         latent = source_face.normed_embedding.reshape((1, -1))
         latent = np.dot(latent, self.emap)
         latent /= np.linalg.norm(latent)
-        return (aimg, M, blob, latent)
     def batch_forward(self, img_list, target_f_list, source_f_list):
         num_samples = len(img_list)
         num_batches = (num_samples + self.batch_size - 1) // self.batch_size
-        preds = []
-        aimgs = []
-        matrs = []
-        for i in tqdm(range(num_batches), desc="Swapping face"):
             start_idx = i * self.batch_size
             end_idx = min((i + 1) * self.batch_size, num_samples)
@@ -119,47 +103,48 @@ class Inswapper():
             batch_target_f = target_f_list[start_idx:end_idx]
             batch_source_f = source_f_list[start_idx:end_idx]
-            batch_pred, batch_aimg, batch_matr = self.get(batch_img, batch_target_f, batch_source_f)
-            preds.extend(batch_pred)
-            aimgs.extend(batch_aimg)
-            matrs.extend(batch_matr)
-        return (preds, aimgs, matrs)
-def paste_to_whole(bgr_fake, aimg, M, whole_img, laplacian_blend=True, crop_mask=(0,0,0,0)):
-    IM = cv2.invertAffineTransform(M)
-    img_white = make_white_image(aimg.shape[:2], crop=crop_mask, white_value=255)
-    bgr_fake = cv2.warpAffine(bgr_fake, IM, (whole_img.shape[1], whole_img.shape[0]), borderValue=0.0)
-    img_white = cv2.warpAffine(img_white, IM, (whole_img.shape[1], whole_img.shape[0]), borderValue=0.0)
-    img_white[img_white > 20] = 255
-    img_mask = img_white
-    mask_h_inds, mask_w_inds = np.where(img_mask == 255)
-    mask_size = int(np.sqrt(np.ptp(mask_h_inds) * np.ptp(mask_w_inds)))
-    k = max(mask_size // 10, 10)
-    img_mask = cv2.erode(img_mask, np.ones((k, k), np.uint8), iterations=1)
-    k = max(mask_size // 20, 5)
-    kernel_size = (k, k)
-    blur_size = tuple(2 * i + 1 for i in kernel_size)
-    img_mask = cv2.GaussianBlur(img_mask, blur_size, 0) / 255
-    img_mask = np.tile(np.expand_dims(img_mask, axis=-1), (1, 1, 3))
-    if laplacian_blend:
-        bgr_fake = laplacian_blending(bgr_fake.astype("float32").clip(0,255), whole_img.astype("float32").clip(0,255), img_mask.clip(0,1))
-        bgr_fake = bgr_fake.astype("float32")
-    fake_merged = img_mask * bgr_fake + (1 - img_mask) * whole_img.astype(np.float32)
-    return fake_merged.astype("uint8")
-def place_foreground_on_background(foreground, background, matrix):
-    matrix = cv2.invertAffineTransform(matrix)
-    mask = np.ones(foreground.shape, dtype="float32")
-    foreground = cv2.warpAffine(foreground, matrix, (background.shape[1], background.shape[0]), borderValue=0.0)
-    mask = cv2.warpAffine(mask, matrix, (background.shape[1], background.shape[0]), borderValue=0.0)
-    composite_image = mask * foreground + (1 - mask) * background
-    return composite_image

 import onnxruntime
 import numpy as np
 from tqdm import tqdm
+import torch.nn as nn
 from onnx import numpy_helper
 from skimage import transform as trans
 import torchvision.transforms.functional as F
+import torch.nn.functional as F
+from utils import mask_crop, laplacian_blending
 arcface_dst = np.array(
     [[38.2946, 51.6963], [73.5318, 51.5014], [56.0252, 71.7366],
         model = onnx.load(self.model_file)
         graph = model.graph
         self.emap = numpy_helper.to_array(graph.initializer[-1])
         self.session_options = onnxruntime.SessionOptions()
         self.session = onnxruntime.InferenceSession(self.model_file, sess_options=self.session_options, providers=providers)
     def forward(self, imgs, latents):
         preds = []
         for img, latent in zip(imgs, latents):
+            img = img / 255
+            pred = self.session.run(['output'], {'target': img, 'source': latent})[0]
             preds.append(pred)
     def get(self, imgs, target_faces, source_faces):
         imgs = list(imgs)
         preds = [None] * len(imgs)
         matrs = [None] * len(imgs)
         for idx, (img, target_face, source_face) in enumerate(zip(imgs, target_faces, source_faces)):
+            matrix, blob, latent = self.prepare_data(img, target_face, source_face)
+            pred = self.session.run(['output'], {'target': blob, 'source': latent})[0]
             pred = pred.transpose((0, 2, 3, 1))[0]
             pred = np.clip(255 * pred, 0, 255).astype(np.uint8)[:, :, ::-1]
             preds[idx] = pred
+            matrs[idx] = matrix
+        return (preds, matrs)
     def prepare_data(self, img, target_face, source_face):
         if isinstance(img, str):
             img = cv2.imread(img)
+        aligned_img, matrix = norm_crop2(img, target_face.kps, 128)
+        blob = cv2.dnn.blobFromImage(aligned_img, 1.0 / 255, (128, 128), (0., 0., 0.), swapRB=True)
         latent = source_face.normed_embedding.reshape((1, -1))
         latent = np.dot(latent, self.emap)
         latent /= np.linalg.norm(latent)
+        return (matrix, blob, latent)
     def batch_forward(self, img_list, target_f_list, source_f_list):
         num_samples = len(img_list)
         num_batches = (num_samples + self.batch_size - 1) // self.batch_size
+        for i in tqdm(range(num_batches), desc="Generating face"):
             start_idx = i * self.batch_size
             end_idx = min((i + 1) * self.batch_size, num_samples)
             batch_target_f = target_f_list[start_idx:end_idx]
             batch_source_f = source_f_list[start_idx:end_idx]
+            batch_pred, batch_matr = self.get(batch_img, batch_target_f, batch_source_f)
+            yield batch_pred, batch_matr
+def paste_to_whole(foreground, background, matrix, mask=None, crop_mask=(0,0,0,0), blur_amount=0.1, erode_amount = 0.15, blend_method='linear'):
+    inv_matrix = cv2.invertAffineTransform(matrix)
+    fg_shape = foreground.shape[:2]
+    bg_shape = (background.shape[1], background.shape[0])
+    foreground = cv2.warpAffine(foreground, inv_matrix, bg_shape, borderValue=0.0)
+    if mask is None:
+        mask = np.full(fg_shape, 1., dtype=np.float32)
+        mask = mask_crop(mask, crop_mask)
+        mask = cv2.warpAffine(mask, inv_matrix, bg_shape, borderValue=0.0)
+    else:
+        assert fg_shape == mask.shape[:2], "foreground & mask shape mismatch!"
+        mask = mask_crop(mask, crop_mask).astype('float32')
+        mask = cv2.warpAffine(mask, inv_matrix, (background.shape[1], background.shape[0]), borderValue=0.0)
+    _mask = mask.copy()
+    _mask[_mask > 0.05] = 1.
+    non_zero_points = cv2.findNonZero(_mask)
+    _, _, w, h = cv2.boundingRect(non_zero_points)
+    mask_size = int(np.sqrt(w * h))
+    if erode_amount > 0:
+        kernel_size = max(int(mask_size * erode_amount), 1)
+        structuring_element = cv2.getStructuringElement(cv2.MORPH_RECT, (kernel_size, kernel_size))
+        mask = cv2.erode(mask, structuring_element)
+    if blur_amount > 0:
+        kernel_size = max(int(mask_size * blur_amount), 3)
+        if kernel_size % 2 == 0:
+            kernel_size += 1
+        mask = cv2.GaussianBlur(mask, (kernel_size, kernel_size), 0)
+    mask = np.tile(np.expand_dims(mask, axis=-1), (1, 1, 3))
+    if blend_method == 'laplacian':
+        composite_image = laplacian_blending(foreground, background, mask.clip(0,1), num_levels=4)
+    else:
+        composite_image = mask * foreground + (1 - mask) * background
+    return composite_image.astype("uint8").clip(0, 255)

nsfw_checker/LICENSE.md ADDED Viewed

	@@ -0,0 +1,11 @@

+Copyright 2016, Yahoo Inc.
+Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
+1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

nsfw_checker/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from . opennsfw import NSFWChecker

nsfw_checker/opennsfw.py ADDED Viewed

	@@ -0,0 +1,37 @@

+import cv2
+import torch
+import onnx
+import onnxruntime
+import numpy as np
+from tqdm import tqdm
+# https://github.com/yahoo/open_nsfw
+class NSFWChecker:
+    def __init__(self, model_path=None, providers=["CPUExecutionProvider"]):
+        model = onnx.load(model_path)
+        self.input_name = model.graph.input[0].name
+        session_options = onnxruntime.SessionOptions()
+        self.session = onnxruntime.InferenceSession(model_path, sess_options=session_options, providers=providers)
+    def is_nsfw(self, img_paths, threshold = 0.85):
+        skip_step = 1
+        total_len = len(img_paths)
+        if total_len < 100: skip_step = 1
+        if total_len > 100 and total_len < 500: skip_step = 10
+        if total_len > 500 and total_len < 1000: skip_step = 20
+        if total_len > 1000 and total_len < 10000: skip_step = 50
+        if total_len > 10000: skip_step = 100
+        for idx in tqdm(range(0, total_len, skip_step), total=int(total_len // skip_step), desc="Checking for NSFW contents"):
+            img = cv2.imread(img_paths[idx])
+            img = cv2.resize(img, (224,224)).astype('float32')
+            img -= np.array([104, 117, 123], dtype=np.float32)
+            img = np.expand_dims(img, axis=0)
+            score = self.session.run(None, {self.input_name:img})[0][0][1]
+            if score > threshold:
+                print(f"Detected nsfw score:{score}")
+                return True
+        return False

requirements.txt CHANGED Viewed

@@ -9,5 +9,4 @@ onnxruntime==1.15.0
 opencv-python>=4.7.0.72
 opencv-python-headless>=4.7.0.72
 gfpgan==1.3.8
-timm==0.9.2

 opencv-python>=4.7.0.72
 opencv-python-headless>=4.7.0.72
 gfpgan==1.3.8

utils.py CHANGED Viewed

@@ -2,7 +2,6 @@ import os
 import cv2
 import time
 import glob
-import torch
 import shutil
 import platform
 import datetime
@@ -11,7 +10,6 @@ import numpy as np
 from threading import Thread
 from moviepy.editor import VideoFileClip, ImageSequenceClip
 from moviepy.video.io.ffmpeg_tools import ffmpeg_extract_subclip
-from face_parsing import init_parser, swap_regions, mask_regions, mask_regions_to_list, SoftErosion
 logo_image = cv2.imread("./assets/images/logo.png", cv2.IMREAD_UNCHANGED)
@@ -69,7 +67,7 @@ def trim_video(video_path, output_path, start_frame, stop_frame):
     os.makedirs(temp_path, exist_ok=True)
     trimmed_video_file_path = os.path.join(temp_path, trimmed_video_filename)
-    video = VideoFileClip(video_path)
     fps = video.fps
     start_time = start_frame / fps
     duration = (stop_frame - start_frame) / fps
@@ -174,7 +172,7 @@ def split_list_by_lengths(data, length_list):
 def merge_img_sequence_from_ref(ref_video_path, image_sequence, output_file_name):
-    video_clip = VideoFileClip(ref_video_path)
     fps = video_clip.fps
     duration = video_clip.duration
     total_frames = video_clip.reader.nframes
@@ -224,12 +222,12 @@ def scale_bbox_from_center(bbox, scale_width, scale_height, image_width, image_h
     return scaled_bbox
-def laplacian_blending(A, B, m, num_levels=4):
     assert A.shape == B.shape
     assert B.shape == m.shape
     height = m.shape[0]
     width = m.shape[1]
-    size_list = np.array([4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096])
     size = size_list[np.where(size_list > max(height, width))][0]
     GA = np.zeros((size, size, 3), dtype=np.float32)
     GA[:height, :width, :] = A
@@ -264,30 +262,42 @@ def laplacian_blending(A, B, m, num_levels=4):
     for i in range(1,num_levels):
         ls_ = cv2.pyrUp(ls_)
         ls_ = cv2.add(ls_, LS[i])
-    ls_ = np.clip(ls_[:height, :width, :], 0, 255)
-    return ls_
-def make_white_image(shape, crop=None, white_value=255):
-    img_white = np.full((shape[0], shape[1]), white_value, dtype=np.float32)
-    if crop is not None:
-        top = int(crop[0])
-        bottom = int(crop[1])
-        if top + bottom < shape[1]:
-            if top > 0: img_white[:top, :] = 0
-            if bottom > 0: img_white[-bottom:, :] = 0
-        left = int(crop[2])
-        right = int(crop[3])
-        if left + right < shape[0]:
-            if left > 0: img_white[:, :left] = 0
-            if right > 0: img_white[:, -right:] = 0
-    return img_white
-def remove_hair(img, model=None):
-    if model is None:
-        path = "./assets/pretrained_models/79999_iter.pth"
-        model = init_parser(path, mode="cuda" if torch.cuda.is_available() else "cpu")

 import cv2
 import time
 import glob
 import shutil
 import platform
 import datetime
 from threading import Thread
 from moviepy.editor import VideoFileClip, ImageSequenceClip
 from moviepy.video.io.ffmpeg_tools import ffmpeg_extract_subclip
 logo_image = cv2.imread("./assets/images/logo.png", cv2.IMREAD_UNCHANGED)
     os.makedirs(temp_path, exist_ok=True)
     trimmed_video_file_path = os.path.join(temp_path, trimmed_video_filename)
+    video = VideoFileClip(video_path, fps_source="fps")
     fps = video.fps
     start_time = start_frame / fps
     duration = (stop_frame - start_frame) / fps
 def merge_img_sequence_from_ref(ref_video_path, image_sequence, output_file_name):
+    video_clip = VideoFileClip(ref_video_path, fps_source="fps")
     fps = video_clip.fps
     duration = video_clip.duration
     total_frames = video_clip.reader.nframes
     return scaled_bbox
+def laplacian_blending(A, B, m, num_levels=7):
     assert A.shape == B.shape
     assert B.shape == m.shape
     height = m.shape[0]
     width = m.shape[1]
+    size_list = np.array([4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192])
     size = size_list[np.where(size_list > max(height, width))][0]
     GA = np.zeros((size, size, 3), dtype=np.float32)
     GA[:height, :width, :] = A
     for i in range(1,num_levels):
         ls_ = cv2.pyrUp(ls_)
         ls_ = cv2.add(ls_, LS[i])
+    ls_ = ls_[:height, :width, :]
+    #ls_ = (ls_ - np.min(ls_)) * (255.0 / (np.max(ls_) - np.min(ls_)))
+    return ls_.clip(0, 255)
+def mask_crop(mask, crop):
+    top, bottom, left, right = crop
+    shape = mask.shape
+    top = int(top)
+    bottom = int(bottom)
+    if top + bottom < shape[1]:
+        if top > 0: mask[:top, :] = 0
+        if bottom > 0: mask[-bottom:, :] = 0
+    left = int(left)
+    right = int(right)
+    if left + right < shape[0]:
+        if left > 0: mask[:, :left] = 0
+        if right > 0: mask[:, -right:] = 0
+    return mask
+def create_image_grid(images, size=128):
+    num_images = len(images)
+    num_cols = int(np.ceil(np.sqrt(num_images)))
+    num_rows = int(np.ceil(num_images / num_cols))
+    grid = np.zeros((num_rows * size, num_cols * size, 3), dtype=np.uint8)
+    for i, image in enumerate(images):
+        row_idx = (i // num_cols) * size
+        col_idx = (i % num_cols) * size
+        image = cv2.resize(image.copy(), (size,size))
+        if image.dtype != np.uint8:
+            image = (image.astype('float32') * 255).astype('uint8')
+        if image.ndim == 2:
+            image = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)
+        grid[row_idx:row_idx + size, col_idx:col_idx + size] = image
+    return grid