russel0719 commited on
Commit
9d5fece
·
1 Parent(s): 55da56b

Delete preprocessing

Browse files
preprocessing/__init__.py DELETED
@@ -1 +0,0 @@
1
- from .face_detector import *
 
 
preprocessing/compress_videos.py DELETED
@@ -1,45 +0,0 @@
1
- import argparse
2
- import os
3
- import random
4
- import subprocess
5
-
6
- os.environ["MKL_NUM_THREADS"] = "1"
7
- os.environ["NUMEXPR_NUM_THREADS"] = "1"
8
- os.environ["OMP_NUM_THREADS"] = "1"
9
- from functools import partial
10
- from glob import glob
11
- from multiprocessing.pool import Pool
12
- from os import cpu_count
13
-
14
- import cv2
15
-
16
- cv2.ocl.setUseOpenCL(False)
17
- cv2.setNumThreads(0)
18
- from tqdm import tqdm
19
-
20
-
21
- def compress_video(video, root_dir):
22
- parent_dir = video.split("/")[-2]
23
- out_dir = os.path.join(root_dir, "compressed", parent_dir)
24
- os.makedirs(out_dir, exist_ok=True)
25
- video_name = video.split("/")[-1]
26
- out_path = os.path.join(out_dir, video_name)
27
- lvl = random.choice([23, 28, 32])
28
- command = "ffmpeg -i {} -c:v libx264 -crf {} -threads 1 {}".format(video, lvl, out_path)
29
- try:
30
- subprocess.check_output(command, shell=True, stderr=subprocess.STDOUT)
31
- except Exception as e:
32
- print("Could not process vide", str(e))
33
-
34
-
35
- if __name__ == '__main__':
36
- parser = argparse.ArgumentParser(
37
- description="Extracts jpegs from video")
38
- parser.add_argument("--root-dir", help="root directory", default="/mnt/sota/datasets/deepfake")
39
-
40
- args = parser.parse_args()
41
- videos = [video_path for video_path in glob(os.path.join(args.root_dir, "*/*.mp4"))]
42
- with Pool(processes=cpu_count() - 2) as p:
43
- with tqdm(total=len(videos)) as pbar:
44
- for v in p.imap_unordered(partial(compress_video, root_dir=args.root_dir), videos):
45
- pbar.update()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
preprocessing/detect_original_faces.py DELETED
@@ -1,51 +0,0 @@
1
- import argparse
2
- import json
3
- import os
4
- from os import cpu_count
5
- from typing import Type
6
-
7
- from torch.utils.data.dataloader import DataLoader
8
- from tqdm import tqdm
9
-
10
- from preprocessing import face_detector, VideoDataset
11
- from preprocessing.face_detector import VideoFaceDetector
12
- from preprocessing.utils import get_original_video_paths
13
-
14
-
15
- def parse_args():
16
- parser = argparse.ArgumentParser(
17
- description="Process a original videos with face detector")
18
- parser.add_argument("--root-dir", help="root directory")
19
- parser.add_argument("--detector-type", help="type of the detector", default="FacenetDetector",
20
- choices=["FacenetDetector"])
21
- args = parser.parse_args()
22
- return args
23
-
24
-
25
- def process_videos(videos, root_dir, detector_cls: Type[VideoFaceDetector]):
26
- detector = face_detector.__dict__[detector_cls](device="cuda:0")
27
- dataset = VideoDataset(videos)
28
- loader = DataLoader(dataset, shuffle=False, num_workers=cpu_count() - 2, batch_size=1, collate_fn=lambda x: x)
29
- for item in tqdm(loader):
30
- result = {}
31
- video, indices, frames = item[0]
32
- batches = [frames[i:i + detector._batch_size] for i in range(0, len(frames), detector._batch_size)]
33
- for j, frames in enumerate(batches):
34
- result.update({int(j * detector._batch_size) + i : b for i, b in zip(indices, detector._detect_faces(frames))})
35
- id = os.path.splitext(os.path.basename(video))[0]
36
- out_dir = os.path.join(root_dir, "boxes")
37
- os.makedirs(out_dir, exist_ok=True)
38
- with open(os.path.join(out_dir, "{}.json".format(id)), "w") as f:
39
- json.dump(result, f)
40
-
41
-
42
-
43
-
44
- def main():
45
- args = parse_args()
46
- originals = get_original_video_paths(args.root_dir)
47
- process_videos(originals, args.root_dir, args.detector_type)
48
-
49
-
50
- if __name__ == "__main__":
51
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
preprocessing/extract_crops.py DELETED
@@ -1,86 +0,0 @@
1
- import argparse
2
- import json
3
- import os
4
- from os import cpu_count
5
- from pathlib import Path
6
-
7
- os.environ["MKL_NUM_THREADS"] = "1"
8
- os.environ["NUMEXPR_NUM_THREADS"] = "1"
9
- os.environ["OMP_NUM_THREADS"] = "1"
10
- from functools import partial
11
- from glob import glob
12
- from multiprocessing.pool import Pool
13
-
14
- import cv2
15
-
16
- cv2.ocl.setUseOpenCL(False)
17
- cv2.setNumThreads(0)
18
- from tqdm import tqdm
19
-
20
-
21
- def extract_video(param, root_dir, crops_dir):
22
- video, bboxes_path = param
23
- with open(bboxes_path, "r") as bbox_f:
24
- bboxes_dict = json.load(bbox_f)
25
-
26
- capture = cv2.VideoCapture(video)
27
- frames_num = int(capture.get(cv2.CAP_PROP_FRAME_COUNT))
28
-
29
- for i in range(frames_num):
30
- capture.grab()
31
- if i % 10 != 0:
32
- continue
33
- success, frame = capture.retrieve()
34
- if not success or str(i) not in bboxes_dict:
35
- continue
36
- id = os.path.splitext(os.path.basename(video))[0]
37
- crops = []
38
- bboxes = bboxes_dict[str(i)]
39
- if bboxes is None:
40
- continue
41
- for bbox in bboxes:
42
- xmin, ymin, xmax, ymax = [int(b * 2) for b in bbox]
43
- w = xmax - xmin
44
- h = ymax - ymin
45
- p_h = h // 3
46
- p_w = w // 3
47
- crop = frame[max(ymin - p_h, 0):ymax + p_h, max(xmin - p_w, 0):xmax + p_w]
48
- h, w = crop.shape[:2]
49
- crops.append(crop)
50
- img_dir = os.path.join(root_dir, crops_dir, id)
51
- os.makedirs(img_dir, exist_ok=True)
52
- for j, crop in enumerate(crops):
53
- cv2.imwrite(os.path.join(img_dir, "{}_{}.png".format(i, j)), crop)
54
-
55
-
56
- def get_video_paths(root_dir):
57
- paths = []
58
- for json_path in glob(os.path.join(root_dir, "*/metadata.json")):
59
- dir = Path(json_path).parent
60
- with open(json_path, "r") as f:
61
- metadata = json.load(f)
62
- for k, v in metadata.items():
63
- original = v.get("original", None)
64
- if not original:
65
- original = k
66
- bboxes_path = os.path.join(root_dir, "boxes", original[:-4] + ".json")
67
- if not os.path.exists(bboxes_path):
68
- continue
69
- paths.append((os.path.join(dir, k), bboxes_path))
70
-
71
- return paths
72
-
73
-
74
- if __name__ == '__main__':
75
- parser = argparse.ArgumentParser(
76
- description="Extracts crops from video")
77
- parser.add_argument("--root-dir", help="root directory")
78
- parser.add_argument("--crops-dir", help="crops directory")
79
-
80
- args = parser.parse_args()
81
- os.makedirs(os.path.join(args.root_dir, args.crops_dir), exist_ok=True)
82
- params = get_video_paths(args.root_dir)
83
- with Pool(processes=cpu_count()) as p:
84
- with tqdm(total=len(params)) as pbar:
85
- for v in p.imap_unordered(partial(extract_video, root_dir=args.root_dir, crops_dir=args.crops_dir), params):
86
- pbar.update()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
preprocessing/extract_images.py DELETED
@@ -1,42 +0,0 @@
1
- import argparse
2
- import os
3
- os.environ["MKL_NUM_THREADS"] = "1"
4
- os.environ["NUMEXPR_NUM_THREADS"] = "1"
5
- os.environ["OMP_NUM_THREADS"] = "1"
6
- from functools import partial
7
- from glob import glob
8
- from multiprocessing.pool import Pool
9
- from os import cpu_count
10
-
11
- import cv2
12
- cv2.ocl.setUseOpenCL(False)
13
- cv2.setNumThreads(0)
14
- from tqdm import tqdm
15
-
16
-
17
- def extract_video(video, root_dir):
18
- capture = cv2.VideoCapture(video)
19
- frames_num = int(capture.get(cv2.CAP_PROP_FRAME_COUNT))
20
-
21
- for i in range(frames_num):
22
- capture.grab()
23
- success, frame = capture.retrieve()
24
- if not success:
25
- continue
26
- id = os.path.splitext(os.path.basename(video))[0]
27
- cv2.imwrite(os.path.join(root_dir, "jpegs", "{}_{}.jpg".format(id, i)), frame, [cv2.IMWRITE_JPEG_QUALITY, 100])
28
-
29
-
30
-
31
- if __name__ == '__main__':
32
- parser = argparse.ArgumentParser(
33
- description="Extracts jpegs from video")
34
- parser.add_argument("--root-dir", help="root directory")
35
-
36
- args = parser.parse_args()
37
- os.makedirs(os.path.join(args.root_dir, "jpegs"), exist_ok=True)
38
- videos = [video_path for video_path in glob(os.path.join(args.root_dir, "*/*.mp4"))]
39
- with Pool(processes=cpu_count() - 2) as p:
40
- with tqdm(total=len(videos)) as pbar:
41
- for v in p.imap_unordered(partial(extract_video, root_dir=args.root_dir), videos):
42
- pbar.update()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
preprocessing/face_detector.py DELETED
@@ -1,72 +0,0 @@
1
- import os
2
- os.environ["MKL_NUM_THREADS"] = "1"
3
- os.environ["NUMEXPR_NUM_THREADS"] = "1"
4
- os.environ["OMP_NUM_THREADS"] = "1"
5
-
6
- from abc import ABC, abstractmethod
7
- from collections import OrderedDict
8
- from typing import List
9
-
10
- import cv2
11
- cv2.ocl.setUseOpenCL(False)
12
- cv2.setNumThreads(0)
13
-
14
- from PIL import Image
15
- from facenet_pytorch.models.mtcnn import MTCNN
16
- from torch.utils.data import Dataset
17
-
18
-
19
- class VideoFaceDetector(ABC):
20
-
21
- def __init__(self, **kwargs) -> None:
22
- super().__init__()
23
-
24
- @property
25
- @abstractmethod
26
- def _batch_size(self) -> int:
27
- pass
28
-
29
- @abstractmethod
30
- def _detect_faces(self, frames) -> List:
31
- pass
32
-
33
-
34
- class FacenetDetector(VideoFaceDetector):
35
-
36
- def __init__(self, device="cuda:0") -> None:
37
- super().__init__()
38
- self.detector = MTCNN(margin=0,thresholds=[0.85, 0.95, 0.95], device=device)
39
-
40
- def _detect_faces(self, frames) -> List:
41
- batch_boxes, *_ = self.detector.detect(frames, landmarks=False)
42
- return [b.tolist() if b is not None else None for b in batch_boxes]
43
-
44
- @property
45
- def _batch_size(self):
46
- return 32
47
-
48
-
49
- class VideoDataset(Dataset):
50
-
51
- def __init__(self, videos) -> None:
52
- super().__init__()
53
- self.videos = videos
54
-
55
- def __getitem__(self, index: int):
56
- video = self.videos[index]
57
- capture = cv2.VideoCapture(video)
58
- frames_num = int(capture.get(cv2.CAP_PROP_FRAME_COUNT))
59
- frames = OrderedDict()
60
- for i in range(frames_num):
61
- capture.grab()
62
- success, frame = capture.retrieve()
63
- if not success:
64
- continue
65
- frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
66
- frame = Image.fromarray(frame)
67
- frame = frame.resize(size=[s // 2 for s in frame.size])
68
- frames[i] = frame
69
- return video, list(frames.keys()), list(frames.values())
70
-
71
- def __len__(self) -> int:
72
- return len(self.videos)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
preprocessing/face_encodings.py DELETED
@@ -1,55 +0,0 @@
1
- import argparse
2
- import os
3
- from functools import partial
4
- from multiprocessing.pool import Pool
5
-
6
- from tqdm import tqdm
7
-
8
- from preprocessing.utils import get_original_video_paths
9
-
10
- os.environ["MKL_NUM_THREADS"] = "1"
11
- os.environ["NUMEXPR_NUM_THREADS"] = "1"
12
- os.environ["OMP_NUM_THREADS"] = "1"
13
-
14
- import random
15
-
16
- import face_recognition
17
- import numpy as np
18
-
19
-
20
- def write_face_encodings(video, root_dir):
21
- video_id, *_ = os.path.splitext(video)
22
- crops_dir = os.path.join(root_dir, "crops", video_id)
23
- if not os.path.exists(crops_dir):
24
- return
25
- crop_files = [f for f in os.listdir(crops_dir) if f.endswith("jpg")]
26
- if crop_files:
27
- crop_files = random.sample(crop_files, min(10, len(crop_files)))
28
- encodings = []
29
- for crop_file in crop_files:
30
- img = face_recognition.load_image_file(os.path.join(crops_dir, crop_file))
31
- encoding = face_recognition.face_encodings(img, num_jitters=10)
32
- if encoding:
33
- encodings.append(encoding[0])
34
- np.save(os.path.join(crops_dir, "encodings"), encodings)
35
-
36
-
37
- def parse_args():
38
- parser = argparse.ArgumentParser(
39
- description="Extract 10 crops encodings for each video")
40
- parser.add_argument("--root-dir", help="root directory", default="/home/selim/datasets/deepfake")
41
- args = parser.parse_args()
42
- return args
43
-
44
-
45
- def main():
46
- args = parse_args()
47
- originals = get_original_video_paths(args.root_dir, basename=True)
48
- with Pool(processes=os.cpu_count() - 4) as p:
49
- with tqdm(total=len(originals)) as pbar:
50
- for v in p.imap_unordered(partial(write_face_encodings, root_dir=args.root_dir), originals):
51
- pbar.update()
52
-
53
-
54
- if __name__ == '__main__':
55
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
preprocessing/generate_diffs.py DELETED
@@ -1,73 +0,0 @@
1
- import argparse
2
- import os
3
-
4
- os.environ["MKL_NUM_THREADS"] = "1"
5
- os.environ["NUMEXPR_NUM_THREADS"] = "1"
6
- os.environ["OMP_NUM_THREADS"] = "1"
7
- from skimage.measure import compare_ssim
8
-
9
- from functools import partial
10
- from multiprocessing.pool import Pool
11
-
12
- from tqdm import tqdm
13
-
14
- from preprocessing.utils import get_original_with_fakes
15
-
16
- import cv2
17
-
18
- cv2.ocl.setUseOpenCL(False)
19
- cv2.setNumThreads(0)
20
-
21
- import numpy as np
22
-
23
- cache = {}
24
-
25
-
26
- def save_diffs(pair, root_dir):
27
- ori_id, fake_id = pair
28
- ori_dir = os.path.join(root_dir, "crops", ori_id)
29
- fake_dir = os.path.join(root_dir, "crops", fake_id)
30
- diff_dir = os.path.join(root_dir, "diffs", fake_id)
31
- os.makedirs(diff_dir, exist_ok=True)
32
- for frame in range(320):
33
- if frame % 10 != 0:
34
- continue
35
- for actor in range(2):
36
- image_id = "{}_{}.png".format(frame, actor)
37
- diff_image_id = "{}_{}_diff.png".format(frame, actor)
38
- ori_path = os.path.join(ori_dir, image_id)
39
- fake_path = os.path.join(fake_dir, image_id)
40
- diff_path = os.path.join(diff_dir, diff_image_id)
41
- if os.path.exists(ori_path) and os.path.exists(fake_path):
42
- img1 = cv2.imread(ori_path, cv2.IMREAD_COLOR)
43
- img2 = cv2.imread(fake_path, cv2.IMREAD_COLOR)
44
- try:
45
- d, a = compare_ssim(img1, img2, multichannel=True, full=True)
46
- a = 1 - a
47
- diff = (a * 255).astype(np.uint8)
48
- diff = cv2.cvtColor(diff, cv2.COLOR_BGR2GRAY)
49
- cv2.imwrite(diff_path, diff)
50
- except:
51
- pass
52
-
53
- def parse_args():
54
- parser = argparse.ArgumentParser(
55
- description="Extract image diffs")
56
- parser.add_argument("--root-dir", help="root directory", default="/mnt/sota/datasets/deepfake")
57
- args = parser.parse_args()
58
- return args
59
-
60
-
61
- def main():
62
- args = parse_args()
63
- pairs = get_original_with_fakes(args.root_dir)
64
- os.makedirs(os.path.join(args.root_dir, "diffs"), exist_ok=True)
65
- with Pool(processes=os.cpu_count() - 2) as p:
66
- with tqdm(total=len(pairs)) as pbar:
67
- func = partial(save_diffs, root_dir=args.root_dir)
68
- for v in p.imap_unordered(func, pairs):
69
- pbar.update()
70
-
71
-
72
- if __name__ == '__main__':
73
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
preprocessing/generate_folds.py DELETED
@@ -1,114 +0,0 @@
1
- import argparse
2
- import json
3
- import os
4
- import random
5
- from functools import partial
6
- from multiprocessing.pool import Pool
7
- from pathlib import Path
8
-
9
- os.environ["MKL_NUM_THREADS"] = "1"
10
- os.environ["NUMEXPR_NUM_THREADS"] = "1"
11
- os.environ["OMP_NUM_THREADS"] = "1"
12
- import pandas as pd
13
-
14
- from tqdm import tqdm
15
-
16
- from preprocessing.utils import get_original_with_fakes
17
-
18
- import cv2
19
-
20
- cv2.ocl.setUseOpenCL(False)
21
- cv2.setNumThreads(0)
22
-
23
-
24
- def get_paths(vid, label, root_dir):
25
- ori_vid, fake_vid = vid
26
- ori_dir = os.path.join(root_dir, "crops", ori_vid)
27
- fake_dir = os.path.join(root_dir, "crops", fake_vid)
28
- data = []
29
- for frame in range(320):
30
- if frame % 10 != 0:
31
- continue
32
- for actor in range(2):
33
- image_id = "{}_{}.png".format(frame, actor)
34
- ori_img_path = os.path.join(ori_dir, image_id)
35
- fake_img_path = os.path.join(fake_dir, image_id)
36
- img_path = ori_img_path if label == 0 else fake_img_path
37
- try:
38
- # img = cv2.imread(img_path)[..., ::-1]
39
- if os.path.exists(img_path):
40
- data.append([img_path, label, ori_vid])
41
- except:
42
- pass
43
- return data
44
-
45
-
46
- def parse_args():
47
- parser = argparse.ArgumentParser(
48
- description="Generate Folds")
49
- parser.add_argument("--root-dir", help="root directory", default="/mnt/sota/datasets/deepfake")
50
- parser.add_argument("--out", type=str, default="folds02.csv", help="CSV file to save")
51
- parser.add_argument("--seed", type=int, default=777, help="Seed to split, default 777")
52
- parser.add_argument("--n_splits", type=int, default=16, help="Num folds, default 10")
53
- args = parser.parse_args()
54
-
55
- return args
56
-
57
-
58
- def main():
59
- args = parse_args()
60
- ori_fakes = get_original_with_fakes(args.root_dir)
61
- sz = 50 // args.n_splits
62
- folds = []
63
- for fold in range(args.n_splits):
64
- folds.append(list(range(sz * fold, sz * fold + sz if fold < args.n_splits - 1 else 50)))
65
- print(folds)
66
- video_fold = {}
67
- for d in os.listdir(args.root_dir):
68
- if "dfdc" in d:
69
- part = int(d.split("_")[-1])
70
- for f in os.listdir(os.path.join(args.root_dir, d)):
71
- if "metadata.json" in f:
72
- with open(os.path.join(args.root_dir, d, "metadata.json")) as metadata_json:
73
- metadata = json.load(metadata_json)
74
-
75
- for k, v in metadata.items():
76
- fold = None
77
- for i, fold_dirs in enumerate(folds):
78
- if part in fold_dirs:
79
- fold = i
80
- break
81
- assert fold is not None
82
- video_id = k[:-4]
83
- video_fold[video_id] = fold
84
- for fold in range(len(folds)):
85
- holdoutset = {k for k, v in video_fold.items() if v == fold}
86
- trainset = {k for k, v in video_fold.items() if v != fold}
87
- assert holdoutset.isdisjoint(trainset), "Folds have leaks"
88
- data = []
89
- ori_ori = set([(ori, ori) for ori, fake in ori_fakes])
90
- with Pool(processes=os.cpu_count()) as p:
91
- with tqdm(total=len(ori_ori)) as pbar:
92
- func = partial(get_paths, label=0, root_dir=args.root_dir)
93
- for v in p.imap_unordered(func, ori_ori):
94
- pbar.update()
95
- data.extend(v)
96
- with tqdm(total=len(ori_fakes)) as pbar:
97
- func = partial(get_paths, label=1, root_dir=args.root_dir)
98
- for v in p.imap_unordered(func, ori_fakes):
99
- pbar.update()
100
- data.extend(v)
101
- fold_data = []
102
- for img_path, label, ori_vid in data:
103
- path = Path(img_path)
104
- video = path.parent.name
105
- file = path.name
106
- assert video_fold[video] == video_fold[ori_vid], "original video and fake have leak {} {}".format(ori_vid,
107
- video)
108
- fold_data.append([video, file, label, ori_vid, int(file.split("_")[0]), video_fold[video]])
109
- random.shuffle(fold_data)
110
- pd.DataFrame(fold_data, columns=["video", "file", "label", "original", "frame", "fold"]).to_csv(args.out, index=False)
111
-
112
-
113
- if __name__ == '__main__':
114
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
preprocessing/generate_landmarks.py DELETED
@@ -1,75 +0,0 @@
1
- import argparse
2
- import os
3
- from functools import partial
4
- from multiprocessing.pool import Pool
5
-
6
-
7
-
8
- os.environ["MKL_NUM_THREADS"] = "1"
9
- os.environ["NUMEXPR_NUM_THREADS"] = "1"
10
- os.environ["OMP_NUM_THREADS"] = "1"
11
-
12
- from tqdm import tqdm
13
-
14
-
15
- import cv2
16
-
17
- cv2.ocl.setUseOpenCL(False)
18
- cv2.setNumThreads(0)
19
- from preprocessing.utils import get_original_video_paths
20
-
21
- from PIL import Image
22
- from facenet_pytorch.models.mtcnn import MTCNN
23
- import numpy as np
24
-
25
- detector = MTCNN(margin=0, thresholds=[0.65, 0.75, 0.75], device="cpu")
26
-
27
-
28
- def save_landmarks(ori_id, root_dir):
29
- ori_id = ori_id[:-4]
30
- ori_dir = os.path.join(root_dir, "crops", ori_id)
31
- landmark_dir = os.path.join(root_dir, "landmarks", ori_id)
32
- os.makedirs(landmark_dir, exist_ok=True)
33
- for frame in range(320):
34
- if frame % 10 != 0:
35
- continue
36
- for actor in range(2):
37
- image_id = "{}_{}.png".format(frame, actor)
38
- landmarks_id = "{}_{}".format(frame, actor)
39
- ori_path = os.path.join(ori_dir, image_id)
40
- landmark_path = os.path.join(landmark_dir, landmarks_id)
41
-
42
- if os.path.exists(ori_path):
43
- try:
44
- image_ori = cv2.imread(ori_path, cv2.IMREAD_COLOR)[...,::-1]
45
- frame_img = Image.fromarray(image_ori)
46
- batch_boxes, conf, landmarks = detector.detect(frame_img, landmarks=True)
47
- if landmarks is not None:
48
- landmarks = np.around(landmarks[0]).astype(np.int16)
49
- np.save(landmark_path, landmarks)
50
- except Exception as e:
51
- print(e)
52
- pass
53
-
54
-
55
- def parse_args():
56
- parser = argparse.ArgumentParser(
57
- description="Extract image landmarks")
58
- parser.add_argument("--root-dir", help="root directory", default="/mnt/sota/datasets/deepfake")
59
- args = parser.parse_args()
60
- return args
61
-
62
-
63
- def main():
64
- args = parse_args()
65
- ids = get_original_video_paths(args.root_dir, basename=True)
66
- os.makedirs(os.path.join(args.root_dir, "landmarks"), exist_ok=True)
67
- with Pool(processes=os.cpu_count()) as p:
68
- with tqdm(total=len(ids)) as pbar:
69
- func = partial(save_landmarks, root_dir=args.root_dir)
70
- for v in p.imap_unordered(func, ids):
71
- pbar.update()
72
-
73
-
74
- if __name__ == '__main__':
75
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
preprocessing/utils.py DELETED
@@ -1,51 +0,0 @@
1
- import json
2
- import os
3
- from glob import glob
4
- from pathlib import Path
5
-
6
-
7
- def get_original_video_paths(root_dir, basename=False):
8
- originals = set()
9
- originals_v = set()
10
- for json_path in glob(os.path.join(root_dir, "*/metadata.json")):
11
- dir = Path(json_path).parent
12
- with open(json_path, "r") as f:
13
- metadata = json.load(f)
14
- for k, v in metadata.items():
15
- original = v.get("original", None)
16
- if v["label"] == "REAL":
17
- original = k
18
- originals_v.add(original)
19
- originals.add(os.path.join(dir, original))
20
- originals = list(originals)
21
- originals_v = list(originals_v)
22
- print(len(originals))
23
- return originals_v if basename else originals
24
-
25
-
26
- def get_original_with_fakes(root_dir):
27
- pairs = []
28
- for json_path in glob(os.path.join(root_dir, "*/metadata.json")):
29
- with open(json_path, "r") as f:
30
- metadata = json.load(f)
31
- for k, v in metadata.items():
32
- original = v.get("original", None)
33
- if v["label"] == "FAKE":
34
- pairs.append((original[:-4], k[:-4] ))
35
-
36
- return pairs
37
-
38
-
39
- def get_originals_and_fakes(root_dir):
40
- originals = []
41
- fakes = []
42
- for json_path in glob(os.path.join(root_dir, "*/metadata.json")):
43
- with open(json_path, "r") as f:
44
- metadata = json.load(f)
45
- for k, v in metadata.items():
46
- if v["label"] == "FAKE":
47
- fakes.append(k[:-4])
48
- else:
49
- originals.append(k[:-4])
50
-
51
- return originals, fakes