Spaces:
Runtime error
Runtime error
russel0719
commited on
Commit
·
9d5fece
1
Parent(s):
55da56b
Delete preprocessing
Browse files- preprocessing/__init__.py +0 -1
- preprocessing/compress_videos.py +0 -45
- preprocessing/detect_original_faces.py +0 -51
- preprocessing/extract_crops.py +0 -86
- preprocessing/extract_images.py +0 -42
- preprocessing/face_detector.py +0 -72
- preprocessing/face_encodings.py +0 -55
- preprocessing/generate_diffs.py +0 -73
- preprocessing/generate_folds.py +0 -114
- preprocessing/generate_landmarks.py +0 -75
- preprocessing/utils.py +0 -51
preprocessing/__init__.py
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
from .face_detector import *
|
|
|
|
preprocessing/compress_videos.py
DELETED
@@ -1,45 +0,0 @@
|
|
1 |
-
import argparse
|
2 |
-
import os
|
3 |
-
import random
|
4 |
-
import subprocess
|
5 |
-
|
6 |
-
os.environ["MKL_NUM_THREADS"] = "1"
|
7 |
-
os.environ["NUMEXPR_NUM_THREADS"] = "1"
|
8 |
-
os.environ["OMP_NUM_THREADS"] = "1"
|
9 |
-
from functools import partial
|
10 |
-
from glob import glob
|
11 |
-
from multiprocessing.pool import Pool
|
12 |
-
from os import cpu_count
|
13 |
-
|
14 |
-
import cv2
|
15 |
-
|
16 |
-
cv2.ocl.setUseOpenCL(False)
|
17 |
-
cv2.setNumThreads(0)
|
18 |
-
from tqdm import tqdm
|
19 |
-
|
20 |
-
|
21 |
-
def compress_video(video, root_dir):
|
22 |
-
parent_dir = video.split("/")[-2]
|
23 |
-
out_dir = os.path.join(root_dir, "compressed", parent_dir)
|
24 |
-
os.makedirs(out_dir, exist_ok=True)
|
25 |
-
video_name = video.split("/")[-1]
|
26 |
-
out_path = os.path.join(out_dir, video_name)
|
27 |
-
lvl = random.choice([23, 28, 32])
|
28 |
-
command = "ffmpeg -i {} -c:v libx264 -crf {} -threads 1 {}".format(video, lvl, out_path)
|
29 |
-
try:
|
30 |
-
subprocess.check_output(command, shell=True, stderr=subprocess.STDOUT)
|
31 |
-
except Exception as e:
|
32 |
-
print("Could not process vide", str(e))
|
33 |
-
|
34 |
-
|
35 |
-
if __name__ == '__main__':
|
36 |
-
parser = argparse.ArgumentParser(
|
37 |
-
description="Extracts jpegs from video")
|
38 |
-
parser.add_argument("--root-dir", help="root directory", default="/mnt/sota/datasets/deepfake")
|
39 |
-
|
40 |
-
args = parser.parse_args()
|
41 |
-
videos = [video_path for video_path in glob(os.path.join(args.root_dir, "*/*.mp4"))]
|
42 |
-
with Pool(processes=cpu_count() - 2) as p:
|
43 |
-
with tqdm(total=len(videos)) as pbar:
|
44 |
-
for v in p.imap_unordered(partial(compress_video, root_dir=args.root_dir), videos):
|
45 |
-
pbar.update()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
preprocessing/detect_original_faces.py
DELETED
@@ -1,51 +0,0 @@
|
|
1 |
-
import argparse
|
2 |
-
import json
|
3 |
-
import os
|
4 |
-
from os import cpu_count
|
5 |
-
from typing import Type
|
6 |
-
|
7 |
-
from torch.utils.data.dataloader import DataLoader
|
8 |
-
from tqdm import tqdm
|
9 |
-
|
10 |
-
from preprocessing import face_detector, VideoDataset
|
11 |
-
from preprocessing.face_detector import VideoFaceDetector
|
12 |
-
from preprocessing.utils import get_original_video_paths
|
13 |
-
|
14 |
-
|
15 |
-
def parse_args():
|
16 |
-
parser = argparse.ArgumentParser(
|
17 |
-
description="Process a original videos with face detector")
|
18 |
-
parser.add_argument("--root-dir", help="root directory")
|
19 |
-
parser.add_argument("--detector-type", help="type of the detector", default="FacenetDetector",
|
20 |
-
choices=["FacenetDetector"])
|
21 |
-
args = parser.parse_args()
|
22 |
-
return args
|
23 |
-
|
24 |
-
|
25 |
-
def process_videos(videos, root_dir, detector_cls: Type[VideoFaceDetector]):
|
26 |
-
detector = face_detector.__dict__[detector_cls](device="cuda:0")
|
27 |
-
dataset = VideoDataset(videos)
|
28 |
-
loader = DataLoader(dataset, shuffle=False, num_workers=cpu_count() - 2, batch_size=1, collate_fn=lambda x: x)
|
29 |
-
for item in tqdm(loader):
|
30 |
-
result = {}
|
31 |
-
video, indices, frames = item[0]
|
32 |
-
batches = [frames[i:i + detector._batch_size] for i in range(0, len(frames), detector._batch_size)]
|
33 |
-
for j, frames in enumerate(batches):
|
34 |
-
result.update({int(j * detector._batch_size) + i : b for i, b in zip(indices, detector._detect_faces(frames))})
|
35 |
-
id = os.path.splitext(os.path.basename(video))[0]
|
36 |
-
out_dir = os.path.join(root_dir, "boxes")
|
37 |
-
os.makedirs(out_dir, exist_ok=True)
|
38 |
-
with open(os.path.join(out_dir, "{}.json".format(id)), "w") as f:
|
39 |
-
json.dump(result, f)
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
def main():
|
45 |
-
args = parse_args()
|
46 |
-
originals = get_original_video_paths(args.root_dir)
|
47 |
-
process_videos(originals, args.root_dir, args.detector_type)
|
48 |
-
|
49 |
-
|
50 |
-
if __name__ == "__main__":
|
51 |
-
main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
preprocessing/extract_crops.py
DELETED
@@ -1,86 +0,0 @@
|
|
1 |
-
import argparse
|
2 |
-
import json
|
3 |
-
import os
|
4 |
-
from os import cpu_count
|
5 |
-
from pathlib import Path
|
6 |
-
|
7 |
-
os.environ["MKL_NUM_THREADS"] = "1"
|
8 |
-
os.environ["NUMEXPR_NUM_THREADS"] = "1"
|
9 |
-
os.environ["OMP_NUM_THREADS"] = "1"
|
10 |
-
from functools import partial
|
11 |
-
from glob import glob
|
12 |
-
from multiprocessing.pool import Pool
|
13 |
-
|
14 |
-
import cv2
|
15 |
-
|
16 |
-
cv2.ocl.setUseOpenCL(False)
|
17 |
-
cv2.setNumThreads(0)
|
18 |
-
from tqdm import tqdm
|
19 |
-
|
20 |
-
|
21 |
-
def extract_video(param, root_dir, crops_dir):
|
22 |
-
video, bboxes_path = param
|
23 |
-
with open(bboxes_path, "r") as bbox_f:
|
24 |
-
bboxes_dict = json.load(bbox_f)
|
25 |
-
|
26 |
-
capture = cv2.VideoCapture(video)
|
27 |
-
frames_num = int(capture.get(cv2.CAP_PROP_FRAME_COUNT))
|
28 |
-
|
29 |
-
for i in range(frames_num):
|
30 |
-
capture.grab()
|
31 |
-
if i % 10 != 0:
|
32 |
-
continue
|
33 |
-
success, frame = capture.retrieve()
|
34 |
-
if not success or str(i) not in bboxes_dict:
|
35 |
-
continue
|
36 |
-
id = os.path.splitext(os.path.basename(video))[0]
|
37 |
-
crops = []
|
38 |
-
bboxes = bboxes_dict[str(i)]
|
39 |
-
if bboxes is None:
|
40 |
-
continue
|
41 |
-
for bbox in bboxes:
|
42 |
-
xmin, ymin, xmax, ymax = [int(b * 2) for b in bbox]
|
43 |
-
w = xmax - xmin
|
44 |
-
h = ymax - ymin
|
45 |
-
p_h = h // 3
|
46 |
-
p_w = w // 3
|
47 |
-
crop = frame[max(ymin - p_h, 0):ymax + p_h, max(xmin - p_w, 0):xmax + p_w]
|
48 |
-
h, w = crop.shape[:2]
|
49 |
-
crops.append(crop)
|
50 |
-
img_dir = os.path.join(root_dir, crops_dir, id)
|
51 |
-
os.makedirs(img_dir, exist_ok=True)
|
52 |
-
for j, crop in enumerate(crops):
|
53 |
-
cv2.imwrite(os.path.join(img_dir, "{}_{}.png".format(i, j)), crop)
|
54 |
-
|
55 |
-
|
56 |
-
def get_video_paths(root_dir):
|
57 |
-
paths = []
|
58 |
-
for json_path in glob(os.path.join(root_dir, "*/metadata.json")):
|
59 |
-
dir = Path(json_path).parent
|
60 |
-
with open(json_path, "r") as f:
|
61 |
-
metadata = json.load(f)
|
62 |
-
for k, v in metadata.items():
|
63 |
-
original = v.get("original", None)
|
64 |
-
if not original:
|
65 |
-
original = k
|
66 |
-
bboxes_path = os.path.join(root_dir, "boxes", original[:-4] + ".json")
|
67 |
-
if not os.path.exists(bboxes_path):
|
68 |
-
continue
|
69 |
-
paths.append((os.path.join(dir, k), bboxes_path))
|
70 |
-
|
71 |
-
return paths
|
72 |
-
|
73 |
-
|
74 |
-
if __name__ == '__main__':
|
75 |
-
parser = argparse.ArgumentParser(
|
76 |
-
description="Extracts crops from video")
|
77 |
-
parser.add_argument("--root-dir", help="root directory")
|
78 |
-
parser.add_argument("--crops-dir", help="crops directory")
|
79 |
-
|
80 |
-
args = parser.parse_args()
|
81 |
-
os.makedirs(os.path.join(args.root_dir, args.crops_dir), exist_ok=True)
|
82 |
-
params = get_video_paths(args.root_dir)
|
83 |
-
with Pool(processes=cpu_count()) as p:
|
84 |
-
with tqdm(total=len(params)) as pbar:
|
85 |
-
for v in p.imap_unordered(partial(extract_video, root_dir=args.root_dir, crops_dir=args.crops_dir), params):
|
86 |
-
pbar.update()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
preprocessing/extract_images.py
DELETED
@@ -1,42 +0,0 @@
|
|
1 |
-
import argparse
|
2 |
-
import os
|
3 |
-
os.environ["MKL_NUM_THREADS"] = "1"
|
4 |
-
os.environ["NUMEXPR_NUM_THREADS"] = "1"
|
5 |
-
os.environ["OMP_NUM_THREADS"] = "1"
|
6 |
-
from functools import partial
|
7 |
-
from glob import glob
|
8 |
-
from multiprocessing.pool import Pool
|
9 |
-
from os import cpu_count
|
10 |
-
|
11 |
-
import cv2
|
12 |
-
cv2.ocl.setUseOpenCL(False)
|
13 |
-
cv2.setNumThreads(0)
|
14 |
-
from tqdm import tqdm
|
15 |
-
|
16 |
-
|
17 |
-
def extract_video(video, root_dir):
|
18 |
-
capture = cv2.VideoCapture(video)
|
19 |
-
frames_num = int(capture.get(cv2.CAP_PROP_FRAME_COUNT))
|
20 |
-
|
21 |
-
for i in range(frames_num):
|
22 |
-
capture.grab()
|
23 |
-
success, frame = capture.retrieve()
|
24 |
-
if not success:
|
25 |
-
continue
|
26 |
-
id = os.path.splitext(os.path.basename(video))[0]
|
27 |
-
cv2.imwrite(os.path.join(root_dir, "jpegs", "{}_{}.jpg".format(id, i)), frame, [cv2.IMWRITE_JPEG_QUALITY, 100])
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
if __name__ == '__main__':
|
32 |
-
parser = argparse.ArgumentParser(
|
33 |
-
description="Extracts jpegs from video")
|
34 |
-
parser.add_argument("--root-dir", help="root directory")
|
35 |
-
|
36 |
-
args = parser.parse_args()
|
37 |
-
os.makedirs(os.path.join(args.root_dir, "jpegs"), exist_ok=True)
|
38 |
-
videos = [video_path for video_path in glob(os.path.join(args.root_dir, "*/*.mp4"))]
|
39 |
-
with Pool(processes=cpu_count() - 2) as p:
|
40 |
-
with tqdm(total=len(videos)) as pbar:
|
41 |
-
for v in p.imap_unordered(partial(extract_video, root_dir=args.root_dir), videos):
|
42 |
-
pbar.update()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
preprocessing/face_detector.py
DELETED
@@ -1,72 +0,0 @@
|
|
1 |
-
import os
|
2 |
-
os.environ["MKL_NUM_THREADS"] = "1"
|
3 |
-
os.environ["NUMEXPR_NUM_THREADS"] = "1"
|
4 |
-
os.environ["OMP_NUM_THREADS"] = "1"
|
5 |
-
|
6 |
-
from abc import ABC, abstractmethod
|
7 |
-
from collections import OrderedDict
|
8 |
-
from typing import List
|
9 |
-
|
10 |
-
import cv2
|
11 |
-
cv2.ocl.setUseOpenCL(False)
|
12 |
-
cv2.setNumThreads(0)
|
13 |
-
|
14 |
-
from PIL import Image
|
15 |
-
from facenet_pytorch.models.mtcnn import MTCNN
|
16 |
-
from torch.utils.data import Dataset
|
17 |
-
|
18 |
-
|
19 |
-
class VideoFaceDetector(ABC):
|
20 |
-
|
21 |
-
def __init__(self, **kwargs) -> None:
|
22 |
-
super().__init__()
|
23 |
-
|
24 |
-
@property
|
25 |
-
@abstractmethod
|
26 |
-
def _batch_size(self) -> int:
|
27 |
-
pass
|
28 |
-
|
29 |
-
@abstractmethod
|
30 |
-
def _detect_faces(self, frames) -> List:
|
31 |
-
pass
|
32 |
-
|
33 |
-
|
34 |
-
class FacenetDetector(VideoFaceDetector):
|
35 |
-
|
36 |
-
def __init__(self, device="cuda:0") -> None:
|
37 |
-
super().__init__()
|
38 |
-
self.detector = MTCNN(margin=0,thresholds=[0.85, 0.95, 0.95], device=device)
|
39 |
-
|
40 |
-
def _detect_faces(self, frames) -> List:
|
41 |
-
batch_boxes, *_ = self.detector.detect(frames, landmarks=False)
|
42 |
-
return [b.tolist() if b is not None else None for b in batch_boxes]
|
43 |
-
|
44 |
-
@property
|
45 |
-
def _batch_size(self):
|
46 |
-
return 32
|
47 |
-
|
48 |
-
|
49 |
-
class VideoDataset(Dataset):
|
50 |
-
|
51 |
-
def __init__(self, videos) -> None:
|
52 |
-
super().__init__()
|
53 |
-
self.videos = videos
|
54 |
-
|
55 |
-
def __getitem__(self, index: int):
|
56 |
-
video = self.videos[index]
|
57 |
-
capture = cv2.VideoCapture(video)
|
58 |
-
frames_num = int(capture.get(cv2.CAP_PROP_FRAME_COUNT))
|
59 |
-
frames = OrderedDict()
|
60 |
-
for i in range(frames_num):
|
61 |
-
capture.grab()
|
62 |
-
success, frame = capture.retrieve()
|
63 |
-
if not success:
|
64 |
-
continue
|
65 |
-
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
66 |
-
frame = Image.fromarray(frame)
|
67 |
-
frame = frame.resize(size=[s // 2 for s in frame.size])
|
68 |
-
frames[i] = frame
|
69 |
-
return video, list(frames.keys()), list(frames.values())
|
70 |
-
|
71 |
-
def __len__(self) -> int:
|
72 |
-
return len(self.videos)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
preprocessing/face_encodings.py
DELETED
@@ -1,55 +0,0 @@
|
|
1 |
-
import argparse
|
2 |
-
import os
|
3 |
-
from functools import partial
|
4 |
-
from multiprocessing.pool import Pool
|
5 |
-
|
6 |
-
from tqdm import tqdm
|
7 |
-
|
8 |
-
from preprocessing.utils import get_original_video_paths
|
9 |
-
|
10 |
-
os.environ["MKL_NUM_THREADS"] = "1"
|
11 |
-
os.environ["NUMEXPR_NUM_THREADS"] = "1"
|
12 |
-
os.environ["OMP_NUM_THREADS"] = "1"
|
13 |
-
|
14 |
-
import random
|
15 |
-
|
16 |
-
import face_recognition
|
17 |
-
import numpy as np
|
18 |
-
|
19 |
-
|
20 |
-
def write_face_encodings(video, root_dir):
|
21 |
-
video_id, *_ = os.path.splitext(video)
|
22 |
-
crops_dir = os.path.join(root_dir, "crops", video_id)
|
23 |
-
if not os.path.exists(crops_dir):
|
24 |
-
return
|
25 |
-
crop_files = [f for f in os.listdir(crops_dir) if f.endswith("jpg")]
|
26 |
-
if crop_files:
|
27 |
-
crop_files = random.sample(crop_files, min(10, len(crop_files)))
|
28 |
-
encodings = []
|
29 |
-
for crop_file in crop_files:
|
30 |
-
img = face_recognition.load_image_file(os.path.join(crops_dir, crop_file))
|
31 |
-
encoding = face_recognition.face_encodings(img, num_jitters=10)
|
32 |
-
if encoding:
|
33 |
-
encodings.append(encoding[0])
|
34 |
-
np.save(os.path.join(crops_dir, "encodings"), encodings)
|
35 |
-
|
36 |
-
|
37 |
-
def parse_args():
|
38 |
-
parser = argparse.ArgumentParser(
|
39 |
-
description="Extract 10 crops encodings for each video")
|
40 |
-
parser.add_argument("--root-dir", help="root directory", default="/home/selim/datasets/deepfake")
|
41 |
-
args = parser.parse_args()
|
42 |
-
return args
|
43 |
-
|
44 |
-
|
45 |
-
def main():
|
46 |
-
args = parse_args()
|
47 |
-
originals = get_original_video_paths(args.root_dir, basename=True)
|
48 |
-
with Pool(processes=os.cpu_count() - 4) as p:
|
49 |
-
with tqdm(total=len(originals)) as pbar:
|
50 |
-
for v in p.imap_unordered(partial(write_face_encodings, root_dir=args.root_dir), originals):
|
51 |
-
pbar.update()
|
52 |
-
|
53 |
-
|
54 |
-
if __name__ == '__main__':
|
55 |
-
main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
preprocessing/generate_diffs.py
DELETED
@@ -1,73 +0,0 @@
|
|
1 |
-
import argparse
|
2 |
-
import os
|
3 |
-
|
4 |
-
os.environ["MKL_NUM_THREADS"] = "1"
|
5 |
-
os.environ["NUMEXPR_NUM_THREADS"] = "1"
|
6 |
-
os.environ["OMP_NUM_THREADS"] = "1"
|
7 |
-
from skimage.measure import compare_ssim
|
8 |
-
|
9 |
-
from functools import partial
|
10 |
-
from multiprocessing.pool import Pool
|
11 |
-
|
12 |
-
from tqdm import tqdm
|
13 |
-
|
14 |
-
from preprocessing.utils import get_original_with_fakes
|
15 |
-
|
16 |
-
import cv2
|
17 |
-
|
18 |
-
cv2.ocl.setUseOpenCL(False)
|
19 |
-
cv2.setNumThreads(0)
|
20 |
-
|
21 |
-
import numpy as np
|
22 |
-
|
23 |
-
cache = {}
|
24 |
-
|
25 |
-
|
26 |
-
def save_diffs(pair, root_dir):
|
27 |
-
ori_id, fake_id = pair
|
28 |
-
ori_dir = os.path.join(root_dir, "crops", ori_id)
|
29 |
-
fake_dir = os.path.join(root_dir, "crops", fake_id)
|
30 |
-
diff_dir = os.path.join(root_dir, "diffs", fake_id)
|
31 |
-
os.makedirs(diff_dir, exist_ok=True)
|
32 |
-
for frame in range(320):
|
33 |
-
if frame % 10 != 0:
|
34 |
-
continue
|
35 |
-
for actor in range(2):
|
36 |
-
image_id = "{}_{}.png".format(frame, actor)
|
37 |
-
diff_image_id = "{}_{}_diff.png".format(frame, actor)
|
38 |
-
ori_path = os.path.join(ori_dir, image_id)
|
39 |
-
fake_path = os.path.join(fake_dir, image_id)
|
40 |
-
diff_path = os.path.join(diff_dir, diff_image_id)
|
41 |
-
if os.path.exists(ori_path) and os.path.exists(fake_path):
|
42 |
-
img1 = cv2.imread(ori_path, cv2.IMREAD_COLOR)
|
43 |
-
img2 = cv2.imread(fake_path, cv2.IMREAD_COLOR)
|
44 |
-
try:
|
45 |
-
d, a = compare_ssim(img1, img2, multichannel=True, full=True)
|
46 |
-
a = 1 - a
|
47 |
-
diff = (a * 255).astype(np.uint8)
|
48 |
-
diff = cv2.cvtColor(diff, cv2.COLOR_BGR2GRAY)
|
49 |
-
cv2.imwrite(diff_path, diff)
|
50 |
-
except:
|
51 |
-
pass
|
52 |
-
|
53 |
-
def parse_args():
|
54 |
-
parser = argparse.ArgumentParser(
|
55 |
-
description="Extract image diffs")
|
56 |
-
parser.add_argument("--root-dir", help="root directory", default="/mnt/sota/datasets/deepfake")
|
57 |
-
args = parser.parse_args()
|
58 |
-
return args
|
59 |
-
|
60 |
-
|
61 |
-
def main():
|
62 |
-
args = parse_args()
|
63 |
-
pairs = get_original_with_fakes(args.root_dir)
|
64 |
-
os.makedirs(os.path.join(args.root_dir, "diffs"), exist_ok=True)
|
65 |
-
with Pool(processes=os.cpu_count() - 2) as p:
|
66 |
-
with tqdm(total=len(pairs)) as pbar:
|
67 |
-
func = partial(save_diffs, root_dir=args.root_dir)
|
68 |
-
for v in p.imap_unordered(func, pairs):
|
69 |
-
pbar.update()
|
70 |
-
|
71 |
-
|
72 |
-
if __name__ == '__main__':
|
73 |
-
main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
preprocessing/generate_folds.py
DELETED
@@ -1,114 +0,0 @@
|
|
1 |
-
import argparse
|
2 |
-
import json
|
3 |
-
import os
|
4 |
-
import random
|
5 |
-
from functools import partial
|
6 |
-
from multiprocessing.pool import Pool
|
7 |
-
from pathlib import Path
|
8 |
-
|
9 |
-
os.environ["MKL_NUM_THREADS"] = "1"
|
10 |
-
os.environ["NUMEXPR_NUM_THREADS"] = "1"
|
11 |
-
os.environ["OMP_NUM_THREADS"] = "1"
|
12 |
-
import pandas as pd
|
13 |
-
|
14 |
-
from tqdm import tqdm
|
15 |
-
|
16 |
-
from preprocessing.utils import get_original_with_fakes
|
17 |
-
|
18 |
-
import cv2
|
19 |
-
|
20 |
-
cv2.ocl.setUseOpenCL(False)
|
21 |
-
cv2.setNumThreads(0)
|
22 |
-
|
23 |
-
|
24 |
-
def get_paths(vid, label, root_dir):
|
25 |
-
ori_vid, fake_vid = vid
|
26 |
-
ori_dir = os.path.join(root_dir, "crops", ori_vid)
|
27 |
-
fake_dir = os.path.join(root_dir, "crops", fake_vid)
|
28 |
-
data = []
|
29 |
-
for frame in range(320):
|
30 |
-
if frame % 10 != 0:
|
31 |
-
continue
|
32 |
-
for actor in range(2):
|
33 |
-
image_id = "{}_{}.png".format(frame, actor)
|
34 |
-
ori_img_path = os.path.join(ori_dir, image_id)
|
35 |
-
fake_img_path = os.path.join(fake_dir, image_id)
|
36 |
-
img_path = ori_img_path if label == 0 else fake_img_path
|
37 |
-
try:
|
38 |
-
# img = cv2.imread(img_path)[..., ::-1]
|
39 |
-
if os.path.exists(img_path):
|
40 |
-
data.append([img_path, label, ori_vid])
|
41 |
-
except:
|
42 |
-
pass
|
43 |
-
return data
|
44 |
-
|
45 |
-
|
46 |
-
def parse_args():
|
47 |
-
parser = argparse.ArgumentParser(
|
48 |
-
description="Generate Folds")
|
49 |
-
parser.add_argument("--root-dir", help="root directory", default="/mnt/sota/datasets/deepfake")
|
50 |
-
parser.add_argument("--out", type=str, default="folds02.csv", help="CSV file to save")
|
51 |
-
parser.add_argument("--seed", type=int, default=777, help="Seed to split, default 777")
|
52 |
-
parser.add_argument("--n_splits", type=int, default=16, help="Num folds, default 10")
|
53 |
-
args = parser.parse_args()
|
54 |
-
|
55 |
-
return args
|
56 |
-
|
57 |
-
|
58 |
-
def main():
|
59 |
-
args = parse_args()
|
60 |
-
ori_fakes = get_original_with_fakes(args.root_dir)
|
61 |
-
sz = 50 // args.n_splits
|
62 |
-
folds = []
|
63 |
-
for fold in range(args.n_splits):
|
64 |
-
folds.append(list(range(sz * fold, sz * fold + sz if fold < args.n_splits - 1 else 50)))
|
65 |
-
print(folds)
|
66 |
-
video_fold = {}
|
67 |
-
for d in os.listdir(args.root_dir):
|
68 |
-
if "dfdc" in d:
|
69 |
-
part = int(d.split("_")[-1])
|
70 |
-
for f in os.listdir(os.path.join(args.root_dir, d)):
|
71 |
-
if "metadata.json" in f:
|
72 |
-
with open(os.path.join(args.root_dir, d, "metadata.json")) as metadata_json:
|
73 |
-
metadata = json.load(metadata_json)
|
74 |
-
|
75 |
-
for k, v in metadata.items():
|
76 |
-
fold = None
|
77 |
-
for i, fold_dirs in enumerate(folds):
|
78 |
-
if part in fold_dirs:
|
79 |
-
fold = i
|
80 |
-
break
|
81 |
-
assert fold is not None
|
82 |
-
video_id = k[:-4]
|
83 |
-
video_fold[video_id] = fold
|
84 |
-
for fold in range(len(folds)):
|
85 |
-
holdoutset = {k for k, v in video_fold.items() if v == fold}
|
86 |
-
trainset = {k for k, v in video_fold.items() if v != fold}
|
87 |
-
assert holdoutset.isdisjoint(trainset), "Folds have leaks"
|
88 |
-
data = []
|
89 |
-
ori_ori = set([(ori, ori) for ori, fake in ori_fakes])
|
90 |
-
with Pool(processes=os.cpu_count()) as p:
|
91 |
-
with tqdm(total=len(ori_ori)) as pbar:
|
92 |
-
func = partial(get_paths, label=0, root_dir=args.root_dir)
|
93 |
-
for v in p.imap_unordered(func, ori_ori):
|
94 |
-
pbar.update()
|
95 |
-
data.extend(v)
|
96 |
-
with tqdm(total=len(ori_fakes)) as pbar:
|
97 |
-
func = partial(get_paths, label=1, root_dir=args.root_dir)
|
98 |
-
for v in p.imap_unordered(func, ori_fakes):
|
99 |
-
pbar.update()
|
100 |
-
data.extend(v)
|
101 |
-
fold_data = []
|
102 |
-
for img_path, label, ori_vid in data:
|
103 |
-
path = Path(img_path)
|
104 |
-
video = path.parent.name
|
105 |
-
file = path.name
|
106 |
-
assert video_fold[video] == video_fold[ori_vid], "original video and fake have leak {} {}".format(ori_vid,
|
107 |
-
video)
|
108 |
-
fold_data.append([video, file, label, ori_vid, int(file.split("_")[0]), video_fold[video]])
|
109 |
-
random.shuffle(fold_data)
|
110 |
-
pd.DataFrame(fold_data, columns=["video", "file", "label", "original", "frame", "fold"]).to_csv(args.out, index=False)
|
111 |
-
|
112 |
-
|
113 |
-
if __name__ == '__main__':
|
114 |
-
main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
preprocessing/generate_landmarks.py
DELETED
@@ -1,75 +0,0 @@
|
|
1 |
-
import argparse
|
2 |
-
import os
|
3 |
-
from functools import partial
|
4 |
-
from multiprocessing.pool import Pool
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
os.environ["MKL_NUM_THREADS"] = "1"
|
9 |
-
os.environ["NUMEXPR_NUM_THREADS"] = "1"
|
10 |
-
os.environ["OMP_NUM_THREADS"] = "1"
|
11 |
-
|
12 |
-
from tqdm import tqdm
|
13 |
-
|
14 |
-
|
15 |
-
import cv2
|
16 |
-
|
17 |
-
cv2.ocl.setUseOpenCL(False)
|
18 |
-
cv2.setNumThreads(0)
|
19 |
-
from preprocessing.utils import get_original_video_paths
|
20 |
-
|
21 |
-
from PIL import Image
|
22 |
-
from facenet_pytorch.models.mtcnn import MTCNN
|
23 |
-
import numpy as np
|
24 |
-
|
25 |
-
detector = MTCNN(margin=0, thresholds=[0.65, 0.75, 0.75], device="cpu")
|
26 |
-
|
27 |
-
|
28 |
-
def save_landmarks(ori_id, root_dir):
|
29 |
-
ori_id = ori_id[:-4]
|
30 |
-
ori_dir = os.path.join(root_dir, "crops", ori_id)
|
31 |
-
landmark_dir = os.path.join(root_dir, "landmarks", ori_id)
|
32 |
-
os.makedirs(landmark_dir, exist_ok=True)
|
33 |
-
for frame in range(320):
|
34 |
-
if frame % 10 != 0:
|
35 |
-
continue
|
36 |
-
for actor in range(2):
|
37 |
-
image_id = "{}_{}.png".format(frame, actor)
|
38 |
-
landmarks_id = "{}_{}".format(frame, actor)
|
39 |
-
ori_path = os.path.join(ori_dir, image_id)
|
40 |
-
landmark_path = os.path.join(landmark_dir, landmarks_id)
|
41 |
-
|
42 |
-
if os.path.exists(ori_path):
|
43 |
-
try:
|
44 |
-
image_ori = cv2.imread(ori_path, cv2.IMREAD_COLOR)[...,::-1]
|
45 |
-
frame_img = Image.fromarray(image_ori)
|
46 |
-
batch_boxes, conf, landmarks = detector.detect(frame_img, landmarks=True)
|
47 |
-
if landmarks is not None:
|
48 |
-
landmarks = np.around(landmarks[0]).astype(np.int16)
|
49 |
-
np.save(landmark_path, landmarks)
|
50 |
-
except Exception as e:
|
51 |
-
print(e)
|
52 |
-
pass
|
53 |
-
|
54 |
-
|
55 |
-
def parse_args():
|
56 |
-
parser = argparse.ArgumentParser(
|
57 |
-
description="Extract image landmarks")
|
58 |
-
parser.add_argument("--root-dir", help="root directory", default="/mnt/sota/datasets/deepfake")
|
59 |
-
args = parser.parse_args()
|
60 |
-
return args
|
61 |
-
|
62 |
-
|
63 |
-
def main():
|
64 |
-
args = parse_args()
|
65 |
-
ids = get_original_video_paths(args.root_dir, basename=True)
|
66 |
-
os.makedirs(os.path.join(args.root_dir, "landmarks"), exist_ok=True)
|
67 |
-
with Pool(processes=os.cpu_count()) as p:
|
68 |
-
with tqdm(total=len(ids)) as pbar:
|
69 |
-
func = partial(save_landmarks, root_dir=args.root_dir)
|
70 |
-
for v in p.imap_unordered(func, ids):
|
71 |
-
pbar.update()
|
72 |
-
|
73 |
-
|
74 |
-
if __name__ == '__main__':
|
75 |
-
main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
preprocessing/utils.py
DELETED
@@ -1,51 +0,0 @@
|
|
1 |
-
import json
|
2 |
-
import os
|
3 |
-
from glob import glob
|
4 |
-
from pathlib import Path
|
5 |
-
|
6 |
-
|
7 |
-
def get_original_video_paths(root_dir, basename=False):
|
8 |
-
originals = set()
|
9 |
-
originals_v = set()
|
10 |
-
for json_path in glob(os.path.join(root_dir, "*/metadata.json")):
|
11 |
-
dir = Path(json_path).parent
|
12 |
-
with open(json_path, "r") as f:
|
13 |
-
metadata = json.load(f)
|
14 |
-
for k, v in metadata.items():
|
15 |
-
original = v.get("original", None)
|
16 |
-
if v["label"] == "REAL":
|
17 |
-
original = k
|
18 |
-
originals_v.add(original)
|
19 |
-
originals.add(os.path.join(dir, original))
|
20 |
-
originals = list(originals)
|
21 |
-
originals_v = list(originals_v)
|
22 |
-
print(len(originals))
|
23 |
-
return originals_v if basename else originals
|
24 |
-
|
25 |
-
|
26 |
-
def get_original_with_fakes(root_dir):
|
27 |
-
pairs = []
|
28 |
-
for json_path in glob(os.path.join(root_dir, "*/metadata.json")):
|
29 |
-
with open(json_path, "r") as f:
|
30 |
-
metadata = json.load(f)
|
31 |
-
for k, v in metadata.items():
|
32 |
-
original = v.get("original", None)
|
33 |
-
if v["label"] == "FAKE":
|
34 |
-
pairs.append((original[:-4], k[:-4] ))
|
35 |
-
|
36 |
-
return pairs
|
37 |
-
|
38 |
-
|
39 |
-
def get_originals_and_fakes(root_dir):
|
40 |
-
originals = []
|
41 |
-
fakes = []
|
42 |
-
for json_path in glob(os.path.join(root_dir, "*/metadata.json")):
|
43 |
-
with open(json_path, "r") as f:
|
44 |
-
metadata = json.load(f)
|
45 |
-
for k, v in metadata.items():
|
46 |
-
if v["label"] == "FAKE":
|
47 |
-
fakes.append(k[:-4])
|
48 |
-
else:
|
49 |
-
originals.append(k[:-4])
|
50 |
-
|
51 |
-
return originals, fakes
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|