Spaces:
Runtime error
Runtime error
File size: 6,276 Bytes
9f28a6a 96f1aac 9f28a6a 9eb4162 9f28a6a a12cb6b 9f28a6a e650d5e 548b88e 9f28a6a 5a475ef 9f28a6a 5a475ef 9f28a6a 0eb1453 9f28a6a 5a475ef 5145dd9 5a475ef 5d9dca0 5a475ef a12cb6b 50dba29 5a475ef 9f28a6a 50dba29 81f1fb5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 |
import gradio as gr
import cv2
import numpy as np
import tensorflow as tf
import tensorflow_addons
from facenet_pytorch import MTCNN
from PIL import Image
import moviepy.editor as mp
import os
import zipfile
local_zip = "FINAL-EFFICIENTNETV2-B0.zip"
zip_ref = zipfile.ZipFile(local_zip, 'r')
zip_ref.extractall('FINAL-EFFICIENTNETV2-B0')
zip_ref.close()
# Load face detector
mtcnn = MTCNN(margin=14, keep_all=True, factor=0.7, device='cpu')
#Face Detection function, Reference: (Timesler, 2020); Source link: https://www.kaggle.com/timesler/facial-recognition-model-in-pytorch
class DetectionPipeline:
"""Pipeline class for detecting faces in the frames of a video file."""
def __init__(self, detector, n_frames=None, batch_size=60, resize=None):
"""Constructor for DetectionPipeline class.
Keyword Arguments:
n_frames {int} -- Total number of frames to load. These will be evenly spaced
throughout the video. If not specified (i.e., None), all frames will be loaded.
(default: {None})
batch_size {int} -- Batch size to use with MTCNN face detector. (default: {32})
resize {float} -- Fraction by which to resize frames from original prior to face
detection. A value less than 1 results in downsampling and a value greater than
1 result in upsampling. (default: {None})
"""
self.detector = detector
self.n_frames = n_frames
self.batch_size = batch_size
self.resize = resize
def __call__(self, filename):
"""Load frames from an MP4 video and detect faces.
Arguments:
filename {str} -- Path to video.
"""
# Create video reader and find length
v_cap = cv2.VideoCapture(filename)
v_len = int(v_cap.get(cv2.CAP_PROP_FRAME_COUNT))
# Pick 'n_frames' evenly spaced frames to sample
if self.n_frames is None:
sample = np.arange(0, v_len)
else:
sample = np.linspace(0, v_len - 1, self.n_frames).astype(int)
# Loop through frames
faces = []
frames = []
for j in range(v_len):
success = v_cap.grab()
if j in sample:
# Load frame
success, frame = v_cap.retrieve()
if not success:
continue
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
# frame = Image.fromarray(frame)
# Resize frame to desired size
if self.resize is not None:
frame = frame.resize([int(d * self.resize) for d in frame.size])
frames.append(frame)
# When batch is full, detect faces and reset frame list
if len(frames) % self.batch_size == 0 or j == sample[-1]:
boxes, probs = self.detector.detect(frames)
for i in range(len(frames)):
if boxes[i] is None:
faces.append(face2) #append previous face frame if no face is detected
continue
box = boxes[i][0].astype(int)
frame = frames[i]
face = frame[box[1]:box[3], box[0]:box[2]]
if not face.any():
faces.append(face2) #append previous face frame if no face is detected
continue
face2 = cv2.resize(face, (224, 224))
faces.append(face2)
frames = []
v_cap.release()
return faces
detection_pipeline = DetectionPipeline(detector=mtcnn,n_frames=20, batch_size=60)
model = tf.keras.models.load_model("FINAL-EFFICIENTNETV2-B0")
def deepfakespredict(input_video):
faces = detection_pipeline(input_video)
total = 0
real = 0
fake = 0
for face in faces:
face2 = face/255
pred = model.predict(np.expand_dims(face2, axis=0))[0]
total+=1
pred2 = pred[1]
if pred2 > 0.5:
fake+=1
else:
real+=1
fake_ratio = fake/total
text =""
text2 = "Deepfakes Confidence: " + str(fake_ratio*100) + "%"
if fake_ratio >= 0.5:
text = "The video is FAKE."
else:
text = "The video is REAL."
face_frames = []
for face in faces:
face_frame = Image.fromarray(face.astype('uint8'), 'RGB')
face_frames.append(face_frame)
face_frames[0].save('results.gif', save_all=True, append_images=face_frames[1:], duration = 250, loop = 100 )
clip = mp.VideoFileClip("results.gif")
clip.write_videofile("video.mp4")
return text, text2, "video.mp4"
title="EfficientNetV2 Deepfakes Video Detector"
description="This is a demo implementation of EfficientNetV2 Deepfakes Image Detector by using frame-by-frame detection. \
To use it, simply upload your video, or click one of the examples to load them.\
This demo and model represent the work of \"Achieving Face Swapped Deepfakes Detection Using EfficientNetV2\" by Lee Sheng Yeh. \
The examples were extracted from Celeb-DF(V2)(Li et al, 2020) and FaceForensics++(Rossler et al., 2019). Full reference details is available in \"references.txt.\" \
The examples are used under fair use to demo the working of the model only. If any copyright is infringed, please contact the researcher via this email: tp054565@mail.apu.edu.my, the researcher will immediately take down the examples used.\
"
examples = [
['Video1-fake-1-ff.mp4'],
['Video6-real-1-ff.mp4'],
['Video3-fake-3-ff.mp4'],
['Video8-real-3-ff.mp4'],
['real-1.mp4'],
['fake-1.mp4'],
]
gr.Interface(deepfakespredict,
inputs = ["video"],
outputs=["text","text", gr.outputs.Video(label="Detected face sequence")],
title=title,
description=description,
examples=examples
).launch() |