# !git clone https://github.com/polimi-ispl/icpr2020dfdc # !pip install efficientnet-pytorch # !pip install -U git+https://github.com/albu/albumentations > /dev/null # %cd icpr2020dfdc/notebook import torch from torch.utils.model_zoo import load_url from PIL import Image from scipy.special import expit import sys sys.path.append('./icpr2020dfdc/') from blazeface import FaceExtractor, BlazeFace, VideoReader from architectures import fornet,weights from isplutils import utils import gradio as gr """ Choose an architecture between - EfficientNetB4 - EfficientNetB4ST - EfficientNetAutoAttB4 - EfficientNetAutoAttB4ST - Xception """ net_model = 'EfficientNetAutoAttB4' """ Choose a training dataset between - DFDC - FFPP """ train_db = 'DFDC' device = torch.device('cuda:0') if torch.cuda.is_available() else torch.device('cpu') face_policy = 'scale' face_size = 224 frames_per_video = 32 model_url = weights.weight_url['{:s}_{:s}'.format(net_model,train_db)] net = getattr(fornet,net_model)().eval().to(device) net.load_state_dict(load_url(model_url,map_location=device,check_hash=True)) transf = utils.get_transformer(face_policy, face_size, net.get_normalizer(), train=False) facedet = BlazeFace().to(device) facedet.load_weights("./icpr2020dfdc/blazeface/blazeface.pth") facedet.load_anchors("./icpr2020dfdc/blazeface/anchors.npy") videoreader = VideoReader(verbose=False) video_read_fn = lambda x: videoreader.read_frames(x, num_frames=frames_per_video) face_extractor = FaceExtractor(video_read_fn=video_read_fn,facedet=facedet) title = "FaceForensics++" def inference(vid): #return "./Labels/Fake.png", f"{vid}" vid_real_faces = face_extractor.process_video(vid) faces_real_t = torch.stack( [ transf(image=frame['faces'][0])['image'] for frame in vid_real_faces if len(frame['faces'])] ) with torch.no_grad(): faces_real_pred = net(faces_real_t.to(device)).cpu().numpy().flatten() res = expit(faces_real_pred.mean()) if res >= 0.5: return "./Labels/Fake.png", f"{res*100:.2f}%" else: return "./Labels/Real.jpg", f"{res*100:.2f}%" demo = gr.Interface( fn=inference, inputs=[gr.inputs.Video(type="mp4", label="In")], outputs=[gr.outputs.Image(type="pil"), "text"] ).launch(debug=True)