Spaces:
Runtime error
Runtime error
""" | |
Index Celeb-DF v2 | |
Image and Sound Processing Lab - Politecnico di Milano | |
Nicolò Bonettini | |
Edoardo Daniele Cannas | |
Sara Mandelli | |
Luca Bondi | |
Paolo Bestagini | |
""" | |
import argparse | |
from multiprocessing import Pool | |
from pathlib import Path | |
import numpy as np | |
import pandas as pd | |
from isplutils.utils import extract_meta_av, extract_meta_cv | |
def main(): | |
parser = argparse.ArgumentParser() | |
parser.add_argument('--source', type=Path, help='Source dir', | |
required=True) | |
parser.add_argument('--videodataset', type=Path, default='data/celebdf_videos.pkl', | |
help='Path to save the videos DataFrame') | |
args = parser.parse_args() | |
## Parameters parsing | |
source_dir: Path = args.source | |
videodataset_path: Path = args.videodataset | |
# Create ouput folder (if doesn't exist) | |
videodataset_path.parent.mkdir(parents=True, exist_ok=True) | |
## DataFrame | |
if videodataset_path.exists(): | |
print('Loading video DataFrame') | |
df_videos = pd.read_pickle(videodataset_path) | |
else: | |
print('Creating video DataFrame') | |
split_file = Path(source_dir).joinpath('List_of_testing_videos.txt') | |
if not split_file.exists(): | |
raise FileNotFoundError('Unable to find "List_of_testing_videos.txt" in {}'.format(source_dir)) | |
test_videos_df = pd.read_csv(split_file, delimiter=' ', header=0, index_col=1) | |
ff_videos = Path(source_dir).rglob('*.mp4') | |
df_videos = pd.DataFrame( | |
{'path': [f.relative_to(source_dir) for f in ff_videos]}) | |
df_videos['height'] = df_videos['width'] = df_videos['frames'] = np.zeros(len(df_videos), dtype=np.uint16) | |
with Pool() as p: | |
meta = p.map(extract_meta_av, df_videos['path'].map(lambda x: str(source_dir.joinpath(x)))) | |
meta = np.stack(meta) | |
df_videos.loc[:, ['height', 'width', 'frames']] = meta | |
# Fix for videos that av cannot decode properly | |
for idx, record in df_videos[df_videos['frames'] == 0].iterrows(): | |
meta = extract_meta_cv(str(source_dir.joinpath(record['path']))) | |
df_videos.loc[idx, ['height', 'width', 'frames']] = meta | |
df_videos['class'] = df_videos['path'].map(lambda x: x.parts[0]).astype('category') | |
df_videos['label'] = df_videos['class'].map( | |
lambda x: True if x == 'Celeb-synthesis' else False) # True is FAKE, False is REAL | |
df_videos['name'] = df_videos['path'].map(lambda x: x.with_suffix('').name) | |
df_videos['original'] = -1 * np.ones(len(df_videos), dtype=np.int16) | |
df_videos.loc[(df_videos['label'] == True), 'original'] = \ | |
df_videos[(df_videos['label'] == True)]['name'].map( | |
lambda x: df_videos.index[ | |
np.flatnonzero(df_videos['name'] == '_'.join([x.split('_')[0], x.split('_')[2]]))[0]] | |
) | |
df_videos['test'] = df_videos['path'].map(str).isin(test_videos_df.index) | |
print('Saving video DataFrame to {}'.format(videodataset_path)) | |
df_videos.to_pickle(str(videodataset_path)) | |
print('Real videos: {:d}'.format(sum(df_videos['label'] == 0))) | |
print('Fake videos: {:d}'.format(sum(df_videos['label'] == 1))) | |
if __name__ == '__main__': | |
main() | |