File size: 14,069 Bytes
f849799
 
 
 
 
7f2c8f8
3b3290d
f849799
 
 
 
 
3b3290d
 
 
f849799
 
 
8d6b883
3b3290d
f849799
 
b117448
 
7f2c8f8
8d6b883
a7b0fd6
f849799
 
 
b117448
 
 
 
 
 
 
 
 
 
f849799
 
 
 
 
 
 
3b3290d
 
f849799
 
7f2c8f8
f849799
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7f2c8f8
f849799
3b3290d
 
 
f849799
 
 
b117448
 
 
 
 
f849799
3b3290d
 
 
 
f849799
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a7b0fd6
7f2c8f8
 
 
 
 
b117448
 
7f2c8f8
 
 
b117448
 
 
 
7f2c8f8
3b3290d
b117448
7f2c8f8
 
 
3b3290d
 
a7b0fd6
 
 
 
f849799
 
 
7f2c8f8
a7b0fd6
7f2c8f8
8d6b883
 
 
 
 
 
 
 
 
 
 
 
3b3290d
 
 
7f2c8f8
 
3b3290d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8d6b883
 
39557de
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8d6b883
 
 
 
 
 
 
 
 
 
 
 
39557de
 
 
8d6b883
f849799
 
 
 
 
 
 
 
 
 
a7b0fd6
 
f849799
 
8d6b883
 
 
 
 
f849799
 
7f2c8f8
a7b0fd6
7f2c8f8
 
8d6b883
7f2c8f8
f849799
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
import tempfile
import urllib.request
import logging
import os
import hashlib
import datetime
import time

import pandas
import gradio as gr
from moviepy.editor import VideoFileClip

import seaborn as sns
import matplotlib.pyplot as plt

import imagehash
from PIL import Image

import numpy as np  
import pandas as pd
import faiss

import shutil

FPS = 5
MIN_DISTANCE = 4
MAX_DISTANCE = 30

video_directory = tempfile.gettempdir()

def move_video_to_tempdir(input_dir, filename):
    new_filename = os.path.join(video_directory, filename)
    input_file = os.path.join(input_dir, filename)
    if not os.path.exists(new_filename):
        shutil.copyfile(input_file, new_filename)
        logging.info(f"Copied {input_file} to {new_filename}.")
    else:
        logging.info(f"Skipping copying from {input_file} because {new_filename} already exists.")
    return new_filename

def download_video_from_url(url):
    """Download video from url or return md5 hash as video name"""
    filename = os.path.join(video_directory, hashlib.md5(url.encode()).hexdigest())
    if not os.path.exists(filename):
        with (urllib.request.urlopen(url)) as f, open(filename, 'wb') as fileout:
            fileout.write(f.read())
        logging.info(f"Downloaded video from {url} to {filename}.")
    else:
        logging.info(f"Skipping downloading from {url} because {filename} already exists.")
    return filename

def change_ffmpeg_fps(clip, fps=FPS):
    # Hacking the ffmpeg call based on 
    # https://github.com/Zulko/moviepy/blob/master/moviepy/video/io/ffmpeg_reader.py#L126
    import subprocess as sp

    cmd = [arg + ",fps=%d" % fps if arg.startswith("scale=") else arg for arg in clip.reader.proc.args]
    clip.reader.close()
    clip.reader.proc = sp.Popen(cmd, bufsize=clip.reader.bufsize, 
                                stdout=sp.PIPE, stderr=sp.PIPE, stdin=sp.DEVNULL)
    clip.fps = clip.reader.fps = fps
    clip.reader.lastread = clip.reader.read_frame()
    return clip

def compute_hash(frame, hash_size=16):
    image = Image.fromarray(np.array(frame))
    return imagehash.phash(image, hash_size)

def binary_array_to_uint8s(arr):
    bit_string = ''.join(str(1 * x) for l in arr for x in l)
    return [int(bit_string[i:i+8], 2) for i in range(0, len(bit_string), 8)]

def compute_hashes(clip, fps=FPS):
    for index, frame in enumerate(change_ffmpeg_fps(clip, fps).iter_frames()):
        # Each frame is a triplet of size (height, width, 3) of the video since it is RGB
        # The hash itself is of size (hash_size, hash_size)
        # The uint8 version of the hash is of size (hash_size * highfreq_factor,) and represents the hash
        hashed = np.array(binary_array_to_uint8s(compute_hash(frame).hash), dtype='uint8')
        yield {"frame": 1+index*fps, "hash": hashed}

def index_hashes_for_video(url, is_file = False):
    if not is_file:
        filename = download_video_from_url(url)
    else:
        filename = url
    if os.path.exists(f'{filename}.index'):
        logging.info(f"Loading indexed hashes from {filename}.index")
        binary_index = faiss.read_index_binary(f'{filename}.index') 
        logging.info(f"Index {filename}.index has in total {binary_index.ntotal} frames")
        return binary_index

    hash_vectors = np.array([x['hash'] for x in compute_hashes(VideoFileClip(filename))])
    logging.info(f"Computed hashes for {hash_vectors.shape} frames.")

    # Initializing the quantizer.
    quantizer = faiss.IndexBinaryFlat(hash_vectors.shape[1]*8)
    # Initializing index.
    index = faiss.IndexBinaryIVF(quantizer, hash_vectors.shape[1]*8, min(16, hash_vectors.shape[0]))
    index.nprobe = 1 # Number of nearest clusters to be searched per query. 
    # Training the quantizer.
    index.train(hash_vectors)
    #index = faiss.IndexBinaryFlat(64)
    index.add(hash_vectors)
    faiss.write_index_binary(index, f'{filename}.index')
    logging.info(f"Indexed hashes for {index.ntotal} frames to {filename}.index.")
    return index

def get_video_indices(url, target, MIN_DISTANCE = 4):
    """" The comparison between the target and the original video will be plotted based
    on the matches between the target and the original video over time. The matches are determined
    based on the minimum distance between hashes (as computed by faiss-vectors) before they're considered a match. 
    
    args: 
    - url: url of the source video (short video which you want to be checked)
    - target: url of the target video (longer video which is a superset of the source video)
    - MIN_DISTANCE: integer representing the minimum distance between hashes on bit-level before its considered a match
    """
    # TODO: Fix crash if no matches are found
    if url.endswith('dl=1'):
        is_file = False
    elif url.endswith('.mp4'):
        is_file = True

    # Url (short video) 
    video_index = index_hashes_for_video(url, is_file)
    video_index.make_direct_map() # Make sure the index is indexable
    hash_vectors = np.array([video_index.reconstruct(i) for i in range(video_index.ntotal)]) # Retrieve original indices
    
    # Target video (long video)
    target_indices = [index_hashes_for_video(x) for x in [target]]

    return video_index, hash_vectors, target_indices    

def compare_videos(video_index, hash_vectors, target_indices, MIN_DISTANCE = 3): # , is_file = False):
    # The results are returned as a triplet of 1D arrays 
    # lims, D, I, where result for query i is in I[lims[i]:lims[i+1]] 
    # (indices of neighbors), D[lims[i]:lims[i+1]] (distances).
    lims, D, I = target_indices[0].range_search(hash_vectors, MIN_DISTANCE)
    return lims, D, I, hash_vectors

def get_decent_distance(url, target, MIN_DISTANCE, MAX_DISTANCE):
    """ To get a decent heurstic for a base distance check every distance from MIN_DISTANCE to MAX_DISTANCE
    until the number of matches found is equal to or higher than the number of frames in the source video"""
    for distance in np.arange(start = MIN_DISTANCE - 2, stop = MAX_DISTANCE + 2, step = 2, dtype=int):
        distance = int(distance)
        video_index, hash_vectors, target_indices = get_video_indices(url, target, MIN_DISTANCE = distance)
        lims, D, I, hash_vectors = compare_videos(video_index, hash_vectors, target_indices, MIN_DISTANCE = distance)
        nr_source_frames = video_index.ntotal
        nr_matches = len(D)
        logging.info(f"{(nr_matches/nr_source_frames) * 100.0:.1f}% of frames have a match for distance '{distance}' ({nr_matches} matches for {nr_source_frames} frames)")
        if nr_matches >= nr_source_frames:
            return distance                                    

def plot_comparison(lims, D, I, hash_vectors, MIN_DISTANCE = 3):
    sns.set_theme()

    x = [(lims[i+1]-lims[i]) * [i] for i in range(hash_vectors.shape[0])]
    x = [i/FPS for j in x for i in j]
    y = [i/FPS for i in I]
    
    # Create figure and dataframe to plot with sns
    fig = plt.figure()
    # plt.tight_layout()
    df = pd.DataFrame(zip(x, y), columns = ['X', 'Y'])
    g = sns.scatterplot(data=df, x='X', y='Y', s=2*(1-D/(MIN_DISTANCE+1)), alpha=1-D/MIN_DISTANCE)

    # Set x-labels to be more readable
    x_locs, x_labels = plt.xticks() # Get original locations and labels for x ticks
    x_labels = [time.strftime('%H:%M:%S', time.gmtime(x)) for x in x_locs]
    plt.xticks(x_locs, x_labels)
    plt.xticks(rotation=90)
    plt.xlabel('Time in source video (H:M:S)')
    plt.xlim(0, None)

    # Set y-labels to be more readable
    y_locs, y_labels = plt.yticks() # Get original locations and labels for x ticks
    y_labels = [time.strftime('%H:%M:%S', time.gmtime(y)) for y in y_locs]
    plt.yticks(y_locs, y_labels)
    plt.ylabel('Time in target video (H:M:S)')

    # Adjust padding to fit gradio
    plt.subplots_adjust(bottom=0.25, left=0.20)
    return fig 

logging.basicConfig()
logging.getLogger().setLevel(logging.INFO)

def plot_multi_comparison(df):
    fig, ax_arr = plt.subplots(3, 2, figsize=(12, 6), dpi=100, sharex=True) # , ax=axes[1]
    # plt.scatter(x=df['TARGET_S'], y = df['SOURCE_S'], ax=ax_arr[0])
    # plt.scatter(x=df['TARGET_S'], y = df['SOURCE_S'], ax=ax_arr[1])
    sns.scatterplot(data = df, x='TARGET_S', y='SOURCE_S', ax=ax_arr[0,0])
    sns.lineplot(data = df, x='TARGET_S', y='SOURCE_LIP_S', ax=ax_arr[0,1])
    sns.scatterplot(data = df, x='TARGET_S', y='TIMESHIFT', ax=ax_arr[1,0])
    sns.lineplot(data = df, x='TARGET_S', y='TIMESHIFT_LIP', ax=ax_arr[1,1])
    sns.scatterplot(data = df, x='TARGET_S', y='OFFSET', ax=ax_arr[2,0])
    sns.lineplot(data = df, x='TARGET_S', y='OFFSET_LIP', ax=ax_arr[2,1])
    return fig


def get_videomatch_df(url, target, min_distance=MIN_DISTANCE, vanilla_df=False):
    distance = get_decent_distance(url, target, MIN_DISTANCE, MAX_DISTANCE)
    video_index, hash_vectors, target_indices = get_video_indices(url, target, MIN_DISTANCE = distance)
    lims, D, I, hash_vectors = compare_videos(video_index, hash_vectors, target_indices, MIN_DISTANCE = distance)

    target = [(lims[i+1]-lims[i]) * [i] for i in range(hash_vectors.shape[0])]
    target_s = [i/FPS for j in target for i in j]
    source_s = [i/FPS for i in I]

    # Make df
    df = pd.DataFrame(zip(target_s, source_s, D, I), columns = ['TARGET_S', 'SOURCE_S', 'DISTANCE', 'INDICES'])
    if vanilla_df:
        return df
        
    # Minimum distance dataframe ----
    # Group by X so for every second/x there will be 1 value of Y in the end
    # index_min_distance = df.groupby('TARGET_S')['DISTANCE'].idxmin()
    # df_min = df.loc[index_min_distance]
    # df_min
    # -------------------------------

    df['TARGET_WEIGHT'] = 1 - df['DISTANCE']/distance # Higher value means a better match    
    df['SOURCE_WEIGHTED_VALUE'] = df['SOURCE_S'] * df['TARGET_WEIGHT'] # Multiply the weight (which indicates a better match) with the value for Y and aggregate to get a less noisy estimate of Y

    # Group by X so for every second/x there will be 1 value of Y in the end
    grouped_X = df.groupby('TARGET_S').agg({'SOURCE_WEIGHTED_VALUE' : 'sum', 'TARGET_WEIGHT' : 'sum'})
    grouped_X['FINAL_SOURCE_VALUE'] = grouped_X['SOURCE_WEIGHTED_VALUE'] / grouped_X['TARGET_WEIGHT'] 

    # Remake the dataframe
    df = grouped_X.reset_index()
    df = df.drop(columns=['SOURCE_WEIGHTED_VALUE', 'TARGET_WEIGHT'])
    df = df.rename({'FINAL_SOURCE_VALUE' : 'SOURCE_S'}, axis='columns')

    # Add NAN to "missing" x values (base it off hash vector, not target_s)
    step_size = 1/FPS
    x_complete =  np.round(np.arange(start=0.0, stop = max(df['TARGET_S'])+step_size, step = step_size), 1) # More robust    
    df['TARGET_S'] = np.round(df['TARGET_S'], 1)
    df_complete = pd.DataFrame(x_complete, columns=['TARGET_S'])

    # Merge dataframes to get NAN values for every missing SOURCE_S
    df = df_complete.merge(df, on='TARGET_S', how='left')

    # Interpolate between frames since there are missing values
    df['SOURCE_LIP_S'] = df['SOURCE_S'].interpolate(method='linear', limit_direction='both', axis=0)
   
    # Add timeshift col and timeshift col with Linearly Interpolated Values
    df['TIMESHIFT'] = df['SOURCE_S'].shift(1) - df['SOURCE_S']
    df['TIMESHIFT_LIP'] = df['SOURCE_LIP_S'].shift(1) - df['SOURCE_LIP_S']

    # Add Offset col that assumes the video is played at the same speed as the other to do a "timeshift"
    df['OFFSET'] = df['SOURCE_S'] - df['TARGET_S'] - np.min(df['SOURCE_S'])
    df['OFFSET_LIP'] = df['SOURCE_LIP_S'] - df['TARGET_S'] - np.min(df['SOURCE_LIP_S'])
    return df

def get_comparison(url, target, MIN_DISTANCE = 4):
    """ Function for Gradio to combine all helper functions"""
    video_index, hash_vectors, target_indices = get_video_indices(url, target, MIN_DISTANCE = MIN_DISTANCE)
    lims, D, I, hash_vectors = compare_videos(video_index, hash_vectors, target_indices, MIN_DISTANCE = MIN_DISTANCE)
    fig = plot_comparison(lims, D, I, hash_vectors, MIN_DISTANCE = MIN_DISTANCE)
    return fig

def get_auto_comparison(url, target, MIN_DISTANCE = MIN_DISTANCE):
    """ Function for Gradio to combine all helper functions"""
    distance = get_decent_distance(url, target, MIN_DISTANCE, MAX_DISTANCE)
    video_index, hash_vectors, target_indices = get_video_indices(url, target, MIN_DISTANCE = distance)
    lims, D, I, hash_vectors = compare_videos(video_index, hash_vectors, target_indices, MIN_DISTANCE = distance)
    # fig = plot_comparison(lims, D, I, hash_vectors, MIN_DISTANCE = distance)
    df = get_videomatch_df(url, target, min_distance=MIN_DISTANCE, vanilla_df=False)
    fig = plot_multi_comparison(df)
    return fig

video_urls = ["https://www.dropbox.com/s/8c89a9aba0w8gjg/Ploumen.mp4?dl=1",
              "https://www.dropbox.com/s/rzmicviu1fe740t/Bram%20van%20Ojik%20krijgt%20reprimande.mp4?dl=1",
              "https://www.dropbox.com/s/wcot34ldmb84071/Baudet%20ontmaskert%20Omtzigt_%20u%20bent%20door%20de%20mand%20gevallen%21.mp4?dl=1",
              "https://www.dropbox.com/s/4ognq8lshcujk43/Plenaire_zaal_20200923132426_Omtzigt.mp4?dl=1"]

index_iface = gr.Interface(fn=lambda url: index_hashes_for_video(url).ntotal, 
                     inputs="text", outputs="text", 
                     examples=video_urls, cache_examples=True)

compare_iface = gr.Interface(fn=get_comparison,
                     inputs=["text", "text", gr.Slider(2, 30, 4, step=2)], outputs="plot", 
                     examples=[[x, video_urls[-1]] for x in video_urls[:-1]])

auto_compare_iface = gr.Interface(fn=get_auto_comparison,
                     inputs=["text", "text"], outputs="plot", 
                     examples=[[x, video_urls[-1]] for x in video_urls[:-1]])

iface = gr.TabbedInterface([index_iface, compare_iface, auto_compare_iface], ["Index", "Compare", "AutoCompare"])

if __name__ == "__main__":
    import matplotlib
    matplotlib.use('SVG') # To be able to plot in gradio

    logging.basicConfig()
    logging.getLogger().setLevel(logging.INFO)

    iface.launch()
    #iface.launch(auth=("test", "test"), share=True, debug=True)