Spaces:
Build error
Build error
Iskaj
commited on
Commit
·
727e567
1
Parent(s):
ed0180d
add documentation to data.py
Browse files
data.py
CHANGED
@@ -4,6 +4,9 @@ import shutil
|
|
4 |
|
5 |
from videohash import filepath_from_url
|
6 |
|
|
|
|
|
|
|
7 |
with open('apb2022.json') as filein:
|
8 |
urls, videos, url2video, video2url = [], [], {}, {}
|
9 |
for item in json.load(filein):
|
@@ -12,13 +15,14 @@ with open('apb2022.json') as filein:
|
|
12 |
url2video[item['url']] = item['mp4']
|
13 |
video2url[item['mp4']] = item['url']
|
14 |
|
|
|
15 |
for url in videos:
|
16 |
filepath = filepath_from_url(url) + '.index'
|
17 |
datapath = os.path.join('data', os.path.basename(filepath))
|
18 |
if not os.path.exists(filepath) and os.path.exists(datapath):
|
19 |
shutil.copyfile(datapath, filepath)
|
20 |
|
21 |
-
|
22 |
if __name__ == "__main__":
|
23 |
from videomatch import get_video_index
|
24 |
|
|
|
4 |
|
5 |
from videohash import filepath_from_url
|
6 |
|
7 |
+
# < Algemene Politieke Beschouwing 2022 >
|
8 |
+
# Load this data based on a .json file to get those videos to compare to.
|
9 |
+
# This can be updated with any .json file containing other videos.
|
10 |
with open('apb2022.json') as filein:
|
11 |
urls, videos, url2video, video2url = [], [], {}, {}
|
12 |
for item in json.load(filein):
|
|
|
15 |
url2video[item['url']] = item['mp4']
|
16 |
video2url[item['mp4']] = item['url']
|
17 |
|
18 |
+
# Get filepaths for the url's indices in the dataset and copy those to data folder if they're not present
|
19 |
for url in videos:
|
20 |
filepath = filepath_from_url(url) + '.index'
|
21 |
datapath = os.path.join('data', os.path.basename(filepath))
|
22 |
if not os.path.exists(filepath) and os.path.exists(datapath):
|
23 |
shutil.copyfile(datapath, filepath)
|
24 |
|
25 |
+
# To manually build the indices for the above dataset.
|
26 |
if __name__ == "__main__":
|
27 |
from videomatch import get_video_index
|
28 |
|