Spaces:
Running
Running
Joshua Lochner
commited on
Commit
·
7dbc778
1
Parent(s):
508e8b2
Code formatting
Browse files- src/preprocess.py +3 -4
src/preprocess.py
CHANGED
@@ -374,14 +374,13 @@ class PreprocessArguments:
|
|
374 |
# 1 = At least one positive vote
|
375 |
|
376 |
max_segment_duration: float = field(
|
377 |
-
default=180,
|
378 |
# >180 => 2.8%
|
379 |
# >200 => 2.1%
|
380 |
# >250 => 1.1%
|
381 |
# >300 => 0.06%
|
382 |
metadata={'help': 'Ignore all segments whose duration in seconds is longer than this value (negative means no limit)'})
|
383 |
|
384 |
-
|
385 |
min_views: int = field(
|
386 |
default=5, metadata={'help': 'Minimum number of views a segment must have to be considered. 0 = show all'})
|
387 |
|
@@ -934,7 +933,8 @@ def main():
|
|
934 |
for item in items:
|
935 |
parsed_item = json.loads(item) # TODO add uuid
|
936 |
|
937 |
-
matches = extract_sponsor_matches_from_text(
|
|
|
938 |
|
939 |
if matches:
|
940 |
for match in matches:
|
@@ -948,7 +948,6 @@ def main():
|
|
948 |
'label': none_category
|
949 |
}), file=fp)
|
950 |
|
951 |
-
|
952 |
logger.info('Write')
|
953 |
# Save excess items
|
954 |
# excess_path = os.path.join(
|
|
|
374 |
# 1 = At least one positive vote
|
375 |
|
376 |
max_segment_duration: float = field(
|
377 |
+
default=180, # 3 minutes
|
378 |
# >180 => 2.8%
|
379 |
# >200 => 2.1%
|
380 |
# >250 => 1.1%
|
381 |
# >300 => 0.06%
|
382 |
metadata={'help': 'Ignore all segments whose duration in seconds is longer than this value (negative means no limit)'})
|
383 |
|
|
|
384 |
min_views: int = field(
|
385 |
default=5, metadata={'help': 'Minimum number of views a segment must have to be considered. 0 = show all'})
|
386 |
|
|
|
933 |
for item in items:
|
934 |
parsed_item = json.loads(item) # TODO add uuid
|
935 |
|
936 |
+
matches = extract_sponsor_matches_from_text(
|
937 |
+
parsed_item['extracted'])
|
938 |
|
939 |
if matches:
|
940 |
for match in matches:
|
|
|
948 |
'label': none_category
|
949 |
}), file=fp)
|
950 |
|
|
|
951 |
logger.info('Write')
|
952 |
# Save excess items
|
953 |
# excess_path = os.path.join(
|