Spaces:
Running
Running
Joshua Lochner
commited on
Commit
•
d7725ec
1
Parent(s):
6a8bf30
Fix incorrect segment output format
Browse files- src/evaluate.py +16 -10
src/evaluate.py
CHANGED
@@ -261,7 +261,7 @@ def main():
|
|
261 |
# Check for incorrect segments using the classifier
|
262 |
|
263 |
segments_to_check = []
|
264 |
-
|
265 |
for sponsor_segment in sponsor_segments:
|
266 |
segment_words = extract_segment(
|
267 |
words, sponsor_segment['start'], sponsor_segment['end'])
|
@@ -280,17 +280,22 @@ def main():
|
|
280 |
if sponsor_segment['locked']:
|
281 |
continue
|
282 |
|
283 |
-
|
284 |
-
sponsor_segment['text'])
|
285 |
-
texts.append(sponsor_segment['cleaned_text'])
|
286 |
segments_to_check.append(sponsor_segment)
|
287 |
|
288 |
if segments_to_check: # Some segments to check
|
289 |
|
290 |
-
segments_scores = classifier(
|
291 |
|
292 |
num_correct = 0
|
293 |
for segment, scores in zip(segments_to_check, segments_scores):
|
|
|
|
|
|
|
|
|
|
|
|
|
294 |
all_metrics['classifier_segment_count'] += 1
|
295 |
|
296 |
prediction = max(scores, key=lambda x: x['score'])
|
@@ -302,7 +307,7 @@ def main():
|
|
302 |
|
303 |
segment.update({
|
304 |
'predicted': predicted_category,
|
305 |
-
'scores':
|
306 |
})
|
307 |
|
308 |
incorrect_segments.append(segment)
|
@@ -313,8 +318,9 @@ def main():
|
|
313 |
|
314 |
all_metrics['classifier_segment_correct'] += num_correct
|
315 |
|
316 |
-
|
317 |
-
all_metrics['
|
|
|
318 |
|
319 |
out_metrics.append(current_metrics)
|
320 |
progress.set_postfix(postfix_info)
|
@@ -383,9 +389,9 @@ def main():
|
|
383 |
safe_print('\t\tPredicted Category:',
|
384 |
incorrect_segment['predicted'])
|
385 |
safe_print('\t\tProbabilities:')
|
386 |
-
for
|
387 |
safe_print(
|
388 |
-
f"\t\t\t{
|
389 |
|
390 |
safe_print()
|
391 |
|
|
|
261 |
# Check for incorrect segments using the classifier
|
262 |
|
263 |
segments_to_check = []
|
264 |
+
cleaned_texts = [] # Texts to send through tokenizer
|
265 |
for sponsor_segment in sponsor_segments:
|
266 |
segment_words = extract_segment(
|
267 |
words, sponsor_segment['start'], sponsor_segment['end'])
|
|
|
280 |
if sponsor_segment['locked']:
|
281 |
continue
|
282 |
|
283 |
+
cleaned_texts.append(
|
284 |
+
clean_text(sponsor_segment['text']))
|
|
|
285 |
segments_to_check.append(sponsor_segment)
|
286 |
|
287 |
if segments_to_check: # Some segments to check
|
288 |
|
289 |
+
segments_scores = classifier(cleaned_texts)
|
290 |
|
291 |
num_correct = 0
|
292 |
for segment, scores in zip(segments_to_check, segments_scores):
|
293 |
+
|
294 |
+
fixed_scores = {
|
295 |
+
score['label']: score['score']
|
296 |
+
for score in scores
|
297 |
+
}
|
298 |
+
|
299 |
all_metrics['classifier_segment_count'] += 1
|
300 |
|
301 |
prediction = max(scores, key=lambda x: x['score'])
|
|
|
307 |
|
308 |
segment.update({
|
309 |
'predicted': predicted_category,
|
310 |
+
'scores': fixed_scores
|
311 |
})
|
312 |
|
313 |
incorrect_segments.append(segment)
|
|
|
318 |
|
319 |
all_metrics['classifier_segment_correct'] += num_correct
|
320 |
|
321 |
+
if all_metrics['classifier_segment_count'] > 0:
|
322 |
+
postfix_info['classifier_accuracy'] = all_metrics['classifier_segment_correct'] / \
|
323 |
+
all_metrics['classifier_segment_count']
|
324 |
|
325 |
out_metrics.append(current_metrics)
|
326 |
progress.set_postfix(postfix_info)
|
|
|
389 |
safe_print('\t\tPredicted Category:',
|
390 |
incorrect_segment['predicted'])
|
391 |
safe_print('\t\tProbabilities:')
|
392 |
+
for label, score in incorrect_segment['scores'].items():
|
393 |
safe_print(
|
394 |
+
f"\t\t\t{label}: {score}")
|
395 |
|
396 |
safe_print()
|
397 |
|