dh-mc commited on
Commit
95ba169
1 Parent(s): 007b986

translation time analyzed

Browse files
.gitattributes CHANGED
@@ -59,3 +59,7 @@ notebooks/00a_Data[[:space:]]Analysis_greedy_decoding.ipynb filter=lfs diff=lfs
59
  notebooks/00b_Data[[:space:]]Analysis_Few_Shots.ipynb filter=lfs diff=lfs merge=lfs -text
60
  notebooks/01_Few-shot_Prompting.ipynb filter=lfs diff=lfs merge=lfs -text
61
  notebooks/01a_Few-shot_Prompting.ipynb filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
59
  notebooks/00b_Data[[:space:]]Analysis_Few_Shots.ipynb filter=lfs diff=lfs merge=lfs -text
60
  notebooks/01_Few-shot_Prompting.ipynb filter=lfs diff=lfs merge=lfs -text
61
  notebooks/01a_Few-shot_Prompting.ipynb filter=lfs diff=lfs merge=lfs -text
62
+ logs/l40-1gpu.txt filter=lfs diff=lfs merge=lfs -text
63
+ logs/l40-4gpu.txt filter=lfs diff=lfs merge=lfs -text
64
+ logs/openai-gpt-4o-mini.txt filter=lfs diff=lfs merge=lfs -text
65
+ logs/openai-gpt-4o.txt filter=lfs diff=lfs merge=lfs -text
llm_toolkit/translation_utils.py CHANGED
@@ -1,5 +1,6 @@
1
  import os
2
  import re
 
3
  import pandas as pd
4
  import evaluate
5
  import seaborn as sns
@@ -482,3 +483,82 @@ def eval_openai(num_shots, datasets, model="gpt-4o-mini", max_new_tokens=300):
482
  predictions.append(output)
483
 
484
  return predictions
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
  import re
3
+ import glob
4
  import pandas as pd
5
  import evaluate
6
  import seaborn as sns
 
483
  predictions.append(output)
484
 
485
  return predictions
486
+
487
+
488
+ def convert_time_to_minutes(time_str):
489
+ # print(f"converting time_str: {time_str}")
490
+ # Split the time string into its components
491
+ time_parts = list(map(int, time_str.split(":")))
492
+
493
+ # Initialize total minutes
494
+ total_minutes = 0
495
+
496
+ # Calculate total minutes based on the number of parts
497
+ if len(time_parts) == 3: # HH:MM:SS
498
+ hours, minutes, seconds = time_parts
499
+ total_minutes = hours * 60 + minutes + seconds / 60
500
+ elif len(time_parts) == 2: # MM:SS
501
+ minutes, seconds = time_parts
502
+ total_minutes = minutes + seconds / 60
503
+ elif len(time_parts) == 1: # SS
504
+ seconds = time_parts[0]
505
+ total_minutes = seconds / 60
506
+
507
+ return total_minutes
508
+
509
+
510
+ time_pattern = re.compile(r"\[(.{5,10})<00:00")
511
+ metrics_pattern = re.compile(r"(.*)/shots-(.*) metrics:")
512
+
513
+
514
+ def process_log_file(log_file):
515
+ model = []
516
+ shots = []
517
+ eval_time = []
518
+
519
+ with open(log_file, "r") as f:
520
+ try:
521
+ for line in f:
522
+ matches = time_pattern.search(line)
523
+ if matches:
524
+ time_pattern_matches = matches
525
+ else:
526
+ matches = metrics_pattern.search(line)
527
+ if matches:
528
+ metrics_pattern_matches = matches
529
+ groups = metrics_pattern_matches.groups()
530
+
531
+ model.append(groups[0])
532
+ shots.append(groups[1])
533
+
534
+ groups = time_pattern_matches.groups()
535
+ time_str = groups[0]
536
+ eval_time.append(convert_time_to_minutes(time_str))
537
+ except Exception as e:
538
+ print(f"Error processing log file: {log_file}")
539
+ print(e)
540
+
541
+ df = pd.DataFrame(
542
+ {
543
+ "model": model,
544
+ "shots": shots,
545
+ "eval_time": eval_time,
546
+ }
547
+ )
548
+ return df
549
+
550
+
551
+ def load_eval_times(logs_folder):
552
+ # Get a list of all files in the logs folder
553
+ log_files = glob.glob(os.path.join(logs_folder, "*"))
554
+ log_files.sort()
555
+
556
+ time_df = pd.DataFrame({"model": [], "shots": [], "eval_time": []})
557
+
558
+ for log_file in log_files:
559
+ print(f"Loading content of {log_file}")
560
+ df = process_log_file(log_file)
561
+ time_df = pd.concat([time_df, df], ignore_index=True)
562
+
563
+ time_df["shots"] = time_df["shots"].apply(lambda x: int(x))
564
+ return time_df
logs/l40-1gpu.txt CHANGED
The diff for this file is too large to render. See raw diff
 
logs/l40-4gpu.txt CHANGED
The diff for this file is too large to render. See raw diff
 
logs/openai-gpt-4o-mini.txt CHANGED
The diff for this file is too large to render. See raw diff
 
logs/openai-gpt-4o.txt CHANGED
@@ -1,74 +1,3 @@
1
- loading env vars from: D:\code\projects\rapget-translation\.env
2
- Adding D:\code\projects\rapget-translation to sys.path
3
- C:\Users\dongh\.conda\envs\rapget\Lib\site-packages\threadpoolctl.py:1214: RuntimeWarning:
4
- Found Intel OpenMP ('libiomp') and LLVM OpenMP ('libomp') loaded at
5
- the same time. Both libraries are known to be incompatible and this
6
- can cause random crashes or deadlocks on Linux when loaded in the
7
- same Python program.
8
- Using threadpoolctl may cause crashes or deadlocks. For more
9
- information and possible workarounds, please see
10
- https://github.com/joblib/threadpoolctl/blob/master/multiple_openmp.md
11
-
12
- warnings.warn(msg, RuntimeWarning)
13
- [nltk_data] Downloading package wordnet to
14
- [nltk_data] C:\Users\dongh\AppData\Roaming\nltk_data...
15
- [nltk_data] Package wordnet is already up-to-date!
16
- [nltk_data] Downloading package punkt to
17
- [nltk_data] C:\Users\dongh\AppData\Roaming\nltk_data...
18
- [nltk_data] Package punkt is already up-to-date!
19
- [nltk_data] Downloading package omw-1.4 to
20
- [nltk_data] C:\Users\dongh\AppData\Roaming\nltk_data...
21
- [nltk_data] Package omw-1.4 is already up-to-date!
22
- loading: D:\code\projects\rapget-translation\eval_modules\calc_repetitions.py
23
- loading D:\code\projects\rapget-translation\llm_toolkit\translation_utils.py
24
- [nltk_data] Downloading package wordnet to
25
- [nltk_data] C:\Users\dongh\AppData\Roaming\nltk_data...
26
- [nltk_data] Package wordnet is already up-to-date!
27
- [nltk_data] Downloading package punkt to
28
- [nltk_data] C:\Users\dongh\AppData\Roaming\nltk_data...
29
- [nltk_data] Package punkt is already up-to-date!
30
- [nltk_data] Downloading package omw-1.4 to
31
- [nltk_data] C:\Users\dongh\AppData\Roaming\nltk_data...
32
- [nltk_data] Package omw-1.4 is already up-to-date!
33
- gpt-4o datasets/mac/mac.tsv results/mac-results_few_shots_openai.csv 300
34
- Evaluating model: gpt-4o
35
- loading train/test data files
36
- DatasetDict({
37
- train: Dataset({
38
- features: ['chinese', 'english'],
39
- num_rows: 4528
40
- })
41
- test: Dataset({
42
- test: Dataset({
43
- features: ['chinese', 'english'],
44
- num_rows: 1133
45
- })
46
- })
47
- --------------------------------------------------
48
- chinese: 老耿端起枪,眯缝起一只三角眼,一搂扳机响了枪,冰雹般的金麻雀劈哩啪啦往下落,铁砂子在柳枝间飞迸着,嚓嚓有声。
49
- chinese: 老耿端起枪,眯缝起一只三角眼,一搂扳机响了枪,冰雹般的金麻雀劈哩啪啦往下落,铁砂子在柳枝间飞迸着,嚓嚓有声。
50
- --------------------------------------------------
51
- english: Old Geng picked up his shotgun, squinted, and pulled the trigger. Two sparrows crashed to the ground like hailstones as shotgun pellets tore noisily through the branches.
52
- *** Evaluating with num_shots: 0
53
- *** Evaluating with num_shots: 0
54
- 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1133/1133 [28:52<00:00, 1.53s/it]
55
- gpt-4o/shots-00 metrics: {'meteor': 0.3797419877414444, 'bleu_scores': {'bleu': 0.12054600115274576, 'precisions': [0.4395170970950372, 0.1657507850413931, 0.08008175399479747, 0.041705426356589144], 'brevity_penalty': 0.965191371371961, 'length_ratio': 0.965783371977476, 'translation_length': 29157, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.42488525198918325, 'rouge2': 0.17659595999851255, 'rougeL': 0.37036814222422193, 'rougeLsum': 0.37043557409027883}, 'accuracy': 0.00088261253309797, 'correct_ids': [77]}
56
- *** Evaluating with num_shots: 1
57
- 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1133/1133 [22:44<00:00, 1.20s/it]
58
- gpt-4o/shots-01 metrics: {'meteor': 0.37588586538591867, 'bleu_scores': {'bleu': 0.12049862468096047, 'precisions': [0.4438186524872315, 0.16850617418861327, 0.08162258566387129, 0.043228692450813504], 'brevity_penalty': 0.9454338245859127, 'length_ratio': 0.9468698244451805, 'translation_length': 28586, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.4200247346821462, 'rouge2': 0.17611482166851536, 'rougeL': 0.36555347015620193, 'rougeLsum': 0.36597227925335113}, 'accuracy': 0.00088261253309797, 'correct_ids': [77]}
59
- *** Evaluating with num_shots: 3
60
- 100%|���████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1133/1133 [38:45<00:00, 2.05s/it]
61
- gpt-4o/shots-03 metrics: {'meteor': 0.3768512103553621, 'bleu_scores': {'bleu': 0.12408746322526747, 'precisions': [0.4504073680481757, 0.17455806915894748, 0.08641500730375952, 0.04606687515034881], 'brevity_penalty': 0.9329257300005195, 'length_ratio': 0.9350778403444849, 'translation_length': 28230, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.42185440095437376, 'rouge2': 0.18099296897772787, 'rougeL': 0.36683121325656565572, 'rougeLsum': 0.36692420445626067}, 'accuracy': 0.00088261253309797, 'correct_ids': [77]}
62
- *** Evaluating with num_shots: 5
63
- 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1133/1133 [31:48<00:00, 1.68s/it]
64
- gpt-4o/shots-05 metrics: {'meteor': 0.35772544915145654, 'bleu_scores': {'bleu': 0.12169683347842021, 'precisions': [0.45675271230826786, 0.1799429620658671, 0.0908092273892347, 0.04932145886344359], 'brevity_penalty': 0.8785850406914042, 'length_ratio': 0.8853925140775091, 'translation_length': 26730, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.3989536343087876, 'rouge2': 0.17450105082463535, 'rougeL': 0.348320055666115, 'rougeLsum': 0.3483328999510906}, 'accuracy': 0.00088261253309797, 'correct_ids': [77]}
65
- *** Evaluating with num_shots: 10
66
- 'rougeLsum': 0.3483328999510906}, 'accuracy': 0.00088261253309797, 'correct_ids': [77]}
67
- *** Evaluating with num_shots: 10
68
- 'rougeLsum': 0.3483328999510906}, 'accuracy': 0.00088261253309797, 'correct_ids': [77]}
69
- *** Evaluating with num_shots: 10
70
- 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1133/1133 [33:48<00:00, 1.79s/it]
71
- gpt-4o/shots-10 metrics: {'meteor': 0.3746444651189953, 'bleu_scores': {'bleu': 0.12498238983123719, 'precisions': [0.45538813929351135, 0.17677558937630558, 0.08810041971086585, 0.04747233145498034], 'brevity_penalty': 0.9226631755170949, 'length_ratio': 0.9255051341503809, 'translation_length': 27941, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.42057276805902843, 'rouge2': 0.182701868068981, 'rougeL': 0.3668754130715727, 'rougeLsum': 0.3673183260659394}, 'accuracy': 0.00176522506619594, 'correct_ids': [77, 364]}
72
- *** Evaluating with num_shots: 50
73
- 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1133/1133 [38:15<00:00, 2.03s/it]
74
- gpt-4o/shots-50 metrics: {'meteor': 0.40413933252744955, 'bleu_scores': {'bleu': 0.13782450337569063, 'precisions': [0.4695234708392603, 0.19261125727201986, 0.09873251410464487, 0.05424823410696267], 'brevity_penalty': 0.9290310787259491, 'length_ratio': 0.9314342497515734, 'translation_length': 28120, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.44343703034704307, 'rouge2': 0.20310004059554654, 'rougeL': 0.3908878454222482, 'rougeLsum': 0.39082492657743595}, 'accuracy': 0.00353045013239188, 'correct_ids': [77, 364, 567, 1000]}
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad80c611cc68e4bca4120d818e49a283694a72c8b1dc71abe908ec8dd68e4497
3
+ size 9795
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
notebooks/00b_Data Analysis_Few_Shots.ipynb CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4062aa5271a5e14210e73d9ce344cb49dcbb126429fa370c68f8e38725840121
3
- size 593498
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c9785edd77e41276edd7d949b04796bb3a2b98fbf4efc56049f074ef0c74e39
3
+ size 645130
results/mac-results_few_shots_metrics.csv CHANGED
@@ -1,23 +1,29 @@
1
- model,shots,meteor,bleu_1,rouge_l,ews_score,repetition_score,total_repetitions,rap,num_max_output_tokens
2
- 01-ai/Yi-1.5-9B-Chat,0,0.2624042529095214,0.052402107437040435,0.22702109917009206,0.0088261253309797,1.593115622241836,1.6019417475728155,0.24649759532229093,18
3
- 01-ai/Yi-1.5-9B-Chat,1,0.34870107586750904,0.08089424511255362,0.32734221074629044,0.0,0.41394527802294795,0.41394527802294795,0.3426649332614599,17
4
- 01-ai/Yi-1.5-9B-Chat,3,0.32640977691198636,0.055279846527263934,0.2928978370489262,0.0,0.8570167696381289,0.8570167696381289,0.3151554166830832,41
5
- 01-ai/Yi-1.5-9B-Chat,5,0.34766805202103457,0.08282971728232061,0.3267409773412665,0.0,0.1703442188879082,0.1703442188879082,0.3451362525721807,12
6
- 01-ai/Yi-1.5-9B-Chat,10,0.3404245874451134,0.0874799371333584,0.3186285587310857,0.0,0.33451015004413065,0.33451015004413065,0.335628491165567,9
7
- Qwen/Qwen2-72B-Instruct,0,0.4003638205699929,0.12223832517678616,0.3843308919636922,0.0,0.19593998234774934,0.19593998234774934,0.3970180421898014,1
8
- Qwen/Qwen2-72B-Instruct,1,0.4068727655718769,0.13151008586303575,0.39419477888585397,0.0,0.15798764342453664,0.15798764342453664,0.4041216347207881,1
9
- Qwen/Qwen2-72B-Instruct,3,0.4086244766794449,0.13771788946915253,0.3975872454980886,0.0,0.12709620476610767,0.12709620476610767,0.4063954239173824,0
10
- Qwen/Qwen2-72B-Instruct,5,0.4132330811975005,0.1439773872150899,0.40319922813685904,0.0,0.11915269196822595,0.11915269196822595,0.41111822769434864,0
11
- Qwen/Qwen2-72B-Instruct,10,0.41598174489789025,0.14493475334416772,0.4061550950232767,0.0,0.09620476610767872,0.09620476610767872,0.4142591929807702,0
12
- gpt-4o-mini,0,0.3797696357415517,0.1208238389018596,0.3703414668036082,0.0,0.09532215357458076,0.09532215357458076,0.37821133607113916,0
13
- gpt-4o-mini,1,0.37721414424357197,0.12013402254992751,0.3672849018610451,0.0,0.09179170344218888,0.09179170344218888,0.37572317024740703,0
14
- gpt-4o-mini,3,0.3772985230936086,0.12400311006855895,0.3678727405759652,0.0,0.09179170344218888,0.09179170344218888,0.3758072155821894,0
15
- gpt-4o-mini,5,0.35541821046691263,0.1202464326274801,0.3467666649149247,0.0,0.05030891438658429,0.05030891438658429,0.3546452926906339,0
16
- gpt-4o-mini,10,0.37335968903521094,0.1257600824824953,0.3655393297085069,0.0,0.0706090026478376,0.0706090026478376,0.37222227656264567,0
17
- gpt-4o-mini,50,0.4044690970661121,0.13972883920222515,0.39119808964775155,0.0,0.08473080317740513,0.08473080317740513,0.4029924080114739,0
18
- gpt-4o,0,0.3797419877414444,0.12054600115274576,0.3701547457064372,0.0,0.09532215357458076,0.09532215357458076,0.37818380151840997,0
19
- gpt-4o,1,0.37588586538591867,0.12049862468096047,0.3655088353382996,0.0,0.09179170344218888,0.09179170344218888,0.3744001415355042,0
20
- gpt-4o,3,0.3768512103553621,0.12408746322526747,0.36675999670221837,0.0,0.09355692850838482,0.09355692850838482,0.3753332737090981,0
21
- gpt-4o,5,0.35772544915145654,0.12169683347842021,0.348000637544411,0.0,0.0353045013239188,0.0353045013239188,0.3571787674657609,0
22
- gpt-4o,10,0.3746444651189953,0.12498238983123719,0.36675868342577317,0.0,0.0706090026478376,0.0706090026478376,0.37350313867182305,0
23
- gpt-4o,50,0.40413933252744955,0.13782450337569063,0.39068912530823663,0.0,0.07590467784642542,0.07590467784642542,0.402816463024093,0
 
 
 
 
 
 
 
1
+ model,shots,meteor,bleu_1,rouge_l,ews_score,repetition_score,total_repetitions,rap,num_max_output_tokens,eval_time
2
+ Qwen/Qwen2-72B-Instruct,0,0.4003638205699929,0.12223832517678616,0.3850228285100058,0.0,0.19593998234774934,0.19593998234774934,0.3970180421898014,1,167.96666666666667
3
+ Qwen/Qwen2-72B-Instruct,1,0.4068727655718769,0.13151008586303575,0.39473841744219135,0.0,0.15798764342453664,0.15798764342453664,0.4041216347207881,1,169.63333333333333
4
+ Qwen/Qwen2-72B-Instruct,3,0.4086244766794449,0.13771788946915253,0.3982274035420223,0.0,0.12709620476610767,0.12709620476610767,0.4063954239173824,0,220.13333333333333
5
+ Qwen/Qwen2-72B-Instruct,5,0.4132330811975005,0.1439773872150899,0.4038550675754348,0.0,0.11915269196822595,0.11915269196822595,0.41111822769434864,0,324.18333333333334
6
+ Qwen/Qwen2-72B-Instruct,10,0.41598174489789025,0.14493475334416772,0.40675496346798323,0.0,0.09620476610767872,0.09620476610767872,0.4142591929807702,0,561.3666666666667
7
+ Qwen/Qwen2-7B-Instruct,0,0.377477070949433,0.11783492823424507,0.36803838669677474,0.0,0.07149161518093557,0.07149161518093557,0.3763128359886437,0,18.516666666666666
8
+ Qwen/Qwen2-7B-Instruct,1,0.38000752971097884,0.11731917392837622,0.3717522235071834,0.0,0.07413945278022947,0.07413945278022947,0.37879237953430883,0,19.883333333333333
9
+ Qwen/Qwen2-7B-Instruct,3,0.38678180999660744,0.12368875746156333,0.37795982050404114,0.0,0.1412180052956752,0.1412180052956752,0.38444052153933106,0,30.233333333333334
10
+ Qwen/Qwen2-7B-Instruct,5,0.38784856371389564,0.1227725469820483,0.38271501720823375,0.0,0.09179170344218888,0.09179170344218888,0.38631555618548774,0,43.233333333333334
11
+ Qwen/Qwen2-7B-Instruct,10,0.38526484346757095,0.12535252418966952,0.381921964717894,0.0,0.10326566637246248,0.10326566637246248,0.3835535147682633,0,75.65
12
+ internlm/internlm2_5-7b-chat,0,0.36816799960793073,0.11360521358693174,0.3604145848797059,0.0,0.2144748455428067,0.2144748455428067,0.3648059323539847,0,23.116666666666667
13
+ internlm/internlm2_5-7b-chat,1,0.3719587471180722,0.1157707566176535,0.3640593681230251,0.0,0.14033539276257723,0.14033539276257723,0.36972107700643503,0,24.783333333333335
14
+ internlm/internlm2_5-7b-chat,3,0.3747105229822289,0.1154826016668525,0.3690935886519632,0.0,0.17740511915269197,0.17740511915269197,0.37187052462735126,0,35.083333333333336
15
+ internlm/internlm2_5-7b-chat,5,0.37285562384505977,0.11541534709366409,0.36890113755410703,0.0,0.14827890556045895,0.14827890556045895,0.37048732274065205,0,54.016666666666666
16
+ internlm/internlm2_5-7b-chat,10,0.3750895095392996,0.11696492920010637,0.3680889860873816,0.0,0.13062665489849956,0.13062665489849956,0.37298723763770353,0,108.06666666666666
17
+ internlm/internlm2_5-7b-chat,50,0.37213069871716603,0.11404688073207249,0.3626467635078598,0.0,0.16857899382171226,0.16857899382171226,0.3694484047441432,8,798.6166666666667
18
+ gpt-4o-mini,0,0.3797696357415517,0.1208238389018596,0.3705758962787884,0.0,0.09532215357458076,0.09532215357458076,0.37821133607113916,0,30.1
19
+ gpt-4o-mini,1,0.37721414424357197,0.12013402254992751,0.36747964103348624,0.0,0.09179170344218888,0.09179170344218888,0.37572317024740703,0,29.583333333333332
20
+ gpt-4o-mini,3,0.3772985230936086,0.12400311006855895,0.3682804376759824,0.0,0.09179170344218888,0.09179170344218888,0.3758072155821894,0,24.3
21
+ gpt-4o-mini,5,0.35541821046691263,0.1202464326274801,0.34720271618869103,0.0,0.05030891438658429,0.05030891438658429,0.3546452926906339,0,22.733333333333334
22
+ gpt-4o-mini,10,0.37335968903521094,0.1257600824824953,0.3659023214917385,0.0,0.0706090026478376,0.0706090026478376,0.37222227656264567,0,22.433333333333334
23
+ gpt-4o-mini,50,0.4044690970661121,0.13972883920222515,0.3916652577211933,0.0,0.08473080317740513,0.08473080317740513,0.4029924080114739,0,24.35
24
+ gpt-4o,0,0.3797419877414444,0.12054600115274576,0.3704105629830775,0.0,0.09532215357458076,0.09532215357458076,0.37818380151840997,0,28.866666666666667
25
+ gpt-4o,1,0.37588586538591867,0.12049862468096047,0.3658901615314057,0.0,0.09179170344218888,0.09179170344218888,0.3744001415355042,0,22.733333333333334
26
+ gpt-4o,3,0.3768512103553621,0.12408746322526747,0.3668998399542892,0.0,0.09355692850838482,0.09355692850838482,0.3753332737090981,0,38.75
27
+ gpt-4o,5,0.35772544915145654,0.12169683347842021,0.3483810267695463,0.0,0.0353045013239188,0.0353045013239188,0.3571787674657609,0,31.8
28
+ gpt-4o,10,0.3746444651189953,0.12498238983123719,0.36712547974906085,0.0,0.0706090026478376,0.0706090026478376,0.37350313867182305,0,33.8
29
+ gpt-4o,50,0.40413933252744955,0.13782450337569063,0.3909558691963182,0.0,0.07590467784642542,0.07590467784642542,0.402816463024093,0,38.25