open-agent-leaderboard / src /overall_math_score.json
qq-hzlh's picture
remove files
a1b0dda
raw
history blame
4.04 kB
{
"time": "2025-01-09 17:13:45",
"results": {
"IO": {
"META": {
"Algorithm": "IO",
"LLM": "gpt-3.5-turbo",
"Eval Date": "2025/01/07"
},
"gsm8k": {
"Score": 37.83,
"Cost($)": 0.3328
},
"AQuA": {
"Score": 38.98,
"Cost($)": 0.0380
}
},
"COT": {
"META": {
"Algorithm": "COT",
"LLM": "gpt-3.5-turbo",
"Eval Date": "2025/01/07"
},
"gsm8k": {
"Score": 78.70,
"Cost($)": 0.6788
},
"AQuA": {
"Score": 61.02,
"Cost($)": 0.0957
}
},
"SC-COT": {
"META": {
"Algorithm": "SC-COT",
"LLM": "gpt-3.5-turbo",
"Eval Date": "2025/01/07"
},
"gsm8k": {
"Score": 80.06,
"Cost($)": 5.0227
},
"AQuA": {
"Score": 67.32,
"Cost($)": 0.6491
}
},
"POT": {
"META": {
"Algorithm": "POT",
"LLM": "gpt-3.5-turbo",
"Eval Date": "2025/01/07"
},
"gsm8k": {
"Score": 76.88,
"Cost($)": 0.6902
},
"AQuA": {
"Score": 51.97,
"Cost($)": 0.1557
}
},
"ReAct-Pro*": {
"META": {
"Algorithm": "ReAct-Pro*",
"LLM": "gpt-3.5-turbo",
"Eval Date": "2025/01/07"
},
"gsm8k": {
"Score": 74.91,
"Cost($)": 3.4633
},
"AQuA": {
"Score": 64.57,
"Cost($)": 0.4928
}
},
"IO-Doubao": {
"META": {
"Algorithm": "IO",
"LLM": "Doubao-lite-32k",
"Eval Date": "2025/01/07"
},
"gsm8k": {
"Score": 72.02,
"Cost($)": 0.0354
},
"AQuA": {
"Score": 79.13,
"Cost($)": 0.0058
}
},
"COT-Doubao": {
"META": {
"Algorithm": "COT",
"LLM": "Doubao-lite-32k",
"Eval Date": "2025/01/07"
},
"gsm8k": {
"Score": 89.31,
"Cost($)": 0.0557
},
"AQuA": {
"Score": 82.68,
"Cost($)": 0.0066
}
},
"SC-COT-Doubao": {
"META": {
"Algorithm": "SC-COT",
"LLM": "Doubao-lite-32k",
"Eval Date": "2025/01/07"
},
"gsm8k": {
"Score": 88.63,
"Cost($)": 0.1533
},
"AQuA": {
"Score": 83.46,
"Cost($)": 0.0409
}
},
"POT-Doubao": {
"META": {
"Algorithm": "POT",
"LLM": "Doubao-lite-32k",
"Eval Date": "2025/01/07"
},
"gsm8k": {
"Score": 79.15,
"Cost($)": 0.0575
},
"AQuA": {
"Score": 52.36,
"Cost($)": 0.0142
}
},
"ReAct-Pro-Doubao": {
"META": {
"Algorithm": "ReAct-Pro",
"LLM": "Doubao-lite-32k",
"Eval Date": "2025/01/07"
},
"gsm8k": {
"Score": 85.60,
"Cost($)": 0.2513
},
"AQuA": {
"Score": 77.56,
"Cost($)": 0.0446
}
}
}
}