Spaces:
Running
Running
{ | |
"time": "2025-01-09 17:13:45", | |
"results": { | |
"IO": { | |
"META": { | |
"Algorithm": "IO", | |
"LLM": "gpt-3.5-turbo", | |
"Eval Date": "2025/01/07" | |
}, | |
"gsm8k": { | |
"Score": 37.83, | |
"Cost($)": 0.3328 | |
}, | |
"AQuA": { | |
"Score": 38.98, | |
"Cost($)": 0.0380 | |
} | |
}, | |
"CoT": { | |
"META": { | |
"Algorithm": "CoT", | |
"LLM": "gpt-3.5-turbo", | |
"Eval Date": "2025/01/07" | |
}, | |
"gsm8k": { | |
"Score": 78.70, | |
"Cost($)": 0.6788 | |
}, | |
"AQuA": { | |
"Score": 61.02, | |
"Cost($)": 0.0957 | |
} | |
}, | |
"SC-CoT": { | |
"META": { | |
"Algorithm": "SC-CoT", | |
"LLM": "gpt-3.5-turbo", | |
"Eval Date": "2025/01/07" | |
}, | |
"gsm8k": { | |
"Score": 80.06, | |
"Cost($)": 5.0227 | |
}, | |
"AQuA": { | |
"Score": 67.32, | |
"Cost($)": 0.6491 | |
} | |
}, | |
"PoT": { | |
"META": { | |
"Algorithm": "PoT", | |
"LLM": "gpt-3.5-turbo", | |
"Eval Date": "2025/01/07" | |
}, | |
"gsm8k": { | |
"Score": 76.88, | |
"Cost($)": 0.6902 | |
}, | |
"AQuA": { | |
"Score": 59.45, | |
"Cost($)": 0.1748 | |
} | |
}, | |
"ReAct-Pro*": { | |
"META": { | |
"Algorithm": "ReAct-Pro*", | |
"LLM": "gpt-3.5-turbo", | |
"Eval Date": "2025/01/07" | |
}, | |
"gsm8k": { | |
"Score": 74.91, | |
"Cost($)": 3.4633 | |
}, | |
"AQuA": { | |
"Score": 64.57, | |
"Cost($)": 0.4928 | |
} | |
}, | |
"IO-Doubao": { | |
"META": { | |
"Algorithm": "IO", | |
"LLM": "Doubao-lite-32k", | |
"Eval Date": "2025/01/07" | |
}, | |
"gsm8k": { | |
"Score": 72.02, | |
"Cost($)": 0.0354 | |
}, | |
"AQuA": { | |
"Score": 79.13, | |
"Cost($)": 0.0058 | |
} | |
}, | |
"CoT-Doubao": { | |
"META": { | |
"Algorithm": "CoT", | |
"LLM": "Doubao-lite-32k", | |
"Eval Date": "2025/01/07" | |
}, | |
"gsm8k": { | |
"Score": 89.31, | |
"Cost($)": 0.0557 | |
}, | |
"AQuA": { | |
"Score": 82.68, | |
"Cost($)": 0.0066 | |
} | |
}, | |
"SC-CoT-Doubao": { | |
"META": { | |
"Algorithm": "SC-CoT", | |
"LLM": "Doubao-lite-32k", | |
"Eval Date": "2025/01/07" | |
}, | |
"gsm8k": { | |
"Score": 88.63, | |
"Cost($)": 0.1533 | |
}, | |
"AQuA": { | |
"Score": 83.46, | |
"Cost($)": 0.0409 | |
} | |
}, | |
"PoT-Doubao": { | |
"META": { | |
"Algorithm": "PoT", | |
"LLM": "Doubao-lite-32k", | |
"Eval Date": "2025/01/07" | |
}, | |
"gsm8k": { | |
"Score": 79.61, | |
"Cost($)": 0.0576 | |
}, | |
"AQuA": { | |
"Score": 71.65, | |
"Cost($)": 0.0147 | |
} | |
}, | |
"ReAct-Pro-Doubao": { | |
"META": { | |
"Algorithm": "ReAct-Pro*", | |
"LLM": "Doubao-lite-32k", | |
"Eval Date": "2025/01/07" | |
}, | |
"gsm8k": { | |
"Score": 85.60, | |
"Cost($)": 0.2513 | |
}, | |
"AQuA": { | |
"Score": 77.56, | |
"Cost($)": 0.0446 | |
} | |
} | |
} | |
} | |