Spaces:
Running
Running
{ | |
"time": "2025-01-09 17:13:45", | |
"results": { | |
"IO": { | |
"META": { | |
"Algorithm": "IO", | |
"LLM": "gpt-3.5-turbo", | |
"Eval Date": "2025/01/07" | |
}, | |
"gsm8k": { | |
"Score": 37.83, | |
"Cost($)": 0.3328 | |
}, | |
"AQuA": { | |
"Score": 38.98, | |
"Cost($)": 0.0380 | |
} | |
}, | |
"COT": { | |
"META": { | |
"Algorithm": "COT", | |
"LLM": "gpt-3.5-turbo", | |
"Eval Date": "2025/01/07" | |
}, | |
"gsm8k": { | |
"Score": 78.70, | |
"Cost($)": 0.6788 | |
}, | |
"AQuA": { | |
"Score": 61.02, | |
"Cost($)": 0.0957 | |
} | |
}, | |
"SC-COT": { | |
"META": { | |
"Algorithm": "SC-COT", | |
"LLM": "gpt-3.5-turbo", | |
"Eval Date": "2025/01/07" | |
}, | |
"gsm8k": { | |
"Score": 80.06, | |
"Cost($)": 5.0227 | |
}, | |
"AQuA": { | |
"Score": 67.32, | |
"Cost($)": 0.6491 | |
} | |
}, | |
"POT": { | |
"META": { | |
"Algorithm": "POT", | |
"LLM": "gpt-3.5-turbo", | |
"Eval Date": "2025/01/07" | |
}, | |
"gsm8k": { | |
"Score": 76.88, | |
"Cost($)": 0.6902 | |
}, | |
"AQuA": { | |
"Score": 51.97, | |
"Cost($)": 0.1557 | |
} | |
}, | |
"ReAct-Pro*": { | |
"META": { | |
"Algorithm": "ReAct-Pro*", | |
"LLM": "gpt-3.5-turbo", | |
"Eval Date": "2025/01/07" | |
}, | |
"gsm8k": { | |
"Score": 74.91, | |
"Cost($)": 3.4633 | |
}, | |
"AQuA": { | |
"Score": 64.57, | |
"Cost($)": 0.4928 | |
} | |
}, | |
"IO-Doubao": { | |
"META": { | |
"Algorithm": "IO", | |
"LLM": "Doubao-lite-32k", | |
"Eval Date": "2025/01/07" | |
}, | |
"gsm8k": { | |
"Score": 72.02, | |
"Cost($)": 0.0354 | |
}, | |
"AQuA": { | |
"Score": 79.13, | |
"Cost($)": 0.0058 | |
} | |
}, | |
"COT-Doubao": { | |
"META": { | |
"Algorithm": "COT", | |
"LLM": "Doubao-lite-32k", | |
"Eval Date": "2025/01/07" | |
}, | |
"gsm8k": { | |
"Score": 89.31, | |
"Cost($)": 0.0557 | |
}, | |
"AQuA": { | |
"Score": 82.68, | |
"Cost($)": 0.0066 | |
} | |
}, | |
"SC-COT-Doubao": { | |
"META": { | |
"Algorithm": "SC-COT", | |
"LLM": "Doubao-lite-32k", | |
"Eval Date": "2025/01/07" | |
}, | |
"gsm8k": { | |
"Score": 88.63, | |
"Cost($)": 0.1533 | |
}, | |
"AQuA": { | |
"Score": 83.46, | |
"Cost($)": 0.0409 | |
} | |
}, | |
"POT-Doubao": { | |
"META": { | |
"Algorithm": "POT", | |
"LLM": "Doubao-lite-32k", | |
"Eval Date": "2025/01/07" | |
}, | |
"gsm8k": { | |
"Score": 79.15, | |
"Cost($)": 0.0575 | |
}, | |
"AQuA": { | |
"Score": 52.36, | |
"Cost($)": 0.0142 | |
} | |
}, | |
"ReAct-Pro-Doubao": { | |
"META": { | |
"Algorithm": "ReAct-Pro", | |
"LLM": "Doubao-lite-32k", | |
"Eval Date": "2025/01/07" | |
}, | |
"gsm8k": { | |
"Score": 85.60, | |
"Cost($)": 0.2513 | |
}, | |
"AQuA": { | |
"Score": 77.56, | |
"Cost($)": 0.0446 | |
} | |
} | |
} | |
} | |