{ "time": "2025-01-09 17:13:45", "results": { "IO": { "META": { "Algorithm": "IO", "LLM": "gpt-3.5-turbo", "Eval Date": "2025/01/07" }, "gsm8k": { "Score": 37.83, "Cost($)": 0.3328 }, "AQuA": { "Score": 38.98, "Cost($)": 0.0380 } }, "CoT": { "META": { "Algorithm": "CoT", "LLM": "gpt-3.5-turbo", "Eval Date": "2025/01/07" }, "gsm8k": { "Score": 78.70, "Cost($)": 0.6788 }, "AQuA": { "Score": 61.02, "Cost($)": 0.0957 } }, "SC-CoT": { "META": { "Algorithm": "SC-CoT", "LLM": "gpt-3.5-turbo", "Eval Date": "2025/01/07" }, "gsm8k": { "Score": 80.06, "Cost($)": 5.0227 }, "AQuA": { "Score": 67.32, "Cost($)": 0.6491 } }, "PoT": { "META": { "Algorithm": "PoT", "LLM": "gpt-3.5-turbo", "Eval Date": "2025/01/07" }, "gsm8k": { "Score": 76.88, "Cost($)": 0.6902 }, "AQuA": { "Score": 59.45, "Cost($)": 0.1748 } }, "ReAct-Pro*": { "META": { "Algorithm": "ReAct-Pro*", "LLM": "gpt-3.5-turbo", "Eval Date": "2025/01/07" }, "gsm8k": { "Score": 74.91, "Cost($)": 3.4633 }, "AQuA": { "Score": 64.57, "Cost($)": 0.4928 } }, "IO-Doubao": { "META": { "Algorithm": "IO", "LLM": "Doubao-lite-32k", "Eval Date": "2025/01/07" }, "gsm8k": { "Score": 72.02, "Cost($)": 0.0354 }, "AQuA": { "Score": 79.13, "Cost($)": 0.0058 } }, "CoT-Doubao": { "META": { "Algorithm": "CoT", "LLM": "Doubao-lite-32k", "Eval Date": "2025/01/07" }, "gsm8k": { "Score": 89.31, "Cost($)": 0.0557 }, "AQuA": { "Score": 82.68, "Cost($)": 0.0066 } }, "SC-CoT-Doubao": { "META": { "Algorithm": "SC-CoT", "LLM": "Doubao-lite-32k", "Eval Date": "2025/01/07" }, "gsm8k": { "Score": 88.63, "Cost($)": 0.1533 }, "AQuA": { "Score": 83.46, "Cost($)": 0.0409 } }, "PoT-Doubao": { "META": { "Algorithm": "PoT", "LLM": "Doubao-lite-32k", "Eval Date": "2025/01/07" }, "gsm8k": { "Score": 79.61, "Cost($)": 0.0576 }, "AQuA": { "Score": 71.65, "Cost($)": 0.0147 } }, "ReAct-Pro-Doubao": { "META": { "Algorithm": "ReAct-Pro*", "LLM": "Doubao-lite-32k", "Eval Date": "2025/01/07" }, "gsm8k": { "Score": 85.60, "Cost($)": 0.2513 }, "AQuA": { "Score": 77.56, "Cost($)": 0.0446 } } } }