Spaces:
Running
Running
# CONSTANTS-URL | |
URL = "http://opencompass.openxlab.space/assets/OpenVLM.json" | |
OVERALL_MATH_SCORE_FILE = "overall_math_score.json" | |
DETAIL_MATH_SCORE_FILE = "detail_math_score.json" | |
# CONSTANTS-TEXT | |
LEADERBORAD_INTRODUCTION = """# Open Agent Leaderboard | |
### Welcome to the Open Agent Leaderboard! We share the evaluation results of open agents: COT, SC_COT, POT, ReAct, etc. The agents are impletemented by the OpenSource Framework: [*OmAgent*](https://github.com/om-ai-lab/OmAgent) | |
This leaderboard was last updated: {}. | |
To add your own agent to the leaderboard, please create a PR in [*OmAgent*](https://github.com/om-ai-lab/OmAgent), then we will help with the evaluation and updating the leaderboard. For any questions or concerns, please feel free to contact us. | |
""" | |
DEFAULT_MATH_BENCH = [ | |
'gsm8k', 'AQuA' | |
] | |
# The README file for each benchmark | |
LEADERBOARD_MD = {} | |
LEADERBOARD_MD['MATH_MAIN'] = f""" | |
## Math task main Evaluation Results | |
- Metrics: | |
- Avg Score: The average score on all math Benchmarks (normalized to 0 - 100, the higher the better). | |
- Rank: The average rank on all math Benchmarks (the lower the better). | |
- Score: The evaluation score on each math Benchmarks (the higher the better). | |
- Cost: The cost on each math Benchmarks (the lower the better). | |
- By default, we present the overall evaluation results based on {', '.join(DEFAULT_MATH_BENCH)}, sorted by the descending order of Avg Score. | |
""" | |
LEADERBOARD_MD['MATH_DETAIL'] = f""" | |
## Math task detail Evaluation Results | |
- By default, we present the overall evaluation results based on {', '.join(DEFAULT_MATH_BENCH)} | |
- default parameters: temperature=0.0 | |
- LLM prices: | |
- gpt-3.5-turbo: | |
- 0.0005$/1M tokens (input) | |
- 0.0015$/1M tokens (output) | |
- Doubao-lite-32k (1 USD = 7.3249 CNY): | |
- 0.00004096$/1M tokens (input) | |
- 0.0001$/1M tokens (output) | |
- ReAct-Pro*: We modified ReAct to ReAct-Pro, following the Reflexion repository. Implementation details can be found in the [*OmAgent*](https://github.com/om-ai-lab/OmAgent) repository. | |
""" | |
META_FIELDS = [ | |
'Algorithm', 'LLM', 'Eval Date' | |
] | |
DATASETS = [ | |
'gsm8k', 'AQuA' | |
] | |
LLM = [ | |
'Doubao-lite-32k', 'gpt-3.5-turbo' | |
] | |
ALGORITHMS = [ | |
'IO', 'COT', 'SC_COT', 'POT', 'ReAct-Pro*' | |
] | |
CITATION_BUTTON_TEXT = r"""@article{zhang2024omagent, | |
title={OmAgent: A Multi-modal Agent Framework for Complex Video Understanding with Task Divide-and-Conquer}, | |
author={Zhang, Lu and Zhao, Tiancheng and Ying, Heting and Ma, Yibo and Lee, Kyusong}, | |
journal={arXiv preprint arXiv:2406.16620}, | |
year={2024} | |
}""" |