open-agent-leaderboard / meta_data.py
qq-hzlh's picture
Upload 5 files
fc71d05 verified
raw
history blame
2.58 kB
# CONSTANTS-URL
URL = "http://opencompass.openxlab.space/assets/OpenVLM.json"
OVERALL_MATH_SCORE_FILE = "overall_math_score.json"
DETAIL_MATH_SCORE_FILE = "detail_math_score.json"
# CONSTANTS-TEXT
LEADERBORAD_INTRODUCTION = """# Open Agent Leaderboard
### Welcome to the Open Agent Leaderboard! We share the evaluation results of open agents: COT, SC_COT, POT, ReAct, etc. The agents are impletemented by the OpenSource Framework: [*OmAgent*](https://github.com/om-ai-lab/OmAgent)
This leaderboard was last updated: {}.
To add your own agent to the leaderboard, please create a PR in [*OmAgent*](https://github.com/om-ai-lab/OmAgent), then we will help with the evaluation and updating the leaderboard. For any questions or concerns, please feel free to contact us.
"""
DEFAULT_MATH_BENCH = [
'gsm8k', 'AQuA'
]
# The README file for each benchmark
LEADERBOARD_MD = {}
LEADERBOARD_MD['MATH_MAIN'] = f"""
## Math task main Evaluation Results
- Metrics:
- Avg Score: The average score on all math Benchmarks (normalized to 0 - 100, the higher the better).
- Rank: The average rank on all math Benchmarks (the lower the better).
- Score: The evaluation score on each math Benchmarks (the higher the better).
- Cost: The cost on each math Benchmarks (the lower the better).
- By default, we present the overall evaluation results based on {', '.join(DEFAULT_MATH_BENCH)}, sorted by the descending order of Avg Score.
"""
LEADERBOARD_MD['MATH_DETAIL'] = f"""
## Math task detail Evaluation Results
- By default, we present the overall evaluation results based on {', '.join(DEFAULT_MATH_BENCH)}
- default parameters: temperature=0.0
- LLM prices:
- gpt-3.5-turbo:
- 0.0005$/1M tokens (input)
- 0.0015$/1M tokens (output)
- Doubao-lite-32k (1 USD = 7.3249 CNY):
- 0.00004096$/1M tokens (input)
- 0.0001$/1M tokens (output)
- ReAct-Pro*: We modified ReAct to ReAct-Pro, following the Reflexion repository. Implementation details can be found in the [*OmAgent*](https://github.com/om-ai-lab/OmAgent) repository.
"""
META_FIELDS = [
'Algorithm', 'LLM', 'Eval Date'
]
DATASETS = [
'gsm8k', 'AQuA'
]
LLM = [
'Doubao-lite-32k', 'gpt-3.5-turbo'
]
ALGORITHMS = [
'IO', 'COT', 'SC_COT', 'POT', 'ReAct-Pro*'
]
CITATION_BUTTON_TEXT = r"""@article{zhang2024omagent,
title={OmAgent: A Multi-modal Agent Framework for Complex Video Understanding with Task Divide-and-Conquer},
author={Zhang, Lu and Zhao, Tiancheng and Ying, Heting and Ma, Yibo and Lee, Kyusong},
journal={arXiv preprint arXiv:2406.16620},
year={2024}
}"""