Spaces:

omlab
/

open-agent-leaderboard

Running

App Files Files Community

open-agent-leaderboard / meta_data.py

qq-hzlh

Upload 5 files

fc71d05 verified 9 days ago

raw

history blame

2.58 kB

	# CONSTANTS-URL
	URL = "http://opencompass.openxlab.space/assets/OpenVLM.json"
	OVERALL_MATH_SCORE_FILE = "overall_math_score.json"
	DETAIL_MATH_SCORE_FILE = "detail_math_score.json"
	# CONSTANTS-TEXT
	LEADERBORAD_INTRODUCTION = """# Open Agent Leaderboard
	### Welcome to the Open Agent Leaderboard! We share the evaluation results of open agents: COT, SC_COT, POT, ReAct, etc. The agents are impletemented by the OpenSource Framework: [OmAgent](https://github.com/om-ai-lab/OmAgent)

	This leaderboard was last updated: {}.

	To add your own agent to the leaderboard, please create a PR in [OmAgent](https://github.com/om-ai-lab/OmAgent), then we will help with the evaluation and updating the leaderboard. For any questions or concerns, please feel free to contact us.
	"""

	DEFAULT_MATH_BENCH = [
	'gsm8k', 'AQuA'
	]
	# The README file for each benchmark
	LEADERBOARD_MD = {}

	LEADERBOARD_MD['MATH_MAIN'] = f"""
	## Math task main Evaluation Results

	- Metrics:
	- Avg Score: The average score on all math Benchmarks (normalized to 0 - 100, the higher the better).
	- Rank: The average rank on all math Benchmarks (the lower the better).
	- Score: The evaluation score on each math Benchmarks (the higher the better).
	- Cost: The cost on each math Benchmarks (the lower the better).

	- By default, we present the overall evaluation results based on {', '.join(DEFAULT_MATH_BENCH)}, sorted by the descending order of Avg Score.
	"""

	LEADERBOARD_MD['MATH_DETAIL'] = f"""
	## Math task detail Evaluation Results

	- By default, we present the overall evaluation results based on {', '.join(DEFAULT_MATH_BENCH)}
	- default parameters: temperature=0.0
	- LLM prices:
	- gpt-3.5-turbo:
	- 0.0005$/1M tokens (input)
	- 0.0015$/1M tokens (output)
	- Doubao-lite-32k (1 USD = 7.3249 CNY):
	- 0.00004096$/1M tokens (input)
	- 0.0001$/1M tokens (output)
	- ReAct-Pro: We modified ReAct to ReAct-Pro, following the Reflexion repository. Implementation details can be found in the [OmAgent*](https://github.com/om-ai-lab/OmAgent) repository.
	"""

	META_FIELDS = [
	'Algorithm', 'LLM', 'Eval Date'
	]

	DATASETS = [
	'gsm8k', 'AQuA'
	]

	LLM = [
	'Doubao-lite-32k', 'gpt-3.5-turbo'
	]

	ALGORITHMS = [
	'IO', 'COT', 'SC_COT', 'POT', 'ReAct-Pro*'
	]

	CITATION_BUTTON_TEXT = r"""@article{zhang2024omagent,
	title={OmAgent: A Multi-modal Agent Framework for Complex Video Understanding with Task Divide-and-Conquer},
	author={Zhang, Lu and Zhao, Tiancheng and Ying, Heting and Ma, Yibo and Lee, Kyusong},
	journal={arXiv preprint arXiv:2406.16620},
	year={2024}
	}"""