# Your leaderboard name
TITLE = """
AIR-Bench: Automated Heterogeneous Information Retrieval Benchmark
(Preview)
"""
# What does your leaderboard evaluate?
INTRODUCTION_TEXT = """
Check more information at [our GitHub repo](https://github.com/AIR-Bench/AIR-Bench)
"""
# Which evaluations are you running? how can people reproduce what you have?
BENCHMARKS_TEXT = f"""
## How it works
Check more information at [our GitHub repo](https://github.com/AIR-Bench/AIR-Bench)
"""
EVALUATION_QUEUE_TEXT = """
## Steps for submit to AIR-Bench
1. Install AIR-Bench
```bash
pip install air-benchmark
```
2. Run the evaluation script
```bash
cd AIR-Bench/scripts
# Run all tasks
python run_air_benchmark.py \\
--output_dir ./search_results \\
--encoder BAAI/bge-m3 \\
--reranker BAAI/bge-reranker-v2-m3 \\
--search_top_k 1000 \\
--rerank_top_k 100 \\
--max_query_length 512 \\
--max_passage_length 512 \\
--batch_size 512 \\
--pooling_method cls \\
--normalize_embeddings True \\
--use_fp16 True \\
--add_instruction False \\
--overwrite False
# Run the tasks in the specified task type
python run_air_benchmark.py \\
--task_types long-doc \\
--output_dir ./search_results \\
--encoder BAAI/bge-m3 \\
--reranker BAAI/bge-reranker-v2-m3 \\
--search_top_k 1000 \\
--rerank_top_k 100 \\
--max_query_length 512 \\
--max_passage_length 512 \\
--batch_size 512 \\
--pooling_method cls \\
--normalize_embeddings True \\
--use_fp16 True \\
--add_instruction False \\
--overwrite False
# Run the tasks in the specified task type and domains
python run_air_benchmark.py \\
--task_types long-doc \\
--domains arxiv book \\
--output_dir ./search_results \\
--encoder BAAI/bge-m3 \\
--reranker BAAI/bge-reranker-v2-m3 \\
--search_top_k 1000 \\
--rerank_top_k 100 \\
--max_query_length 512 \\
--max_passage_length 512 \\
--batch_size 512 \\
--pooling_method cls \\
--normalize_embeddings True \\
--use_fp16 True \\
--add_instruction False \\
--overwrite False
# Run the tasks in the specified languages
python run_air_benchmark.py \\
--languages en \\
--output_dir ./search_results \\
--encoder BAAI/bge-m3 \\
--reranker BAAI/bge-reranker-v2-m3 \\
--search_top_k 1000 \\
--rerank_top_k 100 \\
--max_query_length 512 \\
--max_passage_length 512 \\
--batch_size 512 \\
--pooling_method cls \\
--normalize_embeddings True \\
--use_fp16 True \\
--add_instruction False \\
--overwrite False
# Run the tasks in the specified task type, domains, and languages
python run_air_benchmark.py \\
--task_types qa \\
--domains wiki web \\
--languages en \\
--output_dir ./search_results \\
--encoder BAAI/bge-m3 \\
--reranker BAAI/bge-reranker-v2-m3 \\
--search_top_k 1000 \\
--rerank_top_k 100 \\
--max_query_length 512 \\
--max_passage_length 512 \\
--batch_size 512 \\
--pooling_method cls \\
--normalize_embeddings True \\
--use_fp16 True \\
--add_instruction False \\
--overwrite False
```
3. Package the search results.
```bash
# Zip "Embedding Model + NoReranker" search results in "//NoReranker" to "/_NoReranker.zip".
python zip_results.py \\
--results_dir search_results \\
--model_name bge-m3 \\
--save_dir search_results/zipped_results
# Zip "Embedding Model + Reranker" search results in "//" to "/_.zip".
python zip_results.py \\
--results_dir search_results \\
--model_name bge-m3 \\
--reranker_name bge-reranker-v2-m3 \\
--save_dir search_results/zipped_results
```
4. Upload the `.zip` file on this page and fill in the model information:
- Model Name: such as `bge-m3`.
- Model URL: such as `https://huggingface.co/BAAI/bge-m3`.
- Reranker Name: such as `bge-reranker-v2-m3`. Keep empty for `NoReranker`.
- Reranker URL: such as `https://huggingface.co/BAAI/bge-reranker-v2-m3`. Keep empty for `NoReranker`.
If you want to stay anonymous, you can only fill in the Model Name and Reranker Name (keep empty for `NoReranker`), and check the selection box below befor submission.
5. Congratulation! Your results will be shown on the leaderboard in up to one hour.
"""
CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
CITATION_BUTTON_TEXT = r"""
"""