# Your leaderboard name TITLE = """

AIR-Bench: Automated Heterogeneous Information Retrieval Benchmark (Preview)

""" # What does your leaderboard evaluate? INTRODUCTION_TEXT = """ Check more information at [our GitHub repo](https://github.com/AIR-Bench/AIR-Bench) """ # Which evaluations are you running? how can people reproduce what you have? BENCHMARKS_TEXT = f""" ## How it works Check more information at [our GitHub repo](https://github.com/AIR-Bench/AIR-Bench) """ EVALUATION_QUEUE_TEXT = """ ## Steps for submit to AIR-Bench 1. Install AIR-Bench ```bash pip install air-benchmark ``` 2. Run the evaluation script ```bash cd AIR-Bench/scripts # Run all tasks python run_air_benchmark.py \\ --output_dir ./search_results \\ --encoder BAAI/bge-m3 \\ --reranker BAAI/bge-reranker-v2-m3 \\ --search_top_k 1000 \\ --rerank_top_k 100 \\ --max_query_length 512 \\ --max_passage_length 512 \\ --batch_size 512 \\ --pooling_method cls \\ --normalize_embeddings True \\ --use_fp16 True \\ --add_instruction False \\ --overwrite False # Run the tasks in the specified task type python run_air_benchmark.py \\ --task_types long-doc \\ --output_dir ./search_results \\ --encoder BAAI/bge-m3 \\ --reranker BAAI/bge-reranker-v2-m3 \\ --search_top_k 1000 \\ --rerank_top_k 100 \\ --max_query_length 512 \\ --max_passage_length 512 \\ --batch_size 512 \\ --pooling_method cls \\ --normalize_embeddings True \\ --use_fp16 True \\ --add_instruction False \\ --overwrite False # Run the tasks in the specified task type and domains python run_air_benchmark.py \\ --task_types long-doc \\ --domains arxiv book \\ --output_dir ./search_results \\ --encoder BAAI/bge-m3 \\ --reranker BAAI/bge-reranker-v2-m3 \\ --search_top_k 1000 \\ --rerank_top_k 100 \\ --max_query_length 512 \\ --max_passage_length 512 \\ --batch_size 512 \\ --pooling_method cls \\ --normalize_embeddings True \\ --use_fp16 True \\ --add_instruction False \\ --overwrite False # Run the tasks in the specified languages python run_air_benchmark.py \\ --languages en \\ --output_dir ./search_results \\ --encoder BAAI/bge-m3 \\ --reranker BAAI/bge-reranker-v2-m3 \\ --search_top_k 1000 \\ --rerank_top_k 100 \\ --max_query_length 512 \\ --max_passage_length 512 \\ --batch_size 512 \\ --pooling_method cls \\ --normalize_embeddings True \\ --use_fp16 True \\ --add_instruction False \\ --overwrite False # Run the tasks in the specified task type, domains, and languages python run_air_benchmark.py \\ --task_types qa \\ --domains wiki web \\ --languages en \\ --output_dir ./search_results \\ --encoder BAAI/bge-m3 \\ --reranker BAAI/bge-reranker-v2-m3 \\ --search_top_k 1000 \\ --rerank_top_k 100 \\ --max_query_length 512 \\ --max_passage_length 512 \\ --batch_size 512 \\ --pooling_method cls \\ --normalize_embeddings True \\ --use_fp16 True \\ --add_instruction False \\ --overwrite False ``` 3. Package the search results. ```bash # Zip "Embedding Model + NoReranker" search results in "//NoReranker" to "/_NoReranker.zip". python zip_results.py \\ --results_dir search_results \\ --model_name bge-m3 \\ --save_dir search_results/zipped_results # Zip "Embedding Model + Reranker" search results in "//" to "/_.zip". python zip_results.py \\ --results_dir search_results \\ --model_name bge-m3 \\ --reranker_name bge-reranker-v2-m3 \\ --save_dir search_results/zipped_results ``` 4. Upload the `.zip` file on this page and fill in the model information: - Model Name: such as `bge-m3`. - Model URL: such as `https://huggingface.co/BAAI/bge-m3`. - Reranker Name: such as `bge-reranker-v2-m3`. Keep empty for `NoReranker`. - Reranker URL: such as `https://huggingface.co/BAAI/bge-reranker-v2-m3`. Keep empty for `NoReranker`. If you want to stay anonymous, you can only fill in the Model Name and Reranker Name (keep empty for `NoReranker`), and check the selection box below befor submission. 5. Congratulation! Your results will be shown on the leaderboard in up to one hour. """ CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results" CITATION_BUTTON_TEXT = r""" """