leaderboard

Running on CPU Upgrade

App Files Files Community

nan commited on May 12, 2024

Commit

b9d42b4

1 Parent(s): c48db83

feat: update the about and submission tab

Browse files

Files changed (2) hide show

app.py +2 -0
src/about.py +117 -12

app.py CHANGED Viewed

@@ -202,6 +202,7 @@ with demo:
                 with gr.Column():
                     with gr.Row():
                         search_bar = gr.Textbox(
                             placeholder=" 🔍 Search for retrieval models (separate multiple queries with `;`) and press ENTER...",
                             show_label=False,
                             elem_id="search-bar-long-doc",
@@ -276,6 +277,7 @@ with demo:
                     selected_langs,
                     selected_rerankings,
                     search_bar,
                 ],
                 leaderboard_table_long_doc,
             )

                 with gr.Column():
                     with gr.Row():
                         search_bar = gr.Textbox(
+                            info="Search the retrieval models",
                             placeholder=" 🔍 Search for retrieval models (separate multiple queries with `;`) and press ENTER...",
                             show_label=False,
                             elem_id="search-bar-long-doc",
                     selected_langs,
                     selected_rerankings,
                     search_bar,
+                    show_anonymous,
                 ],
                 leaderboard_table_long_doc,
             )

src/about.py CHANGED Viewed

@@ -10,24 +10,129 @@ AIR-Bench: Automated Heterogeneous Information Retrieval Benchmark
 BENCHMARKS_TEXT = f"""
 ## How it works
-## Reproducibility
-To reproduce our results, here is the commands you can run:
 """
 EVALUATION_QUEUE_TEXT = """
-## Some good practices before submitting a model
-### 1)
-### 2)
-### 3)
-### 4)
-## In case of model failure
-If your model is displayed in the `FAILED` category, its execution stopped.
-Make sure you have followed the above steps first.
-If everything is done, check you can launch the EleutherAIHarness on your model locally, using the above command without modifications (you can add `--limit` to limit the number of examples per task).
 """
 CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"

 BENCHMARKS_TEXT = f"""
 ## How it works
+Check more information at [our GitHub repo](https://github.com/AIR-Bench/AIR-Bench)
 """
 EVALUATION_QUEUE_TEXT = """
+## Steps for submit to AIR-Bench
+1. Install AIR-Bench
+```bash
+# Clone the repo
+git clone https://github.com/AIR-Bench/AIR-Bench.git
+# Install the package
+cd AIR-Bench
+pip install .
+```
+2. Run the evaluation script
+```bash
+cd AIR-Bench/scripts
+# Run all tasks
+python run_AIR-Bench.py \
+--output_dir ./search_results \
+--encoder BAAI/bge-m3 \
+--encoder_link https://huggingface.co/BAAI/bge-m3 \
+--reranker BAAI/bge-reranker-v2-m3 \
+--reranker_link https://huggingface.co/BAAI/bge-reranker-v2-m3 \
+--search_top_k 1000 \
+--rerank_top_k 100 \
+--max_query_length 512 \
+--max_passage_length 512 \
+--batch_size 512 \
+--pooling_method cls \
+--normalize_embeddings True \
+--use_fp16 True \
+--add_instruction False \
+--overwrite False
+# Run the tasks in the specified task type
+python run_AIR-Bench.py \
+--task_type long-doc \
+--output_dir ./search_results \
+--encoder BAAI/bge-m3 \
+--encoder_link https://huggingface.co/BAAI/bge-m3 \
+--reranker BAAI/bge-reranker-v2-m3 \
+--reranker_link https://huggingface.co/BAAI/bge-reranker-v2-m3 \
+--search_top_k 1000 \
+--rerank_top_k 100 \
+--max_query_length 512 \
+--max_passage_length 512 \
+--batch_size 512 \
+--pooling_method cls \
+--normalize_embeddings True \
+--use_fp16 True \
+--add_instruction False \
+--overwrite False
+# Run the tasks in the specified task type and domains
+python run_AIR-Bench.py \
+--task_type long-doc \
+--domain arxiv book \
+--output_dir ./search_results \
+--encoder BAAI/bge-m3 \
+--encoder_link https://huggingface.co/BAAI/bge-m3 \
+--reranker BAAI/bge-reranker-v2-m3 \
+--reranker_link https://huggingface.co/BAAI/bge-reranker-v2-m3 \
+--search_top_k 1000 \
+--rerank_top_k 100 \
+--max_query_length 512 \
+--max_passage_length 512 \
+--batch_size 512 \
+--pooling_method cls \
+--normalize_embeddings True \
+--use_fp16 True \
+--add_instruction False \
+--overwrite False
+# Run the tasks in the specified languages
+python run_AIR-Bench.py \
+--language en \
+--output_dir ./search_results \
+--encoder BAAI/bge-m3 \
+--encoder_link https://huggingface.co/BAAI/bge-m3 \
+--reranker BAAI/bge-reranker-v2-m3 \
+--reranker_link https://huggingface.co/BAAI/bge-reranker-v2-m3 \
+--search_top_k 1000 \
+--rerank_top_k 100 \
+--max_query_length 512 \
+--max_passage_length 512 \
+--batch_size 512 \
+--pooling_method cls \
+--normalize_embeddings True \
+--use_fp16 True \
+--add_instruction False \
+--overwrite False
+# Run the tasks in the specified task type, domains, and languages
+python run_AIR-Bench.py \
+--task_type qa \
+--domains wiki web \
+--languages en \
+--output_dir ./search_results \
+--encoder BAAI/bge-m3 \
+--encoder_link https://huggingface.co/BAAI/bge-m3 \
+--reranker BAAI/bge-reranker-v2-m3 \
+--reranker_link https://huggingface.co/BAAI/bge-reranker-v2-m3 \
+--search_top_k 1000 \
+--rerank_top_k 100 \
+--max_query_length 512 \
+--max_passage_length 512 \
+--batch_size 512 \
+--pooling_method cls \
+--normalize_embeddings True \
+--use_fp16 True \
+--add_instruction False \
+--overwrite False
+```
+3. Package the search results.
+```bash
+python zip_results.py \
+--results_path search_results/bge-m3 \
+--save_path search_results/zipped_results
+```
+4. Upload the `.zip` file on this page and fill in the model information.
+5. Congratulation! Your results will be shown on the leaderboard in up to one hour.
 """
 CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"