pteb-leaderboard / utils.py
tabedini's picture
Update utils.py
60b2c57 verified
import json
import os
from datetime import datetime
import gradio as gr
import pandas as pd
from envs import API, EVAL_REQUESTS_PATH, TOKEN, QUEUE_REPO
custom_css = """
@import url('https://fonts.googleapis.com/css2?family=Vazirmatn&display=swap');
body, .gradio-container, .gr-button, .gr-input, .gr-slider, .gr-dropdown, .gr-markdown {
font-family: 'Vazirmatn', sans-serif !important;
}
.markdown-text {
font-size: 16px !important;
}
#models-to-add-text {
font-size: 18px !important;
}
#citation-button span {
font-size: 16px !important;
}
#citation-button textarea {
font-size: 16px !important;
}
#citation-button > label > button {
margin: 6px;
transform: scale(1.3);
}
#leaderboard-table {
margin-top: 15px;
text-align: center;
}
#leaderboard-table,
#leaderboard-table th,
#leaderboard-table td {
text-align: center;
vertical-align: middle;
border-collapse: collapse;
}
#leaderboard-table td:first-child,
#leaderboard-table th:first-child {
text-align: left;
max-width: 600px;
}
table > thead {
white-space: normal;
}
table > thead th,
table > tbody td {
text-align: center;
vertical-align: middle;
}
table > tbody td:first-child {
text-align: left;
max-width: 600px;
}
#leaderboard-table-lite {
margin-top: 15px;
}
#search-bar-table-box > div:first-child {
background: none;
border: none;
}
#search-bar {
padding: 0px;
}
.tab-buttons button {
font-size: 20px;
}
#scale-logo {
border-style: none !important;
box-shadow: none;
display: block;
margin-left: auto;
margin-right: auto;
max-width: 600px;
}
#scale-logo .download {
display: none;
}
#filter_type {
border: 0;
padding-left: 0;
padding-top: 0;
}
#filter_type label {
display: flex;
}
#filter_type label > span {
margin-top: var(--spacing-lg);
margin-right: 0.5em;
}
#filter_type label > .wrap {
width: 103px;
}
#filter_type label > .wrap .wrap-inner {
padding: 2px;
}
#filter_type label > .wrap .wrap-inner input {
width: 1px;
}
#filter-columns-type {
border: 0;
padding: 0.5;
}
#filter-columns-size {
border: 0;
padding: 0.5;
}
#box-filter > .form {
border: 0;
}
"""
ABOUT_TEXT = f"""
# Persian Text Embedding Benchmark (v1.0.0)
> The Persian Text Embedding Benchmark Leaderboard, developed by **Part DP AI**, provides a comprehensive benchmarking system specifically designed for Persian embedding models. This leaderboard, based on the open-source [MTEB](https://github.com/embeddings-benchmark/mteb), offers a unique platform for evaluating the performance of embedding models on datasets that demand linguistic proficiency in Persian.
> **Note:** This leaderboard is continuously updating its data and models, reflecting the latest developments in Persian embedding models. It is currently in version 1.0.0, serving as the initial benchmark for embedding model evaluation, with plans for future enhancements.
"""
SUBMIT_TEXT = """## Submitting a Model for Evaluation
> To submit your open-source model for evaluation, follow these steps:
>
> 1. **Ensure your model is on Hugging Face**: Your model must be publicly available on [Hugging Face](https://huggingface.co/).
>
> 2. **Submit Request**: Send a request with your model's Hugging Face identifier.
>
> 3. **Manual Queue**: Please note that the evaluation process is currently handled manually. Submissions will be queued and processed as soon as possible.
>
> 4. **Results**: Once the evaluation is complete, your model’s results will be updated on the leaderboard.
>
> We appreciate your patience and contributions to the Persian LM ecosystem!
"""
PART_LOGO = """
<img src="https://avatars.githubusercontent.com/u/39557177?v=4" style="width:30%;display:block;margin-left:auto;margin-right:auto">
<h1 style="font-size: 28px; margin-bottom: 2px;">Part DP AI</h1>
"""
tasks_and_metrics_table_markdown = """
<div style="text-align: center;">
<p style="font-size: 16px; font-weight: bold;">Table of tasks and corresponding metrics</p>
<table style="margin: 0 auto; border-collapse: collapse; width: 30%; text-align: left; border: 1px solid #ddd; font-size: 14px;">
<thead>
<tr style="background-color: #f2f2f2;">
<th style="padding: 8px; border: 1px solid #ddd; width: 65%;">Task</th>
<th style="padding: 8px; border: 1px solid #ddd; width: 35%;">Metric</th>
</tr>
</thead>
<tbody>
<tr>
<td style="padding: 8px; border: 1px solid #ddd; word-wrap: break-word;">FarsTail-Pair-Classification</td>
<td style="padding: 8px; border: 1px solid #ddd;">Average precision</td>
</tr>
<tr>
<td style="padding: 8px; border: 1px solid #ddd; word-wrap: break-word;">MIRACL-Reranking</td>
<td style="padding: 8px; border: 1px solid #ddd;">NDCG@10</td>
</tr>
<tr>
<td style="padding: 8px; border: 1px solid #ddd; word-wrap: break-word;">Wikipedia-Multilingual-Reranking</td>
<td style="padding: 8px; border: 1px solid #ddd;">MAP</td>
</tr>
<tr>
<td style="padding: 8px; border: 1px solid #ddd; word-wrap: break-word;">NeuCLIR2023-Retrieval</td>
<td style="padding: 8px; border: 1px solid #ddd;">NDCG@20</td>
</tr>
<tr>
<td style="padding: 8px; border: 1px solid #ddd; word-wrap: break-word;">MIRACL-Retrieval</td>
<td style="padding: 8px; border: 1px solid #ddd;">NDCG@10</td>
</tr>
<tr>
<td style="padding: 8px; border: 1px solid #ddd; word-wrap: break-word;">Wikipedia-Multilingual-Retrieval</td>
<td style="padding: 8px; border: 1px solid #ddd;">NDCG@10</td>
</tr>
<tr>
<td style="padding: 8px; border: 1px solid #ddd; word-wrap: break-word;">Massive-Intent-Classification</td>
<td style="padding: 8px; border: 1px solid #ddd;">Accuracy</td>
</tr>
<tr>
<td style="padding: 8px; border: 1px solid #ddd; word-wrap: break-word;">Massive-Scenario-Classification</td>
<td style="padding: 8px; border: 1px solid #ddd;">Accuracy</td>
</tr>
<tr>
<td style="padding: 8px; border: 1px solid #ddd; word-wrap: break-word;">Multilingual-Sentiment-Classification</td>
<td style="padding: 8px; border: 1px solid #ddd;">Accuracy</td>
</tr>
<tr>
<td style="padding: 8px; border: 1px solid #ddd; word-wrap: break-word;">Persian-Food-Sentiment-Classification</td>
<td style="padding: 8px; border: 1px solid #ddd;">Accuracy</td>
</tr>
</tbody>
</table>
</div>
"""
def load_jsonl(input_file):
data = []
with open(input_file, 'r') as f:
for line in f:
data.append(json.loads(line))
return data
def jsonl_to_dataframe(input_file):
data = load_jsonl(input_file)
return pd.DataFrame(data)
def sort_dataframe_by_column(df, column_name):
if column_name not in df.columns:
raise ValueError(f"Column '{column_name}' does not exist in the DataFrame.")
return df.sort_values(by=column_name, ascending=False).reset_index(drop=True)
def add_average_column_to_df(df,columns_to_average, index=3, average_column_name="Average Accuracy"):
average_column = df[columns_to_average].mean(axis=1)
df.insert(index, average_column_name, average_column)
return df
def model_hyperlink(link, model_name):
return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
def make_clickable_model(model_name):
link = f"https://huggingface.co/{model_name}"
return model_hyperlink(link, model_name)
def center_align_markdown(text):
return f'<div align="center">{text}</div>'
def apply_markdown_format_for_columns(df, model_column_name):
columns = list(df.columns)
df[model_column_name] = df[model_column_name].apply(make_clickable_model)
# for column in columns:
# if column != model_column_name:
# df[column] = df[column].apply(center_align_markdown)
return df
def submit(model_name, model_id, contact_email):
if model_name == "" or model_id == "" or contact_email == "":
gr.Info("Please fill all the fields")
return
try:
user_name = ""
if "/" in model_id:
user_name = model_id.split("/")[0]
model_path = model_id.split("/")[1]
eval_entry = {
"model_name": model_name,
"model_id": model_id,
"contact_email": contact_email,
}
# Get the current timestamp to add to the filename
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
OUT_DIR = f"{EVAL_REQUESTS_PATH}/{user_name}"
os.makedirs(OUT_DIR, exist_ok=True)
# Add the timestamp to the filename
out_path = f"{OUT_DIR}/{user_name}_{model_path}_{timestamp}.json"
with open(out_path, "w") as f:
f.write(json.dumps(eval_entry))
print("Uploading eval file")
API.upload_file(
path_or_fileobj=out_path,
path_in_repo=out_path.split("eval-queue/")[1],
repo_id=QUEUE_REPO,
repo_type="dataset",
commit_message=f"Add {model_name} to eval queue",
)
gr.Info("Successfully submitted", duration=10)
# Remove the local file
os.remove(out_path)
except Exception as e:
gr.Error(f"Error submitting the model: {e}")