nouamanetazi's picture
nouamanetazi HF staff
Upload app.py with huggingface_hub
5ad2e6e verified
raw
history blame contribute delete
957 Bytes
import streamlit as st
from tabs import comparison, leaderboard, about
import pandas as pd
dataframe_path = "darija_tokenizers_leaderboard.jsonl"
try:
df = pd.read_json(dataframe_path, lines=True)
assert all(col in df.columns for col in ["Tokenizer", "Vocabulary Size", "Token Count", "Tokens/Character Ratio", "Latin Support", "Tokenizer Class"]), "Invalid columns in leaderboard"
except:
df = pd.DataFrame(
columns=[
"Tokenizer",
"Vocabulary Size",
"Token Count",
"Tokens/Character Ratio",
"Latin Support",
"Tokenizer Class"
]
)
def main():
st.title("Darija Tokenizer Explorer 🧭")
tab1, tab2, tab3 = st.tabs(["Leaderboard", "Comparison", "About"])
with tab1:
leaderboard.leaderboard_tab(df)
with tab2:
comparison.comparison_tab(df)
with tab3:
about.about_tab()
if __name__ == "__main__":
main()