import streamlit as st | |
from tabs import comparison, leaderboard, about | |
import pandas as pd | |
dataframe_path = "darija_tokenizers_leaderboard.jsonl" | |
try: | |
df = pd.read_json(dataframe_path, lines=True) | |
assert all(col in df.columns for col in ["Tokenizer", "Vocabulary Size", "Token Count", "Tokens/Character Ratio", "Latin Support", "Tokenizer Class"]), "Invalid columns in leaderboard" | |
except: | |
df = pd.DataFrame( | |
columns=[ | |
"Tokenizer", | |
"Vocabulary Size", | |
"Token Count", | |
"Tokens/Character Ratio", | |
"Latin Support", | |
"Tokenizer Class" | |
] | |
) | |
def main(): | |
st.title("Darija Tokenizer Explorer 🧭") | |
tab1, tab2, tab3 = st.tabs(["Leaderboard", "Comparison", "About"]) | |
with tab1: | |
leaderboard.leaderboard_tab(df) | |
with tab2: | |
comparison.comparison_tab(df) | |
with tab3: | |
about.about_tab() | |
if __name__ == "__main__": | |
main() |