MohamedRashad commited on
Commit
0c0efc4
·
1 Parent(s): e4cac44

Add refresh functionality to update dataframe

Browse files
Files changed (1) hide show
  1. app.py +11 -1
app.py CHANGED
@@ -17,6 +17,7 @@ initial_list_of_models = [
17
  dataset = load_dataset("MohamedRashad/rasaif-translations", split="train")["arabic"]
18
 
19
  dataframe_path = Path(__file__).parent / "arabic_tokenizers_leaderboard.jsonl"
 
20
  if dataframe_path.exists():
21
  df = pd.read_json(dataframe_path, lines=True)
22
  else:
@@ -161,8 +162,14 @@ def tokenize_text(text, chosen_model, better_tokenization=False):
161
 
162
  return gr.HighlightedText(output, color_map)
163
 
 
 
 
 
 
164
  leaderboard_description = """The `Total Number of Tokens` in this leaderboard is based on the total number of tokens summed on the Arabic section of [rasaif-translations](https://huggingface.co/datasets/MohamedRashad/rasaif-translations) dataset.
165
  This dataset was chosen because it represents Arabic Fusha text in a small and consentrated manner.
 
166
  A tokenizer that scores high in this leaderboard will be efficient in parsing Arabic in its different dialects and forms.
167
  """
168
 
@@ -188,7 +195,9 @@ with gr.Blocks() as demo:
188
  model_name = gr.Textbox(
189
  label="Model Name from Hugging Face (e.g. Xenova/gpt-4o)"
190
  )
191
- submit_new_model_btn = gr.Button(value="Submit", variant="primary")
 
 
192
  with gr.Tab(label="Try tokenizers"):
193
  text = gr.Textbox(label="Enter a text", lines=5, value="السلام عليكم ورحمة الله", rtl=True, text_align="right")
194
  dropdown = gr.Dropdown(
@@ -202,6 +211,7 @@ with gr.Blocks() as demo:
202
  tokenized_textbox = gr.HighlightedText(label="Tokenized text")
203
 
204
  submit_new_model_btn.click(submit, model_name, outputs=[dataframe, barplot, dropdown])
 
205
  submit_text_btn.click(tokenize_text, inputs=[text, dropdown, checkbox], outputs=[tokenized_textbox])
206
 
207
 
 
17
  dataset = load_dataset("MohamedRashad/rasaif-translations", split="train")["arabic"]
18
 
19
  dataframe_path = Path(__file__).parent / "arabic_tokenizers_leaderboard.jsonl"
20
+
21
  if dataframe_path.exists():
22
  df = pd.read_json(dataframe_path, lines=True)
23
  else:
 
162
 
163
  return gr.HighlightedText(output, color_map)
164
 
165
+ def refresh():
166
+ global df
167
+ df = pd.read_json(dataframe_path, lines=True)
168
+ return gr.Dataframe(df), gr.BarPlot(df), gr.Dropdown(choices=df["📛 Models"].tolist())
169
+
170
  leaderboard_description = """The `Total Number of Tokens` in this leaderboard is based on the total number of tokens summed on the Arabic section of [rasaif-translations](https://huggingface.co/datasets/MohamedRashad/rasaif-translations) dataset.
171
  This dataset was chosen because it represents Arabic Fusha text in a small and consentrated manner.
172
+
173
  A tokenizer that scores high in this leaderboard will be efficient in parsing Arabic in its different dialects and forms.
174
  """
175
 
 
195
  model_name = gr.Textbox(
196
  label="Model Name from Hugging Face (e.g. Xenova/gpt-4o)"
197
  )
198
+ with gr.Row():
199
+ submit_new_model_btn = gr.Button(value="Submit New Model", variant="primary", scale=3)
200
+ refresh_btn = gr.Button(value="Refresh", variant="secondary", scale=1)
201
  with gr.Tab(label="Try tokenizers"):
202
  text = gr.Textbox(label="Enter a text", lines=5, value="السلام عليكم ورحمة الله", rtl=True, text_align="right")
203
  dropdown = gr.Dropdown(
 
211
  tokenized_textbox = gr.HighlightedText(label="Tokenized text")
212
 
213
  submit_new_model_btn.click(submit, model_name, outputs=[dataframe, barplot, dropdown])
214
+ refresh_btn.click(refresh, outputs=[dataframe, barplot, dropdown])
215
  submit_text_btn.click(tokenize_text, inputs=[text, dropdown, checkbox], outputs=[tokenized_textbox])
216
 
217