Spaces:

HeshamHaroon
/

Arabic_Tokenizer

Runtime error

App Files Files Community

HeshamHaroon commited on May 20, 2024

Commit

b613c61

verified ·

1 Parent(s): 00fb4c8

Update app.py

Browse files

Files changed (1) hide show

app.py +6 -6

app.py CHANGED Viewed

@@ -38,7 +38,7 @@ tokenizer_options = [
     "inception-mbzuai/jais-13b",
     "aubmindlab/bert-base-arabertv2",
     "CohereForAI/c4ai-command-r-v01",
-    "CohereForAI/c4ai-command-r-plus"
 ]
 if meta_llama_tokenizer:
@@ -73,13 +73,13 @@ def compare_tokenizers(tokenizer_name, text):
     tokens_display = [token.encode('utf-8').decode('utf-8') if isinstance(token, bytes) else token for token in tokens]
     # Prepare the results to be displayed in HTML format
-    tokens_html = "".join([f"<span style='background-color:#D3D3D3; padding:2px; margin:2px; border-radius:5px;'>{token}</span>" for token in tokens_display])
-    encoded_html = "".join([f"<span style='background-color:#ADD8E6; padding:2px; margin:2px; border-radius:5px;'>{token}</span>" for token in encoded_output])
-    decoded_html = f"<div style='background-color:#90EE90; padding:10px; border-radius:5px;'>{decoded_text}</div>"
     results_html = f"""
     <div style='font-family: Arial, sans-serif;'>
-        <h3>Tokenizer: {tokenizer_name}</h3>
         <p><strong>Tokens:</strong> {tokens_html}</p>
         <p><strong>Encoded:</strong> {encoded_html}</p>
         <p><strong>Decoded:</strong> {decoded_html}</p>
@@ -89,7 +89,7 @@ def compare_tokenizers(tokenizer_name, text):
 # Define the Gradio interface components with a dropdown for model selection
 inputs_component = [
-    gr.Dropdown(choices=tokenizer_options, label="Select Tokenizer"),
     gr.Textbox(lines=2, placeholder="اكتب النص هنا...", label="Input Text")
 ]

     "inception-mbzuai/jais-13b",
     "aubmindlab/bert-base-arabertv2",
     "CohereForAI/c4ai-command-r-v01",
+    "CohereForAI/c4ai-command-r-plus"
 ]
 if meta_llama_tokenizer:
     tokens_display = [token.encode('utf-8').decode('utf-8') if isinstance(token, bytes) else token for token in tokens]
     # Prepare the results to be displayed in HTML format
+    tokens_html = "".join([f"<span style='background-color:#f0f0f0; padding:4px; margin:2px; border-radius:3px; border:1px solid #ccc;'>{token}</span>" for token in tokens_display])
+    encoded_html = "".join([f"<span style='background-color:#e0f7fa; padding:4px; margin:2px; border-radius:3px; border:1px solid #00acc1;'>{token}</span>" for token in encoded_output])
+    decoded_html = f"<div style='background-color:#e8f5e9; padding:10px; border-radius:3px; border:1px solid #4caf50;'>{decoded_text}</div>"
     results_html = f"""
     <div style='font-family: Arial, sans-serif;'>
+        <h3 style='color: #00796b;'>Tokenizer: {tokenizer_name}</h3>
         <p><strong>Tokens:</strong> {tokens_html}</p>
         <p><strong>Encoded:</strong> {encoded_html}</p>
         <p><strong>Decoded:</strong> {decoded_html}</p>
 # Define the Gradio interface components with a dropdown for model selection
 inputs_component = [
+    gr.Dropdown(choices=tokenizer_options, label="Select Tokenizer", type="index"),
     gr.Textbox(lines=2, placeholder="اكتب النص هنا...", label="Input Text")
 ]