|
import gradio as gr |
|
import torch |
|
from transformers import pipeline |
|
import os |
|
import spaces |
|
|
|
MODELS = { |
|
"Moroccan": "BounharAbdelaziz/ModernBERT-Arabic-base-stage-1-pre-decay-predef-bkpt-ini-1024-mx8192", |
|
"Arabic": "BounharAbdelaziz/ModernBERT-Arabic-base-stage-1-pre-decay-predef-bkpt-ini-1024-mx8192", |
|
"Egyptian": "BounharAbdelaziz/ModernBERT-Arabic-base-stage-1-pre-decay-predef-bkpt-ini-1024-mx8192", |
|
"Tunisian": "BounharAbdelaziz/ModernBERT-Arabic-base-stage-1-pre-decay-predef-bkpt-ini-1024-mx8192", |
|
"Algerian": "BounharAbdelaziz/ModernBERT-Arabic-base-stage-1-pre-decay-predef-bkpt-ini-1024-mx8192", |
|
} |
|
|
|
EXAMPLES = { |
|
"Moroccan": [ |
|
"الدار البيضاء [MASK]", |
|
"المغرب بلاد [MASK]", |
|
"كناكل [MASK] فالمغرب", |
|
"العاصمة د [MASK] هي الرباط", |
|
"المغرب [MASK] زوين", |
|
"انا سميتي مريم، و كنسكن ف[MASK] العاصمة دفلسطين" |
|
], |
|
"Arabic": [ |
|
"العاصمة الرسمية لمصر هي [MASK].", |
|
"أطول نهر في العالم هو نهر [MASK].", |
|
"الشاعر العربي المشهور [MASK] كتب قصيدة 'أراك عصي الدمع'.", |
|
"عندما أستيقظ في الصباح، أشرب فنجان من [MASK].", |
|
"في التأني [MASK] وفي العجلة الندامة.", |
|
"معركة [MASK] كانت من أهم المعارك في تاريخ الإسلام.", |
|
"يعتبر [MASK] من أهم العلماء في مجال الفيزياء.", |
|
"تقع جبال [MASK] في شمال إفريقيا.", |
|
"يعتبر [MASK] من أركان الإسلام الخمسة." |
|
], |
|
"Egyptian": [ |
|
"القاهرة مدينة [MASK]", |
|
"مصر بلاد [MASK]", |
|
"بنحب [MASK] فمصر" |
|
], |
|
"Tunisian": [ |
|
"تونس بلاد [MASK]", |
|
"المنستير مدينة [MASK]", |
|
"عيشتي في [MASK]" |
|
], |
|
"Algerian": [ |
|
"الجزائر بلاد [MASK]", |
|
"قسنطينة مدينة [MASK]", |
|
"نحبو [MASK] ف الجزائر" |
|
], |
|
} |
|
|
|
TOKEN = os.environ["HF_TOKEN"] |
|
device = "cuda:0" if torch.cuda.is_available() else "cpu" |
|
|
|
def load_model(dialect): |
|
model_path = MODELS.get(dialect, MODELS["Arabic"]) |
|
return pipeline(task="fill-mask", model=model_path, token=TOKEN, device=device) |
|
|
|
pipe = None |
|
|
|
@spaces.GPU |
|
def predict(text, dialect): |
|
global pipe |
|
if pipe is None or dialect != predict.current_dialect: |
|
pipe = load_model(dialect) |
|
predict.current_dialect = dialect |
|
outputs = pipe(text) |
|
scores = [x["score"] for x in outputs] |
|
tokens = [x["token_str"] for x in outputs] |
|
return {label: float(prob) for label, prob in zip(tokens, scores)} |
|
|
|
|
|
predict.current_dialect = None |
|
|
|
|
|
with gr.Blocks() as demo: |
|
with gr.Row(): |
|
with gr.Column(): |
|
|
|
dialect_dropdown = gr.Dropdown( |
|
choices=["Arabic", "Tunisian", "Moroccan", "Algerian", "Egyptian"], |
|
label="Select Dialect", |
|
value="Arabic" |
|
) |
|
|
|
|
|
input_text = gr.Textbox( |
|
label="Input", |
|
placeholder="Enter text here...", |
|
rtl=True |
|
) |
|
|
|
|
|
with gr.Row(): |
|
clear_btn = gr.Button("Clear") |
|
submit_btn = gr.Button("Submit", variant="primary") |
|
|
|
|
|
examples_state = gr.State(EXAMPLES["Arabic"]) |
|
|
|
example_dropdown = gr.Dropdown( |
|
choices=EXAMPLES["Arabic"], |
|
label="Select Example", |
|
interactive=True |
|
) |
|
|
|
load_example_btn = gr.Button("Load Example") |
|
|
|
with gr.Column(): |
|
|
|
output_labels = gr.Label( |
|
label="Prediction Results", |
|
show_label=False |
|
) |
|
|
|
|
|
def update_example_choices(dialect): |
|
return gr.update(choices=EXAMPLES.get(dialect, EXAMPLES["Arabic"]), value=EXAMPLES[dialect][0]) |
|
|
|
|
|
def load_example(selected_example): |
|
return selected_example |
|
|
|
|
|
dialect_dropdown.change( |
|
update_example_choices, |
|
inputs=dialect_dropdown, |
|
outputs=example_dropdown |
|
) |
|
|
|
|
|
load_example_btn.click( |
|
load_example, |
|
inputs=example_dropdown, |
|
outputs=input_text |
|
) |
|
|
|
|
|
submit_btn.click( |
|
predict, |
|
inputs=[input_text, dialect_dropdown], |
|
outputs=output_labels |
|
) |
|
|
|
clear_btn.click( |
|
lambda: "", |
|
outputs=input_text |
|
) |
|
|
|
|
|
demo.launch() |