Spaces:
Running
on
Zero
Running
on
Zero
Added all supported languages
Browse files
app.py
CHANGED
@@ -4,36 +4,67 @@ from transformers import pipeline
|
|
4 |
import os
|
5 |
import spaces
|
6 |
|
7 |
-
|
8 |
-
|
9 |
-
|
|
|
|
|
|
|
|
|
|
|
10 |
|
|
|
|
|
11 |
|
12 |
-
|
13 |
-
|
|
|
|
|
14 |
task="fill-mask",
|
15 |
-
model=
|
16 |
token=TOKEN,
|
17 |
device=device
|
18 |
)
|
19 |
return pipe
|
20 |
|
21 |
-
|
22 |
-
pipe=
|
23 |
-
print("[INFO] Pipeline loaded!")
|
24 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
|
|
|
26 |
@spaces.GPU
|
27 |
-
def predict(text):
|
|
|
|
|
|
|
|
|
28 |
outputs = pipe(text)
|
29 |
-
scores= [x["score"] for x in outputs]
|
30 |
-
tokens= [x["token_str"] for x in outputs]
|
31 |
return {label: float(prob) for label, prob in zip(tokens, scores)}
|
32 |
|
|
|
|
|
|
|
33 |
# Create Gradio interface
|
34 |
with gr.Blocks() as demo:
|
35 |
with gr.Row():
|
36 |
with gr.Column():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
# Input text box
|
38 |
input_text = gr.Textbox(
|
39 |
label="Input",
|
@@ -46,10 +77,11 @@ with gr.Blocks() as demo:
|
|
46 |
clear_btn = gr.Button("Clear")
|
47 |
submit_btn = gr.Button("Submit", variant="primary")
|
48 |
|
49 |
-
# Examples section
|
50 |
-
gr.Examples(
|
51 |
-
examples=["
|
52 |
-
inputs=input_text
|
|
|
53 |
)
|
54 |
|
55 |
with gr.Column():
|
@@ -59,10 +91,20 @@ with gr.Blocks() as demo:
|
|
59 |
show_label=False
|
60 |
)
|
61 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
62 |
# Button actions
|
63 |
submit_btn.click(
|
64 |
predict,
|
65 |
-
inputs=input_text,
|
66 |
outputs=output_labels
|
67 |
)
|
68 |
|
|
|
4 |
import os
|
5 |
import spaces
|
6 |
|
7 |
+
# Define models for each dialect
|
8 |
+
MODELS = {
|
9 |
+
"Arabic": "BounharAbdelaziz/ModernBERT-Arabic-base-stage-3-decay-mx8192-MSA",
|
10 |
+
"Tunisian": "BounharAbdelaziz/ModernBERT-Arabic-base-stage-3-decay-mx8192-ARY"
|
11 |
+
"Moroccan": "BounharAbdelaziz/ModernBERT-Arabic-base-stage-3-decay-mx8192-ARY"
|
12 |
+
"Algerian": "BounharAbdelaziz/ModernBERT-Arabic-base-stage-3-decay-mx8192-ALGERIAN"
|
13 |
+
"Egyptian": "BounharAbdelaziz/ModernBERT-Arabic-base-stage-3-decay-mx8192-ARZ"
|
14 |
+
}
|
15 |
|
16 |
+
TOKEN = os.environ["HF_TOKEN"]
|
17 |
+
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
18 |
|
19 |
+
# Load the pipeline dynamically based on the selected dialect
|
20 |
+
def load_model(dialect):
|
21 |
+
model_path = MODELS.get(dialect, MODELS["Arabic"]) # Default to Arabic if dialect not found
|
22 |
+
pipe = pipeline(
|
23 |
task="fill-mask",
|
24 |
+
model=model_path,
|
25 |
token=TOKEN,
|
26 |
device=device
|
27 |
)
|
28 |
return pipe
|
29 |
|
30 |
+
# Store the pipeline globally
|
31 |
+
pipe = None
|
|
|
32 |
|
33 |
+
# Define examples for each dialect
|
34 |
+
EXAMPLES = {
|
35 |
+
"Moroccan": ["الدار البيضاء [MASK]", "المغرب بلاد [MASK]", "كناكل [MASK] فالمغرب", "العاصمة د [MASK] هي الرباط","المغرب [MASK] زوين","انا سميتي مريم، و كنسكن ف[MASK] العاصمة دفلسطين"],
|
36 |
+
"Arabic": ["العاصمة د [MASK] هي الرباط", "المغرب [MASK] زوين", "انا سميتي مريم، و كنسكن ف[MASK] العاصمة دفلسطين"],
|
37 |
+
"Egyptian": ["القاهرة مدينة [MASK]", "مصر بلاد [MASK]", "بنحب [MASK] فمصر"],
|
38 |
+
"Tunisian": ["تونس بلاد [MASK]", "المنستير مدينة [MASK]", "عيشتي في [MASK]"],
|
39 |
+
"Algerian": ["الجزائر بلاد [MASK]", "قسنطينة مدينة [MASK]", "نحبو [MASK] ف الجزائر"],
|
40 |
+
}
|
41 |
|
42 |
+
# Predict function
|
43 |
@spaces.GPU
|
44 |
+
def predict(text, dialect):
|
45 |
+
global pipe
|
46 |
+
if pipe is None or dialect != predict.current_dialect: # Reload model if dialect changes
|
47 |
+
pipe = load_model(dialect)
|
48 |
+
predict.current_dialect = dialect
|
49 |
outputs = pipe(text)
|
50 |
+
scores = [x["score"] for x in outputs]
|
51 |
+
tokens = [x["token_str"] for x in outputs]
|
52 |
return {label: float(prob) for label, prob in zip(tokens, scores)}
|
53 |
|
54 |
+
# Initialize current dialect
|
55 |
+
predict.current_dialect = None
|
56 |
+
|
57 |
# Create Gradio interface
|
58 |
with gr.Blocks() as demo:
|
59 |
with gr.Row():
|
60 |
with gr.Column():
|
61 |
+
# Dropdown for dialect selection
|
62 |
+
dialect_dropdown = gr.Dropdown(
|
63 |
+
choices=["Arabic", "Tunisian", "Moroccan", "Algerian", "Egyptian"],
|
64 |
+
label="Select Dialect",
|
65 |
+
value="Arabic"
|
66 |
+
)
|
67 |
+
|
68 |
# Input text box
|
69 |
input_text = gr.Textbox(
|
70 |
label="Input",
|
|
|
77 |
clear_btn = gr.Button("Clear")
|
78 |
submit_btn = gr.Button("Submit", variant="primary")
|
79 |
|
80 |
+
# Examples section (dynamic based on dialect)
|
81 |
+
examples = gr.Examples(
|
82 |
+
examples=EXAMPLES["Arabic"], # Default to Arabic examples
|
83 |
+
inputs=input_text,
|
84 |
+
label="Examples"
|
85 |
)
|
86 |
|
87 |
with gr.Column():
|
|
|
91 |
show_label=False
|
92 |
)
|
93 |
|
94 |
+
# Update examples when dialect changes
|
95 |
+
def update_examples(dialect):
|
96 |
+
return EXAMPLES.get(dialect, EXAMPLES["Arabic"])
|
97 |
+
|
98 |
+
dialect_dropdown.change(
|
99 |
+
update_examples,
|
100 |
+
inputs=dialect_dropdown,
|
101 |
+
outputs=examples
|
102 |
+
)
|
103 |
+
|
104 |
# Button actions
|
105 |
submit_btn.click(
|
106 |
predict,
|
107 |
+
inputs=[input_text, dialect_dropdown],
|
108 |
outputs=output_labels
|
109 |
)
|
110 |
|