BounharAbdelaziz commited on
Commit
afcf6db
·
verified ·
1 Parent(s): eabec88

Added all supported languages

Browse files
Files changed (1) hide show
  1. app.py +59 -17
app.py CHANGED
@@ -4,36 +4,67 @@ from transformers import pipeline
4
  import os
5
  import spaces
6
 
7
- MODEL_PATH = "BounharAbdelaziz/ModernBERT-Arabic-base-stage-3-decay-mx8192-ARY"
8
- TOKEN=os.environ["HF_TOKEN"]
9
- device = "cuda:0" if torch.cuda.is_available() else "cpu"
 
 
 
 
 
10
 
 
 
11
 
12
- def load_model():
13
- pipe=pipeline(
 
 
14
  task="fill-mask",
15
- model=MODEL_PATH,
16
  token=TOKEN,
17
  device=device
18
  )
19
  return pipe
20
 
21
- print("[INFO] Lading pipeline model ...")
22
- pipe=load_model()
23
- print("[INFO] Pipeline loaded!")
24
 
 
 
 
 
 
 
 
 
25
 
 
26
  @spaces.GPU
27
- def predict(text):
 
 
 
 
28
  outputs = pipe(text)
29
- scores= [x["score"] for x in outputs]
30
- tokens= [x["token_str"] for x in outputs]
31
  return {label: float(prob) for label, prob in zip(tokens, scores)}
32
 
 
 
 
33
  # Create Gradio interface
34
  with gr.Blocks() as demo:
35
  with gr.Row():
36
  with gr.Column():
 
 
 
 
 
 
 
37
  # Input text box
38
  input_text = gr.Textbox(
39
  label="Input",
@@ -46,10 +77,11 @@ with gr.Blocks() as demo:
46
  clear_btn = gr.Button("Clear")
47
  submit_btn = gr.Button("Submit", variant="primary")
48
 
49
- # Examples section
50
- gr.Examples(
51
- examples=["العاصمة د <mask> هي الرباط","المغرب <mask> زوين","انا سميتي مريم، و كنسكن ف<mask> العاصمة دفلسطين"],
52
- inputs=input_text
 
53
  )
54
 
55
  with gr.Column():
@@ -59,10 +91,20 @@ with gr.Blocks() as demo:
59
  show_label=False
60
  )
61
 
 
 
 
 
 
 
 
 
 
 
62
  # Button actions
63
  submit_btn.click(
64
  predict,
65
- inputs=input_text,
66
  outputs=output_labels
67
  )
68
 
 
4
  import os
5
  import spaces
6
 
7
+ # Define models for each dialect
8
+ MODELS = {
9
+ "Arabic": "BounharAbdelaziz/ModernBERT-Arabic-base-stage-3-decay-mx8192-MSA",
10
+ "Tunisian": "BounharAbdelaziz/ModernBERT-Arabic-base-stage-3-decay-mx8192-ARY"
11
+ "Moroccan": "BounharAbdelaziz/ModernBERT-Arabic-base-stage-3-decay-mx8192-ARY"
12
+ "Algerian": "BounharAbdelaziz/ModernBERT-Arabic-base-stage-3-decay-mx8192-ALGERIAN"
13
+ "Egyptian": "BounharAbdelaziz/ModernBERT-Arabic-base-stage-3-decay-mx8192-ARZ"
14
+ }
15
 
16
+ TOKEN = os.environ["HF_TOKEN"]
17
+ device = "cuda:0" if torch.cuda.is_available() else "cpu"
18
 
19
+ # Load the pipeline dynamically based on the selected dialect
20
+ def load_model(dialect):
21
+ model_path = MODELS.get(dialect, MODELS["Arabic"]) # Default to Arabic if dialect not found
22
+ pipe = pipeline(
23
  task="fill-mask",
24
+ model=model_path,
25
  token=TOKEN,
26
  device=device
27
  )
28
  return pipe
29
 
30
+ # Store the pipeline globally
31
+ pipe = None
 
32
 
33
+ # Define examples for each dialect
34
+ EXAMPLES = {
35
+ "Moroccan": ["الدار البيضاء [MASK]", "المغرب بلاد [MASK]", "كناكل [MASK] فالمغرب", "العاصمة د [MASK] هي الرباط","المغرب [MASK] زوين","انا سميتي مريم، و كنسكن ف[MASK] العاصمة دفلسطين"],
36
+ "Arabic": ["العاصمة د [MASK] هي الرباط", "المغرب [MASK] زوين", "انا سميتي مريم، و كنسكن ف[MASK] العاصمة دفلسطين"],
37
+ "Egyptian": ["القاهرة مدينة [MASK]", "مصر بلاد [MASK]", "بنحب [MASK] فمصر"],
38
+ "Tunisian": ["تونس بلاد [MASK]", "المنستير مدينة [MASK]", "عيشتي في [MASK]"],
39
+ "Algerian": ["الجزائر بلاد [MASK]", "قسنطينة مدينة [MASK]", "نحبو [MASK] ف الجزائر"],
40
+ }
41
 
42
+ # Predict function
43
  @spaces.GPU
44
+ def predict(text, dialect):
45
+ global pipe
46
+ if pipe is None or dialect != predict.current_dialect: # Reload model if dialect changes
47
+ pipe = load_model(dialect)
48
+ predict.current_dialect = dialect
49
  outputs = pipe(text)
50
+ scores = [x["score"] for x in outputs]
51
+ tokens = [x["token_str"] for x in outputs]
52
  return {label: float(prob) for label, prob in zip(tokens, scores)}
53
 
54
+ # Initialize current dialect
55
+ predict.current_dialect = None
56
+
57
  # Create Gradio interface
58
  with gr.Blocks() as demo:
59
  with gr.Row():
60
  with gr.Column():
61
+ # Dropdown for dialect selection
62
+ dialect_dropdown = gr.Dropdown(
63
+ choices=["Arabic", "Tunisian", "Moroccan", "Algerian", "Egyptian"],
64
+ label="Select Dialect",
65
+ value="Arabic"
66
+ )
67
+
68
  # Input text box
69
  input_text = gr.Textbox(
70
  label="Input",
 
77
  clear_btn = gr.Button("Clear")
78
  submit_btn = gr.Button("Submit", variant="primary")
79
 
80
+ # Examples section (dynamic based on dialect)
81
+ examples = gr.Examples(
82
+ examples=EXAMPLES["Arabic"], # Default to Arabic examples
83
+ inputs=input_text,
84
+ label="Examples"
85
  )
86
 
87
  with gr.Column():
 
91
  show_label=False
92
  )
93
 
94
+ # Update examples when dialect changes
95
+ def update_examples(dialect):
96
+ return EXAMPLES.get(dialect, EXAMPLES["Arabic"])
97
+
98
+ dialect_dropdown.change(
99
+ update_examples,
100
+ inputs=dialect_dropdown,
101
+ outputs=examples
102
+ )
103
+
104
  # Button actions
105
  submit_btn.click(
106
  predict,
107
+ inputs=[input_text, dialect_dropdown],
108
  outputs=output_labels
109
  )
110