musfiqdehan commited on
Commit
03e97cd
1 Parent(s): 6b83453

Refactor alignment and translation process

Browse files
Files changed (1) hide show
  1. app.py +131 -49
app.py CHANGED
@@ -2,25 +2,33 @@ import gradio as gr
2
  from gradio_rich_textbox import RichTextbox
3
 
4
  from helper.text_preprocess import space_punc
5
- from helper.alignment_mappers import select_model
6
- from helper.pos_taggers import select_pos_tagger
7
- from helper.translators import select_translator
8
 
9
 
10
- def bn_postagger(src, translator, model_name, tagger):
11
  """
12
  Bangla PoS Tagger
13
  """
 
 
 
14
 
15
  src = space_punc(src)
16
 
17
- tgt_base, tgt = select_translator(src, translator)
 
 
18
 
19
  model_name = select_model(model_name)
20
 
21
- result, pos_accuracy = select_pos_tagger(src, tgt, model_name, tagger)
 
 
 
 
22
 
23
- return tgt_base, result, pos_accuracy
24
 
25
 
26
  with gr.Blocks(css="styles.css") as demo:
@@ -31,20 +39,121 @@ with gr.Blocks(css="styles.css") as demo:
31
  with gr.Column():
32
  inputs = [
33
  gr.Textbox(
34
- label="Enter Bangla Sentence",
35
- placeholder="বাংলা বাক্য লিখুন"
36
  ),
37
  gr.Dropdown(
38
- choices=["Google", "BanglaNMT", "MyMemory"],
39
- label="Select a Translator"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  ),
41
  gr.Dropdown(
42
  choices=["Google-mBERT (Base-Multilingual)", "Neulab-AwesomeAlign (Bn-En-0.5M)", "BUET-BanglaBERT (Large)", "SagorSarker-BanglaBERT (Base)", "SentenceTransformers-LaBSE (Multilingual)"],
43
  label="Select a Model"
44
- ),
45
- gr.Dropdown(
46
- choices=["spaCy", "NLTK", "Flair", "TextBlob"],
47
- label="Select a PoS Tagger"
48
  )
49
  ]
50
 
@@ -54,54 +163,27 @@ with gr.Blocks(css="styles.css") as demo:
54
  with gr.Column():
55
  outputs = [
56
  gr.Textbox(label="English Translation"),
57
- RichTextbox(label="PoS Tags"),
58
- gr.Textbox(label="PoS Tagging Accuracy (Based on Unknown(UNK) Tags)")
59
  ]
60
 
61
- btn.click(bn_postagger, inputs, outputs)
62
 
63
  gr.Examples([
64
  [
65
  "বাংলাদেশ দক্ষিণ এশিয়ার একটি সার্বভৌম রাষ্ট্র।",
66
- "Google",
67
- "Neulab-AwesomeAlign (Bn-En-0.5M)",
68
- "NLTK"
69
  ],
70
  [
71
  "বাংলাদেশের সংবিধানিক নাম কি?",
72
- "Google",
73
  "Google-mBERT (Base-Multilingual)",
74
- "spaCy"
75
  ],
76
  [
77
  "বাংলাদেশের সাংবিধানিক নাম গণপ্রজাতন্ত্রী বাংলাদেশ।",
78
- "Google",
79
  "Google-mBERT (Base-Multilingual)",
80
- "TextBlob"
81
- ],
82
- [
83
- "তিনজনের কেউই বাবার পথ ধরে প্রযুক্তি দুনিয়ায় হাঁটেননি।",
84
- "Google",
85
- "Neulab-AwesomeAlign (Bn-En-0.5M)",
86
- "spaCy"
87
- ],
88
- [
89
- "তিনজনের কেউই বাবার পথ ধরে প্রযুক্তি দুনিয়ায় হাঁটেননি।",
90
- "BanglaNMT",
91
- "Google-mBERT (Base-Multilingual)",
92
- "spaCy"
93
- ],
94
- [
95
- "তিনজনের কেউই বাবার পথ ধরে প্রযুক্তি দুনিয়ায় হাঁটেননি।",
96
- "MyMemory",
97
- "Google-mBERT (Base-Multilingual)",
98
- "spaCy"
99
- ],
100
- [
101
- "বিশ্বের আরও একটি সেরা ক্লাব।",
102
- "Google",
103
- "Neulab-AwesomeAlign (Bn-En-0.5M)",
104
- "Flair"
105
  ]
106
 
107
  ], inputs)
 
2
  from gradio_rich_textbox import RichTextbox
3
 
4
  from helper.text_preprocess import space_punc
5
+ from helper.alignment_mappers import select_model, get_alignments_table
6
+ from helper.translators import select_target_lang_code, google_translation
 
7
 
8
 
9
+ def process_alignments(src, language_name, model_name):
10
  """
11
  Bangla PoS Tagger
12
  """
13
+
14
+ tgt = None
15
+ html_table = None
16
 
17
  src = space_punc(src)
18
 
19
+ tgt = select_target_lang_code(language_name)
20
+
21
+ tgt = google_translation(src, tgt)
22
 
23
  model_name = select_model(model_name)
24
 
25
+ html_table, alignment_accuracy = get_alignments_table(
26
+ source=src,
27
+ target=tgt,
28
+ model_name=model_name
29
+ )
30
 
31
+ return tgt, html_table, alignment_accuracy
32
 
33
 
34
  with gr.Blocks(css="styles.css") as demo:
 
39
  with gr.Column():
40
  inputs = [
41
  gr.Textbox(
42
+ label="Enter a Sentence (Auto Detect Language)",
 
43
  ),
44
  gr.Dropdown(
45
+ choices=
46
+ [
47
+ "Afrikaans",
48
+ "Albanian",
49
+ "Arabic",
50
+ "Aragonese",
51
+ "Armenian",
52
+ "Asturian",
53
+ "Azerbaijani",
54
+ "Bashkir",
55
+ "Basque",
56
+ "Bavarian",
57
+ "Belarusian",
58
+ "Bengali",
59
+ "Bishnupriya Manipuri",
60
+ "Bosnian",
61
+ "Breton",
62
+ "Bulgarian",
63
+ "Burmese",
64
+ "Catalan",
65
+ "Cebuano",
66
+ "Chechen",
67
+ "Chinese (Simplified)",
68
+ "Chinese (Traditional)",
69
+ "Chuvash",
70
+ "Croatian",
71
+ "Czech",
72
+ "Danish",
73
+ "Dutch",
74
+ "English",
75
+ "Estonian",
76
+ "Finnish",
77
+ "French",
78
+ "Galician",
79
+ "Georgian",
80
+ "German",
81
+ "Greek",
82
+ "Gujarati",
83
+ "Haitian",
84
+ "Hebrew",
85
+ "Hindi",
86
+ "Hungarian",
87
+ "Icelandic",
88
+ "Ido",
89
+ "Indonesian",
90
+ "Irish",
91
+ "Italian",
92
+ "Japanese",
93
+ "Javanese",
94
+ "Kannada",
95
+ "Kazakh",
96
+ "Kirghiz",
97
+ "Korean",
98
+ "Latin",
99
+ "Latvian",
100
+ "Lithuanian",
101
+ "Lombard",
102
+ "Low Saxon",
103
+ "Luxembourgish",
104
+ "Macedonian",
105
+ "Malagasy",
106
+ "Malay",
107
+ "Malayalam",
108
+ "Marathi",
109
+ "Minangkabau",
110
+ "Nepali",
111
+ "Newar",
112
+ "Norwegian (Bokmal)",
113
+ "Norwegian (Nynorsk)",
114
+ "Occitan",
115
+ "Persian (Farsi)",
116
+ "Piedmontese",
117
+ "Polish",
118
+ "Portuguese",
119
+ "Punjabi",
120
+ "Romanian",
121
+ "Russian",
122
+ "Scots",
123
+ "Serbian",
124
+ "Serbo-Croatian",
125
+ "Sicilian",
126
+ "Slovak",
127
+ "Slovenian",
128
+ "South Azerbaijani",
129
+ "Spanish",
130
+ "Sundanese",
131
+ "Swahili",
132
+ "Swedish",
133
+ "Tagalog",
134
+ "Tajik",
135
+ "Tamil",
136
+ "Tatar",
137
+ "Telugu",
138
+ "Turkish",
139
+ "Ukrainian",
140
+ "Urdu",
141
+ "Uzbek",
142
+ "Vietnamese",
143
+ "Volapük",
144
+ "Waray-Waray",
145
+ "Welsh",
146
+ "West Frisian",
147
+ "Western Punjabi",
148
+ "Yoruba",
149
+ "Thai",
150
+ "Mongolian"
151
+ ],
152
+ label="Select Target Language"
153
  ),
154
  gr.Dropdown(
155
  choices=["Google-mBERT (Base-Multilingual)", "Neulab-AwesomeAlign (Bn-En-0.5M)", "BUET-BanglaBERT (Large)", "SagorSarker-BanglaBERT (Base)", "SentenceTransformers-LaBSE (Multilingual)"],
156
  label="Select a Model"
 
 
 
 
157
  )
158
  ]
159
 
 
163
  with gr.Column():
164
  outputs = [
165
  gr.Textbox(label="English Translation"),
166
+ RichTextbox(label="Alignments Mapping (Source to Target)"),
167
+ gr.Textbox(label="Alignment Accuracy (Based on Unknown(UNK) Tags)")
168
  ]
169
 
170
+ btn.click(process_alignments, inputs, outputs)
171
 
172
  gr.Examples([
173
  [
174
  "বাংলাদেশ দক্ষিণ এশিয়ার একটি সার্বভৌম রাষ্ট্র।",
175
+ "English",
176
+ "SentenceTransformers-LaBSE (Multilingual)",
 
177
  ],
178
  [
179
  "বাংলাদেশের সংবিধানিক নাম কি?",
180
+ "English",
181
  "Google-mBERT (Base-Multilingual)",
 
182
  ],
183
  [
184
  "বাংলাদেশের সাংবিধানিক নাম গণপ্রজাতন্ত্রী বাংলাদেশ।",
185
+ "Hindi",
186
  "Google-mBERT (Base-Multilingual)",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
187
  ]
188
 
189
  ], inputs)