Spaces:
Running
Running
wissamantoun
commited on
Commit
•
3036e92
1
Parent(s):
29dc4f2
fixes
Browse files- .gitignore +3 -0
- app.py +8 -8
- pages/processor.py +20 -16
.gitignore
CHANGED
@@ -127,3 +127,6 @@ dmypy.json
|
|
127 |
|
128 |
# Pyre type checker
|
129 |
.pyre/
|
|
|
|
|
|
|
|
127 |
|
128 |
# Pyre type checker
|
129 |
.pyre/
|
130 |
+
|
131 |
+
|
132 |
+
.vscode/
|
app.py
CHANGED
@@ -11,14 +11,14 @@ st.set_page_config(
|
|
11 |
PAGES = {"Home": pages.home, "Arabic Text Preprocessor": pages.processor}
|
12 |
|
13 |
|
14 |
-
|
15 |
-
|
16 |
-
st.sidebar.title("Navigation")
|
17 |
-
selection = st.sidebar.radio("Pages", list(PAGES.keys()))
|
18 |
|
19 |
-
|
|
|
20 |
ast.shared.components.write_page(page)
|
21 |
|
22 |
-
|
23 |
-
|
24 |
-
|
|
|
|
11 |
PAGES = {"Home": pages.home, "Arabic Text Preprocessor": pages.processor}
|
12 |
|
13 |
|
14 |
+
st.sidebar.title("Navigation")
|
15 |
+
selection = st.sidebar.radio("Pages", list(PAGES.keys()))
|
|
|
|
|
16 |
|
17 |
+
page = PAGES[selection]
|
18 |
+
with st.spinner(f"Loading {selection} ..."):
|
19 |
ast.shared.components.write_page(page)
|
20 |
|
21 |
+
st.sidebar.header("Info")
|
22 |
+
st.sidebar.write("Made by [Wissam Antoun](https://twitter.com/wissam_antoun)")
|
23 |
+
st.sidebar.write("[Models Repo](https://github.com/aub-mind/arabert)")
|
24 |
+
st.sidebar.write("Source Code [GitHub](https://github.com/WissamAntoun/Arabic-NLP-app)")
|
pages/processor.py
CHANGED
@@ -101,7 +101,7 @@ def _desegmentword(orig_word: str) -> str:
|
|
101 |
|
102 |
|
103 |
def write():
|
104 |
-
col1, _ = st.columns(
|
105 |
|
106 |
with col1:
|
107 |
col1.title("Arabic Text Pre-Processor")
|
@@ -120,26 +120,30 @@ def write():
|
|
120 |
value="ولن نبالغ إذا قلنا: إن 'هاتف' أو 'كمبيوتر المكتب' في زمننا هذا ضروري",
|
121 |
)
|
122 |
|
123 |
-
|
124 |
-
model_selector =
|
125 |
-
|
126 |
-
|
|
|
127 |
if model_selector == "None":
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
|
|
136 |
"Remove non-digit repetition", True
|
137 |
)
|
138 |
-
replace_slash_with_dash =
|
139 |
-
map_hindi_numbers_to_arabic =
|
140 |
"Map hindi numbers to arabic", None
|
141 |
)
|
142 |
-
apply_farasa_segmentation =
|
|
|
|
|
143 |
|
144 |
run_preprocessor = st.button("Run Pre-Processor")
|
145 |
|
|
|
101 |
|
102 |
|
103 |
def write():
|
104 |
+
_, col1, _ = st.columns(3)
|
105 |
|
106 |
with col1:
|
107 |
col1.title("Arabic Text Pre-Processor")
|
|
|
120 |
value="ولن نبالغ إذا قلنا: إن 'هاتف' أو 'كمبيوتر المكتب' في زمننا هذا ضروري",
|
121 |
)
|
122 |
|
123 |
+
st.sidebar.title("Model Selector")
|
124 |
+
model_selector = st.sidebar.selectbox(
|
125 |
+
"""Select None to enable further filters""",
|
126 |
+
options=MODELS_to_SELECT,
|
127 |
+
)
|
128 |
if model_selector == "None":
|
129 |
+
keep_emojis = st.sidebar.checkbox("Keep emojis", False)
|
130 |
+
remove_html_markup = st.sidebar.checkbox("Remove html markup", True)
|
131 |
+
strip_tashkeel = st.sidebar.checkbox("Strip tashkeel", True)
|
132 |
+
replace_urls_emails_mentions = st.sidebar.checkbox(
|
133 |
+
"Replace urls and emails", True
|
134 |
+
)
|
135 |
+
strip_tatweel = st.sidebar.checkbox("Strip tatweel", True)
|
136 |
+
insert_white_spaces = st.sidebar.checkbox("Insert white spaces", True)
|
137 |
+
remove_non_digit_repetition = st.sidebar.checkbox(
|
138 |
"Remove non-digit repetition", True
|
139 |
)
|
140 |
+
replace_slash_with_dash = st.sidebar.checkbox("Replace slash with dash", None)
|
141 |
+
map_hindi_numbers_to_arabic = st.sidebar.checkbox(
|
142 |
"Map hindi numbers to arabic", None
|
143 |
)
|
144 |
+
apply_farasa_segmentation = st.sidebar.checkbox(
|
145 |
+
"Apply farasa segmentation", None
|
146 |
+
)
|
147 |
|
148 |
run_preprocessor = st.button("Run Pre-Processor")
|
149 |
|