Spaces:
Runtime error
Runtime error
HugoLaurencon
commited on
Commit
·
fa81556
1
Parent(s):
0610f9d
visualization: small step for the slider on flagged words ratio
Browse files
app.py
CHANGED
@@ -282,9 +282,16 @@ class Visualization_for_lang:
|
|
282 |
new_flagged_words,
|
283 |
)
|
284 |
cutoff_def = "If the flagged words ratio of a document is higher than this number, the document is removed."
|
285 |
-
max_fwr =
|
|
|
|
|
286 |
cutoff_flagged_words_ratio = st.slider(
|
287 |
-
cutoff_def,
|
|
|
|
|
|
|
|
|
|
|
288 |
)
|
289 |
new_key = ("flagged_words_ratio", cutoff_flagged_words_ratio, True)
|
290 |
keys.append(new_key)
|
@@ -481,7 +488,9 @@ class Visualization_for_lang:
|
|
481 |
for i in range(len(self.words["incorrect_substrings"]))
|
482 |
]
|
483 |
)
|
484 |
-
Visualization_for_lang.print_discarded_by_cond(
|
|
|
|
|
485 |
conds_words["incorrect_substrings"] = cond_incorrect_substrings
|
486 |
|
487 |
all_conds_words = np.all(list(conds_words.values()), axis=0)
|
@@ -723,7 +732,10 @@ class Visualization:
|
|
723 |
)
|
724 |
|
725 |
def choose_lang(self):
|
726 |
-
options = [
|
|
|
|
|
|
|
727 |
index = options.index("English") if ("English" in options) else 0
|
728 |
lang_chosen = st.selectbox(
|
729 |
label="Select the language for visualization",
|
@@ -731,23 +743,37 @@ class Visualization:
|
|
731 |
index=index,
|
732 |
)
|
733 |
if lang_chosen != "None":
|
734 |
-
lang_chosen_dataset_id = langs_id.loc[
|
|
|
|
|
735 |
visualization_for_lang = Visualization_for_lang(
|
736 |
-
path_data
|
737 |
-
lang
|
738 |
-
num_docs
|
739 |
-
num_docs_for_words
|
740 |
-
|
741 |
-
|
742 |
-
|
743 |
-
|
744 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
745 |
)
|
746 |
visualization_for_lang.visualization_for_lang()
|
747 |
|
748 |
def visualization(self):
|
749 |
self.preamble()
|
750 |
-
self.warning_preamble()
|
751 |
self.choose_lang()
|
752 |
|
753 |
|
|
|
282 |
new_flagged_words,
|
283 |
)
|
284 |
cutoff_def = "If the flagged words ratio of a document is higher than this number, the document is removed."
|
285 |
+
max_fwr = np.max(self.docs["flagged_words_ratio"])
|
286 |
+
max_fwr = np.ceil(max_fwr * 1000) / 1000
|
287 |
+
max_fwr = float(max_fwr)
|
288 |
cutoff_flagged_words_ratio = st.slider(
|
289 |
+
cutoff_def,
|
290 |
+
0.000,
|
291 |
+
max_fwr,
|
292 |
+
max_fwr,
|
293 |
+
step=0.001,
|
294 |
+
format="%f",
|
295 |
)
|
296 |
new_key = ("flagged_words_ratio", cutoff_flagged_words_ratio, True)
|
297 |
keys.append(new_key)
|
|
|
488 |
for i in range(len(self.words["incorrect_substrings"]))
|
489 |
]
|
490 |
)
|
491 |
+
Visualization_for_lang.print_discarded_by_cond(
|
492 |
+
cond_incorrect_substrings
|
493 |
+
)
|
494 |
conds_words["incorrect_substrings"] = cond_incorrect_substrings
|
495 |
|
496 |
all_conds_words = np.all(list(conds_words.values()), axis=0)
|
|
|
732 |
)
|
733 |
|
734 |
def choose_lang(self):
|
735 |
+
options = [
|
736 |
+
self.param_visu_langs[lang_dataset_id]["lang"]
|
737 |
+
for lang_dataset_id in self.param_visu_langs
|
738 |
+
]
|
739 |
index = options.index("English") if ("English" in options) else 0
|
740 |
lang_chosen = st.selectbox(
|
741 |
label="Select the language for visualization",
|
|
|
743 |
index=index,
|
744 |
)
|
745 |
if lang_chosen != "None":
|
746 |
+
lang_chosen_dataset_id = langs_id.loc[
|
747 |
+
langs_id["lang"] == lang_chosen, "dataset_id"
|
748 |
+
].iloc[0]
|
749 |
visualization_for_lang = Visualization_for_lang(
|
750 |
+
path_data=self.param_visu_langs[lang_chosen_dataset_id]["path_data"],
|
751 |
+
lang=self.param_visu_langs[lang_chosen_dataset_id]["lang"],
|
752 |
+
num_docs=self.param_visu_langs[lang_chosen_dataset_id]["num_docs"],
|
753 |
+
num_docs_for_words=self.param_visu_langs[lang_chosen_dataset_id][
|
754 |
+
"num_docs_for_words"
|
755 |
+
],
|
756 |
+
max_len_text_display=self.param_visu_langs[lang_chosen_dataset_id][
|
757 |
+
"max_len_text_display"
|
758 |
+
],
|
759 |
+
lang_dataset_id=self.param_visu_langs[lang_chosen_dataset_id][
|
760 |
+
"lang_dataset_id"
|
761 |
+
],
|
762 |
+
path_fasttext_model=self.param_visu_langs[lang_chosen_dataset_id][
|
763 |
+
"path_fasttext_model"
|
764 |
+
],
|
765 |
+
path_sentencepiece_model=self.param_visu_langs[lang_chosen_dataset_id][
|
766 |
+
"path_sentencepiece_model"
|
767 |
+
],
|
768 |
+
path_kenlm_model=self.param_visu_langs[lang_chosen_dataset_id][
|
769 |
+
"path_kenlm_model"
|
770 |
+
],
|
771 |
)
|
772 |
visualization_for_lang.visualization_for_lang()
|
773 |
|
774 |
def visualization(self):
|
775 |
self.preamble()
|
776 |
+
# self.warning_preamble()
|
777 |
self.choose_lang()
|
778 |
|
779 |
|