Spaces:
Runtime error
Runtime error
ncoop57
commited on
Commit
•
a2dd03e
1
Parent(s):
e0be252
Add additional checks
Browse files- app.py +42 -5
- requirements.txt +1 -1
app.py
CHANGED
@@ -42,9 +42,12 @@ for name in dataset_names:
|
|
42 |
)
|
43 |
dataset_data[name] = {
|
44 |
"ds": ds,
|
45 |
-
"
|
46 |
"check_char_repetition_criteria": np.array(ds["check_char_repetition_criteria"]),
|
47 |
"check_flagged_words_criteria": np.array(ds["check_flagged_words_criteria"]),
|
|
|
|
|
|
|
48 |
}
|
49 |
|
50 |
def plt_plot(criteria, dataset, threshold):
|
@@ -83,6 +86,18 @@ def check_filtered(criteria, dataset, threshold):
|
|
83 |
with gr.Blocks() as demo:
|
84 |
dataset = gr.Radio(dataset_names, label="Dataset", value="arXiv")
|
85 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
86 |
with gr.Tab("Character Repetition Criteria"):
|
87 |
# plot some random data
|
88 |
plot = gr.Plot()
|
@@ -95,18 +110,18 @@ with gr.Blocks() as demo:
|
|
95 |
check_fn = partial(check_filtered, "check_char_repetition_criteria")
|
96 |
check.click(check_fn, [dataset, threshold], filtered_data)
|
97 |
|
98 |
-
with gr.Tab("Word
|
99 |
plot = gr.Plot()
|
100 |
threshold = gr.Slider(minimum=0, maximum=1, label="Threshold")
|
101 |
calculate = gr.Button("Calculate")
|
102 |
check = gr.Button("Check Filtered Data")
|
103 |
filtered_data = gr.Textbox(lines=5, label="Filtered Data")
|
104 |
-
plot_fn = partial(plt_plot, "
|
105 |
calculate.click(plot_fn, [dataset, threshold], plot)
|
106 |
-
check_fn = partial(check_filtered, "
|
107 |
check.click(check_fn, [dataset, threshold], filtered_data)
|
108 |
|
109 |
-
with gr.Tab("Flagged Word Criteria")
|
110 |
plot = gr.Plot()
|
111 |
threshold = gr.Slider(minimum=0, maximum=1, label="Threshold")
|
112 |
calculate = gr.Button("Calculate")
|
@@ -117,5 +132,27 @@ with gr.Blocks() as demo:
|
|
117 |
check_fn = partial(check_filtered, "check_flagged_words_criteria")
|
118 |
check.click(check_fn, [dataset, threshold], filtered_data)
|
119 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
120 |
if __name__ == "__main__":
|
121 |
demo.launch()
|
|
|
42 |
)
|
43 |
dataset_data[name] = {
|
44 |
"ds": ds,
|
45 |
+
"check_word_number_criteria": np.array(ds["check_word_number_criteria"]),
|
46 |
"check_char_repetition_criteria": np.array(ds["check_char_repetition_criteria"]),
|
47 |
"check_flagged_words_criteria": np.array(ds["check_flagged_words_criteria"]),
|
48 |
+
"check_stop_word_ratio_criteria": np.array(ds["check_stop_word_ratio_criteria"]),
|
49 |
+
"check_perplexity_criteria": np.array(ds["check_perplexity_criteria"]),
|
50 |
+
"check_language_criteria": np.array(ds["check_language_criteria"]),
|
51 |
}
|
52 |
|
53 |
def plt_plot(criteria, dataset, threshold):
|
|
|
86 |
with gr.Blocks() as demo:
|
87 |
dataset = gr.Radio(dataset_names, label="Dataset", value="arXiv")
|
88 |
|
89 |
+
with gr.Tab("Number of Words Criteria"):
|
90 |
+
# plot some random data
|
91 |
+
plot = gr.Plot()
|
92 |
+
threshold = gr.Slider(minimum=0, maximum=1, label="Threshold")
|
93 |
+
calculate = gr.Button("Calculate")
|
94 |
+
check = gr.Button("Check Filtered Data")
|
95 |
+
filtered_data = gr.Textbox(lines=5, label="Filtered Data")
|
96 |
+
plot_fn = partial(plt_plot, "check_word_number_criteria")
|
97 |
+
calculate.click(plot_fn, [dataset, threshold], plot)
|
98 |
+
check_fn = partial(check_filtered, "check_word_number_criteria")
|
99 |
+
check.click(check_fn, [dataset, threshold], filtered_data)
|
100 |
+
|
101 |
with gr.Tab("Character Repetition Criteria"):
|
102 |
# plot some random data
|
103 |
plot = gr.Plot()
|
|
|
110 |
check_fn = partial(check_filtered, "check_char_repetition_criteria")
|
111 |
check.click(check_fn, [dataset, threshold], filtered_data)
|
112 |
|
113 |
+
with gr.Tab("Stop Word Ratio Criteria"):
|
114 |
plot = gr.Plot()
|
115 |
threshold = gr.Slider(minimum=0, maximum=1, label="Threshold")
|
116 |
calculate = gr.Button("Calculate")
|
117 |
check = gr.Button("Check Filtered Data")
|
118 |
filtered_data = gr.Textbox(lines=5, label="Filtered Data")
|
119 |
+
plot_fn = partial(plt_plot, "check_stop_word_ratio_criteria")
|
120 |
calculate.click(plot_fn, [dataset, threshold], plot)
|
121 |
+
check_fn = partial(check_filtered, "check_stop_word_ratio_criteria")
|
122 |
check.click(check_fn, [dataset, threshold], filtered_data)
|
123 |
|
124 |
+
with gr.Tab("Flagged Word Criteria"):
|
125 |
plot = gr.Plot()
|
126 |
threshold = gr.Slider(minimum=0, maximum=1, label="Threshold")
|
127 |
calculate = gr.Button("Calculate")
|
|
|
132 |
check_fn = partial(check_filtered, "check_flagged_words_criteria")
|
133 |
check.click(check_fn, [dataset, threshold], filtered_data)
|
134 |
|
135 |
+
with gr.Tab("Perplexity Criteria"):
|
136 |
+
plot = gr.Plot()
|
137 |
+
threshold = gr.Slider(minimum=0, maximum=1, label="Threshold")
|
138 |
+
calculate = gr.Button("Calculate")
|
139 |
+
check = gr.Button("Check Filtered Data")
|
140 |
+
filtered_data = gr.Textbox(lines=5, label="Filtered Data")
|
141 |
+
plot_fn = partial(plt_plot, "check_perplexity_criteria")
|
142 |
+
calculate.click(plot_fn, [dataset, threshold], plot)
|
143 |
+
check_fn = partial(check_filtered, "check_perplexity_criteria")
|
144 |
+
check.click(check_fn, [dataset, threshold], filtered_data)
|
145 |
+
|
146 |
+
with gr.Tab("Language Detection Criteria"):
|
147 |
+
plot = gr.Plot()
|
148 |
+
threshold = gr.Slider(minimum=0, maximum=1, label="Threshold")
|
149 |
+
calculate = gr.Button("Calculate")
|
150 |
+
check = gr.Button("Check Filtered Data")
|
151 |
+
filtered_data = gr.Textbox(lines=5, label="Filtered Data")
|
152 |
+
plot_fn = partial(plt_plot, "check_language_criteria")
|
153 |
+
calculate.click(plot_fn, [dataset, threshold], plot)
|
154 |
+
check_fn = partial(check_filtered, "check_language_criteria")
|
155 |
+
check.click(check_fn, [dataset, threshold], filtered_data)
|
156 |
+
|
157 |
if __name__ == "__main__":
|
158 |
demo.launch()
|
requirements.txt
CHANGED
@@ -1,2 +1,2 @@
|
|
1 |
scrubadub
|
2 |
-
squeakily
|
|
|
1 |
scrubadub
|
2 |
+
git+https://github.com/CarperAI/squeakily.git
|