Spaces:
Running
Running
🚧 streamline vars
Browse filesSigned-off-by: peter szemraj <peterszemraj@gmail.com>
app.py
CHANGED
@@ -241,21 +241,24 @@ def proc_submission(
|
|
241 |
|
242 |
st = time.perf_counter()
|
243 |
history = {}
|
244 |
-
|
245 |
-
|
246 |
logging.info(
|
247 |
-
f"pre-truncation word count: {len(contraction_aware_tokenize(
|
|
|
|
|
|
|
248 |
)
|
249 |
-
truncation_validated = truncate_word_count(clean_text, max_words=max_input_length)
|
250 |
|
251 |
if truncation_validated["was_truncated"]:
|
252 |
model_input_text = truncation_validated["processed_text"]
|
253 |
# create elaborate HTML warning
|
254 |
-
input_wc =
|
255 |
msg = f"""
|
256 |
<div style="background-color: #FFA500; color: white; padding: 20px;">
|
257 |
<h3>Warning</h3>
|
258 |
<p>Input text was truncated to {max_input_length} words. That's about {100*max_input_length/len(input_wc):.2f}% of the submission.</p>
|
|
|
259 |
</div>
|
260 |
"""
|
261 |
logging.warning(msg)
|
|
|
241 |
|
242 |
st = time.perf_counter()
|
243 |
history = {}
|
244 |
+
cln_text = clean(input_text, lower=False)
|
245 |
+
parsed_cln_text = remove_stopwords(cln_text) if predrop_stopwords else cln_text
|
246 |
logging.info(
|
247 |
+
f"pre-truncation word count: {len(contraction_aware_tokenize(parsed_cln_text))}"
|
248 |
+
)
|
249 |
+
truncation_validated = truncate_word_count(
|
250 |
+
parsed_cln_text, max_words=max_input_length
|
251 |
)
|
|
|
252 |
|
253 |
if truncation_validated["was_truncated"]:
|
254 |
model_input_text = truncation_validated["processed_text"]
|
255 |
# create elaborate HTML warning
|
256 |
+
input_wc = len(contraction_aware_tokenize(parsed_cln_text))
|
257 |
msg = f"""
|
258 |
<div style="background-color: #FFA500; color: white; padding: 20px;">
|
259 |
<h3>Warning</h3>
|
260 |
<p>Input text was truncated to {max_input_length} words. That's about {100*max_input_length/len(input_wc):.2f}% of the submission.</p>
|
261 |
+
<p>Dropping stopwords is set to {predrop_stopwords}. If this is not what you intended, please validate the advanced settings.</p>
|
262 |
</div>
|
263 |
"""
|
264 |
logging.warning(msg)
|