added input text preprocessor
Browse files
app.py
CHANGED
@@ -1,3 +1,4 @@
|
|
|
|
1 |
import streamlit as st
|
2 |
from qg_pipeline import Pipeline
|
3 |
|
@@ -5,6 +6,11 @@ from qg_pipeline import Pipeline
|
|
5 |
import nltk
|
6 |
nltk.download('punkt')
|
7 |
|
|
|
|
|
|
|
|
|
|
|
8 |
|
9 |
# Add a model selector to the sidebar
|
10 |
q_model = st.sidebar.selectbox(
|
@@ -30,7 +36,7 @@ pipeline = Pipeline(
|
|
30 |
)
|
31 |
|
32 |
if len(txt) >= 1:
|
33 |
-
autocards = pipeline(txt)
|
34 |
else:
|
35 |
autocards = []
|
36 |
|
|
|
1 |
+
import re
|
2 |
import streamlit as st
|
3 |
from qg_pipeline import Pipeline
|
4 |
|
|
|
6 |
import nltk
|
7 |
nltk.download('punkt')
|
8 |
|
9 |
+
def preprocess_text(text):
|
10 |
+
text = re.sub('\[[0-9]+\]', '', text)
|
11 |
+
text = re.sub('[\s]{2,}', ' ', text)
|
12 |
+
text = text.strip()
|
13 |
+
return text
|
14 |
|
15 |
# Add a model selector to the sidebar
|
16 |
q_model = st.sidebar.selectbox(
|
|
|
36 |
)
|
37 |
|
38 |
if len(txt) >= 1:
|
39 |
+
autocards = pipeline(preprocess_text(txt))
|
40 |
else:
|
41 |
autocards = []
|
42 |
|