Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -64,8 +64,6 @@ class callback(CallbackAny2Vec):
|
|
64 |
|
65 |
|
66 |
|
67 |
-
|
68 |
-
|
69 |
def spacy_lemmatize_text(text):
|
70 |
text = nlp(text)
|
71 |
text = ' '.join([word.lemma_ if word.lemma_ != '-PRON-' else word.text for word in text])
|
@@ -105,25 +103,34 @@ def pre_process():
|
|
105 |
|
106 |
return nltk.word_tokenize(sentenceLemStopped)
|
107 |
|
108 |
-
def classify(new_column = True):
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
for
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
117 |
|
118 |
-
value = reconstructed_model_CNN.predict(MCTIinput_vector)[0]
|
119 |
-
|
120 |
-
# if value >= 0.5:
|
121 |
-
# return Image.open(r"elegivel.png")
|
122 |
-
# else:
|
123 |
-
# return Image.open(r"inelegivel.png")
|
124 |
|
125 |
-
|
126 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
127 |
|
128 |
def gen_output(data):
|
129 |
data.to_excel("output.xlsx", index=False)
|
@@ -148,22 +155,22 @@ def app(operacao, resultado, dados):
|
|
148 |
data.to_excel("output.xlsx")
|
149 |
return "output.xlsx"
|
150 |
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
|
166 |
-
|
167 |
|
168 |
iface = gr.Interface(
|
169 |
fn=app,
|
|
|
64 |
|
65 |
|
66 |
|
|
|
|
|
67 |
def spacy_lemmatize_text(text):
|
68 |
text = nlp(text)
|
69 |
text = ' '.join([word.lemma_ if word.lemma_ != '-PRON-' else word.text for word in text])
|
|
|
103 |
|
104 |
return nltk.word_tokenize(sentenceLemStopped)
|
105 |
|
106 |
+
def classify(df, new_column = True):
|
107 |
+
sentencesMCTIList_xp8 = df['opo_pre_tkn']
|
108 |
+
|
109 |
+
formatted_sentences = []
|
110 |
+
for sentence in sentencesMCTIList_xp8:
|
111 |
+
formatted_sentences.append(json.loads(sentence.replace("'",'"')))
|
112 |
+
|
113 |
+
words = list(reloaded_w2v_model.wv.vocab)
|
114 |
+
item_shape = np.shape(reloaded_w2v_model.wv[words[0]])
|
115 |
+
MCTIinput_vector = []
|
116 |
+
for sentence in formatted_sentences:
|
117 |
+
aux_vector = []
|
118 |
+
for word in sentence:
|
119 |
+
try:
|
120 |
+
aux_vector.append(reloaded_w2v_model.wv[word])
|
121 |
+
except:
|
122 |
+
aux_vector.append(np.zeros(item_shape))
|
123 |
+
MCTIinput_vector.append(aux_vector)
|
124 |
|
|
|
|
|
|
|
|
|
|
|
|
|
125 |
|
126 |
+
MCTIinput_padded = pad_sequences(MCTIinput_vector, maxlen=2726, padding='pre')
|
127 |
+
|
128 |
+
predictions = reconstructed_model_CNN.predict(MCTIinput_padded)
|
129 |
+
cleaned_up_predictions = []
|
130 |
+
for prediction in predictions:
|
131 |
+
cleaned_up_predictions.append(1 if prediction >= 0.5 else 0);
|
132 |
+
df['classification'] = cleaned_up_predictions
|
133 |
+
return df
|
134 |
|
135 |
def gen_output(data):
|
136 |
data.to_excel("output.xlsx", index=False)
|
|
|
155 |
data.to_excel("output.xlsx")
|
156 |
return "output.xlsx"
|
157 |
|
158 |
+
if operacao == "Pré-processamento + Classificação" :
|
159 |
+
pre_process()
|
160 |
+
classify(resultado == "Nova Coluna")
|
161 |
+
output = gen_output()
|
162 |
|
163 |
+
return output
|
164 |
+
elif operacao == "Apenas Pré-processamento" :
|
165 |
+
pre_process()
|
166 |
+
output = gen_output()
|
167 |
|
168 |
+
return output
|
169 |
+
elif operacao == "Apenas Classificação" :
|
170 |
+
df = classify(data, resultado == "Nova Coluna")
|
171 |
+
output = gen_output(df)
|
172 |
|
173 |
+
return output
|
174 |
|
175 |
iface = gr.Interface(
|
176 |
fn=app,
|