Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -1,15 +1,163 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import gradio as gr
|
2 |
import pandas as pd
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
|
4 |
def app(operacao, resultado, dados):
|
|
|
5 |
boxes = {'Color': ['Green','Green','Green','Blue','Blue','Red','Red','Red'],
|
6 |
'Shape': ['Rectangle','Rectangle','Square','Rectangle','Square','Square','Square','Rectangle'],
|
7 |
'Price': [10,15,5,5,10,15,15,5]
|
8 |
}
|
9 |
df = pd.DataFrame(boxes, columns= ['Color','Shape','Price'])
|
10 |
df.to_excel("output.xlsx")
|
11 |
-
|
12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
|
14 |
iface = gr.Interface(
|
15 |
fn=app,
|
|
|
1 |
+
import os
|
2 |
+
import re
|
3 |
+
import contractions
|
4 |
+
import unicodedata
|
5 |
+
import spacy
|
6 |
+
import keras
|
7 |
+
import requests
|
8 |
+
import shutil
|
9 |
+
import json
|
10 |
import gradio as gr
|
11 |
import pandas as pd
|
12 |
+
import numpy as np
|
13 |
+
from PIL import Image
|
14 |
+
from keras import backend as K
|
15 |
+
from keras.utils.data_utils import pad_sequences
|
16 |
+
from gensim.models import Word2Vec
|
17 |
+
from gensim.models.callbacks import CallbackAny2Vec
|
18 |
+
|
19 |
+
import nltk
|
20 |
+
nltk.download('punkt')
|
21 |
+
nltk.download('stopwords')
|
22 |
+
|
23 |
+
os.system('python -m spacy download en_core_web_sm')]
|
24 |
+
|
25 |
+
import en_core_web_sm
|
26 |
+
nlp = en_core_web_sm.load()
|
27 |
+
|
28 |
+
|
29 |
+
def recall_m(y_true, y_pred):
|
30 |
+
true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
|
31 |
+
possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
|
32 |
+
recall = true_positives / (possible_positives + K.epsilon())
|
33 |
+
return recall
|
34 |
+
|
35 |
+
def precision_m(y_true, y_pred):
|
36 |
+
true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
|
37 |
+
predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
|
38 |
+
precision = true_positives / (predicted_positives + K.epsilon())
|
39 |
+
return precision
|
40 |
+
|
41 |
+
def f1_m(y_true, y_pred):
|
42 |
+
precision = precision_m(y_true, y_pred)
|
43 |
+
recall = recall_m(y_true, y_pred)
|
44 |
+
return 2*((precision*recall)/(precision+recall+K.epsilon()))
|
45 |
+
|
46 |
+
|
47 |
+
#initialise callback class
|
48 |
+
class callback(CallbackAny2Vec):
|
49 |
+
"""
|
50 |
+
Print the loss value after each epoch
|
51 |
+
"""
|
52 |
+
def __init__(self):
|
53 |
+
self.epoch = 0
|
54 |
+
#gensim loss is cumulative, so we record previous values to print
|
55 |
+
self.loss_previous_step = 0
|
56 |
+
|
57 |
+
def on_epoch_end(self, model):
|
58 |
+
loss = model.get_latest_training_loss()
|
59 |
+
if self.epoch % 100 == 0:
|
60 |
+
print('Loss after epoch {}: {}'.format(self.epoch, loss-self.loss_previous_step))
|
61 |
+
|
62 |
+
self.epoch+= 1
|
63 |
+
self.loss_previous_step = loss
|
64 |
+
|
65 |
+
|
66 |
+
|
67 |
+
|
68 |
+
|
69 |
+
def spacy_lemmatize_text(text):
|
70 |
+
text = nlp(text)
|
71 |
+
text = ' '.join([word.lemma_ if word.lemma_ != '-PRON-' else word.text for word in text])
|
72 |
+
return text
|
73 |
+
|
74 |
+
def remove_accented_chars(text):
|
75 |
+
text = unicodedata.normalize('NFC', text).encode('ascii', 'ignore').decode('utf-8', 'ignore')
|
76 |
+
return text
|
77 |
+
|
78 |
+
def remove_special_characters(text, remove_digits=False):
|
79 |
+
pattern = r'[^a-zA-Z0-9\s]' if not remove_digits else r'[^a-zA-Z\s]'
|
80 |
+
text = re.sub(pattern, '', text)
|
81 |
+
return text
|
82 |
+
|
83 |
+
def remove_stopwords(text, is_lower_case=False, stopwords=None):
|
84 |
+
if not stopwords:
|
85 |
+
stopwords = nltk.corpus.stopwords.words('english')
|
86 |
+
tokens = nltk.word_tokenize(text)
|
87 |
+
tokens = [token.strip() for token in tokens]
|
88 |
+
|
89 |
+
if is_lower_case:
|
90 |
+
filtered_tokens = [token for token in tokens if token not in stopwords]
|
91 |
+
else:
|
92 |
+
filtered_tokens = [token for token in tokens if token.lower() not in stopwords]
|
93 |
+
|
94 |
+
filtered_text = ' '.join(filtered_tokens)
|
95 |
+
return filtered_text
|
96 |
+
|
97 |
+
|
98 |
+
def pre_process():
|
99 |
+
opo_texto_sem_caracteres_especiais = (remove_accented_chars(sentence))
|
100 |
+
sentenceExpanded = contractions.fix(opo_texto_sem_caracteres_especiais)
|
101 |
+
sentenceWithoutPunctuation = remove_special_characters(sentenceExpanded , remove_digits=True)
|
102 |
+
sentenceLowered = sentenceWithoutPunctuation.lower()
|
103 |
+
sentenceLemmatized = spacy_lemmatize_text(sentenceLowered)
|
104 |
+
sentenceLemStopped = remove_stopwords(sentenceLemmatized, is_lower_case=False)
|
105 |
+
|
106 |
+
return nltk.word_tokenize(sentenceLemStopped)
|
107 |
+
|
108 |
+
def classify(new_column = True):
|
109 |
+
sentenceWords = json.loads(sentence.replace("'",'"'))
|
110 |
+
|
111 |
+
aux_vector = []
|
112 |
+
for word in sentenceWords:
|
113 |
+
aux_vector.append(reloaded_w2v_model.wv[word])
|
114 |
+
w2vWords = []
|
115 |
+
w2vWords.append(aux_vector)
|
116 |
+
MCTIinput_vector = pad_sequences(w2vWords, maxlen=2726, padding='pre')
|
117 |
+
|
118 |
+
value = reconstructed_model_CNN.predict(MCTIinput_vector)[0]
|
119 |
+
|
120 |
+
if value >= 0.5:
|
121 |
+
return Image.open(r"elegivel.png")
|
122 |
+
else:
|
123 |
+
return Image.open(r"inelegivel.png")
|
124 |
+
|
125 |
+
def gen_output(data):
|
126 |
+
return "output.xlsx"
|
127 |
+
|
128 |
+
|
129 |
+
reloaded_w2v_model = Word2Vec.load('word2vec_xp8.model')
|
130 |
+
|
131 |
+
reconstructed_model_CNN = keras.models.load_model("best weights CNN.h5",
|
132 |
+
custom_objects={'f1_m':f1_m,
|
133 |
+
"precision_m":precision_m,
|
134 |
+
"recall_m":recall_m})
|
135 |
|
136 |
def app(operacao, resultado, dados):
|
137 |
+
|
138 |
boxes = {'Color': ['Green','Green','Green','Blue','Blue','Red','Red','Red'],
|
139 |
'Shape': ['Rectangle','Rectangle','Square','Rectangle','Square','Square','Square','Rectangle'],
|
140 |
'Price': [10,15,5,5,10,15,15,5]
|
141 |
}
|
142 |
df = pd.DataFrame(boxes, columns= ['Color','Shape','Price'])
|
143 |
df.to_excel("output.xlsx")
|
144 |
+
|
145 |
+
if operacao === "Pré-processamento + Classificação" :
|
146 |
+
pre_process()
|
147 |
+
classify(resultado == "Nova Coluna")
|
148 |
+
output = gen_output()
|
149 |
+
|
150 |
+
return output
|
151 |
+
elif operacao === "Apenas Pré-processamento" :
|
152 |
+
pre_process()
|
153 |
+
output = gen_output()
|
154 |
+
|
155 |
+
return output
|
156 |
+
elif operacao === "Apenas Classificação" :
|
157 |
+
classify(resultado == "Nova Coluna")
|
158 |
+
output = gen_output()
|
159 |
+
|
160 |
+
return output
|
161 |
|
162 |
iface = gr.Interface(
|
163 |
fn=app,
|