Joshua1808 commited on
Commit
c2c681e
1 Parent(s): 2158a3c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +171 -252
app.py CHANGED
@@ -1,29 +1,22 @@
1
  import tweepy as tw
2
  import streamlit as st
3
  import pandas as pd
4
- import torch
5
- import numpy as np
6
  import regex as re
 
7
  import pysentimiento
8
  import geopy
9
  import matplotlib.pyplot as plt
 
 
10
 
11
  from pysentimiento.preprocessing import preprocess_tweet
12
  from geopy.geocoders import Nominatim
 
 
13
 
14
- from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
15
- from transformers import AutoTokenizer, AutoModelForSequenceClassification,AdamW
16
- tokenizer = AutoTokenizer.from_pretrained('hackathon-pln-es/twitter_sexismo-finetuned-robertuito-exist2021')
17
- model = AutoModelForSequenceClassification.from_pretrained("hackathon-pln-es/twitter_sexismo-finetuned-robertuito-exist2021")
18
 
19
- import torch
20
- if torch.cuda.is_available():
21
- device = torch.device( "cuda")
22
- print('I will use the GPU:', torch.cuda.get_device_name(0))
23
-
24
- else:
25
- print('No GPU available, using the CPU instead.')
26
- device = torch.device("cpu")
27
 
28
 
29
  consumer_key = "BjipwQslVG4vBdy4qK318KnoA"
@@ -33,34 +26,23 @@ access_token_secret = "pqQ5aFSJxzJ2xnI6yhVtNjQO36FOu8DBOH6DtUrPAU54J"
33
  auth = tw.OAuthHandler(consumer_key, consumer_secret)
34
  auth.set_access_token(access_token, access_token_secret)
35
  api = tw.API(auth, wait_on_rate_limit=True)
36
-
37
- def preprocess(text):
38
- #text=text.lower()
39
- # remove hyperlinks
40
- text = re.sub(r'https?:\/\/.*[\r\n]*', '', text)
41
- text = re.sub(r'http?:\/\/.*[\r\n]*', '', text)
42
- #Replace &amp, &lt, &gt with &,<,> respectively
43
- text=text.replace(r'&amp;?',r'and')
44
- text=text.replace(r'&lt;',r'<')
45
- text=text.replace(r'&gt;',r'>')
46
- #remove hashtag sign
47
- #text=re.sub(r"#","",text)
48
- #remove mentions
49
- text = re.sub(r"(?:\@)\w+", '', text)
50
- #text=re.sub(r"@","",text)
51
- #remove non ascii chars
52
- text=text.encode("ascii",errors="ignore").decode()
53
- #remove some puncts (except . ! ?)
54
- text=re.sub(r'[:"#$%&\*+,-/:;<=>@\\^_`{|}~]+','',text)
55
- text=re.sub(r'[!]+','!',text)
56
- text=re.sub(r'[?]+','?',text)
57
- text=re.sub(r'[.]+','.',text)
58
- text=re.sub(r"'","",text)
59
- text=re.sub(r"\(","",text)
60
- text=re.sub(r"\)","",text)
61
- text=" ".join(text.split())
62
- return text
63
-
64
 
65
  def highlight_survived(s):
66
  return ['background-color: red']*len(s) if (s.Sexista == 1) else ['background-color: green']*len(s)
@@ -73,231 +55,168 @@ def color_survived(val):
73
  st.set_page_config(layout="wide")
74
  st.markdown('<style>body{background-color: Blue;}</style>',unsafe_allow_html=True)
75
 
76
- colT1,colT2 = st.columns([2,8])
77
- with colT2:
78
- # st.title('Analisis de comentarios sexistas en Twitter')
79
- st.markdown(""" <style> .font {
80
- font-size:40px ; font-family: 'Cooper Black'; color: #06bf69;}
81
- </style> """, unsafe_allow_html=True)
82
- st.markdown('<p class="font">Análisis de comentarios sexistas en Twitter</p>', unsafe_allow_html=True)
83
-
84
- st.markdown(""" <style> .font1 {
85
- font-size:28px ; font-family: 'Times New Roman'; color: #8d33ff;}
86
- </style> """, unsafe_allow_html=True)
87
-
88
- st.markdown(""" <style> .font2 {
89
- font-size:16px ; font-family: 'Times New Roman'; color: #3358ff;}
90
- </style> """, unsafe_allow_html=True)
91
-
92
-
93
-
94
-
95
-
96
- def analizar_tweets(search_words, number_of_tweets ):
97
- tweets = api.user_timeline(screen_name = search_words, count= number_of_tweets)
98
- tweet_list = [i.text for i in tweets]
99
- text= pd.DataFrame(tweet_list)
100
- text[0] = text[0].apply(preprocess_tweet)
101
- text1=text[0].values
102
- indices1=tokenizer.batch_encode_plus(text1.tolist(), max_length=128,add_special_tokens=True, return_attention_mask=True,pad_to_max_length=True,truncation=True)
103
- input_ids1=indices1["input_ids"]
104
- attention_masks1=indices1["attention_mask"]
105
- prediction_inputs1= torch.tensor(input_ids1)
106
- prediction_masks1 = torch.tensor(attention_masks1)
107
- batch_size = 25
108
- # Create the DataLoader.
109
- prediction_data1 = TensorDataset(prediction_inputs1, prediction_masks1)
110
- prediction_sampler1 = SequentialSampler(prediction_data1)
111
- prediction_dataloader1 = DataLoader(prediction_data1, sampler=prediction_sampler1, batch_size=batch_size)
112
- #print('Predicting labels for {:,} test sentences...'.format(len(prediction_inputs1)))
113
- # Put model in evaluation mode
114
- model.eval()
115
- # Tracking variables
116
- predictions = []
117
- for batch in prediction_dataloader1:
118
- batch = tuple(t.to(device) for t in batch)
119
- # Unpack the inputs from our dataloader
120
- b_input_ids1, b_input_mask1 = batch
121
-
122
- #Telling the model not to compute or store gradients, saving memory and # speeding up prediction
123
- with torch.no_grad():
124
- # Forward pass, calculate logit predictions
125
- outputs1 = model(b_input_ids1, token_type_ids=None,attention_mask=b_input_mask1)
126
- logits1 = outputs1[0]
127
- # Move logits and labels to CPU
128
- logits1 = logits1.detach().cpu().numpy()
129
- # Store predictions and true labels
130
- predictions.append(logits1)
131
-
132
- #flat_predictions = [item for sublist in predictions for item in sublist]
133
- flat_predictions = [item for sublist in predictions for item in sublist]
134
-
135
- flat_predictions = np.argmax(flat_predictions, axis=1).flatten()
136
-
137
- probability = np.amax(logits1,axis=1).flatten()
138
- Tweets =['Últimos '+ str(number_of_tweets)+' Tweets'+' de '+search_words]
139
- df = pd.DataFrame(list(zip(text1, flat_predictions,probability)), columns = ['Tweets' , 'Prediccion','Probabilidad'])
140
-
141
- df['Prediccion']= np.where(df['Prediccion']== 0, 'No Sexista', 'Sexista')
142
- df['Tweets'] = df['Tweets'].str.replace('RT|@', '')
143
- #df['Tweets'] = df['Tweets'].apply(lambda x: re.sub(r'[:;][-o^]?[)\]DpP3]|[(/\\]|[\U0001f600-\U0001f64f]|[\U0001f300-\U0001f5ff]|[\U0001f680-\U0001f6ff]|[\U0001f1e0-\U0001f1ff]','', x))
144
-
145
- tabla = st.table(df.reset_index(drop=True).head(30).style.applymap(color_survived, subset=['Prediccion']))
146
 
147
- return tabla
148
-
149
- def analizar_frase(frase):
150
- #palabra = frase.split()
151
- palabra = [frase]
152
-
153
- indices1=tokenizer.batch_encode_plus(palabra,max_length=128,add_special_tokens=True,
154
- return_attention_mask=True,
155
- pad_to_max_length=True,
156
- truncation=True)
157
- input_ids1=indices1["input_ids"]
158
- attention_masks1=indices1["attention_mask"]
159
- prediction_inputs1= torch.tensor(input_ids1)
160
- prediction_masks1 = torch.tensor(attention_masks1)
161
- batch_size = 25
162
- prediction_data1 = TensorDataset(prediction_inputs1, prediction_masks1)
163
- prediction_sampler1 = SequentialSampler(prediction_data1)
164
- prediction_dataloader1 = DataLoader(prediction_data1, sampler=prediction_sampler1, batch_size=batch_size)
165
- model.eval()
166
- predictions = []
167
- # Predict
168
- for batch in prediction_dataloader1:
169
- batch = tuple(t.to(device) for t in batch)
170
- # Unpack the inputs from our dataloader
171
- b_input_ids1, b_input_mask1 = batch
172
- # Telling the model not to compute or store gradients, saving memory and # speeding up prediction
173
- with torch.no_grad():
174
- # Forward pass, calculate logit predictions
175
- outputs1 = model(b_input_ids1, token_type_ids=None,attention_mask=b_input_mask1)
176
- logits1 = outputs1[0]
177
- # Move logits and labels to CPU
178
- logits1 = logits1.detach().cpu().numpy()
179
- # Store predictions and true labels
180
- predictions.append(logits1)
181
- flat_predictions = [item for sublist in predictions for item in sublist]
182
- flat_predictions = np.argmax(flat_predictions, axis=1).flatten()
183
- tokens = tokenizer.tokenize(frase)
184
- # Convertir los tokens a un formato compatible con el modelo
185
- input_ids = tokenizer.convert_tokens_to_ids(tokens)
186
- attention_masks = [1] * len(input_ids)
187
-
188
- # Pasar los tokens al modelo
189
- outputs = model(torch.tensor([input_ids]), token_type_ids=None, attention_mask=torch.tensor([attention_masks]))
190
- scores = outputs[0]
191
- #prediccion = scores.argmax(dim=1).item()
192
- # Obtener la probabilidad de que la frase sea "sexista"
193
- probabilidad_sexista = scores.amax(dim=1).item()
194
- #print(probabilidad_sexista)
195
-
196
- # Crear un Dataframe
197
- text= pd.DataFrame({'Frase': [frase], 'Prediccion':[flat_predictions], 'Probabilidad':[probabilidad_sexista]})
198
- text['Prediccion'] = np.where(text['Prediccion'] == 0 , 'No Sexista', 'Sexista')
199
 
 
 
 
200
 
201
- tabla = st.table(text.reset_index(drop=True).head(50).style.applymap(color_survived, subset=['Prediccion']))
202
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
203
  return tabla
204
 
205
  def tweets_localidad(buscar_localidad):
206
- geolocator = Nominatim(user_agent="nombre_del_usuario")
207
- location = geolocator.geocode(buscar_localidad)
208
- radius = "10km"
209
- tweets = api.search_tweets(q="",lang="es",geocode=f"{location.latitude},{location.longitude},{radius}", count = 50)
210
- localidad = [i.user.location for i in tweets]
211
- text_localidad = pd.DataFrame(localidad)
212
- username = [i.user.screen_name for i in tweets]
213
- text_user= pd.DataFrame(username)
214
-
215
- tweet_list = [i.text for i in tweets]
216
- text= pd.DataFrame(tweet_list)
217
- text[0] = text[0].apply(preprocess_tweet)
218
- text1=text[0].values
219
- print(text1)
220
- indices1=tokenizer.batch_encode_plus(text1.tolist(), max_length=128,add_special_tokens=True, return_attention_mask=True,pad_to_max_length=True,truncation=True)
221
- input_ids1=indices1["input_ids"]
222
- attention_masks1=indices1["attention_mask"]
223
- prediction_inputs1= torch.tensor(input_ids1)
224
- prediction_masks1 = torch.tensor(attention_masks1)
225
- batch_size = 25
226
- # Create the DataLoader.
227
- prediction_data1 = TensorDataset(prediction_inputs1, prediction_masks1)
228
- prediction_sampler1 = SequentialSampler(prediction_data1)
229
- prediction_dataloader1 = DataLoader(prediction_data1, sampler=prediction_sampler1, batch_size=batch_size)
230
- #print('Predicting labels for {:,} test sentences...'.format(len(prediction_inputs1)))
231
- # Put model in evaluation mode
232
- model.eval()
233
- # Tracking variables
234
- predictions = []
235
- for batch in prediction_dataloader1:
236
- batch = tuple(t.to(device) for t in batch)
237
- # Unpack the inputs from our dataloader
238
- b_input_ids1, b_input_mask1 = batch
239
-
240
- #Telling the model not to compute or store gradients, saving memory and # speeding up prediction
241
- with torch.no_grad():
242
- # Forward pass, calculate logit predictions
243
- outputs1 = model(b_input_ids1, token_type_ids=None,attention_mask=b_input_mask1)
244
- logits1 = outputs1[0]
245
- # Move logits and labels to CPU
246
- logits1 = logits1.detach().cpu().numpy()
247
- # Store predictions and true labels
248
- predictions.append(logits1)
249
-
250
- #flat_predictions = [item for sublist in predictions for item in sublist]
251
- flat_predictions = [item for sublist in predictions for item in sublist]
252
-
253
- flat_predictions = np.argmax(flat_predictions, axis=1).flatten()
254
-
255
- probability = np.amax(logits1,axis=1).flatten()
256
- Tweets =['Últimos 50 Tweets'+' de '+ buscar_localidad]
257
- df = pd.DataFrame(list(zip(text1, localidad,username, flat_predictions,probability)), columns = ['Tweets' ,'Localidad' , 'Usuario','Prediccion','Probabilidad'])
258
-
259
- df['Prediccion']= np.where(df['Prediccion']== 0, 'No Sexista', 'Sexista')
260
- #df['Tweets'] = df['Tweets'].str.replace('RT|@', '')
261
- df_filtrado = df[df["Prediccion"]=="Sexista" ]
262
- #df['Tweets'] = df['Tweets'].apply(lambda x: re.sub(r'[:;][-o^]?[)\]DpP3]|[(/\\]|[\U0001f600-\U0001f64f]|[\U0001f300-\U0001f5ff]|[\U0001f680-\U0001f6ff]|[\U0001f1e0-\U0001f1ff]','', x))
263
-
264
- tabla = st.table(df.reset_index(drop=True).head(50).style.applymap(color_survived, subset=['Prediccion']))
 
265
 
266
- df_sexista = df[df['Prediccion']== 'Sexista']
267
- df_no_sexista = df[df['Probabilidad'] > 0]
268
- sexista = len(df_sexista)
269
- no_sexista = len(df_no_sexista)
270
-
271
- # Crear un gráfico de barras
272
- labels = ['Sexista ', ' No sexista']
273
- counts = [sexista, no_sexista]
274
- plt.bar(labels, counts)
275
- plt.xlabel('Categoría')
276
- plt.ylabel('Cantidad de tweets')
277
- plt.title('Cantidad de tweets sexistas y no sexistas')
278
- plt.show()
279
- st.pyplot()
280
- st.set_option('deprecation.showPyplotGlobalUse', False)
 
 
 
281
 
282
- return df
283
-
284
-
285
  def run():
286
  with st.form("my_form"):
287
  col,buff1, buff2 = st.columns([2,2,1])
288
  st.write("Escoja una Opción")
289
- search_words = col.text_input("Introduzca el termino, usuario o localidad para analizar y pulse el check correspondiente")
290
- number_of_tweets = col.number_input('Introduzca número de tweets a analizar. Máximo 50', 0,50,0)
291
- termino=st.checkbox('Término')
292
  usuario=st.checkbox('Usuario')
293
  localidad=st.checkbox('Localidad')
294
  submit_button = col.form_submit_button(label='Analizar')
295
  error =False
296
- submit_button = st.sidebar.button('Submit')
297
- clear_button = st.sidebar.button('Clear')
298
-
299
- st.sidebar.row(submit_button, clear_button)
300
-
301
  if submit_button:
302
  # Condición para el caso de que esten dos check seleccionados
303
  if ( termino == False and usuario == False and localidad == False):
@@ -312,8 +231,8 @@ def run():
312
  analizar_frase(search_words)
313
 
314
  elif (usuario):
315
- analizar_tweets(search_words,number_of_tweets)
316
  elif (localidad):
317
  tweets_localidad(search_words)
318
-
319
  run()
 
1
  import tweepy as tw
2
  import streamlit as st
3
  import pandas as pd
 
 
4
  import regex as re
5
+ import numpy as np
6
  import pysentimiento
7
  import geopy
8
  import matplotlib.pyplot as plt
9
+ import langdetect
10
+
11
 
12
  from pysentimiento.preprocessing import preprocess_tweet
13
  from geopy.geocoders import Nominatim
14
+ from transformers import pipeline
15
+ from langdetect import detect
16
 
 
 
 
 
17
 
18
+ model_checkpoint = "hackathon-pln-es/twitter_sexismo-finetuned-robertuito-exist2021"
19
+ pipeline_nlp = pipeline("text-classification", model=model_checkpoint)
 
 
 
 
 
 
20
 
21
 
22
  consumer_key = "BjipwQslVG4vBdy4qK318KnoA"
 
26
  auth = tw.OAuthHandler(consumer_key, consumer_secret)
27
  auth.set_access_token(access_token, access_token_secret)
28
  api = tw.API(auth, wait_on_rate_limit=True)
29
+
30
+ def limpieza_datos(tweet):
31
+ # Eliminar emojis
32
+ tweet = re.sub(r'[\U0001F600-\U0001F64F]', '', tweet)
33
+ tweet = re.sub(r'[\U0001F300-\U0001F5FF]', '', tweet)
34
+ tweet = re.sub(r'[\U0001F680-\U0001F6FF]', '', tweet)
35
+ tweet = re.sub(r'[\U0001F1E0-\U0001F1FF]', '', tweet)
36
+ # Eliminar arrobas
37
+ tweet = re.sub(r'@\w+', '', tweet)
38
+ # Eliminar URL
39
+ tweet = re.sub(r'http\S+', '', tweet)
40
+ # Eliminar hashtags
41
+ tweet = re.sub(r'#\w+', '', tweet)
42
+ # Eliminar caracteres especiales
43
+ #tweet = re.sub(r'[^a-zA-Z0-9 \n\.]', '', tweet)
44
+ tweet = re.sub(r'[^a-zA-Z0-9 \n\áéíóúÁÉÍÓÚñÑ.]', '', tweet)
45
+ return tweet
 
 
 
 
 
 
 
 
 
 
 
46
 
47
  def highlight_survived(s):
48
  return ['background-color: red']*len(s) if (s.Sexista == 1) else ['background-color: green']*len(s)
 
55
  st.set_page_config(layout="wide")
56
  st.markdown('<style>body{background-color: Blue;}</style>',unsafe_allow_html=True)
57
 
58
+ #st.markdown('<style>body{background-color: Blue;}</style>',unsafe_allow_html=True)
59
+ #colT1,colT2 = st.columns([2,8])
60
+ st.markdown(""" <style> .fondo {
61
+ background-image: url("https://www.google.com/url?sa=i&url=https%3A%2F%2Flasmujereseneldeportemexicano.wordpress.com%2F2016%2F11%2F17%2Fpor-que-es-importante-hablar-de-genero%2F&psig=AOvVaw0xG7SVXtJoEpwt-fF5Kykt&ust=1676431557056000&source=images&cd=vfe&ved=0CBAQjRxqFwoTCJiu-a6IlP0CFQAAAAAdAAAAABAJ");
62
+ background-size: 180%;}
63
+ </style> """, unsafe_allow_html=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
 
66
+ st.markdown(""" <style> .font {
67
+ font-size:40px ; font-family: 'Cooper Black'; color: #301E67;}
68
+ </style> """, unsafe_allow_html=True)
69
 
70
+ st.markdown('<p class="font">Análisis de comentarios sexistas en linea</p>', unsafe_allow_html=True)
71
 
72
+ st.markdown(""" <style> .font1 {
73
+ font-size:28px ; font-family: 'Times New Roman'; color: #8d33ff;}
74
+ </style> """, unsafe_allow_html=True)
75
+
76
+ st.markdown(""" <style> .font2 {
77
+ font-size:16px ; font-family: 'Times New Roman'; color: #5B8FB9;}
78
+ </style> """, unsafe_allow_html=True)
79
+
80
+ st.markdown('<p class="font2">Este proyecto consiste en una aplicación web que utiliza la biblioteca Tweepy de Python para descargar tweets de Twitter, permitiendo buscar Tweets por usuario y por localidad. Luego, utiliza modelos de lenguaje basados en Transformers para analizar los tweets y detectar comentarios sexistas. Los resultados se almacenan en un dataframe para su posterior visualización y análisis. El objetivo del proyecto es identificar y proporcionar información sobre el discurso sexista en línea para combatir la discriminación y el acoso hacia las mujeres y otros grupos marginados, y así informar políticas y prácticas que promuevan la igualdad de género y la inclusión.</p>',unsafe_allow_html=True)
81
+
82
+
83
+ def tweets_usuario(usuario, cant_de_tweets):
84
+ tabla = []
85
+ if(cant_de_tweets > 0 and usuario != "" ):
86
+ try:
87
+ # Buscar la información del perfil de usuario
88
+ user = api.get_user(screen_name=usuario)
89
+ tweets = api.user_timeline(screen_name = usuario,tweet_mode="extended", count= cant_de_tweets)
90
+ result = []
91
+ for tweet in tweets:
92
+ if (tweet.full_text.startswith('RT')):
93
+ continue
94
+ else:
95
+ text = tweet.full_text
96
+ try:
97
+ language = detect(text)
98
+ if language == 'es':
99
+ datos=limpieza_datos(text)
100
+ if datos == "":
101
+ continue
102
+ else:
103
+ prediction = pipeline_nlp(datos)
104
+ for predic in prediction:
105
+ etiqueta = {'Tweets': datos, 'Prediccion': predic['label'], 'Probabilidad': predic['score']}
106
+ result.append(etiqueta)
107
+ except:
108
+ pass
109
+ df = pd.DataFrame(result)
110
+ if df.empty:
111
+ muestra= st.text("No hay tweets Sexistas a analizar")
112
+ tabla.append(muestra)
113
+ else:
114
+ df.sort_values(by=['Prediccion', 'Probabilidad'], ascending=[False, False], inplace=True)
115
+ df['Prediccion'] = np.where(df['Prediccion'] == 'LABEL_1', 'Sexista', 'No Sexista')
116
+ df['Probabilidad'] = df['Probabilidad'].apply(lambda x: round(x, 3))
117
+ muestra = st.table(df.reset_index(drop=True).head(30).style.applymap(color_survived, subset=['Prediccion']))
118
+ tabla.append(muestra)
119
+ except Exception as e:
120
+ muestra = st.text(f"La cuenta {search_words} no existe.")
121
+ tabla.append(muestra)
122
+ else:
123
+ muestra= st.text("Ingrese los parametros correspondientes")
124
+ tabla.append(muestra)
125
  return tabla
126
 
127
  def tweets_localidad(buscar_localidad):
128
+ tabla = []
129
+ try:
130
+ geolocator = Nominatim(user_agent="nombre_del_usuario")
131
+ location = geolocator.geocode(buscar_localidad)
132
+ radius = "15km"
133
+ tweets = api.search_tweets(q="",lang="es",geocode=f"{location.latitude},{location.longitude},{radius}", count = 1000, tweet_mode="extended")
134
+ result = []
135
+ for tweet in tweets:
136
+ if (tweet.full_text.startswith('RT')):
137
+ continue
138
+ elif not tweet.full_text.strip():
139
+ continue
140
+ else:
141
+ datos = limpieza_datos(tweet.full_text)
142
+ prediction = pipeline_nlp(datos)
143
+ for predic in prediction:
144
+ etiqueta = {'Tweets': datos,'Prediccion': predic['label'], 'Probabilidad': predic['score']}
145
+ result.append(etiqueta)
146
+ df = pd.DataFrame(result)
147
+ if df.empty:
148
+ muestra=st.text("No se encontraron tweets sexistas dentro de la localidad")
149
+ tabla.append(muestra)
150
+ else:
151
+ #tabla.append(muestra)
152
+ #df.sort_values(by=['Prediccion', 'Probabilidad'], ascending=[False, False], inplace=True)
153
+ df.sort_values(by='Prediccion', ascending=False, inplace=True)
154
+ df['Prediccion'] = np.where(df['Prediccion'] == 'LABEL_1', 'Sexista', 'No Sexista')
155
+ df['Probabilidad'] = df['Probabilidad'].round(3)
156
+ muestra = st.table(df.reset_index(drop=True).head(10).style.applymap(color_survived, subset=['Prediccion']))
157
+ tabla.append(muestra)
158
+ #resultado=df.groupby('Prediccion')['Probabilidad'].sum()
159
+ with st.container():
160
+ resultado = df['Prediccion'].head(10).value_counts()
161
+ colores=["#EE3555","#aae977"]
162
+ fig, ax = plt.subplots()
163
+ fig.set_size_inches(2, 2)
164
+ plt.pie(resultado,labels=resultado.index,autopct='%1.1f%%',colors=colores, textprops={'fontsize': 4})
165
+ ax.set_title("Porcentajes por Categorias", fontsize=5, fontweight="bold")
166
+ plt.rcParams.update({'font.size':4, 'font.weight':'bold'})
167
+ ax.legend()
168
+ # Muestra el gráfico
169
+ plt.show()
170
+ st.set_option('deprecation.showPyplotGlobalUse', False)
171
+ st.pyplot()
172
+
173
+ plt.bar(resultado.index, resultado, color=colores)
174
+ ax.set_title("Porcentajes por Categorias", fontsize=5, fontweight="bold")
175
+ plt.rcParams.update({'font.size':4, 'font.weight':'bold'})
176
+ ax.set_xlabel("Categoría")
177
+ ax.set_ylabel("Probabilidad")
178
+ # Muestra el gráfico
179
+ plt.show()
180
+ st.set_option('deprecation.showPyplotGlobalUse', False)
181
+ st.pyplot()
182
+
183
+ except AttributeError as e:
184
+ muestra=st.text("No existe ninguna localidad con ese nombre")
185
+ tabla.append(muestra)
186
+
187
+ return tabla
188
 
189
+ def analizar_frase(frase):
190
+ language = detect(frase)
191
+ if frase == "":
192
+ tabla = st.text("Ingrese una frase")
193
+ #st.text("Ingrese una frase")
194
+ elif language == 'es':
195
+ predictions = pipeline_nlp(frase)
196
+ # convierte las predicciones en una lista de diccionarios
197
+ data = [{'Texto': frase, 'Prediccion': prediction['label'], 'Probabilidad': prediction['score']} for prediction in predictions]
198
+ # crea un DataFrame a partir de la lista de diccionarios
199
+ df = pd.DataFrame(data)
200
+ df['Prediccion'] = np.where( df['Prediccion'] == 'LABEL_1', 'Sexista', 'No Sexista')
201
+ # muestra el DataFrame
202
+ tabla = st.table(df.reset_index(drop=True).head(1).style.applymap(color_survived, subset=['Prediccion']))
203
+ else:
204
+ tabla = st.text("Solo Frase en español")
205
+
206
+ return tabla
207
 
 
 
 
208
  def run():
209
  with st.form("my_form"):
210
  col,buff1, buff2 = st.columns([2,2,1])
211
  st.write("Escoja una Opción")
212
+ search_words = col.text_input("Introduzca la frase, el usuario o localidad para analizar y pulse el check correspondiente")
213
+ number_of_tweets = col.number_input('Introduzca número de tweets a analizar del usuario Máximo 50', 0,50,0)
214
+ termino=st.checkbox('Frase')
215
  usuario=st.checkbox('Usuario')
216
  localidad=st.checkbox('Localidad')
217
  submit_button = col.form_submit_button(label='Analizar')
218
  error =False
219
+
 
 
 
 
220
  if submit_button:
221
  # Condición para el caso de que esten dos check seleccionados
222
  if ( termino == False and usuario == False and localidad == False):
 
231
  analizar_frase(search_words)
232
 
233
  elif (usuario):
234
+ tweets_usuario(search_words,number_of_tweets)
235
  elif (localidad):
236
  tweets_localidad(search_words)
237
+
238
  run()