LordCoffee commited on
Commit
354fa5d
verified
1 Parent(s): f974173

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +87 -58
app.py CHANGED
@@ -1,73 +1,102 @@
1
- from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
2
- from sklearn.metrics.pairwise import cosine_similarity
3
- from sklearn.feature_extraction.text import TfidfVectorizer
4
  import numpy as np
5
  import gradio as gr
 
 
6
 
7
- # Modelos para an谩lisis de texto y similitud
8
- sentiment_analysis = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")
9
- similarity_model = AutoModelForSequenceClassification.from_pretrained("bert-base-uncased")
10
- similarity_tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
- # Respuestas esperadas para las preguntas
13
- expected_responses = {
14
- 'question1': ['respuesta1a', 'respuesta1b', 'respuesta1c'],
15
- 'question2': ['respuesta2a', 'respuesta2b', 'respuesta2c'],
16
- 'question3': ['respuesta3a', 'respuesta3b', 'respuesta3c'],
17
- 'question4': ['respuesta4a', 'respuesta4b', 'respuesta4c'],
18
- }
19
 
20
- # Funci贸n para calcular la similitud de respuestas usando TF-IDF y cosine similarity
21
- def calculate_similarity(response, expected_responses):
22
- vectorizer = TfidfVectorizer().fit_transform([response] + expected_responses)
23
- vectors = vectorizer.toarray()
24
- cosine_similarities = cosine_similarity(vectors[0:1], vectors[1:])
25
- return float(max(cosine_similarities[0]))
26
 
27
- # Funci贸n para evaluar una sola respuesta
28
- def evaluate_response(response, expected_responses):
29
- return calculate_similarity(response, expected_responses)
 
 
 
 
 
 
 
 
30
 
31
- # Funci贸n para evaluar an谩lisis de texto
32
- def analyze_text(response):
33
- analysis = sentiment_analysis(response)
34
- return analysis[0]
35
 
36
- # Funci贸n para evaluar todas las respuestas
37
- def evaluate_all_responses(responses):
38
- results = {}
39
- for i, response in enumerate(responses[:4]):
40
- question_key = f'question{i+1}'
41
- results[question_key] = evaluate_response(response, expected_responses[question_key])
42
 
43
- for i, response in enumerate(responses[4:]):
44
- question_key = f'question{i+5}'
45
- results[question_key] = analyze_text(response)
46
 
47
- return results
 
 
48
 
49
- # Interfaz Gradio
50
- def evaluate(*responses):
51
- if len(responses) != 6:
52
- return {'error': 'Se esperan 6 respuestas'}
53
- results = evaluate_all_responses(responses)
54
- return results
55
 
56
- # Definir los cuadros de texto para las respuestas de las preguntas
57
- input_texts = [gr.Textbox(label=f"Pregunta {i+1}") for i in range(6)]
58
 
59
- # Definir la salida como un JSON para mostrar los resultados
60
- output_text = gr.JSON(label="Resultados")
 
61
 
62
- # Crear la interfaz Gradio
63
- demo = gr.Interface(
64
- fn=evaluate,
65
- inputs=input_texts,
66
- outputs=output_text,
67
- title="Evaluaci贸n de Respuestas y An谩lisis de Texto",
68
- description="Ingrese las respuestas para las 6 preguntas y obtenga evaluaciones de similitud y an谩lisis de sentimientos.",
69
- )
70
 
71
- # Lanzar la aplicaci贸n
72
- if __name__ == "__main__":
73
- demo.launch()
 
 
 
 
 
 
 
 
 
1
+ from huggingface_hub import from_pretrained_keras
 
 
2
  import numpy as np
3
  import gradio as gr
4
+ import transformers
5
+ import tensorflow as tf
6
 
7
+ class BertSemanticDataGenerator(tf.keras.utils.Sequence):
8
+ """Generates batches of data."""
9
+ def __init__(
10
+ self,
11
+ sentence_pairs,
12
+ labels,
13
+ batch_size=32,
14
+ shuffle=True,
15
+ include_targets=True,
16
+ ):
17
+ self.sentence_pairs = sentence_pairs
18
+ self.labels = labels
19
+ self.shuffle = shuffle
20
+ self.batch_size = batch_size
21
+ self.include_targets = include_targets
22
+ # Load our BERT Tokenizer to encode the text.
23
+ # We will use base-base-uncased pretrained model.
24
+ self.tokenizer = transformers.BertTokenizer.from_pretrained(
25
+ "bert-base-uncased", do_lower_case=True
26
+ )
27
+ self.indexes = np.arange(len(self.sentence_pairs))
28
+ self.on_epoch_end()
29
 
30
+ def __len__(self):
31
+ # Denotes the number of batches per epoch.
32
+ return len(self.sentence_pairs) // self.batch_size
 
 
 
 
33
 
34
+ def __getitem__(self, idx):
35
+ # Retrieves the batch of index.
36
+ indexes = self.indexes[idx * self.batch_size : (idx + 1) * self.batch_size]
37
+ sentence_pairs = self.sentence_pairs[indexes]
 
 
38
 
39
+ # With BERT tokenizer's batch_encode_plus batch of both the sentences are
40
+ # encoded together and separated by [SEP] token.
41
+ encoded = self.tokenizer.batch_encode_plus(
42
+ sentence_pairs.tolist(),
43
+ add_special_tokens=True,
44
+ max_length=128,
45
+ return_attention_mask=True,
46
+ return_token_type_ids=True,
47
+ pad_to_max_length=True,
48
+ return_tensors="tf",
49
+ )
50
 
51
+ # Convert batch of encoded features to numpy array.
52
+ input_ids = np.array(encoded["input_ids"], dtype="int32")
53
+ attention_masks = np.array(encoded["attention_mask"], dtype="int32")
54
+ token_type_ids = np.array(encoded["token_type_ids"], dtype="int32")
55
 
56
+ # Set to true if data generator is used for training/validation.
57
+ if self.include_targets:
58
+ labels = np.array(self.labels[indexes], dtype="int32")
59
+ return [input_ids, attention_masks, token_type_ids], labels
60
+ else:
61
+ return [input_ids, attention_masks, token_type_ids]
62
 
63
+ model = from_pretrained_keras("keras-io/bert-semantic-similarity")
64
+ labels = ["contradiction", "entailment", "neutral"]
 
65
 
66
+ def predict(*sentences):
67
+ if len(sentences) != 6:
68
+ return {'error': 'Se esperan 6 oraciones'}
69
 
70
+ sentence_pairs = np.array([[str(sentences[i]), str(expected_responses[i])] for i in range(6)])
71
+ test_data = BertSemanticDataGenerator(
72
+ sentence_pairs, labels=None, batch_size=1, shuffle=False, include_targets=False,
73
+ )
74
+ probs = model.predict(test_data[0])[0]
 
75
 
76
+ labels_probs = {labels[i]: float(probs[i]) for i, _ in enumerate(labels)}
77
+ return labels_probs
78
 
79
+ expected_responses = [
80
+ 'respuesta1a', 'respuesta2a', 'respuesta3a', 'respuesta4a', 'respuesta5a', 'respuesta6a'
81
+ ]
82
 
83
+ examples = [
84
+ ["Two women are observing something together.", "respuesta1a"],
85
+ ["A smiling costumed woman is holding an umbrella", "respuesta2a"],
86
+ ["A soccer game with multiple males playing", "respuesta3a"],
87
+ ["Some men are playing a sport", "respuesta4a"],
88
+ ["Another example sentence", "respuesta5a"],
89
+ ["One more example for the sixth input", "respuesta6a"]
90
+ ]
91
 
92
+ # Interfaz Gradio
93
+ gr.Interface(
94
+ fn=predict,
95
+ title="Semantic Similarity with BERT",
96
+ description="Natural Language Inference by fine-tuning BERT model on SNLI Corpus 馃摪",
97
+ inputs=[gr.Textbox(label=f"Input {i+1}") for i in range(6)],
98
+ examples=examples,
99
+ outputs=gr.outputs.Label(num_top_classes=3, label='Semantic similarity'),
100
+ cache_examples=False,
101
+ article="Author: <a href=\"https://huggingface.co/vumichien\">Vu Minh Chien</a>. Based on the keras example from <a href=\"https://keras.io/examples/nlp/semantic_similarity_with_bert/\">Mohamad Merchant</a>",
102
+ ).launch(debug=True, enable_queue=True)