Spaces:

LordCoffee
/

english-aa

Runtime error

App Files Files Community

english-aa / app.py

LordCoffee

Update app.py

354fa5d verified 8 months ago

raw

history blame contribute delete

4.05 kB

	from huggingface_hub import from_pretrained_keras
	import numpy as np
	import gradio as gr
	import transformers
	import tensorflow as tf

	class BertSemanticDataGenerator(tf.keras.utils.Sequence):
	"""Generates batches of data."""
	def __init__(
	self,
	sentence_pairs,
	labels,
	batch_size=32,
	shuffle=True,
	include_targets=True,
	):
	self.sentence_pairs = sentence_pairs
	self.labels = labels
	self.shuffle = shuffle
	self.batch_size = batch_size
	self.include_targets = include_targets
	# Load our BERT Tokenizer to encode the text.
	# We will use base-base-uncased pretrained model.
	self.tokenizer = transformers.BertTokenizer.from_pretrained(
	"bert-base-uncased", do_lower_case=True
	)
	self.indexes = np.arange(len(self.sentence_pairs))
	self.on_epoch_end()

	def __len__(self):
	# Denotes the number of batches per epoch.
	return len(self.sentence_pairs) // self.batch_size

	def __getitem__(self, idx):
	# Retrieves the batch of index.
	indexes = self.indexes[idx * self.batch_size : (idx + 1) * self.batch_size]
	sentence_pairs = self.sentence_pairs[indexes]

	# With BERT tokenizer's batch_encode_plus batch of both the sentences are
	# encoded together and separated by [SEP] token.
	encoded = self.tokenizer.batch_encode_plus(
	sentence_pairs.tolist(),
	add_special_tokens=True,
	max_length=128,
	return_attention_mask=True,
	return_token_type_ids=True,
	pad_to_max_length=True,
	return_tensors="tf",
	)

	# Convert batch of encoded features to numpy array.
	input_ids = np.array(encoded["input_ids"], dtype="int32")
	attention_masks = np.array(encoded["attention_mask"], dtype="int32")
	token_type_ids = np.array(encoded["token_type_ids"], dtype="int32")

	# Set to true if data generator is used for training/validation.
	if self.include_targets:
	labels = np.array(self.labels[indexes], dtype="int32")
	return [input_ids, attention_masks, token_type_ids], labels
	else:
	return [input_ids, attention_masks, token_type_ids]

	model = from_pretrained_keras("keras-io/bert-semantic-similarity")
	labels = ["contradiction", "entailment", "neutral"]

	def predict(*sentences):
	if len(sentences) != 6:
	return {'error': 'Se esperan 6 oraciones'}

	sentence_pairs = np.array([[str(sentences[i]), str(expected_responses[i])] for i in range(6)])
	test_data = BertSemanticDataGenerator(
	sentence_pairs, labels=None, batch_size=1, shuffle=False, include_targets=False,
	)
	probs = model.predict(test_data[0])[0]

	labels_probs = {labels[i]: float(probs[i]) for i, _ in enumerate(labels)}
	return labels_probs

	expected_responses = [
	'respuesta1a', 'respuesta2a', 'respuesta3a', 'respuesta4a', 'respuesta5a', 'respuesta6a'
	]

	examples = [
	["Two women are observing something together.", "respuesta1a"],
	["A smiling costumed woman is holding an umbrella", "respuesta2a"],
	["A soccer game with multiple males playing", "respuesta3a"],
	["Some men are playing a sport", "respuesta4a"],
	["Another example sentence", "respuesta5a"],
	["One more example for the sixth input", "respuesta6a"]
	]

	# Interfaz Gradio
	gr.Interface(
	fn=predict,
	title="Semantic Similarity with BERT",
	description="Natural Language Inference by fine-tuning BERT model on SNLI Corpus 📰",
	inputs=[gr.Textbox(label=f"Input {i+1}") for i in range(6)],
	examples=examples,
	outputs=gr.outputs.Label(num_top_classes=3, label='Semantic similarity'),
	cache_examples=False,
	article="Author: <a href=\"https://huggingface.co/vumichien\">Vu Minh Chien</a>. Based on the keras example from <a href=\"https://keras.io/examples/nlp/semantic_similarity_with_bert/\">Mohamad Merchant</a>",
	).launch(debug=True, enable_queue=True)