Shea vumichien commited on
Commit
da1b0ae
0 Parent(s):

Duplicate from keras-io/bert-semantic-similarity

Browse files

Co-authored-by: vumichien <vumichien@users.noreply.huggingface.co>

Files changed (4) hide show
  1. .gitattributes +27 -0
  2. README.md +14 -0
  3. app.py +100 -0
  4. requirements.txt +6 -0
.gitattributes ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ftz filter=lfs diff=lfs merge=lfs -text
6
+ *.gz filter=lfs diff=lfs merge=lfs -text
7
+ *.h5 filter=lfs diff=lfs merge=lfs -text
8
+ *.joblib filter=lfs diff=lfs merge=lfs -text
9
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
10
+ *.model filter=lfs diff=lfs merge=lfs -text
11
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
12
+ *.onnx filter=lfs diff=lfs merge=lfs -text
13
+ *.ot filter=lfs diff=lfs merge=lfs -text
14
+ *.parquet filter=lfs diff=lfs merge=lfs -text
15
+ *.pb filter=lfs diff=lfs merge=lfs -text
16
+ *.pt filter=lfs diff=lfs merge=lfs -text
17
+ *.pth filter=lfs diff=lfs merge=lfs -text
18
+ *.rar filter=lfs diff=lfs merge=lfs -text
19
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
20
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
21
+ *.tflite filter=lfs diff=lfs merge=lfs -text
22
+ *.tgz filter=lfs diff=lfs merge=lfs -text
23
+ *.wasm filter=lfs diff=lfs merge=lfs -text
24
+ *.xz filter=lfs diff=lfs merge=lfs -text
25
+ *.zip filter=lfs diff=lfs merge=lfs -text
26
+ *.zstandard filter=lfs diff=lfs merge=lfs -text
27
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Semantic Similarity with BERT
3
+ emoji: 🌇🌆
4
+ colorFrom: red
5
+ colorTo: blue
6
+ sdk: gradio
7
+ sdk_version: 3.0.24
8
+ app_file: app.py
9
+ pinned: false
10
+ license: apache-2.0
11
+ duplicated_from: keras-io/bert-semantic-similarity
12
+ ---
13
+
14
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from huggingface_hub import from_pretrained_keras
2
+ import numpy as np
3
+ import gradio as gr
4
+ import transformers
5
+ import tensorflow as tf
6
+
7
+ class BertSemanticDataGenerator(tf.keras.utils.Sequence):
8
+ """Generates batches of data."""
9
+ def __init__(
10
+ self,
11
+ sentence_pairs,
12
+ labels,
13
+ batch_size=32,
14
+ shuffle=True,
15
+ include_targets=True,
16
+ ):
17
+ self.sentence_pairs = sentence_pairs
18
+ self.labels = labels
19
+ self.shuffle = shuffle
20
+ self.batch_size = batch_size
21
+ self.include_targets = include_targets
22
+ # Load our BERT Tokenizer to encode the text.
23
+ # We will use base-base-uncased pretrained model.
24
+ self.tokenizer = transformers.BertTokenizer.from_pretrained(
25
+ "bert-base-uncased", do_lower_case=True
26
+ )
27
+ self.indexes = np.arange(len(self.sentence_pairs))
28
+ self.on_epoch_end()
29
+
30
+ def __len__(self):
31
+ # Denotes the number of batches per epoch.
32
+ return len(self.sentence_pairs) // self.batch_size
33
+
34
+ def __getitem__(self, idx):
35
+ # Retrieves the batch of index.
36
+ indexes = self.indexes[idx * self.batch_size : (idx + 1) * self.batch_size]
37
+ sentence_pairs = self.sentence_pairs[indexes]
38
+
39
+ # With BERT tokenizer's batch_encode_plus batch of both the sentences are
40
+ # encoded together and separated by [SEP] token.
41
+ encoded = self.tokenizer.batch_encode_plus(
42
+ sentence_pairs.tolist(),
43
+ add_special_tokens=True,
44
+ max_length=128,
45
+ return_attention_mask=True,
46
+ return_token_type_ids=True,
47
+ pad_to_max_length=True,
48
+ return_tensors="tf",
49
+ )
50
+
51
+ # Convert batch of encoded features to numpy array.
52
+ input_ids = np.array(encoded["input_ids"], dtype="int32")
53
+ attention_masks = np.array(encoded["attention_mask"], dtype="int32")
54
+ token_type_ids = np.array(encoded["token_type_ids"], dtype="int32")
55
+
56
+ # Set to true if data generator is used for training/validation.
57
+ if self.include_targets:
58
+ labels = np.array(self.labels[indexes], dtype="int32")
59
+ return [input_ids, attention_masks, token_type_ids], labels
60
+ else:
61
+ return [input_ids, attention_masks, token_type_ids]
62
+
63
+ model = from_pretrained_keras("keras-io/bert-semantic-similarity")
64
+ labels = ["contradiction", "entailment", "neutral"]
65
+
66
+ def predict(sentence1, sentence2):
67
+ sentence_pairs = np.array([[str(sentence1), str(sentence2)]])
68
+ test_data = BertSemanticDataGenerator(
69
+ sentence_pairs, labels=None, batch_size=1, shuffle=False, include_targets=False,
70
+ )
71
+ probs = model.predict(test_data[0])[0]
72
+
73
+ labels_probs = {labels[i]: float(probs[i]) for i, _ in enumerate(labels)}
74
+ return labels_probs
75
+
76
+ #idx = np.argmax(proba)
77
+ #proba = f"{proba[idx]*100:.2f}%"
78
+ #pred = labels[idx]
79
+ #return f'The semantic similarity of two input sentences is {pred} with {proba} of probability'
80
+
81
+ inputs = [
82
+ gr.Audio(source = "upload", label='Upload audio file', type="filepath"),
83
+ ]
84
+
85
+ examples = [["Two women are observing something together.", "Two women are standing with their eyes closed."],
86
+ ["A smiling costumed woman is holding an umbrella", "A happy woman in a fairy costume holds an umbrella"],
87
+ ["A soccer game with multiple males playing", "Some men are playing a sport"],
88
+ ]
89
+
90
+ gr.Interface(
91
+ fn=predict,
92
+ title="Semantic Similarity with BERT",
93
+ description = "Natural Language Inference by fine-tuning BERT model on SNLI Corpus 📰",
94
+ inputs=["text", "text"],
95
+ examples=examples,
96
+ #outputs=gr.Textbox(label='Prediction'),
97
+ outputs=gr.outputs.Label(num_top_classes=3, label='Semantic similarity'),
98
+ cache_examples=True,
99
+ article = "Author: <a href=\"https://huggingface.co/vumichien\">Vu Minh Chien</a>. Based on the keras example from <a href=\"https://keras.io/examples/nlp/semantic_similarity_with_bert/\">Mohamad Merchant</a>",
100
+ ).launch(debug=True, enable_queue=True)
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ tensorflow
2
+ keras
3
+ gradio
4
+ tensorflow_addons
5
+ tensorflow-io
6
+ transformers