Upload TFBilma

Browse files

Files changed (4) hide show

config.json +1 -0
configuration_bilma.py +6 -1
modeling_bilma.py +16 -6
tf_model.h5 +1 -1

config.json CHANGED Viewed

@@ -9,6 +9,7 @@
   },
   "hidden_dropout_prob": 0.1,
   "hidden_size": 512,
   "include_top": true,
   "model_type": "bilma",
   "num_attention_heads": 4,

   },
   "hidden_dropout_prob": 0.1,
   "hidden_size": 512,
+  "include_head": null,
   "include_top": true,
   "model_type": "bilma",
   "num_attention_heads": 4,

configuration_bilma.py CHANGED Viewed

@@ -6,7 +6,8 @@ class BilmaConfig(PretrainedConfig):
     def __init__(
         self,
         weights="MX",
-        include_top=True,
         num_attention_heads: int = 4,
         num_hidden_layers: int = 2,
         seq_max_length: int = 280,
@@ -18,9 +19,12 @@ class BilmaConfig(PretrainedConfig):
         countries = ["MX"]
         if weights not in countries:
             raise ValueError(f"`weights` must be one of {countries}, got {weights}.")
         if weights is not None:
             self.weights = weights
             self.include_top = include_top
             self.num_attention_heads = 4
             self.num_hidden_layers = 2
             self.seq_max_length = 280
@@ -32,6 +36,7 @@ class BilmaConfig(PretrainedConfig):
         self.weights = weights
         self.include_top = include_top
         self.num_attention_heads = num_attention_heads
         self.num_hidden_layers = num_hidden_layers
         self.seq_max_length = seq_max_length

     def __init__(
         self,
         weights="MX",
+        include_top = True,
+        include_head = None,
         num_attention_heads: int = 4,
         num_hidden_layers: int = 2,
         seq_max_length: int = 280,
         countries = ["MX"]
         if weights not in countries:
             raise ValueError(f"`weights` must be one of {countries}, got {weights}.")
+        if include_head is not None and include_top == True:
+            raise ValueError(f"To include a head, 'include_top' must be False")
         if weights is not None:
             self.weights = weights
             self.include_top = include_top
+            self.include_head = include_head
             self.num_attention_heads = 4
             self.num_hidden_layers = 2
             self.seq_max_length = 280
         self.weights = weights
         self.include_top = include_top
+        self.include_head = include_head
         self.num_attention_heads = num_attention_heads
         self.num_hidden_layers = num_hidden_layers
         self.seq_max_length = seq_max_length

modeling_bilma.py CHANGED Viewed

@@ -9,7 +9,7 @@ from typing import Dict
 import re
 import unicodedata
-from .configuration_bilma import BilmaConfig
 # copied from preprocessing.py
 BLANK = ' '
@@ -47,7 +47,8 @@ class TFBilma(TFPreTrainedModel):
                            ff_dim=config.hidden_size,
                            vocab_size=config.vocab_size,
                            rate=config.hidden_dropout_prob,
-                           include_top = config.include_top)
     @property
     def dummy_inputs(self) -> Dict[str, tf.Tensor]:
@@ -74,7 +75,10 @@ class TFBilma(TFPreTrainedModel):
         if self.include_top:
             output = {"logits":self.model(ins)}
         else:
-            output = {"last_hidden_state":self.model(ins)}
         return output
 # copied from bilma_model.py
@@ -105,7 +109,7 @@ def accuracy_function(ignore_id=0):
         return tf.math.divide_no_nan(tf.reduce_sum(accuracies), tf.reduce_sum(mask))
     return acc_mlm
-def bilma(num_enc=6, embed_dim=300, max_length=50, num_heads=6, ff_dim=512, vocab_size=9739, rate=0.1, include_top=True):
     capt_inputs_ids = Input(shape=(max_length, ), name='input_ids')
     capt_embedding = Embedding(vocab_size, embed_dim, mask_zero=False, name="bilma/embedding")
     capt_inputs = capt_embedding(capt_inputs_ids)
@@ -115,9 +119,15 @@ def bilma(num_enc=6, embed_dim=300, max_length=50, num_heads=6, ff_dim=512, voca
     if include_top:
         fin_output = Dense(vocab_size, use_bias=True, name="bilma/dense_final")(enc_output)
     else:
-        fin_output = enc_output
-    caption_model = Model(inputs=capt_inputs_ids, outputs=[fin_output], name="bilma_model")
     return caption_model
 def load(model_file):

 import re
 import unicodedata
+from configuration_bilma import BilmaConfig
 # copied from preprocessing.py
 BLANK = ' '
                            ff_dim=config.hidden_size,
                            vocab_size=config.vocab_size,
                            rate=config.hidden_dropout_prob,
+                           include_top = config.include_top,
+                           include_head = config.include_head)
     @property
     def dummy_inputs(self) -> Dict[str, tf.Tensor]:
         if self.include_top:
             output = {"logits":self.model(ins)}
         else:
+            if self.include_head is None:
+                output = {"last_hidden_state":self.model(ins)}
+            else:
+                output = {"logits":self.model(ins)}
         return output
 # copied from bilma_model.py
         return tf.math.divide_no_nan(tf.reduce_sum(accuracies), tf.reduce_sum(mask))
     return acc_mlm
+def bilma(num_enc=6, embed_dim=300, max_length=50, num_heads=6, ff_dim=512, vocab_size=9739, rate=0.1, include_top=True, include_head=None):
     capt_inputs_ids = Input(shape=(max_length, ), name='input_ids')
     capt_embedding = Embedding(vocab_size, embed_dim, mask_zero=False, name="bilma/embedding")
     capt_inputs = capt_embedding(capt_inputs_ids)
     if include_top:
         fin_output = Dense(vocab_size, use_bias=True, name="bilma/dense_final")(enc_output)
     else:
+        if self.include_head is None:
+            fin_output = enc_output
+        else:
+            x = enc_output
+            for i, m in enumerate(self.include_head[:-1]):
+                x = Dense(m, use_bias=True, activation="relu", name=f"bilma/dense_ex_{i}")(x)
+            fin_output = [Dense(self.include_head[-1], use_bias=True, name=f"bilma/dense_ex_final")(x), enc_output]
+    caption_model = Model(inputs=capt_inputs_ids, outputs=fin_output, name="bilma_model")
     return caption_model
 def load(model_file):

tf_model.h5 CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bbfa589e471d9015d5ca64d2d212afa28da612a2ff8f2d93560fca1b03167afa
 size 156875820

 version https://git-lfs.github.com/spec/v1
+oid sha256:f83fdad7da418dac337cc4df40cb630f3145ff66b48188148e899214539e2db5
 size 156875820