Spaces:
Build error
Build error
PeteBleackley
commited on
Commit
·
f16a715
1
Parent(s):
eef3bd6
More work on models
Browse files
qarac/models/QaracDecoderModel.py
CHANGED
@@ -7,31 +7,111 @@ Created on Tue Sep 5 10:29:03 2023
|
|
7 |
"""
|
8 |
|
9 |
import keras
|
|
|
10 |
import transformers
|
11 |
|
12 |
class QaracDecoderHead(keras.layers.Layer):
|
13 |
|
14 |
def __init__(self,config):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
super(QaracDecoderHead,self).__init__()
|
16 |
self.concat = keras.layers.Concatenate(axis=1)
|
17 |
self.layer_0 = transformers.TFRobertaLayer(config)
|
18 |
self.layer_1 = transformers.TFRobertalayer(config)
|
19 |
self.head = transformers.TFRobertaLMHead(config)
|
20 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
def call(self,inputs):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
vectors = self.concat(inputs)
|
23 |
l0 = self.layer_0(vectors)
|
24 |
return self.head(self.layer1(l0.last_hidden_state[:,1:]))
|
25 |
|
26 |
-
class QaracDecoderModel(transformers.TFPretrainedModel):
|
27 |
|
28 |
-
def __init__(self,base_model):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
super(QaracDecoderModel,self).__init__()
|
30 |
self.base_model = base_model
|
31 |
self.decoder_head = QaracDecoderHead(self.base_model.config)
|
|
|
|
|
|
|
|
|
32 |
|
33 |
-
def call(self,inputs):
|
34 |
-
|
35 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
|
37 |
|
|
|
7 |
"""
|
8 |
|
9 |
import keras
|
10 |
+
import tensorflow
|
11 |
import transformers
|
12 |
|
13 |
class QaracDecoderHead(keras.layers.Layer):
|
14 |
|
15 |
def __init__(self,config):
|
16 |
+
"""
|
17 |
+
Creates the Decoder head
|
18 |
+
|
19 |
+
Parameters
|
20 |
+
----------
|
21 |
+
config : transformers.RobertaConfig
|
22 |
+
Config for the RobertaModel that this head will be attached to.
|
23 |
+
|
24 |
+
Returns
|
25 |
+
-------
|
26 |
+
None.
|
27 |
+
|
28 |
+
"""
|
29 |
super(QaracDecoderHead,self).__init__()
|
30 |
self.concat = keras.layers.Concatenate(axis=1)
|
31 |
self.layer_0 = transformers.TFRobertaLayer(config)
|
32 |
self.layer_1 = transformers.TFRobertalayer(config)
|
33 |
self.head = transformers.TFRobertaLMHead(config)
|
34 |
|
35 |
+
def build(self,input_shape):
|
36 |
+
"""
|
37 |
+
|
38 |
+
|
39 |
+
Parameters
|
40 |
+
----------
|
41 |
+
input_shape : tuple
|
42 |
+
Input shape.
|
43 |
+
|
44 |
+
Returns
|
45 |
+
-------
|
46 |
+
None.
|
47 |
+
|
48 |
+
"""
|
49 |
+
self.built = True
|
50 |
+
|
51 |
def call(self,inputs):
|
52 |
+
"""
|
53 |
+
Predicts text fron vector and hidden states of base model
|
54 |
+
|
55 |
+
Parameters
|
56 |
+
----------
|
57 |
+
inputs : tuple of tensorflow.Tensors
|
58 |
+
Vector to be decoded and last hidden states of base model
|
59 |
+
|
60 |
+
Returns
|
61 |
+
-------
|
62 |
+
transformers.modeling_tf_outputs.TFCausalLMOutputWithCrossAttentions
|
63 |
+
Predicted text
|
64 |
+
|
65 |
+
"""
|
66 |
vectors = self.concat(inputs)
|
67 |
l0 = self.layer_0(vectors)
|
68 |
return self.head(self.layer1(l0.last_hidden_state[:,1:]))
|
69 |
|
70 |
+
class QaracDecoderModel(transformers.TFPretrainedModel,transformers.TFGenerationMixin):
|
71 |
|
72 |
+
def __init__(self,base_model,tokenizer):
|
73 |
+
"""
|
74 |
+
Creates decoder model from base model
|
75 |
+
|
76 |
+
Parameters
|
77 |
+
----------
|
78 |
+
base_model : transformers.TFRobertaModel
|
79 |
+
The base model
|
80 |
+
|
81 |
+
Returns
|
82 |
+
-------
|
83 |
+
None.
|
84 |
+
|
85 |
+
"""
|
86 |
super(QaracDecoderModel,self).__init__()
|
87 |
self.base_model = base_model
|
88 |
self.decoder_head = QaracDecoderHead(self.base_model.config)
|
89 |
+
self.tokenizer = tokenizer
|
90 |
+
self.start=None
|
91 |
+
self.end=None
|
92 |
+
self.pad=None
|
93 |
|
94 |
+
def call(self,inputs,**kwargs):
|
95 |
+
"""
|
96 |
+
Predicts text from inputs
|
97 |
+
|
98 |
+
Parameters
|
99 |
+
----------
|
100 |
+
inputs : tuple of Tensorflow.Tensors OR tensorflow.Tensor
|
101 |
+
Vector to be converted to text and seed text ORtokenized seed text
|
102 |
+
kwargs : optional keyword arguments
|
103 |
+
vector : tensorflow.Tensor vector to be decoded. May be supplied
|
104 |
+
via a keyword argument when this is invoked by .generate
|
105 |
+
|
106 |
+
Returns
|
107 |
+
-------
|
108 |
+
transformers.modeling_tf_outputs.TFCausalLMOutputWithCrossAttentions
|
109 |
+
Predicted text
|
110 |
+
|
111 |
+
"""
|
112 |
+
(v,s) = (kwargs['vector'],inputs) if 'vector' in kwargs else inputs
|
113 |
+
return self.decoder_head((v,self.base_model(s).last_hidden_state))
|
114 |
+
|
115 |
+
|
116 |
|
117 |
|
qarac/models/QaracEncoderModel.py
CHANGED
@@ -7,16 +7,59 @@ Created on Tue Sep 5 10:01:39 2023
|
|
7 |
"""
|
8 |
|
9 |
import transformers
|
10 |
-
import qarac.layers.GlobalAttentionPoolingHead
|
11 |
|
12 |
class QaracEncoderModel(transformers.TFPretrainedModel):
|
13 |
|
14 |
def __init__(self,base_model):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
super(QaracEncoderModel,self).__init__()
|
16 |
self.base_model = base_model
|
17 |
-
self.head = qarac.layers.GlobalAttentionPoolingHead.GlobalAttentionPoolingHead()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
|
19 |
def call(self,inputs):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
return self.head(self.base_model(inputs).last_hidden_state)
|
21 |
|
22 |
|
|
|
7 |
"""
|
8 |
|
9 |
import transformers
|
10 |
+
import qarac.models.layers.GlobalAttentionPoolingHead
|
11 |
|
12 |
class QaracEncoderModel(transformers.TFPretrainedModel):
|
13 |
|
14 |
def __init__(self,base_model):
|
15 |
+
"""
|
16 |
+
Creates the endocer model
|
17 |
+
|
18 |
+
Parameters
|
19 |
+
----------
|
20 |
+
base_model : transformers.TFRobertaModel
|
21 |
+
The base model
|
22 |
+
|
23 |
+
Returns
|
24 |
+
-------
|
25 |
+
None.
|
26 |
+
|
27 |
+
"""
|
28 |
super(QaracEncoderModel,self).__init__()
|
29 |
self.base_model = base_model
|
30 |
+
self.head = qarac.models.layers.GlobalAttentionPoolingHead.GlobalAttentionPoolingHead()
|
31 |
+
|
32 |
+
def build(self,input_shape):
|
33 |
+
"""
|
34 |
+
|
35 |
+
|
36 |
+
Parameters
|
37 |
+
----------
|
38 |
+
input_shape : tuple
|
39 |
+
shape of input data.
|
40 |
+
|
41 |
+
Returns
|
42 |
+
-------
|
43 |
+
None.
|
44 |
+
|
45 |
+
"""
|
46 |
+
self.built=True
|
47 |
|
48 |
def call(self,inputs):
|
49 |
+
"""
|
50 |
+
Vectorizes a tokenised text
|
51 |
+
|
52 |
+
Parameters
|
53 |
+
----------
|
54 |
+
inputs : tensorflow.Tensor
|
55 |
+
tokenized text to endode
|
56 |
+
|
57 |
+
Returns
|
58 |
+
-------
|
59 |
+
tensorflow.Tensor
|
60 |
+
Vector representing the document
|
61 |
+
|
62 |
+
"""
|
63 |
return self.head(self.base_model(inputs).last_hidden_state)
|
64 |
|
65 |
|
qarac/models/QaracTrainerModel.py
CHANGED
@@ -12,14 +12,56 @@ import QaracDecoderModel
|
|
12 |
|
13 |
class QuaracTrainerModel(keras.Model):
|
14 |
|
15 |
-
def __init__(self,base_encoder_model,base_decoder_model):
|
16 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
self.question_encoder = QaracEncoderModel.QaracEncoderModel(base_encoder_model)
|
18 |
self.answer_encoder = QaracEncoderModel.QaracEncoderModel(base_encoder_model)
|
19 |
-
self.decoder = QaracDecoderModel.QaracDecoderModel(base_decoder_model)
|
20 |
self.consistency = keras.layers.Dot(axes=1,normalize=True)
|
21 |
|
22 |
def call(self,inputs,training=None):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
results = {}
|
24 |
results['encode_decode'] = self.decoder((self.answer_encoder(inputs['all_text']),
|
25 |
inputs['offset_text']))
|
|
|
12 |
|
13 |
class QuaracTrainerModel(keras.Model):
|
14 |
|
15 |
+
def __init__(self,base_encoder_model,base_decoder_model,tokenizer):
|
16 |
+
"""
|
17 |
+
Sets up the Trainer model
|
18 |
+
|
19 |
+
Parameters
|
20 |
+
----------
|
21 |
+
base_encoder_model : transformers.TFRobertaModel
|
22 |
+
Base model for encoders.
|
23 |
+
base_decoder_model : transformers.TFRobertaModel
|
24 |
+
Base model for decoder
|
25 |
+
tokenizer : transformers.RobertaTokenizer
|
26 |
+
Tokeniaer for decoder
|
27 |
+
Returns
|
28 |
+
-------
|
29 |
+
None.
|
30 |
+
|
31 |
+
"""
|
32 |
self.question_encoder = QaracEncoderModel.QaracEncoderModel(base_encoder_model)
|
33 |
self.answer_encoder = QaracEncoderModel.QaracEncoderModel(base_encoder_model)
|
34 |
+
self.decoder = QaracDecoderModel.QaracDecoderModel(base_decoder_model,tokenizer)
|
35 |
self.consistency = keras.layers.Dot(axes=1,normalize=True)
|
36 |
|
37 |
def call(self,inputs,training=None):
|
38 |
+
"""
|
39 |
+
Generates training objective outputs from training data
|
40 |
+
|
41 |
+
Parameters
|
42 |
+
----------
|
43 |
+
inputs : dict[str,tensoflow.tensor]
|
44 |
+
Fields are
|
45 |
+
'all_text': Tokenized text to train answer encoder to produce vectors
|
46 |
+
and decoder to convert them back to text
|
47 |
+
'offset_text': Same text as in 'all_text', but preceded by <s>
|
48 |
+
'question': Tokenized text of questions for question answering
|
49 |
+
objective
|
50 |
+
'answer': Tokenized text of answers for question answering objective
|
51 |
+
'proposition0': tokenized proposition for reasoning objective
|
52 |
+
'proposition1': tokenized proposition for reasoning objective
|
53 |
+
'conclusion_offset': tokenized text of conclusions for reasoning
|
54 |
+
objective, prefixed by '<s>'
|
55 |
+
'statement0': tokenized statement for consistency objective
|
56 |
+
training : Bool, optional
|
57 |
+
Not used. The default is None.
|
58 |
+
|
59 |
+
Returns
|
60 |
+
-------
|
61 |
+
results : TYPE
|
62 |
+
DESCRIPTION.
|
63 |
+
|
64 |
+
"""
|
65 |
results = {}
|
66 |
results['encode_decode'] = self.decoder((self.answer_encoder(inputs['all_text']),
|
67 |
inputs['offset_text']))
|
qarac/models/layers/GlobalAttentionPoolingHead.py
CHANGED
@@ -12,37 +12,66 @@ import tensorflow
|
|
12 |
class GlobalAttentionPoolingHead(keras.layers.Layer):
|
13 |
|
14 |
def __init__(self):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
super(GlobalAttentionPoolingHead,self).__init__()
|
16 |
self.global_projection = None
|
17 |
self.local_projection = None
|
18 |
|
19 |
|
20 |
def build(self,input_shape):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
width = input_shape[-1]
|
22 |
self.global_projection = self.add_weight('global projection',shape=(width,width))
|
23 |
self.local_projection = self.add_weight('local projection',shape=(width,width))
|
24 |
-
self.
|
25 |
|
26 |
-
@tensorflow.function
|
27 |
-
def project(self,X):
|
28 |
-
return tensorflow.tensordot(X,self.local_projection,axes=1)
|
29 |
|
30 |
-
def attention_function(self,gp):
|
31 |
-
@tensorflow.function
|
32 |
-
def inner(lp):
|
33 |
-
return tensorflow.tensordot(lp,gp,axes=1)
|
34 |
-
return inner
|
35 |
|
36 |
def call(self,X,training=None):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
gp = tensorflow.linalg.l2_normalize(tensorflow.tensordot([tensorflow.reduce_sum(X,
|
38 |
axis=1),
|
39 |
self.global_projection],
|
40 |
axes=1),
|
41 |
axis=1)
|
42 |
-
lp = tensorflow.linalg.l2_normalize(tensorflow.
|
43 |
-
|
|
|
44 |
axis=2)
|
45 |
-
attention = tensorflow.
|
46 |
-
lp)
|
47 |
return tensorflow.reduce_sum(attention *X,
|
48 |
axis=1)
|
|
|
12 |
class GlobalAttentionPoolingHead(keras.layers.Layer):
|
13 |
|
14 |
def __init__(self):
|
15 |
+
"""
|
16 |
+
Creates the layer
|
17 |
+
|
18 |
+
Returns
|
19 |
+
-------
|
20 |
+
None.
|
21 |
+
|
22 |
+
"""
|
23 |
super(GlobalAttentionPoolingHead,self).__init__()
|
24 |
self.global_projection = None
|
25 |
self.local_projection = None
|
26 |
|
27 |
|
28 |
def build(self,input_shape):
|
29 |
+
"""
|
30 |
+
Initialises layer weights
|
31 |
+
|
32 |
+
Parameters
|
33 |
+
----------
|
34 |
+
input_shape : tuple
|
35 |
+
Shape of the input layer
|
36 |
+
|
37 |
+
Returns
|
38 |
+
-------
|
39 |
+
None.
|
40 |
+
|
41 |
+
"""
|
42 |
width = input_shape[-1]
|
43 |
self.global_projection = self.add_weight('global projection',shape=(width,width))
|
44 |
self.local_projection = self.add_weight('local projection',shape=(width,width))
|
45 |
+
self.built=True
|
46 |
|
|
|
|
|
|
|
47 |
|
|
|
|
|
|
|
|
|
|
|
48 |
|
49 |
def call(self,X,training=None):
|
50 |
+
"""
|
51 |
+
|
52 |
+
|
53 |
+
Parameters
|
54 |
+
----------
|
55 |
+
X : tensorflow.Tensor
|
56 |
+
Base model vectors to apply pooling to.
|
57 |
+
training : bool, optional
|
58 |
+
Not used. The default is None.
|
59 |
+
|
60 |
+
Returns
|
61 |
+
-------
|
62 |
+
tensorflow.Tensor
|
63 |
+
The pooled value.
|
64 |
+
|
65 |
+
"""
|
66 |
gp = tensorflow.linalg.l2_normalize(tensorflow.tensordot([tensorflow.reduce_sum(X,
|
67 |
axis=1),
|
68 |
self.global_projection],
|
69 |
axes=1),
|
70 |
axis=1)
|
71 |
+
lp = tensorflow.linalg.l2_normalize(tensorflow.tensordot(X,
|
72 |
+
self.local_projection,
|
73 |
+
axes=1),
|
74 |
axis=2)
|
75 |
+
attention = tensorflow.tensordot(lp,gp,axes=1)
|
|
|
76 |
return tensorflow.reduce_sum(attention *X,
|
77 |
axis=1)
|
scripts.py
CHANGED
@@ -9,7 +9,10 @@ import qarac.models.qarac_base_model
|
|
9 |
import keras
|
10 |
import tensorflow
|
11 |
|
12 |
-
|
|
|
|
|
|
|
13 |
|
14 |
|
15 |
|
|
|
9 |
import keras
|
10 |
import tensorflow
|
11 |
|
12 |
+
def decoder_loss(y_true,y_pred):
|
13 |
+
return keras.losses.sparse_categorical_crossentropy(y_true,
|
14 |
+
y_pred.logits,
|
15 |
+
logits=True)
|
16 |
|
17 |
|
18 |
|