Spaces:
Build error
Build error
PeteBleackley
commited on
Commit
Β·
83d5adb
1
Parent(s):
75ef467
Code for building and training base modkels.
Browse files- .gitignore +1 -0
- {src β qarac}/__init__.py +0 -0
- qarac/__pycache__/__init__.cpython-310.pyc +0 -0
- {src β qarac}/corpora/BNCorpus.py +50 -26
- qarac/corpora/Batcher.py +50 -0
- qarac/corpora/__init__.py +1 -0
- qarac/corpora/__pycache__/BNCorpus.cpython-310.pyc +0 -0
- qarac/corpora/__pycache__/Batcher.cpython-310.pyc +0 -0
- qarac/corpora/__pycache__/__init__.cpython-310.pyc +0 -0
- {src β qarac}/models/__init__.py +0 -0
- qarac/models/__pycache__/__init__.cpython-310.pyc +0 -0
- qarac/models/__pycache__/qarac_base_model.cpython-310.pyc +0 -0
- {src β qarac}/models/layers/HierarchicalLogits.py +3 -2
- {src β qarac}/models/layers/HierarchicalSoftMax.py +0 -0
- {src β qarac}/models/layers/HyenaLayer.py +45 -16
- qarac/models/layers/__init__.py +0 -0
- qarac/models/layers/__pycache__/HierarchicalLogits.cpython-310.pyc +0 -0
- qarac/models/layers/__pycache__/HyenaLayer.cpython-310.pyc +0 -0
- qarac/models/layers/__pycache__/__init__.cpython-310.pyc +0 -0
- qarac/models/qarac_base_model.py +23 -0
- scripts.py +50 -0
- src/models/quarac_base_model.py +0 -18
.gitignore
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
*.json
|
{src β qarac}/__init__.py
RENAMED
File without changes
|
qarac/__pycache__/__init__.cpython-310.pyc
ADDED
Binary file (128 Bytes). View file
|
|
{src β qarac}/corpora/BNCorpus.py
RENAMED
@@ -5,6 +5,7 @@ Created on Thu Aug 24 10:38:48 2023
|
|
5 |
|
6 |
@author: peter
|
7 |
"""
|
|
|
8 |
import numpy
|
9 |
import numpy.random
|
10 |
import nltk.corpus
|
@@ -16,12 +17,20 @@ def detokenize(sentences):
|
|
16 |
class BNCorpus(object):
|
17 |
|
18 |
def __init__(self,fileids=None,tokenizer=None,task=None):
|
19 |
-
self.bnc = nltk.corpus.reader.bnc.BNCCorpusReader('
|
|
|
|
|
|
|
20 |
self.file_ids = self.bnc.fileids() if fileids is None else fileids
|
21 |
self.n_docs = len(self.file_ids)
|
22 |
self.rng = numpy.random.default_rng()
|
23 |
self.tokenizer = tokenizer
|
24 |
self.task = task
|
|
|
|
|
|
|
|
|
|
|
25 |
|
26 |
def __len__(self):
|
27 |
return self.n_docs
|
@@ -29,8 +38,8 @@ class BNCorpus(object):
|
|
29 |
def split(self,p=0.8):
|
30 |
n = int(p*self.n_docs)
|
31 |
self.rng.shuffle(self.file_ids)
|
32 |
-
train = BNCorpus(self.
|
33 |
-
test = BNCorpus(self.
|
34 |
return (train,test)
|
35 |
|
36 |
def __iter__(self):
|
@@ -40,32 +49,47 @@ class BNCorpus(object):
|
|
40 |
if self.task is None:
|
41 |
yield detokenize(doc)
|
42 |
elif self.task=='encode':
|
43 |
-
yield self.
|
44 |
else:
|
45 |
yield self.decoder_example(doc)
|
46 |
|
47 |
def encoder_example(self,doc):
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
masked_sentences.
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
|
|
|
|
|
|
61 |
numpy.concatenate(sample_weights))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
62 |
|
63 |
-
|
64 |
-
x = ['START'] + doc
|
65 |
-
y = doc + ['END']
|
66 |
-
sample_weights = [numpy.zeros(len(sentence)) if i==0
|
67 |
-
else numpy.ones(len(sentence))
|
68 |
-
for (i,sentence) in enumerate(y)]
|
69 |
-
return (self.tokenizer.encode(detokenize(x)),
|
70 |
-
self.tokenizer.encode(detokenize(y)),
|
71 |
-
numpy.concatenate(sample_weights))
|
|
|
5 |
|
6 |
@author: peter
|
7 |
"""
|
8 |
+
import os
|
9 |
import numpy
|
10 |
import numpy.random
|
11 |
import nltk.corpus
|
|
|
17 |
class BNCorpus(object):
|
18 |
|
19 |
def __init__(self,fileids=None,tokenizer=None,task=None):
|
20 |
+
self.bnc = nltk.corpus.reader.bnc.BNCCorpusReader('/'.join([os.environ['HOME'],
|
21 |
+
'BNC',
|
22 |
+
'Texts']),
|
23 |
+
fileids=r'[A-K]/\w*/\w*\.xml')
|
24 |
self.file_ids = self.bnc.fileids() if fileids is None else fileids
|
25 |
self.n_docs = len(self.file_ids)
|
26 |
self.rng = numpy.random.default_rng()
|
27 |
self.tokenizer = tokenizer
|
28 |
self.task = task
|
29 |
+
if self.tokenizer is not None:
|
30 |
+
self.mask = self.tokenizer.token_to_id('<mask>')
|
31 |
+
self.start = self.tokenizer.token_to_id('<start>')
|
32 |
+
self.end = self.tokenizer.token_to_id('<end>')
|
33 |
+
self.pad = numpy.array([self.tokenizer.token_to_id('<pad>')])
|
34 |
|
35 |
def __len__(self):
|
36 |
return self.n_docs
|
|
|
38 |
def split(self,p=0.8):
|
39 |
n = int(p*self.n_docs)
|
40 |
self.rng.shuffle(self.file_ids)
|
41 |
+
train = BNCorpus(self.file_ids[:n],self.tokenizer,self.task)
|
42 |
+
test = BNCorpus(self.file_ids[n:],self.tokenizer,self.task)
|
43 |
return (train,test)
|
44 |
|
45 |
def __iter__(self):
|
|
|
49 |
if self.task is None:
|
50 |
yield detokenize(doc)
|
51 |
elif self.task=='encode':
|
52 |
+
yield self.encoder_example(doc)
|
53 |
else:
|
54 |
yield self.decoder_example(doc)
|
55 |
|
56 |
def encoder_example(self,doc):
|
57 |
+
sentences = self.encode(doc)
|
58 |
+
masked_sentences = [sentence.copy()
|
59 |
+
for sentence in sentences]
|
60 |
+
sample_weights = [numpy.zeros_like(sentence)
|
61 |
+
for sentence in sentences]
|
62 |
+
masks = self.rng.integers([sentence.shape[0]
|
63 |
+
for sentence in sentences])
|
64 |
+
for (i,n) in enumerate(masks):
|
65 |
+
masked_sentences[i][n]=self.mask
|
66 |
+
sample_weights[i][n]=1
|
67 |
+
if sum((sentence.shape[0] for sentence in sentences))%2 ==1:
|
68 |
+
masked_sentences.append(self.pad)
|
69 |
+
sentences.append(self.pad)
|
70 |
+
sample_weights.append(numpy.zeros(1))
|
71 |
+
return (numpy.concatenate(masked_sentences),
|
72 |
+
numpy.concatenate(sentences),
|
73 |
numpy.concatenate(sample_weights))
|
74 |
+
|
75 |
+
|
76 |
+
|
77 |
+
|
78 |
+
def decoder_example(self,doc):
|
79 |
+
sentences = self.encode(doc)
|
80 |
+
before = [numpy.array([self.start])]+sentences
|
81 |
+
sentences.append(numpy.array([self.end]))
|
82 |
+
sample_weights = numpy.ones(sum([sentence.shape[0]
|
83 |
+
for sentence in sentences]))
|
84 |
+
sample_weights[:4]=0
|
85 |
+
return (numpy.concatenate(before),
|
86 |
+
numpy.concatenate(sentences),
|
87 |
+
sample_weights)
|
88 |
+
|
89 |
+
|
90 |
+
def encode(self,doc):
|
91 |
+
return [numpy.array(self.tokenizer.encode(''.join(sentence)).ids)
|
92 |
+
for sentence in doc
|
93 |
+
if len(sentence)>0]
|
94 |
|
95 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
qarac/corpora/Batcher.py
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
"""
|
4 |
+
Created on Mon Aug 28 11:25:26 2023
|
5 |
+
|
6 |
+
@author: peter
|
7 |
+
"""
|
8 |
+
import keras
|
9 |
+
import tensorflow
|
10 |
+
import tqdm
|
11 |
+
|
12 |
+
class Batcher(keras.utils.Sequence):
|
13 |
+
|
14 |
+
def __init__(self,source,batch_size=32):
|
15 |
+
self.batches = None
|
16 |
+
self.source=source
|
17 |
+
self.batch_size=batch_size
|
18 |
+
self.on_epoch_end()
|
19 |
+
|
20 |
+
def __len__(self):
|
21 |
+
return len(self.batches)
|
22 |
+
|
23 |
+
def __getitem__(self, index):
|
24 |
+
return self.batches[index]
|
25 |
+
|
26 |
+
def on_epoch_end(self):
|
27 |
+
self.batches = []
|
28 |
+
n=0
|
29 |
+
X=[]
|
30 |
+
Y=[]
|
31 |
+
Z=[]
|
32 |
+
for (x,y,z) in tqdm.tqdm(self.source):
|
33 |
+
X.append(x)
|
34 |
+
Y.append(y)
|
35 |
+
Z.append(z)
|
36 |
+
n+=1
|
37 |
+
if n==self.batch_size:
|
38 |
+
self.batches.append((tensorflow.ragged.constant(X),
|
39 |
+
tensorflow.ragged.constant(Y),
|
40 |
+
tensorflow.ragged.constant(Z)))
|
41 |
+
n=0
|
42 |
+
X=[]
|
43 |
+
Y=[]
|
44 |
+
Z=[]
|
45 |
+
if n!=0:
|
46 |
+
self.batches.append((tensorflow.ragged.constant(X),
|
47 |
+
tensorflow.ragged.constant(Y),
|
48 |
+
tensorflow.ragged.constant(Z)))
|
49 |
+
|
50 |
+
|
qarac/corpora/__init__.py
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
|
qarac/corpora/__pycache__/BNCorpus.cpython-310.pyc
ADDED
Binary file (3.91 kB). View file
|
|
qarac/corpora/__pycache__/Batcher.cpython-310.pyc
ADDED
Binary file (1.37 kB). View file
|
|
qarac/corpora/__pycache__/__init__.cpython-310.pyc
ADDED
Binary file (136 Bytes). View file
|
|
{src β qarac}/models/__init__.py
RENAMED
File without changes
|
qarac/models/__pycache__/__init__.cpython-310.pyc
ADDED
Binary file (135 Bytes). View file
|
|
qarac/models/__pycache__/qarac_base_model.cpython-310.pyc
ADDED
Binary file (899 Bytes). View file
|
|
{src β qarac}/models/layers/HierarchicalLogits.py
RENAMED
@@ -11,7 +11,8 @@ import tensorflow
|
|
11 |
|
12 |
class LeafNode(keras.layers.Layer):
|
13 |
def __init__(self):
|
14 |
-
self.
|
|
|
15 |
initializer='random_normal',
|
16 |
trainable=True)
|
17 |
|
@@ -55,7 +56,7 @@ class HierarchicalLogits(keras.layers.Layer):
|
|
55 |
def call(self,X,training=None):
|
56 |
|
57 |
y=tensorflow.tensordot(X,self.normal,1)
|
58 |
-
result = self.concat([self.left(X)+y,self.right(X)
|
59 |
return result
|
60 |
|
61 |
|
|
|
11 |
|
12 |
class LeafNode(keras.layers.Layer):
|
13 |
def __init__(self):
|
14 |
+
super(LeafNode,self).__init__()
|
15 |
+
self.bias = self.add_weight(
|
16 |
initializer='random_normal',
|
17 |
trainable=True)
|
18 |
|
|
|
56 |
def call(self,X,training=None):
|
57 |
|
58 |
y=tensorflow.tensordot(X,self.normal,1)
|
59 |
+
result = self.concat([self.left(X)+y,self.right(X)-y])
|
60 |
return result
|
61 |
|
62 |
|
{src β qarac}/models/layers/HierarchicalSoftMax.py
RENAMED
File without changes
|
{src β qarac}/models/layers/HyenaLayer.py
RENAMED
@@ -9,15 +9,23 @@ Created on Tue Aug 22 09:34:14 2023
|
|
9 |
import keras
|
10 |
import keras_nlp
|
11 |
import tensorflow
|
|
|
12 |
|
13 |
def convolve(x,y):
|
14 |
-
xT = tensorflow.transpose(x,[0,2,1])
|
15 |
-
yT = tensorflow.transpose(y,[0,2,1])
|
16 |
-
z = tensorflow.signal.irfft(tensorflow.signal.rfft(xT)*tensorflow.signal.rfft(yT))
|
17 |
-
return tensorflow.transpose(z,[0,2,1])
|
18 |
-
|
19 |
-
|
20 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
|
22 |
class HyenaLayer(keras.layers.Layer):
|
23 |
"""Keras implementation of Hyena layer. Unlike in the original paper,
|
@@ -40,29 +48,50 @@ class HyenaLayer(keras.layers.Layer):
|
|
40 |
None.
|
41 |
|
42 |
"""
|
43 |
-
|
44 |
self.stages = stages
|
45 |
self.causal = causal
|
46 |
self.data_projection = None
|
47 |
self.filters = None
|
48 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
|
50 |
def build(self,input_shape):
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
|
|
55 |
|
56 |
def call(self,X,training=None):
|
57 |
-
|
58 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
59 |
if self.causal:
|
60 |
concat = keras.layers.Concatenate()
|
61 |
x = concat(x,tensorflow.zeros_like(x))
|
62 |
f = concat(f,tensorflow.zeros_like(f))
|
63 |
-
y = x[0]
|
64 |
for i in range(self.stages):
|
65 |
-
y = convolve(y,f[i])*x[i+1]
|
66 |
if self.causal:
|
67 |
for (i,n) in enumerate(X.row_lengths()):
|
68 |
y[i] = y[i,:n]
|
|
|
9 |
import keras
|
10 |
import keras_nlp
|
11 |
import tensorflow
|
12 |
+
import warnings
|
13 |
|
14 |
def convolve(x,y):
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
|
16 |
+
fx = tensorflow.vectorized_map(fft, x, warn=False)
|
17 |
+
fy = tensorflow.vectorized_map(fft, y, warn=False)
|
18 |
+
fz = fx*fy
|
19 |
+
return tensorflow.vectorized_map(ifft,fz,warn=False)
|
20 |
+
|
21 |
+
@tensorflow.function
|
22 |
+
def fft(x):
|
23 |
+
return tensorflow.signal.rfft(tensorflow.transpose(x))
|
24 |
+
|
25 |
+
@tensorflow.function
|
26 |
+
def ifft(x):
|
27 |
+
return tensorflow.transpose(tensorflow.signal.irfft(x))
|
28 |
+
|
29 |
|
30 |
class HyenaLayer(keras.layers.Layer):
|
31 |
"""Keras implementation of Hyena layer. Unlike in the original paper,
|
|
|
48 |
None.
|
49 |
|
50 |
"""
|
51 |
+
super(HyenaLayer,self).__init__()
|
52 |
self.stages = stages
|
53 |
self.causal = causal
|
54 |
self.data_projection = None
|
55 |
self.filters = None
|
56 |
+
|
57 |
+
def positional_encoding(self,X):
|
58 |
+
t = tensorflow.dtypes.saturate_cast(tensorflow.ragged.range(X.row_lengths()),
|
59 |
+
tensorflow.float32)
|
60 |
+
width = X.shape[-1]//2
|
61 |
+
f =10000 **tensorflow.expand_dims(-tensorflow.range(width,
|
62 |
+
dtype=tensorflow.float32)/width,
|
63 |
+
axis=0)
|
64 |
+
phi = tensorflow.RaggedTensor.from_row_lengths(t.flat_values * f,
|
65 |
+
X.row_lengths())
|
66 |
+
|
67 |
+
return tensorflow.concat([tensorflow.sin(phi),
|
68 |
+
tensorflow.cos(phi)],
|
69 |
+
axis=-1)
|
70 |
+
|
71 |
|
72 |
def build(self,input_shape):
|
73 |
+
width = input_shape[-1]
|
74 |
+
self.data_projection = self.add_weight(shape=(width,width,self.stages+1),
|
75 |
+
trainable=True)
|
76 |
+
self.filters = self.add_weight(shape=(width,width,self.stages),
|
77 |
+
trainable=True)
|
78 |
|
79 |
def call(self,X,training=None):
|
80 |
+
x_flat = tensorflow.tensordot(X.flat_values,
|
81 |
+
self.data_projection,
|
82 |
+
axes=1)
|
83 |
+
f_flat = tensorflow.tensordot(self.positional_encoding(X).flat_values,
|
84 |
+
self.filters,
|
85 |
+
axes=1)
|
86 |
+
x = tensorflow.RaggedTensor.from_row_lengths(x_flat,X.row_lengths())
|
87 |
+
f = tensorflow.RaggedTensor.from_row_lengths(f_flat,X.row_lengths())
|
88 |
if self.causal:
|
89 |
concat = keras.layers.Concatenate()
|
90 |
x = concat(x,tensorflow.zeros_like(x))
|
91 |
f = concat(f,tensorflow.zeros_like(f))
|
92 |
+
y = x[:,:,:,0]
|
93 |
for i in range(self.stages):
|
94 |
+
y = convolve(y,f[:,:,:,i])*x[:,:,:,i+1]
|
95 |
if self.causal:
|
96 |
for (i,n) in enumerate(X.row_lengths()):
|
97 |
y[i] = y[i,:n]
|
qarac/models/layers/__init__.py
ADDED
File without changes
|
qarac/models/layers/__pycache__/HierarchicalLogits.cpython-310.pyc
ADDED
Binary file (2.43 kB). View file
|
|
qarac/models/layers/__pycache__/HyenaLayer.cpython-310.pyc
ADDED
Binary file (3.19 kB). View file
|
|
qarac/models/layers/__pycache__/__init__.cpython-310.pyc
ADDED
Binary file (142 Bytes). View file
|
|
qarac/models/qarac_base_model.py
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
"""
|
4 |
+
Created on Wed Aug 23 09:50:14 2023
|
5 |
+
|
6 |
+
@author: peter
|
7 |
+
"""
|
8 |
+
|
9 |
+
import keras
|
10 |
+
import qarac.models.layers.HierarchicalLogits
|
11 |
+
import qarac.models.layers.HyenaLayer
|
12 |
+
|
13 |
+
def qarac_base_model(vocab_size,width,depth,decoder=True):
|
14 |
+
print('Building','decoder' if decoder else 'encoder','model with vocab size',
|
15 |
+
vocab_size,',',depth,'layers and vector width',width)
|
16 |
+
stack = [keras.layers.Input(shape=(None,),ragged=True),
|
17 |
+
keras.layers.Embedding(vocab_size,width,name='Embedding')]
|
18 |
+
for _ in range(depth):
|
19 |
+
stack.append(qarac.models.layers.HyenaLayer.HyenaLayer(causal=decoder))
|
20 |
+
#stack.append(keras.layers.TimeDistributed(qarac.models.layers.HierarchicalLogits.HierarchicalLogits(vocab_size)))
|
21 |
+
#stack.append(keras.layers.TimeDistributed(keras.layers.Softmax()))
|
22 |
+
stack.append(keras.layers.TimeDistributed(keras.layers.Dense(vocab_size,activation='softmax')))
|
23 |
+
return keras.models.Sequential(stack)
|
scripts.py
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
import os
|
3 |
+
import argparse
|
4 |
+
import pickle
|
5 |
+
import tokenizers
|
6 |
+
import qarac.corpora.BNCorpus
|
7 |
+
import qarac.corpora.Batcher
|
8 |
+
import qarac.models.qarac_base_model
|
9 |
+
import keras
|
10 |
+
|
11 |
+
|
12 |
+
|
13 |
+
|
14 |
+
def train_base_model(task,filename):
|
15 |
+
tokenizer = tokenizers.Tokenizer.from_pretrained('xlm-roberta-base')
|
16 |
+
tokenizer.add_special_tokens(['<start>','<end>','<pad>'])
|
17 |
+
tokenizer.save('/'.join([os.environ['HOME'],
|
18 |
+
'QARAC',
|
19 |
+
'models',
|
20 |
+
'tokenizer.json']))
|
21 |
+
bnc = qarac.corpora.BNCorpus.BNCorpus(tokenizer=tokenizer,
|
22 |
+
task=task)
|
23 |
+
(train,test)=bnc.split(0.01)
|
24 |
+
train_data=qarac.corpora.Batcher.Batcher(train)
|
25 |
+
model = qarac.models.qarac_base_model.qarac_base_model(tokenizer.get_vocab_size(),
|
26 |
+
768,
|
27 |
+
12,
|
28 |
+
task=='decode')
|
29 |
+
optimizer = keras.optimizers.Nadam(learning_rate=keras.optimizers.schedules.ExponentialDecay(1.0e-5, 100, 0.99))
|
30 |
+
model.compile(optimizer=optimizer,loss='sparse_categorical_crossentropy',metrics='accuracy')
|
31 |
+
model.fit(train_data,
|
32 |
+
epochs=100,
|
33 |
+
workers = 16)
|
34 |
+
test_data=qarac.corpora.Batcher.Batcher(test)
|
35 |
+
print(model.evaluate(test_data))
|
36 |
+
model.save(filename)
|
37 |
+
|
38 |
+
|
39 |
+
|
40 |
+
|
41 |
+
if __name__ == '__main__':
|
42 |
+
parser = argparse.ArgumentParser(prog='QARAC',
|
43 |
+
description='Experimental NLP system, aimed at improving factual accuracy')
|
44 |
+
parser.add_argument('task')
|
45 |
+
parser.add_argument('-f','--filename')
|
46 |
+
parser.add_argument('-t','--training-task')
|
47 |
+
args = parser.parse_args()
|
48 |
+
if args.task == 'train_base_model':
|
49 |
+
train_base_model(args.training_task,args.filename)
|
50 |
+
|
src/models/quarac_base_model.py
DELETED
@@ -1,18 +0,0 @@
|
|
1 |
-
#!/usr/bin/env python3
|
2 |
-
# -*- coding: utf-8 -*-
|
3 |
-
"""
|
4 |
-
Created on Wed Aug 23 09:50:14 2023
|
5 |
-
|
6 |
-
@author: peter
|
7 |
-
"""
|
8 |
-
|
9 |
-
import keras
|
10 |
-
import layers
|
11 |
-
|
12 |
-
def quarac_base_model(vocab_size,width,depth,decoder=True):
|
13 |
-
stack = [keras.layers.Embedding(vocab_size,width)]
|
14 |
-
for _ in range(depth):
|
15 |
-
stack.append(layers.HyenaLayer(causal=decoder))
|
16 |
-
stack.append(keras.layers.Timedistributed(layers.HierarchicalLogits()))
|
17 |
-
stack.append(keras.layers.Timedistributed(keras.layers.Softmax()))
|
18 |
-
return keras.models.Sequential(stack)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|