ierhon commited on
Commit
b18d732
1 Parent(s): 7bcdff2

Make the model larger and add PReLU alpha constraint

Browse files
Files changed (1) hide show
  1. chatbot_constructor.py +18 -8
chatbot_constructor.py CHANGED
@@ -4,14 +4,24 @@ from keras.models import Model
4
  from keras.saving import load_model
5
  from keras.layers import *
6
  from keras.regularizers import L1
 
7
  from tensorflow.keras.optimizers import RMSprop
8
  from keras.preprocessing.text import Tokenizer
 
9
  import os
10
  import hashlib
11
  import keras
12
 
13
  os.mkdir("cache")
14
 
 
 
 
 
 
 
 
 
15
  def todset(text: str):
16
  lines = [x.rstrip("\n").lower().split("→") for x in text.split("\n")]
17
  lines = [(x[0].replace("\\n", "\n"), x[1].replace("\\n", "\n")) for x in lines]
@@ -33,8 +43,8 @@ def hash_str(data: str):
33
  def train(message: str = "", regularization: float = 0.0001, dropout: float = 0.1, learning_rate: float = 0.001, epochs: int = 16, emb_size: int = 128, input_len: int = 16, kernels_count: int = 8, kernel_size: int = 8, left_padding: bool = True, end_activation: str = "softmax", data: str = ""):
34
  data_hash = None
35
  if "→" not in data or "\n" not in data:
36
- if data in os.listdir("cache"):
37
- data_hash = data
38
  else:
39
  return "Dataset example:\nquestion→answer\nquestion→answer\netc."
40
  dset, responses = todset(data)
@@ -68,14 +78,14 @@ def train(message: str = "", regularization: float = 0.0001, dropout: float = 0.
68
  conv3_flatten_layer = Flatten()(conv3_layer)
69
  concat1_layer = Concatenate()([flatten_layer, attn_flatten_layer, conv1_flatten_layer, conv2_flatten_layer, conv3_flatten_layer])
70
  dropout2_layer = Dropout(dropout)(concat1_layer)
71
- dense1_layer = Dense(512, activation="linear", kernel_regularizer=L1(regularization))(dropout2_layer)
72
- prelu1_layer = PReLU()(dense1_layer)
73
  dropout3_layer = Dropout(dropout)(prelu1_layer)
74
- dense2_layer = Dense(256, activation="relu", kernel_regularizer=L1(regularization))(dropout3_layer)
75
  dropout4_layer = Dropout(dropout)(dense2_layer)
76
- dense3_layer = Dense(256, activation="relu", kernel_regularizer=L1(regularization))(dropout4_layer)
77
  dropout5_layer = Dropout(dropout)(dense3_layer)
78
- dense4_layer = Dense(100, activation="relu", kernel_regularizer=L1(regularization))(dropout5_layer)
79
  concat2_layer = Concatenate()([dense4_layer, prelu1_layer, attn_flatten_layer, conv1_flatten_layer])
80
  dense4_layer = Dense(resps_len, activation=end_activation, kernel_regularizer=L1(regularization))(concat2_layer)
81
  model = Model(inputs=input_layer, outputs=dense4_layer)
@@ -103,7 +113,7 @@ def train(message: str = "", regularization: float = 0.0001, dropout: float = 0.
103
  model.save(f"cache/{data_hash}")
104
  tokens = tokenizer.texts_to_sequences([message,])[0]
105
  prediction = model.predict(np.array([(list(tokens)+[0,]*inp_len)[:inp_len],]))[0]
106
- keras.backend.clear_session()
107
  return responses[np.argmax(prediction)]
108
 
109
  if __name__ == "__main__":
 
4
  from keras.saving import load_model
5
  from keras.layers import *
6
  from keras.regularizers import L1
7
+ from keras.constraints import Constraint
8
  from tensorflow.keras.optimizers import RMSprop
9
  from keras.preprocessing.text import Tokenizer
10
+ import keras.backend as K
11
  import os
12
  import hashlib
13
  import keras
14
 
15
  os.mkdir("cache")
16
 
17
+ class ValueConstraint(Constraint):
18
+ def __init__(self, min_value: float = -1, max_value: float = 1):
19
+ self.min_value = min_value
20
+ self.max_value
21
+
22
+ def __call__(self, w):
23
+ return K.clip(w, self.min_value, self.max_value)
24
+
25
  def todset(text: str):
26
  lines = [x.rstrip("\n").lower().split("→") for x in text.split("\n")]
27
  lines = [(x[0].replace("\\n", "\n"), x[1].replace("\\n", "\n")) for x in lines]
 
43
  def train(message: str = "", regularization: float = 0.0001, dropout: float = 0.1, learning_rate: float = 0.001, epochs: int = 16, emb_size: int = 128, input_len: int = 16, kernels_count: int = 8, kernel_size: int = 8, left_padding: bool = True, end_activation: str = "softmax", data: str = ""):
44
  data_hash = None
45
  if "→" not in data or "\n" not in data:
46
+ if data in os.listdir("cache"): # data = filename
47
+ data_hash = data # set the hash to the file name
48
  else:
49
  return "Dataset example:\nquestion→answer\nquestion→answer\netc."
50
  dset, responses = todset(data)
 
78
  conv3_flatten_layer = Flatten()(conv3_layer)
79
  concat1_layer = Concatenate()([flatten_layer, attn_flatten_layer, conv1_flatten_layer, conv2_flatten_layer, conv3_flatten_layer])
80
  dropout2_layer = Dropout(dropout)(concat1_layer)
81
+ dense1_layer = Dense(2048, activation="linear", kernel_regularizer=L1(regularization))(dropout2_layer)
82
+ prelu1_layer = PReLU(alpha_constraint=ValueConstraint())(dense1_layer)
83
  dropout3_layer = Dropout(dropout)(prelu1_layer)
84
+ dense2_layer = Dense(1024, activation="relu", kernel_regularizer=L1(regularization))(dropout3_layer)
85
  dropout4_layer = Dropout(dropout)(dense2_layer)
86
+ dense3_layer = Dense(512, activation="relu", kernel_regularizer=L1(regularization))(dropout4_layer)
87
  dropout5_layer = Dropout(dropout)(dense3_layer)
88
+ dense4_layer = Dense(256, activation="relu", kernel_regularizer=L1(regularization))(dropout5_layer)
89
  concat2_layer = Concatenate()([dense4_layer, prelu1_layer, attn_flatten_layer, conv1_flatten_layer])
90
  dense4_layer = Dense(resps_len, activation=end_activation, kernel_regularizer=L1(regularization))(concat2_layer)
91
  model = Model(inputs=input_layer, outputs=dense4_layer)
 
113
  model.save(f"cache/{data_hash}")
114
  tokens = tokenizer.texts_to_sequences([message,])[0]
115
  prediction = model.predict(np.array([(list(tokens)+[0,]*inp_len)[:inp_len],]))[0]
116
+ K.clear_session()
117
  return responses[np.argmax(prediction)]
118
 
119
  if __name__ == "__main__":