ierhon commited on
Commit
31dee74
1 Parent(s): c22db75

Add dropout slider

Browse files
Files changed (1) hide show
  1. chatbot_constructor.py +13 -8
chatbot_constructor.py CHANGED
@@ -29,7 +29,7 @@ def todset(text: str):
29
  def hash_str(data: str):
30
  return hashlib.md5(data.encode('utf-8')).hexdigest()
31
 
32
- def train(message: str = "", epochs: int = 16, learning_rate: float = 0.001, emb_size: int = 128, input_len: int = 16, kernels_count: int = 8, kernel_size: int = 8, data: str = ""):
33
  data_hash = None
34
  if "→" not in data or "\n" not in data:
35
  if data in os.listdir("cache"):
@@ -54,7 +54,8 @@ def train(message: str = "", epochs: int = 16, learning_rate: float = 0.001, emb
54
  else:
55
  input_layer = Input(shape=(inp_len,))
56
  emb_layer = Embedding(input_dim=vocab_size, output_dim=emb_size, input_length=inp_len)(input_layer)
57
- attn_layer = MultiHeadAttention(num_heads=4, key_dim=128)(emb_layer, emb_layer, emb_layer)
 
58
  noise_layer = GaussianNoise(0.1)(attn_layer)
59
  conv1_layer = Conv1D(kernels_count, kernel_size, padding='same', activation='relu', strides=1, input_shape=(64, 128))(noise_layer)
60
  conv2_layer = Conv1D(16, 4, padding='same', activation='relu', strides=1)(conv1_layer)
@@ -65,12 +66,15 @@ def train(message: str = "", epochs: int = 16, learning_rate: float = 0.001, emb
65
  conv2_flatten_layer = Flatten()(conv2_layer)
66
  conv3_flatten_layer = Flatten()(conv3_layer)
67
  concat1_layer = Concatenate()([flatten_layer, attn_flatten_layer, conv1_flatten_layer, conv2_flatten_layer, conv3_flatten_layer])
68
- dense1_layer = Dense(512, activation="linear")(concat1_layer)
 
69
  prelu1_layer = PReLU()(dense1_layer)
70
- dropout_layer = Dropout(0.3)(prelu1_layer)
71
- dense2_layer = Dense(256, activation="tanh")(dropout_layer)
72
- dense3_layer = Dense(256, activation="relu")(dense2_layer)
73
- dense4_layer = Dense(100, activation="tanh")(dense3_layer)
 
 
74
  concat2_layer = Concatenate()([dense4_layer, prelu1_layer, attn_flatten_layer, conv1_flatten_layer])
75
  dense4_layer = Dense(resps_len, activation="softmax")(concat2_layer)
76
  model = Model(inputs=input_layer, outputs=dense4_layer)
@@ -97,8 +101,9 @@ def train(message: str = "", epochs: int = 16, learning_rate: float = 0.001, emb
97
 
98
  if __name__ == "__main__":
99
  iface = gr.Interface(fn=train, inputs=["text",
100
- gr.inputs.Slider(1, 64, default=32, step=1, label="Epochs"),
101
  gr.inputs.Slider(0.00000001, 0.1, default=0.001, step=0.00000001, label="Learning rate"),
 
102
  gr.inputs.Slider(1, 256, default=100, step=1, label="Embedding size"),
103
  gr.inputs.Slider(1, 128, default=16, step=1, label="Input Length"),
104
  gr.inputs.Slider(1, 128, default=64, step=1, label="Convolution kernel count"),
 
29
  def hash_str(data: str):
30
  return hashlib.md5(data.encode('utf-8')).hexdigest()
31
 
32
+ def train(message: str = "", dropout: float = 0.5, learning_rate: float = 0.001, epochs: int = 16, emb_size: int = 128, input_len: int = 16, kernels_count: int = 8, kernel_size: int = 8, data: str = ""):
33
  data_hash = None
34
  if "→" not in data or "\n" not in data:
35
  if data in os.listdir("cache"):
 
54
  else:
55
  input_layer = Input(shape=(inp_len,))
56
  emb_layer = Embedding(input_dim=vocab_size, output_dim=emb_size, input_length=inp_len)(input_layer)
57
+ dropout1_layer = Dropout(0.25)(emb_layer)
58
+ attn_layer = MultiHeadAttention(num_heads=4, key_dim=128)(dropout1_layer, dropout1_layer, dropout1_layer)
59
  noise_layer = GaussianNoise(0.1)(attn_layer)
60
  conv1_layer = Conv1D(kernels_count, kernel_size, padding='same', activation='relu', strides=1, input_shape=(64, 128))(noise_layer)
61
  conv2_layer = Conv1D(16, 4, padding='same', activation='relu', strides=1)(conv1_layer)
 
66
  conv2_flatten_layer = Flatten()(conv2_layer)
67
  conv3_flatten_layer = Flatten()(conv3_layer)
68
  concat1_layer = Concatenate()([flatten_layer, attn_flatten_layer, conv1_flatten_layer, conv2_flatten_layer, conv3_flatten_layer])
69
+ dropout2_layer = Dropout(0.5)(concat1_layer)
70
+ dense1_layer = Dense(512, activation="linear")(dropout2_layer)
71
  prelu1_layer = PReLU()(dense1_layer)
72
+ dropout3_layer = Dropout(0.5)(prelu1_layer)
73
+ dense2_layer = Dense(256, activation="tanh")(dropout3_layer)
74
+ dropout4_layer = Dropout(0.5)(dense2_layer)
75
+ dense3_layer = Dense(256, activation="relu")(dropout4_layer)
76
+ dropout5_layer = Dropout(0.5)(dense3_layer)
77
+ dense4_layer = Dense(100, activation="tanh")(dropout5_layer)
78
  concat2_layer = Concatenate()([dense4_layer, prelu1_layer, attn_flatten_layer, conv1_flatten_layer])
79
  dense4_layer = Dense(resps_len, activation="softmax")(concat2_layer)
80
  model = Model(inputs=input_layer, outputs=dense4_layer)
 
101
 
102
  if __name__ == "__main__":
103
  iface = gr.Interface(fn=train, inputs=["text",
104
+ gr.inputs.Slider(0, 0.5, default=0.5, step=0.00000001, label="Dropout"),
105
  gr.inputs.Slider(0.00000001, 0.1, default=0.001, step=0.00000001, label="Learning rate"),
106
+ gr.inputs.Slider(1, 64, default=32, step=1, label="Epochs"),
107
  gr.inputs.Slider(1, 256, default=100, step=1, label="Embedding size"),
108
  gr.inputs.Slider(1, 128, default=16, step=1, label="Input Length"),
109
  gr.inputs.Slider(1, 128, default=64, step=1, label="Convolution kernel count"),