ierhon commited on
Commit
5e22f32
1 Parent(s): 76b74a3

model improvement and caching

Browse files
Files changed (1) hide show
  1. app.py +52 -33
app.py CHANGED
@@ -1,53 +1,72 @@
1
  import gradio as gr
2
  from todset import todset
3
  import numpy as np
4
- from keras.models import Sequential
5
- from keras.layers import Embedding, Dense, Dropout, Flatten, PReLU
 
6
  from keras.preprocessing.text import Tokenizer
7
- from keras_self_attention import SeqSelfAttention, SeqWeightedAttention
 
 
8
 
9
  emb_size = 128
10
  inp_len = 16
11
  maxshift = 4
12
 
 
 
 
13
  def train(data: str, message: str):
14
  if "→" not in data or "\n" not in data:
15
- return "Dataset should be like:\nquestion→answer\nquestion→answer\netc."
16
  dset, responses = todset(data)
17
  resps_len = len(responses)
18
  tokenizer = Tokenizer()
19
  tokenizer.fit_on_texts(list(dset.keys()))
20
 
21
  vocab_size = len(tokenizer.word_index) + 1
22
-
23
- model = Sequential()
24
- model.add(Embedding(input_dim=vocab_size, output_dim=emb_size, input_length=inp_len))
25
- model.add(SeqSelfAttention())
26
- model.add(Flatten())
27
- model.add(Dense(1024, activation="relu"))
28
- model.add(Dropout(0.5))
29
- model.add(Dense(512, activation="relu"))
30
- model.add(Dense(512, activation="relu"))
31
- model.add(Dense(256, activation="relu"))
32
- model.add(Dense(resps_len, activation="softmax"))
33
-
34
- X = []
35
- y = []
36
-
37
- for key in dset:
38
- for p in range(maxshift):
39
- tokens = tokenizer.texts_to_sequences([key,])[0]
40
- X.append(np.array(([0,]*p+list(tokens)+[0,]*inp_len)[:inp_len]))
41
- output_array = np.zeros(resps_len)
42
- output_array[dset[key]] = 1
43
- y.append(output_array)
44
-
45
- X = np.array(X)
46
- y = np.array(y)
47
-
48
- model.compile(loss="categorical_crossentropy", metrics=["accuracy",])
49
-
50
- model.fit(X, y, epochs=10, batch_size=8, workers=4, use_multiprocessing=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  tokens = tokenizer.texts_to_sequences([message,])[0]
52
  prediction = model.predict(np.array([(list(tokens)+[0,]*inp_len)[:inp_len],]))[0]
53
  max_o = 0
 
1
  import gradio as gr
2
  from todset import todset
3
  import numpy as np
4
+ from keras.models import Model
5
+ from keras.saving import load_model
6
+ from keras.layers import *
7
  from keras.preprocessing.text import Tokenizer
8
+ import os
9
+
10
+ os.mkdir("cache")
11
 
12
  emb_size = 128
13
  inp_len = 16
14
  maxshift = 4
15
 
16
+ def hash_str(data: str):
17
+ return hashlib.md5(data.encode('utf-8')).hexdigest()
18
+
19
  def train(data: str, message: str):
20
  if "→" not in data or "\n" not in data:
21
+ return "Dataset example:\nquestion→answer\nquestion→answer\netc."
22
  dset, responses = todset(data)
23
  resps_len = len(responses)
24
  tokenizer = Tokenizer()
25
  tokenizer.fit_on_texts(list(dset.keys()))
26
 
27
  vocab_size = len(tokenizer.word_index) + 1
28
+ if hash_str(data)+".keras" in os.listdir("cache"):
29
+ model = load_model(hash_str(data)+".keras")
30
+ else:
31
+ input_layer = Input(shape=(inp_len,))
32
+ emb_layer = Embedding(input_dim=vocab_size, output_dim=emb_size, input_length=inp_len)(input_layer)
33
+ attn_layer = MultiHeadAttention(num_heads=4, key_dim=128)(emb_layer, emb_layer, emb_layer)
34
+ noise_layer = GaussianNoise(0.1)(attn_layer)
35
+ conv1_layer = Conv1D(64, 8, padding='same', activation='relu', strides=1, input_shape=(64, 128))(noise_layer)
36
+ conv2_layer = Conv1D(16, 4, padding='valid', activation='relu', strides=1)(conv1_layer)
37
+ conv3_layer = Conv1D(8, 2, padding='valid', activation='relu', strides=1)(conv2_layer)
38
+ flatten_layer = Flatten()(conv3_layer)
39
+ attn_flatten_layer = Flatten()(attn_layer)
40
+ conv1_flatten_layer = Flatten()(conv1_layer)
41
+ conv3_flatten_layer = Flatten()(conv3_layer)
42
+ concat1_layer = Concatenate()([flatten_layer, attn_flatten_layer, conv1_flatten_layer, conv2_layer, conv3_flatten_layer])
43
+ dense1_layer = Dense(512, activation="linear")(concat1_layer)
44
+ prelu1_layer = PReLU()(dense1_layer)
45
+ dropout_layer = Dropout(0.3)(prelu1_layer)
46
+ dense2_layer = Dense(256, activation="tanh")(dropout_layer)
47
+ dense3_layer = Dense(256, activation="relu")(dense2_layer)
48
+ dense4_layer = Dense(100, activation="tanh")(dense3_layer)
49
+ concat2_layer = Concatenate()([dense4_layer, prelu1_layer, attn_flatten_layer, conv1_flatten_layer])
50
+ dense4_layer = Dense(resps_len, activation="softmax")(concat2_layer)
51
+
52
+ X = []
53
+ y = []
54
+
55
+ for key in dset:
56
+ for p in range(maxshift):
57
+ tokens = tokenizer.texts_to_sequences([key,])[0]
58
+ X.append(np.array(([0,]*p+list(tokens)+[0,]*inp_len)[:inp_len]))
59
+ output_array = np.zeros(resps_len)
60
+ output_array[dset[key]] = 1
61
+ y.append(output_array)
62
+
63
+ X = np.array(X)
64
+ y = np.array(y)
65
+
66
+ model.compile(loss="categorical_crossentropy", metrics=["accuracy",])
67
+
68
+ model.fit(X, y, epochs=10, batch_size=8, workers=4, use_multiprocessing=True)
69
+ model.save("{data_hash}.keras")
70
  tokens = tokenizer.texts_to_sequences([message,])[0]
71
  prediction = model.predict(np.array([(list(tokens)+[0,]*inp_len)[:inp_len],]))[0]
72
  max_o = 0