ierhon commited on
Commit
ec8a8b1
1 Parent(s): 5158737

Add left-right padding checkbox

Browse files
Files changed (1) hide show
  1. chatbot_constructor.py +17 -10
chatbot_constructor.py CHANGED
@@ -30,7 +30,7 @@ def todset(text: str):
30
  def hash_str(data: str):
31
  return hashlib.md5(data.encode('utf-8')).hexdigest()
32
 
33
- def train(message: str = "", regularization: float = 0.0001, dropout: float = 0.1, learning_rate: float = 0.001, epochs: int = 16, emb_size: int = 128, input_len: int = 16, kernels_count: int = 8, kernel_size: int = 8, data: str = ""):
34
  data_hash = None
35
  if "→" not in data or "\n" not in data:
36
  if data in os.listdir("cache"):
@@ -82,12 +82,18 @@ def train(message: str = "", regularization: float = 0.0001, dropout: float = 0.
82
 
83
  X = []
84
  y = []
85
-
86
- for key in dset:
87
- tokens = tokenizer.texts_to_sequences([key,])[0]
88
- X.append(np.array((list(tokens)+[0,]*inp_len)[:inp_len]))
89
- y.append(dset[key])
90
-
 
 
 
 
 
 
91
  X = np.array(X)
92
  y = np.array(y)
93
 
@@ -102,14 +108,15 @@ def train(message: str = "", regularization: float = 0.0001, dropout: float = 0.
102
 
103
  if __name__ == "__main__":
104
  iface = gr.Interface(fn=train, inputs=["text",
105
- gr.inputs.Slider(0, 0.01, default=0.0001, step=0.00000001, label="Regularization L1"),
106
- gr.inputs.Slider(0, 0.5, default=0.1, step=0.00000001, label="Dropout"),
107
- gr.inputs.Slider(0.00000001, 0.01, default=0.001, step=0.00000001, label="Learning rate"),
108
  gr.inputs.Slider(1, 64, default=32, step=1, label="Epochs"),
109
  gr.inputs.Slider(1, 256, default=100, step=1, label="Embedding size"),
110
  gr.inputs.Slider(1, 128, default=16, step=1, label="Input Length"),
111
  gr.inputs.Slider(1, 128, default=64, step=1, label="Convolution kernel count"),
112
  gr.inputs.Slider(1, 16, default=8, step=1, label="Convolution kernel size"),
 
113
  "text"],
114
  outputs="text")
115
  iface.launch()
 
30
  def hash_str(data: str):
31
  return hashlib.md5(data.encode('utf-8')).hexdigest()
32
 
33
+ def train(message: str = "", regularization: float = 0.0001, dropout: float = 0.1, learning_rate: float = 0.001, epochs: int = 16, emb_size: int = 128, input_len: int = 16, kernels_count: int = 8, kernel_size: int = 8, left_padding: bool = True, data: str = ""):
34
  data_hash = None
35
  if "→" not in data or "\n" not in data:
36
  if data in os.listdir("cache"):
 
82
 
83
  X = []
84
  y = []
85
+ if left_padding:
86
+ for key in dset:
87
+ tokens = tokenizer.texts_to_sequences([key,])[0]
88
+ X.append(np.array(([0,]*inp_len+list(tokens))[-inp_len:]))
89
+ y.append(dset[key])
90
+
91
+ else:
92
+ for key in dset:
93
+ tokens = tokenizer.texts_to_sequences([key,])[0]
94
+ X.append(np.array(list(tokens)+[0,]*inp_len)[:inp_len]))
95
+ y.append(dset[key])
96
+
97
  X = np.array(X)
98
  y = np.array(y)
99
 
 
108
 
109
  if __name__ == "__main__":
110
  iface = gr.Interface(fn=train, inputs=["text",
111
+ gr.inputs.Slider(0, 0.01, default=0.0001, step=1e-8, label="Regularization L1"),
112
+ gr.inputs.Slider(0, 0.5, default=0.1, step=1e-8, label="Dropout"),
113
+ gr.inputs.Slider(1e-8, 0.01, default=0.001, step=1e-8, label="Learning rate"),
114
  gr.inputs.Slider(1, 64, default=32, step=1, label="Epochs"),
115
  gr.inputs.Slider(1, 256, default=100, step=1, label="Embedding size"),
116
  gr.inputs.Slider(1, 128, default=16, step=1, label="Input Length"),
117
  gr.inputs.Slider(1, 128, default=64, step=1, label="Convolution kernel count"),
118
  gr.inputs.Slider(1, 16, default=8, step=1, label="Convolution kernel size"),
119
+ gr.inputs.CheckBox(False, label="Use left padding", info="Mostly used to continue a sequence"),
120
  "text"],
121
  outputs="text")
122
  iface.launch()