Daniel Fried commited on
Commit
fb51e42
·
1 Parent(s): 5900055
Files changed (3) hide show
  1. .gitignore +3 -0
  2. modules/app.py +6 -4
  3. static/index.html +56 -50
.gitignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ use_normal_tokenizers
2
+ __pycache__
3
+ incoder-6B
modules/app.py CHANGED
@@ -5,12 +5,13 @@ import os
5
  # needs to be imported *before* transformers
6
  if os.path.exists('use_normal_tokenizers'):
7
  import tokenizers
 
8
  else:
9
  import tokenizers_patch
 
10
  from transformers import AutoModelForCausalLM, AutoTokenizer
11
  import json
12
 
13
-
14
  # from flask import Flask, request, render_template
15
  # from flask_cors import CORS
16
  # app = Flask(__name__, static_folder='static')
@@ -24,8 +25,6 @@ import json
24
  PORT = 7860
25
  VERBOSE = False
26
 
27
- BIG_MODEL = False
28
-
29
  if BIG_MODEL:
30
  CUDA = True
31
  model_name = "facebook/incoder-6B"
@@ -61,7 +60,10 @@ def generate(input, length_limit=None, temperature=None):
61
  input_ids = tokenizer(input, return_tensors="pt").input_ids
62
  if CUDA:
63
  input_ids = input_ids.cuda()
64
- output = model.generate(input_ids=input_ids, do_sample=True, top_p=0.95, temperature=temperature, max_length=length_limit)
 
 
 
65
  detok_hypo_str = tokenizer.decode(output.flatten())
66
  if detok_hypo_str.startswith(BOS):
67
  detok_hypo_str = detok_hypo_str[len(BOS):]
 
5
  # needs to be imported *before* transformers
6
  if os.path.exists('use_normal_tokenizers'):
7
  import tokenizers
8
+ BIG_MODEL = False
9
  else:
10
  import tokenizers_patch
11
+ BIG_MODEL = True
12
  from transformers import AutoModelForCausalLM, AutoTokenizer
13
  import json
14
 
 
15
  # from flask import Flask, request, render_template
16
  # from flask_cors import CORS
17
  # app = Flask(__name__, static_folder='static')
 
25
  PORT = 7860
26
  VERBOSE = False
27
 
 
 
28
  if BIG_MODEL:
29
  CUDA = True
30
  model_name = "facebook/incoder-6B"
 
60
  input_ids = tokenizer(input, return_tensors="pt").input_ids
61
  if CUDA:
62
  input_ids = input_ids.cuda()
63
+ max_length = length_limit + input_ids.flatten().size(0)
64
+ if max_length > 256:
65
+ max_length = 256
66
+ output = model.generate(input_ids=input_ids, do_sample=True, top_p=0.95, temperature=temperature, max_length=max_length)
67
  detok_hypo_str = tokenizer.decode(output.flatten())
68
  if detok_hypo_str.startswith(BOS):
69
  detok_hypo_str = detok_hypo_str[len(BOS):]
static/index.html CHANGED
@@ -124,41 +124,9 @@ label {
124
  <span><a href='javascript:select_example("metadata-prediction");'>Metadata Prediction</a></span>
125
  <span><a href='javascript:select_example("humaneval");'>Docstring->Code</a></span>
126
  </div>
127
- <div>
128
- Syntax:
129
- <select name="mode" id="mode">
130
- <option value="text">Text</option>
131
- <option value="c_cpp">C/C++</option>
132
- <option value="csharp">C#</option>
133
- <option value="clojure">Clojure</option>
134
- <option value="coffee">CoffeeScript</option>
135
- <option value="golang">Go</option>
136
- <option value="haskell">Haskell</option>
137
- <option value="java">Java</option>
138
- <option value="javascript">JavaScript</option>
139
- <option value="lua">Lua</option>
140
- <option value="objectivec">Objective C</option>
141
- <option value="perl">Perl</option>
142
- <option value="php">PHP</option>
143
- <option value="python">Python</option>
144
- <option value="ruby">Ruby</option>
145
- <option value="rust">Rust</option>
146
- <option value="scala">Scala</option>
147
- <option value="sh">Shell</option>
148
- <option value="swift">Swift</option>
149
- <option value="typescript">Typescript</option>
150
- </select>
151
  </div>
152
  <div class="request">
153
  <form id="generate-form">
154
- <div class="leftside">
155
- <!--
156
- <textarea name="prompt" rows="12" cols="100" id="textbox"></textarea>
157
- <textarea name="prefix" rows="12" cols="100" id="textbox"></textarea>
158
- <textarea name="suffix" rows="12" cols="100" id="textbox"></textarea>
159
- -->
160
- <div id="editor"></div>
161
- </div>
162
  <div class="rightside">
163
  <div>
164
  <label>Response Length:</label>
@@ -174,15 +142,7 @@ Syntax:
174
  >
175
  <output>0.6</output>
176
  </div>
177
- <!--
178
  <div>
179
- <label>Top-k:</label>
180
- <input type="range" value="2" min="1" max="8" step="1" class="slider"
181
- oninput="this.nextElementSibling.value = this.value" name="topk">
182
- <output>2</output>
183
- </div>
184
- -->
185
- <div class="submit-holder">
186
  <!-- <input type="submit" value="Extend" id="extend-form-button"/> -->
187
  <input type="button" value="Extend" id="extend-form-button"/>
188
  <span style='margin-left:1em'>
@@ -203,6 +163,39 @@ Syntax:
203
  <div>
204
  -->
205
  </div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
206
  <div id="error"></div>
207
  </div>
208
  </div>
@@ -212,13 +205,26 @@ Syntax:
212
  <div id="loader_holder">
213
  <div class="loader"></div>
214
  <div>
215
- Please be patient. Your generation may take <span id="eta">X</span> seconds.
216
  </div>
217
  </div>
218
  </div>
219
 
220
- <h3 id="debug-info">Debug info</h3>
221
  <p>
 
 
 
 
 
 
 
 
 
 
 
 
 
222
  <script type="text/javascript">
223
  // these constants are only used for providing user expectations.
224
  var OVERHEAD = 3;
@@ -231,7 +237,7 @@ var Range = require("ace/range").Range;
231
  var EXAMPLES = {
232
  "python": {
233
  "prompt": "<| file ext=.py |>\nclass Person:\n" + SPLIT_TOKEN + "\np = Person('Eren', 18, 'Male')",
234
- "length": 128,
235
  "mode": "python"
236
  },
237
  "python-infill2": {
@@ -241,7 +247,7 @@ def <infill>(file_name):
241
  """Count the number of occurrences of each word in the file."""
242
  <infill>
243
  `,
244
- "length": 128,
245
  "mode": "python"
246
  },
247
 
@@ -259,7 +265,7 @@ def <infill>(file_name):
259
  word_counts[word] = 1
260
  return word_counts
261
  `,
262
- "length": 64,
263
  "mode": "python"
264
  },
265
  "docstring": {
@@ -277,27 +283,27 @@ def <infill>(file_name):
277
  word_counts[word] = 1
278
  return word_counts
279
  `,
280
- "length": 128,
281
  "mode": "python"
282
  },
283
  "javascript": {
284
  "prompt": "<| file ext=.js |>\n // is something really happening here",
285
- "length": 128,
286
  "mode": "javascript"
287
  },
288
  "jupyter": {
289
  "prompt": "<| file ext=.ipynb:python |>\n<text>\nThis notebook demonstrates using scikit-learn to perform PCA.\n</text>\n<cell>",
290
- "length": 128,
291
  "mode": "python"
292
  },
293
  "stackoverflow": {
294
  "prompt": "<| q tags=regex,html |>\nParsing HTML with regular expressions\nHow do I do this? Is it a good idea?\n<|/ q dscore=3 |>\n<| a dscore=4 |>",
295
- "length": 128,
296
  "mode": "text"
297
  },
298
  "metadata-conditioning": {
299
  "prompt": "<| file ext=.py filename=train_model.py source=github dstars=4 |>\n",
300
- "length": 256,
301
  "mode": "python"
302
  },
303
  "metadata-prediction": {
 
124
  <span><a href='javascript:select_example("metadata-prediction");'>Metadata Prediction</a></span>
125
  <span><a href='javascript:select_example("humaneval");'>Docstring->Code</a></span>
126
  </div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127
  </div>
128
  <div class="request">
129
  <form id="generate-form">
 
 
 
 
 
 
 
 
130
  <div class="rightside">
131
  <div>
132
  <label>Response Length:</label>
 
142
  >
143
  <output>0.6</output>
144
  </div>
 
145
  <div>
 
 
 
 
 
 
 
146
  <!-- <input type="submit" value="Extend" id="extend-form-button"/> -->
147
  <input type="button" value="Extend" id="extend-form-button"/>
148
  <span style='margin-left:1em'>
 
163
  <div>
164
  -->
165
  </div>
166
+ <br>
167
+ <div class="leftside">
168
+ <!--
169
+ <textarea name="prompt" rows="12" cols="100" id="textbox"></textarea>
170
+ <textarea name="prefix" rows="12" cols="100" id="textbox"></textarea>
171
+ <textarea name="suffix" rows="12" cols="100" id="textbox"></textarea>
172
+ -->
173
+ <div>
174
+ Syntax:
175
+ <select name="mode" id="mode">
176
+ <option value="text">Text</option>
177
+ <option value="c_cpp">C/C++</option>
178
+ <option value="csharp">C#</option>
179
+ <option value="clojure">Clojure</option>
180
+ <option value="coffee">CoffeeScript</option>
181
+ <option value="golang">Go</option>
182
+ <option value="haskell">Haskell</option>
183
+ <option value="java">Java</option>
184
+ <option value="javascript">JavaScript</option>
185
+ <option value="lua">Lua</option>
186
+ <option value="objectivec">Objective C</option>
187
+ <option value="perl">Perl</option>
188
+ <option value="php">PHP</option>
189
+ <option value="python">Python</option>
190
+ <option value="ruby">Ruby</option>
191
+ <option value="rust">Rust</option>
192
+ <option value="scala">Scala</option>
193
+ <option value="sh">Shell</option>
194
+ <option value="swift">Swift</option>
195
+ <option value="typescript">Typescript</option>
196
+ </select>
197
+ <div id="editor"></div>
198
+ </div>
199
  <div id="error"></div>
200
  </div>
201
  </div>
 
205
  <div id="loader_holder">
206
  <div class="loader"></div>
207
  <div>
208
+ Generation queued, please wait...
209
  </div>
210
  </div>
211
  </div>
212
 
213
+ <h3 id="debug-info">More Info</h3>
214
  <p>
215
+ This is a demo interface for <a href="https://github.com/dpfried/incoder/blob/main/paper/InCoder-4-12-22.pdf">InCoder: A Generative Model for Code In-Filling and Synthesis</a>.
216
+ </p>
217
+ <p>
218
+ See <a href="https://sites.google.com/view/incoder-code-models">our project site</a> for more information on these models, including a paper and examples.
219
+ </p>
220
+
221
+ <p>
222
+ For instructions on setting up and using the models yourself, see <a href="https://github.com/dpfried/incoder/blob/main/README.md">our readme</a>.
223
+ </p>
224
+
225
+ <h3 id="debug-info">Credits</h3>
226
+ <p>Model development: Daniel Fried*, Armen Aghajanyan*, Jessy Lin, Sida Wang, Eric Wallace, Freda Shi, Ruiqi Zhong, Wen-tau Yih, Luke Zettlemoyer, and Mike Lewis</p>
227
+ <p>Thanks to Naman Goyal and Stephen Roller for writing the code this was based on. Extensions by Daniel Fried and Sida Wang.</p>
228
  <script type="text/javascript">
229
  // these constants are only used for providing user expectations.
230
  var OVERHEAD = 3;
 
237
  var EXAMPLES = {
238
  "python": {
239
  "prompt": "<| file ext=.py |>\nclass Person:\n" + SPLIT_TOKEN + "\np = Person('Eren', 18, 'Male')",
240
+ "length": 64,
241
  "mode": "python"
242
  },
243
  "python-infill2": {
 
247
  """Count the number of occurrences of each word in the file."""
248
  <infill>
249
  `,
250
+ "length": 64,
251
  "mode": "python"
252
  },
253
 
 
265
  word_counts[word] = 1
266
  return word_counts
267
  `,
268
+ "length": 4,
269
  "mode": "python"
270
  },
271
  "docstring": {
 
283
  word_counts[word] = 1
284
  return word_counts
285
  `,
286
+ "length": 32,
287
  "mode": "python"
288
  },
289
  "javascript": {
290
  "prompt": "<| file ext=.js |>\n // is something really happening here",
291
+ "length": 64,
292
  "mode": "javascript"
293
  },
294
  "jupyter": {
295
  "prompt": "<| file ext=.ipynb:python |>\n<text>\nThis notebook demonstrates using scikit-learn to perform PCA.\n</text>\n<cell>",
296
+ "length": 64,
297
  "mode": "python"
298
  },
299
  "stackoverflow": {
300
  "prompt": "<| q tags=regex,html |>\nParsing HTML with regular expressions\nHow do I do this? Is it a good idea?\n<|/ q dscore=3 |>\n<| a dscore=4 |>",
301
+ "length": 64,
302
  "mode": "text"
303
  },
304
  "metadata-conditioning": {
305
  "prompt": "<| file ext=.py filename=train_model.py source=github dstars=4 |>\n",
306
+ "length": 64,
307
  "mode": "python"
308
  },
309
  "metadata-prediction": {