dpfried commited on
Commit
78c9772
·
0 Parent(s):

Duplicate from facebook/incoder-demo

Browse files
.gitattributes ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bin.* filter=lfs diff=lfs merge=lfs -text
5
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.model filter=lfs diff=lfs merge=lfs -text
12
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
13
+ *.onnx filter=lfs diff=lfs merge=lfs -text
14
+ *.ot filter=lfs diff=lfs merge=lfs -text
15
+ *.parquet filter=lfs diff=lfs merge=lfs -text
16
+ *.pb filter=lfs diff=lfs merge=lfs -text
17
+ *.pt filter=lfs diff=lfs merge=lfs -text
18
+ *.pth filter=lfs diff=lfs merge=lfs -text
19
+ *.rar filter=lfs diff=lfs merge=lfs -text
20
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
21
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
22
+ *.tflite filter=lfs diff=lfs merge=lfs -text
23
+ *.tgz filter=lfs diff=lfs merge=lfs -text
24
+ *.wasm filter=lfs diff=lfs merge=lfs -text
25
+ *.xz filter=lfs diff=lfs merge=lfs -text
26
+ *.zip filter=lfs diff=lfs merge=lfs -text
27
+ *.zstandard filter=lfs diff=lfs merge=lfs -text
28
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ debug
2
+ unlock
3
+ __pycache__
4
+ incoder-6B
README.md ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Incoder Demo
3
+ emoji: 💻
4
+ colorFrom: red
5
+ colorTo: purple
6
+ sdk: gradio
7
+ sdk_version: 2.9.1
8
+ python_version: 3.8.13
9
+ app_file: start.py
10
+ license: cc-by-nc-4.0
11
+ pinned: false
12
+ duplicated_from: facebook/incoder-demo
13
+ ---
14
+
15
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces#reference
index.html ADDED
@@ -0,0 +1 @@
 
 
1
+ demo is loading
modules/app.py ADDED
@@ -0,0 +1,240 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ from typing import List
3
+ import traceback
4
+ import os
5
+ import base64
6
+
7
+ import logging
8
+ logging.basicConfig(level=logging.INFO)
9
+ import modules.cloud_logging
10
+
11
+ import tokenizers
12
+ import torch
13
+ from transformers import AutoModelForCausalLM, AutoTokenizer
14
+ import json
15
+ import pprint
16
+
17
+ # needs to be imported *before* transformers
18
+ if os.path.exists('debug'):
19
+ BIG_MODEL = False
20
+ CUDA = False
21
+ else:
22
+ BIG_MODEL = True
23
+ CUDA = True
24
+
25
+ # from flask import Flask, request, render_template
26
+ # from flask_cors import CORS
27
+ # app = Flask(__name__, static_folder='static')
28
+ # app.config['TEMPLATES_AUTO_RELOAD'] = Tru
29
+ # CORS(app, resources= {
30
+ # r"/generate": {"origins": origins},
31
+ # r"/infill": {"origins": origins},
32
+ # })
33
+ # origins=[f"http://localhost:{PORT}", "https://huggingface.co", "https://hf.space"]
34
+
35
+ PORT = 7860
36
+ VERBOSE = False
37
+
38
+ if os.path.exists('unlock'):
39
+ MAX_LENGTH = 2048
40
+ else:
41
+ MAX_LENGTH = 256+64
42
+ TRUNCATION_MESSAGE = f'warning: This demo is limited to {MAX_LENGTH} tokens in the document for efficiency.'
43
+
44
+ if BIG_MODEL:
45
+ model_name = "facebook/incoder-6B"
46
+ kwargs = dict(
47
+ revision="float16",
48
+ torch_dtype=torch.float16,
49
+ low_cpu_mem_usage=True,
50
+ )
51
+ else:
52
+ model_name = "facebook/incoder-1B"
53
+ kwargs = dict()
54
+
55
+ from fastapi import FastAPI, Request
56
+ from fastapi.staticfiles import StaticFiles
57
+ from fastapi.responses import FileResponse, StreamingResponse
58
+ app = FastAPI(docs_url=None, redoc_url=None)
59
+ app.mount("/static", StaticFiles(directory="static"), name="static")
60
+
61
+
62
+ logging.info("loading model")
63
+ model = AutoModelForCausalLM.from_pretrained(model_name, **kwargs)
64
+ logging.info("loading tokenizer")
65
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
66
+ logging.info("loading complete")
67
+
68
+ if CUDA:
69
+ model = model.half().cuda()
70
+
71
+ BOS = "<|endoftext|>"
72
+ EOM = "<|endofmask|>"
73
+
74
+ def make_sentinel(i):
75
+ return f"<|mask:{i}|>"
76
+
77
+ SPECIAL_TOKENS = [make_sentinel(i) for i in range(256)] + [EOM]
78
+
79
+ def generate(input, length_limit=None, temperature=None):
80
+ input_ids = tokenizer(input, return_tensors="pt").input_ids
81
+ if CUDA:
82
+ input_ids = input_ids.cuda()
83
+ current_length = input_ids.flatten().size(0)
84
+ max_length = length_limit + current_length
85
+ truncated = False
86
+ if max_length > MAX_LENGTH:
87
+ max_length = MAX_LENGTH
88
+ truncated = True
89
+ if max_length == current_length:
90
+ return input, True
91
+ output = model.generate(input_ids=input_ids, do_sample=True, top_p=0.95, temperature=temperature, max_length=max_length)
92
+ detok_hypo_str = tokenizer.decode(output.flatten())
93
+ if detok_hypo_str.startswith(BOS):
94
+ detok_hypo_str = detok_hypo_str[len(BOS):]
95
+ return detok_hypo_str, truncated
96
+
97
+ def infill(parts: List[str], length_limit=None, temperature=None, extra_sentinel=False, max_retries=1):
98
+ assert isinstance(parts, list)
99
+ retries_attempted = 0
100
+ done = False
101
+
102
+
103
+ while (not done) and (retries_attempted < max_retries):
104
+ any_truncated = False
105
+ retries_attempted += 1
106
+ if VERBOSE:
107
+ logging.info(f"retry {retries_attempted}")
108
+ if len(parts) == 1:
109
+ prompt = parts[0]
110
+ else:
111
+ prompt = ""
112
+ # encode parts separated by sentinel
113
+ for sentinel_ix, part in enumerate(parts):
114
+ prompt += part
115
+ if extra_sentinel or (sentinel_ix < len(parts) - 1):
116
+ prompt += make_sentinel(sentinel_ix)
117
+
118
+ # prompt += TokenizerWrapper.make_sentinel(0)
119
+
120
+ infills = []
121
+ complete = []
122
+
123
+ done = True
124
+
125
+ for sentinel_ix, part in enumerate(parts[:-1]):
126
+ complete.append(part)
127
+ prompt += make_sentinel(sentinel_ix)
128
+ completion, this_truncated = generate(prompt, length_limit, temperature)
129
+ any_truncated |= this_truncated
130
+ completion = completion[len(prompt):]
131
+ if EOM not in completion:
132
+ if VERBOSE:
133
+ logging.info(f"warning: {EOM} not found")
134
+ completion += EOM
135
+ # TODO: break inner loop here
136
+ done = False
137
+ completion = completion[:completion.index(EOM) + len(EOM)]
138
+ infilled = completion[:-len(EOM)]
139
+ infills.append(infilled)
140
+ complete.append(infilled)
141
+ prompt += completion
142
+ complete.append(parts[-1])
143
+ text = ''.join(complete)
144
+
145
+ if VERBOSE:
146
+ logging.info("generated text:")
147
+ logging.info(prompt)
148
+ logging.info()
149
+ logging.info("parts:")
150
+ logging.info(parts)
151
+ logging.info()
152
+ logging.info("infills:")
153
+ logging.info(infills)
154
+ logging.info()
155
+ logging.info("restitched text:")
156
+ logging.info(text)
157
+ logging.info()
158
+
159
+ return {
160
+ 'text': text,
161
+ 'parts': parts,
162
+ 'infills': infills,
163
+ 'retries_attempted': retries_attempted,
164
+ 'truncated': any_truncated,
165
+ }
166
+
167
+
168
+ @app.head("/")
169
+ @app.get("/")
170
+ def index() -> FileResponse:
171
+ return FileResponse(path="static/index.html", media_type="text/html")
172
+
173
+ @app.get('/generate')
174
+ # async def generate_maybe(request: Request):
175
+ async def generate_maybe(info: str):
176
+ # form = await info.json()
177
+ # form = await request.json()
178
+ # info is a base64-encoded, url-escaped json string (since GET doesn't support a body, and POST leads to CORS issues)
179
+ # fix padding, following https://stackoverflow.com/a/9956217/1319683
180
+ info = base64.urlsafe_b64decode(info + '=' * (4 - len(info) % 4)).decode('utf-8')
181
+ form = json.loads(info)
182
+ # print(form)
183
+ prompt = form['prompt']
184
+ length_limit = int(form['length'])
185
+ temperature = float(form['temperature'])
186
+ logging.info(json.dumps({
187
+ 'length': length_limit,
188
+ 'temperature': temperature,
189
+ 'prompt': prompt,
190
+ }))
191
+ try:
192
+ generation, truncated = generate(prompt, length_limit, temperature)
193
+ if truncated:
194
+ message = TRUNCATION_MESSAGE
195
+ else:
196
+ message = ''
197
+ return {'result': 'success', 'type': 'generate', 'prompt': prompt, 'text': generation, 'message': message}
198
+ except Exception as e:
199
+ traceback.print_exception(*sys.exc_info())
200
+ logging.error(e)
201
+ return {'result': 'error', 'type': 'generate', 'prompt': prompt, 'message': f'Error: {e}.'}
202
+
203
+ @app.get('/infill')
204
+ # async def infill_maybe(request: Request):
205
+ async def infill_maybe(info: str):
206
+ # form = await info.json()
207
+ # form = await request.json()
208
+ # info is a base64-encoded, url-escaped json string (since GET doesn't support a body, and POST leads to CORS issues)
209
+ # fix padding, following https://stackoverflow.com/a/9956217/1319683
210
+ info = base64.urlsafe_b64decode(info + '=' * (4 - len(info) % 4)).decode('utf-8')
211
+ form = json.loads(info)
212
+ length_limit = int(form['length'])
213
+ temperature = float(form['temperature'])
214
+ max_retries = 1
215
+ extra_sentinel = True
216
+ logging.info(json.dumps({
217
+ 'length': length_limit,
218
+ 'temperature': temperature,
219
+ 'parts_joined': '<infill>'.join(form['parts']),
220
+ }))
221
+ try:
222
+ if len(form['parts']) > 4:
223
+ return {'result': 'error', 'text': ''.join(form['parts']), 'type': 'infill', 'message': f"error: Can't use more than 3 <infill> tokens in this demo (for efficiency)."}
224
+ generation = infill(form['parts'], length_limit, temperature, extra_sentinel=extra_sentinel, max_retries=max_retries)
225
+ generation['result'] = 'success'
226
+ generation['type'] = 'infill'
227
+ if generation['truncated']:
228
+ generation['message'] = TRUNCATION_MESSAGE
229
+ else:
230
+ generation['message'] = ''
231
+ return generation
232
+ # return {'result': 'success', 'prefix': prefix, 'suffix': suffix, 'text': generation['text']}
233
+ except Exception as e:
234
+ traceback.print_exception(*sys.exc_info())
235
+ logging.error(e)
236
+ return {'result': 'error', 'type': 'infill', 'message': f'Error: {e}.'}
237
+
238
+
239
+ if __name__ == "__main__":
240
+ app.run(host='0.0.0.0', port=PORT, threaded=False)
modules/cloud_logging.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ def make_logging_client():
3
+ cred_filename = os.environ.get('GOOGLE_APPLICATION_CREDENTIALS')
4
+ if not cred_filename:
5
+ return None
6
+ print("cred filename:", cred_filename)
7
+ cred_string = os.environ.get('GOOGLE_APPLICATION_CREDENTIALS_STRING')
8
+ print("cred string:", bool(cred_string))
9
+ if not os.path.exists(cred_filename):
10
+ if cred_string:
11
+ print(f"writing cred string to {cred_filename}")
12
+ with open(cred_filename, 'w') as f:
13
+ f.write(cred_string)
14
+ else:
15
+ return None
16
+ from google.cloud import logging
17
+ logging_client = logging.Client()
18
+ logging_client.setup_logging()
19
+ return logging_client
20
+
21
+ logging_client = make_logging_client()
packages.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ rustc
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ fastapi==0.74.*
2
+ requests==2.27.*
3
+ torch==1.11.*
4
+ uvicorn[standard]==0.17.*
5
+ tokenizers==0.12.1
6
+ git+https://github.com/huggingface/transformers.git@b18dfd95e1f60ae65a959a7b255fc06522170d1b
7
+ google-cloud-logging
start.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ import subprocess
2
+
3
+ subprocess.run("uvicorn modules.app:app --timeout-keep-alive 300 --host 0.0.0.0 --port 7860", shell=True)
static/frame.html ADDED
@@ -0,0 +1 @@
 
 
1
+ <iframe src="index.html"></iframe>
static/index.html ADDED
@@ -0,0 +1,633 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html>
3
+ <head>
4
+ <meta charset="utf-8"/>
5
+ <meta name="viewport" contents="width=device-width, initial-scale=1.0" />
6
+ <title>InCoder</title>
7
+ <script src="https://ajax.googleapis.com/ajax/libs/jquery/3.4.1/jquery.min.js"></script>
8
+ <script src="https://cdn.jsdelivr.net/npm/js-base64@3.7.2/base64.min.js"></script>
9
+
10
+ <script src="https://cdnjs.cloudflare.com/ajax/libs/ace/1.4.14/ace.min.js"></script>
11
+ <script src="https://cdnjs.cloudflare.com/ajax/libs/ace/1.4.14/mode-plain_text.min.js"></script>
12
+ <script src="https://cdnjs.cloudflare.com/ajax/libs/ace/1.4.14/mode-c_cpp.min.js"></script>
13
+ <script src="https://cdnjs.cloudflare.com/ajax/libs/ace/1.4.14/mode-csharp.min.js"></script>
14
+ <script src="https://cdnjs.cloudflare.com/ajax/libs/ace/1.4.14/mode-clojure.min.js"></script>
15
+ <script src="https://cdnjs.cloudflare.com/ajax/libs/ace/1.4.14/mode-coffee.min.js"></script>
16
+ <script src="https://cdnjs.cloudflare.com/ajax/libs/ace/1.4.14/mode-golang.min.js"></script>
17
+ <script src="https://cdnjs.cloudflare.com/ajax/libs/ace/1.4.14/mode-haskell.min.js"></script>
18
+ <script src="https://cdnjs.cloudflare.com/ajax/libs/ace/1.4.14/mode-python.min.js"></script>
19
+ <script src="https://cdnjs.cloudflare.com/ajax/libs/ace/1.4.14/mode-java.min.js"></script>
20
+ <script src="https://cdnjs.cloudflare.com/ajax/libs/ace/1.4.14/mode-javascript.min.js"></script>
21
+ <script src="https://cdnjs.cloudflare.com/ajax/libs/ace/1.4.14/mode-lua.min.js"></script>
22
+ <script src="https://cdnjs.cloudflare.com/ajax/libs/ace/1.4.14/mode-objectivec.min.js"></script>
23
+ <script src="https://cdnjs.cloudflare.com/ajax/libs/ace/1.4.14/mode-perl.min.js"></script>
24
+ <script src="https://cdnjs.cloudflare.com/ajax/libs/ace/1.4.14/mode-php.min.js"></script>
25
+ <script src="https://cdnjs.cloudflare.com/ajax/libs/ace/1.4.14/mode-python.min.js"></script>
26
+ <script src="https://cdnjs.cloudflare.com/ajax/libs/ace/1.4.14/mode-ruby.min.js"></script>
27
+ <script src="https://cdnjs.cloudflare.com/ajax/libs/ace/1.4.14/mode-rust.min.js"></script>
28
+ <script src="https://cdnjs.cloudflare.com/ajax/libs/ace/1.4.14/mode-scala.min.js"></script>
29
+ <script src="https://cdnjs.cloudflare.com/ajax/libs/ace/1.4.14/mode-sh.min.js"></script>
30
+ <script src="https://cdnjs.cloudflare.com/ajax/libs/ace/1.4.14/mode-swift.min.js"></script>
31
+ <script src="https://cdnjs.cloudflare.com/ajax/libs/ace/1.4.14/mode-typescript.min.js"></script>
32
+ <link rel="stylesheet" href="static/style.css">
33
+ </head>
34
+ <style type="text/css">
35
+ /* body {
36
+ font-family: sans-serif;
37
+ } */
38
+ /* .leftside {
39
+ } */
40
+ main {
41
+ max-width: 80rem;
42
+ }
43
+ .rightside {
44
+ width: 30em;
45
+ }
46
+ .submit-holder {
47
+ margin-top: 2em;
48
+ }
49
+ .submit input {
50
+ font-size: 16pt;
51
+ }
52
+ .slider {
53
+ width: 20em;
54
+ }
55
+ #faq {
56
+ max-width: 60em;
57
+ }
58
+ #result {
59
+ font-family: monospace;
60
+ white-space: pre-wrap;
61
+ word-wrap: break-word;
62
+ font-size: 12pt;
63
+ clear: both;
64
+ margin-top: 1em;
65
+ border: 1px solid black;
66
+ padding: 1em;
67
+ width: 60em;
68
+ min-height: 12em;
69
+ }
70
+ #prompt {
71
+ font-weight: bold;
72
+ }
73
+ .loader {
74
+ border: 4px solid #f3f3f3;
75
+ border-radius: 50%;
76
+ border-top: 4px solid #3498db;
77
+ width: 30px;
78
+ height: 30px;
79
+ animation: spin 2s linear infinite;
80
+ margin-right: 1em;
81
+ }
82
+ @keyframes spin {
83
+ 0% { transform: rotate(0deg); }
84
+ 100% { transform: rotate(360deg); }
85
+ }
86
+ #loader_holder {
87
+ visibility: hidden;
88
+ display: flex;
89
+ align-items: center;
90
+ }
91
+
92
+ label {
93
+ margin-top: 1em;
94
+ display: inline-elock;
95
+ width: 10em;
96
+ text-align: right;
97
+ font-size: 80%;
98
+ }
99
+ #loader_holder_super {
100
+ }
101
+ #error {
102
+ color: red;
103
+ width: 100%;
104
+ }
105
+ #warning {
106
+ color: darkorange;
107
+ width: 100%;
108
+ }
109
+ #examples span {
110
+ margin-right: 1em;
111
+ }
112
+ #editor {
113
+ position: relative;
114
+ width: 100%;
115
+ height: 400px;
116
+ }
117
+ #editor-holder {
118
+ position: relative;
119
+ width: 100%;
120
+ height: 400px;
121
+ }
122
+ .ace_infill {
123
+ color: red;
124
+ }
125
+ </style>
126
+ <body>
127
+ <main>
128
+ <div class="card" id="about">
129
+ <div class="header"> <h1>InCoder: A Generative Model for Code Infilling and Synthesis</h1> </div>
130
+ <p>Demo of the 6.7B parameter version of InCoder: a decoder-only Transformer model that can both extend and insert/infill code.</p>
131
+ <p>Select one of the examples below, or input your own code into the editor. You can type &lt;infill&gt; to mark a location you want the model to insert code at.</p>
132
+ <p>Click "Extend" to append text at the end of the editor. Click "Infill" to replace all &lt;infill&gt; masks. (Click "Add &lt;infill&gt; mask" to add a mask at the cursor or replace the current selection.) </p>
133
+ </div>
134
+ <div class="card" id="examples">
135
+ <div id="examples-infill">
136
+ <span class="softspan">Infill Examples:</span>
137
+ <br>
138
+ <span class="softspan"><a href='javascript:select_example("type-pred");'>Type prediction</a></span>
139
+ <span class="softspan"><a href='javascript:select_example("multi-region");'>Docstring to function</a></span>
140
+ <span class="softspan"><a href='javascript:select_example("docstring-2");'>Function to docstring</a></span>
141
+ <!--
142
+ <span class="softspan"><a href='javascript:select_example("python-infill2");'>Docstring to function</a></span>
143
+ -->
144
+ <span class="softspan"><a href='javascript:select_example("class");'>Class generation</a></span>
145
+ </div>
146
+ <div id="examples-extend">
147
+ <span class="softspan">Extend Examples:</span>
148
+ <br>
149
+ <span class="softspan"><a href='javascript:select_example("python");'>Python</a></span>
150
+ <span class="softspan"><a href='javascript:select_example("javascript");'>JavaScript</a></span>
151
+ <span class="softspan"><a href='javascript:select_example("jupyter");'>Jupyter</a></span>
152
+ <span class="softspan"><a href='javascript:select_example("stackoverflow");'>StackOverflow</a></span>
153
+ <span class="softspan"><a href='javascript:select_example("metadata-conditioning");'>Metadata Conditioning</a></span>
154
+ <span class="softspan"><a href='javascript:select_example("metadata-prediction");'>Metadata Prediction</a></span>
155
+ </div>
156
+ </div>
157
+ <div class="card" id="controls">
158
+ <div>
159
+ <label>Num Tokens:</label>
160
+ <input type="range" value="64" min="16" max="256" step="16" class="slider"
161
+ oninput="this.nextElementSibling.value = this.value" name="length" id='length_slider'>
162
+ <output class='a' id="length_slider_output">64</output>
163
+ </div>
164
+ <div>
165
+ <label>Temperature:</label>
166
+ <input type="range" value="0.6" min="0.1" max="1.0" step="0.10" class="slider"
167
+ oninput="this.nextElementSibling.value = this.value" name="temp" id='temp_slider'>
168
+ <output class='a' id="temp_slider_output">0.6</output>
169
+ </div>
170
+ <div id="buttons">
171
+ <br>
172
+ <input type="button" value="Extend" id="extend-form-button" />
173
+ <input type="button" value="Infill" id="infill-form-button" />
174
+ <br>
175
+ <br>
176
+ <input type="button" value="Add <infill> mask" id="insert-mask-button" title="add the infill marker at cursor or selection" />
177
+ </div>
178
+ </div>
179
+ <div id="edit-container" class="card">
180
+ <div id="syntax">
181
+ <span class="softspan">Syntax:</span>
182
+ <select name="mode" id="mode">
183
+ <option value="text">Text</option>
184
+ <option value="c_cpp">C/C++</option>
185
+ <option value="csharp">C#</option>
186
+ <option value="clojure">Clojure</option>
187
+ <option value="coffee">CoffeeScript</option>
188
+ <option value="golang">Go</option>
189
+ <option value="haskell">Haskell</option>
190
+ <option value="java">Java</option>
191
+ <option value="javascript">JavaScript</option>
192
+ <option value="lua">Lua</option>
193
+ <option value="objectivec">Objective C</option>
194
+ <option value="perl">Perl</option>
195
+ <option value="php">PHP</option>
196
+ <option value="python">Python</option>
197
+ <option value="ruby">Ruby</option>
198
+ <option value="rust">Rust</option>
199
+ <option value="scala">Scala</option>
200
+ <option value="sh">Shell</option>
201
+ <option value="swift">Swift</option>
202
+ <option value="typescript">Typescript</option>
203
+ </select>
204
+ </div>
205
+ <div id="editor"></div>
206
+ </div>
207
+ <div id="loader_holder_super" class="card">
208
+ <h1>Messages</h1>
209
+ <div id="error"></div>
210
+ <div id="warning"></div>
211
+ <div id="loader_holder">
212
+ <div class="loader"></div>
213
+ <div>
214
+ Generation queued, please wait...
215
+ </div>
216
+ </div>
217
+ </div>
218
+ <div id="info" class="card">
219
+ <h1 id="debug-info">More Info</h3>
220
+ <p>
221
+ See <a href="https://sites.google.com/view/incoder-code-models" target="_blank" rel="noopener noreferrer">our project site</a> for more information on
222
+ these models, including a paper and examples.
223
+ </p>
224
+
225
+ <p>
226
+ For instructions on setting up and using the models (via HuggingFace transformers), see
227
+ <a href="https://github.com/dpfried/incoder/blob/main/README.md" target="_blank" rel="noopener noreferrer">our readme</a>.
228
+ </p>
229
+
230
+ <h1 id="debug-info">Credits</h3>
231
+ <p>This model was developed at Facebook AI Research by Daniel Fried*, Armen Aghajanyan*, Jessy Lin, Sida Wang, Eric Wallace, Freda Shi, Ruiqi Zhong,
232
+ Wen-tau Yih, Luke Zettlemoyer, and Mike Lewis.</p>
233
+ <p>Thanks to Naman Goyal and Stephen Roller for writing the code this demo was based on. Extensions by Daniel Fried and
234
+ Sida Wang.</p>
235
+ </div>
236
+ </main>
237
+ <script type="text/javascript">
238
+ // these constants are only used for providing user expectations.
239
+ var OVERHEAD = 3;
240
+ var PER_TOKEN = 0.12;
241
+ var SPLIT_TOKEN = "<infill>"
242
+
243
+ var Range = require("ace/range").Range;
244
+
245
+ // examples for the user
246
+ var EXAMPLES = {
247
+ "python-infill2": {
248
+ "prompt":
249
+ `<| file ext=.py |>
250
+ from collections import Counter
251
+
252
+ def <infill>
253
+ """Count the number of occurrences of each word in the file."""
254
+ <infill>
255
+ `,
256
+ "length": 64,
257
+ "temperature": 0.2,
258
+ "mode": "python"
259
+ },
260
+ "multi-region": {
261
+ "prompt":
262
+ `<| file ext=.py |>
263
+ <infill>
264
+ """ Load the given gzip jsonl file. """
265
+ <infill>
266
+ `,
267
+ "length": 64,
268
+ "temperature": 0.2,
269
+ "mode": "python"
270
+ },
271
+ "type-pred": {
272
+ "prompt":
273
+ `def count_words(filename: str) -> <infill>
274
+ """Count the number of occurrences of each word in the file."""
275
+ with open(filename, 'r') as f:
276
+ word_counts = {}
277
+ for line in f:
278
+ for word in line.split():
279
+ if word in word_counts:
280
+ word_counts[word] = 1
281
+ else:
282
+ word_counts[word] = 1
283
+ return word_counts
284
+ `,
285
+ "length": 4,
286
+ "temperature": 0.2,
287
+ "mode": "python"
288
+ },
289
+ "docstring-2": {
290
+ "prompt":
291
+ `def _minimize_in_graph(build_loss_fn, num_steps=200, optimizer=None):
292
+ """
293
+ <infill>
294
+ """
295
+ optimizer = tf.compat.v1.train.AdamOptimizer(
296
+ 0.1) if optimizer is None else optimizer
297
+
298
+ def train_loop_body(step):
299
+ train_op = optimizer.minimize(
300
+ build_loss_fn if tf.executing_eagerly() else build_loss_fn())
301
+ return tf.tuple(tensors=[tf.add(step, 1)], control_inputs=[train_op])
302
+
303
+ minimize_op = tf.compat.v1.while_loop(
304
+ cond=lambda step: step < num_steps,
305
+ body=train_loop_body,
306
+ loop_vars=[tf.constant(0)],
307
+ return_same_structure=True)[0]
308
+ return minimize_op`,
309
+ "length": 64,
310
+ "temperature": 0.3,
311
+ "mode": "python",
312
+ },
313
+ "docstring": {
314
+ "prompt":
315
+ `<| file ext=.py |>
316
+
317
+ def count_words(filename: str) -> Dict[str, int]:
318
+ """<infill>
319
+ """
320
+ with open(filename, 'r') as f:
321
+ word_counts = {}
322
+ for line in f:
323
+ for word in line.split():
324
+ if word in word_counts:
325
+ word_counts[word] = 1
326
+ else:
327
+ word_counts[word] = 1
328
+ return word_counts
329
+ `,
330
+ "length": 32,
331
+ "temperature": 0.2,
332
+ "mode": "python"
333
+ },
334
+ "python": {
335
+ "prompt":
336
+ `<| file ext=.py |>
337
+ def count_words(filename):
338
+ """Count the number of occurrences of each word in the file"""`,
339
+ "length": 64,
340
+ "temperature": 0.6,
341
+ "mode": "python"
342
+ },
343
+ "class": {
344
+ "prompt": "<| file ext=.py |>\nclass Person:\n" + SPLIT_TOKEN + "\np = Person('Eren', 18, 'Male')",
345
+ "length": 64,
346
+ "temperature": 0.2,
347
+ "mode": "python"
348
+ },
349
+ "javascript": {
350
+ "prompt": "// fetch from the given URL and load the response contents into a new div",
351
+ "length": 64,
352
+ "temperature": 0.6,
353
+ "mode": "javascript"
354
+ },
355
+ "jupyter": {
356
+ "prompt": "<| file ext=.ipynb:python |>\n<text>\nThis notebook demonstrates using scikit-learn to perform PCA.\n</text>\n<cell>",
357
+ "length": 64,
358
+ "temperature": 0.6,
359
+ "mode": "python"
360
+ },
361
+ "stackoverflow": {
362
+ "prompt": "<| q tags=regex,html |>\nParsing HTML with regular expressions\nHow do I do this? Is it a good idea?\n<|/ q dscore=3 |>\n<| a dscore=4 |>",
363
+ "length": 64,
364
+ "temperature": 0.6,
365
+ "mode": "text"
366
+ },
367
+ "metadata-conditioning": {
368
+ "prompt": "<| file ext=.py filename=train_model.py source=github dstars=4 |>\n",
369
+ "length": 64,
370
+ "temperature": 0.6,
371
+ "mode": "python"
372
+ },
373
+ "metadata-prediction": {
374
+ "prompt": "<| file source=github ext=.py |>\nfrom setuptools import setup\nfrom setuptools_rust import Binding, RustExtension\n\nextras = {}\nextras[\"testing\"] = [\"pytest\", \"requests\", \"numpy\", \"datasets\"]\nextras[\"docs\"] = [\"sphinx\", \"sphinx_rtd_theme\", \"setuptools_rust\"]\n\nsetup(\n name=\"tokenizers\",\n version=\"0.11\",\n description=\"Fast and Customizable Tokenizers\",\n long_description=open(\"README.md\", \"r\", encoding=\"utf-8\").read(),\n)\n\n<|/ file filename=",
375
+ "length": 1,
376
+ "temperature": 0.2,
377
+ "mode": "python"
378
+ },
379
+ "humaneval": {
380
+ "prompt": "from typing import List, Optional\n\n\ndef longest(strings: List[str]) -> Optional[str]:\n \"\"\" Out of list of strings, return the longest one. Return the first one in case of multiple\n strings of the same length. Return None in case the input list is empty.\n >>> longest([])\n\n >>> longest(['a', 'b', 'c'])\n 'a'\n >>> longest(['a', 'bb', 'ccc'])\n 'ccc'\n \"\"\"\n",
381
+ "temperature": 0.6,
382
+ "length": 64,
383
+ "mode": "python"
384
+ },
385
+ };
386
+
387
+ var editor = ace.edit("editor");
388
+ editor.setOption("wrap", true);
389
+ //var editor = null;
390
+
391
+ function set_editor_mode(mode) {
392
+ session = editor.session
393
+ session.setMode("ace/mode/" + mode, function() {
394
+ var rules = session.$mode.$highlightRules.getRules();
395
+ for (var stateName in rules) {
396
+ if (Object.prototype.hasOwnProperty.call(rules, stateName)) {
397
+ rules[stateName].unshift({
398
+ token: 'infill',
399
+ regex: SPLIT_TOKEN
400
+ });
401
+ }
402
+ }
403
+ // force recreation of tokenizer
404
+ session.$mode.$tokenizer = null;
405
+ session.bgTokenizer.setTokenizer(session.$mode.getTokenizer());
406
+ // force re-highlight whole document
407
+ session.bgTokenizer.start(0);
408
+ });
409
+ }
410
+
411
+ /*
412
+ var textarea = $('textarea[name="prompt"]').hide();
413
+ var prefix_textarea = $('textarea[name="prefix"]').hide();
414
+ var suffix_textarea = $('textarea[name="suffix"]').hide();
415
+ editor.getSession().on('change', function () {
416
+ textarea.val(editor.getSession().getValue());
417
+ });
418
+ */
419
+
420
+ function set_text(text) {
421
+ editor.getSession().setValue(text);
422
+ // textarea.val(text);
423
+ }
424
+
425
+ function set_selection(data) {
426
+ var lines = editor.getSession().doc.$lines;
427
+ var lines_flat = join_lines(lines);
428
+ if (data['type'] == 'generate') {
429
+ doc_length = lines_flat.length;
430
+ var start = convert_string_index_to_location(data['prompt'].length, lines);
431
+ var end = convert_string_index_to_location(doc_length, lines);
432
+ // reverse this so that we can shift select to shorten and delete extra stuff
433
+ editor.selection.setRange(new Range(end.row, end.column, start.row, start.column));
434
+ } else if (data['type'] == 'infill') {
435
+ var length_so_far = 0;
436
+ for (var i = 0; i < data['infills'].length; i++) {
437
+ var prefix = data['parts'][i];
438
+ var suffix = data['parts'][i+1];
439
+ var infilled = data['infills'][i];
440
+ var start = convert_string_index_to_location(length_so_far + prefix.length, lines);
441
+ var end = convert_string_index_to_location(length_so_far + (prefix + infilled).length, lines);
442
+ var range = null;
443
+ if (data['infills'].length == 1) {
444
+ range = new Range(end.row, end.column, start.row, start.column)
445
+ } else {
446
+ range = new Range(start.row, start.column, end.row, end.column)
447
+ }
448
+ if (i == 0) {
449
+ editor.selection.setRange(range);
450
+ } else {
451
+ editor.selection.addRange(range);
452
+ }
453
+ length_so_far += (prefix + infilled).length;
454
+ }
455
+ }
456
+ editor.focus();
457
+ }
458
+
459
+ function select_example(name) {
460
+ $("#length_slider").val(EXAMPLES[name]["length"]);
461
+ $("#length_slider_output").text(EXAMPLES[name]["length"]);
462
+ $("#temp_slider").val(EXAMPLES[name]["temperature"]);
463
+ $("#temp_slider_output").text(EXAMPLES[name]["temperature"]);
464
+ set_text(EXAMPLES[name]["prompt"])
465
+ var mode = EXAMPLES[name]["mode"];
466
+
467
+ set_editor_mode(mode);
468
+ $("#mode").val(mode).change();
469
+ }
470
+
471
+ function newline_character() {
472
+ return editor.getSession().doc.getNewLineCharacter();
473
+ }
474
+
475
+ function join_lines(lines) {
476
+ return lines.join(newline_character());
477
+ }
478
+
479
+ function get_prefix(location, lines) {
480
+ if (!(location.hasOwnProperty('row') && location.hasOwnProperty('column'))) {
481
+ console.error("invalid location " + location);
482
+ }
483
+ if (location.row == 0) {
484
+ return lines[location.row].substring(0, location.column);
485
+ } else {
486
+ return join_lines(lines.slice(0, location.row)) + newline_character() + lines[location.row].substring(0, location.column);
487
+ }
488
+ }
489
+
490
+ function convert_location_to_string_index(location, lines) {
491
+ return get_prefix(location, lines).length;
492
+ }
493
+
494
+ function convert_string_index_to_location(string_index, lines) {
495
+ var column = 0;
496
+ var row = 0;
497
+ var char_count = 0;
498
+ var line_sep_length = editor.getSession().doc.getNewLineCharacter().length;
499
+ for (var i = 0; i < lines.length; i++) {
500
+ var line = lines[i];
501
+ var new_char_count = char_count + line.length + line_sep_length;
502
+ if (string_index < new_char_count) {
503
+ return {
504
+ 'row': i,
505
+ 'column': string_index - char_count,
506
+ }
507
+ }
508
+ char_count = new_char_count;
509
+ }
510
+ console.error("did not find index " + string_index + " in lines " + lines);
511
+ return null;
512
+ }
513
+
514
+ function get_infill_parts(warn_on_single) {
515
+ var lines = editor.getSession().doc.$lines;
516
+ var lines_flat = join_lines(lines);
517
+ parts = lines_flat.split(SPLIT_TOKEN)
518
+ if (warn_on_single && parts.length == 1) {
519
+ window.alert('There are no infill masks, add some <infill> masks before requesting an infill')
520
+ }
521
+ return parts
522
+ }
523
+
524
+ function insert_mask() {
525
+ if (editor.selection.ranges.length > 1) {
526
+ for (var i = 0; i < editor.selection.ranges.length; i++) {
527
+ console.log('range is', editor.selection.ranges[i])
528
+ editor.session.replace(editor.selection.ranges[i], SPLIT_TOKEN)
529
+ }
530
+ } else {
531
+ editor.session.replace(editor.selection.getRange(), SPLIT_TOKEN)
532
+ }
533
+ }
534
+
535
+
536
+ function make_generate_listener(url) {
537
+ return async function(event) {
538
+ var length = $("#length_slider").val();
539
+ var eta = PER_TOKEN * length + OVERHEAD;
540
+ // $("#eta").text(eta);
541
+ // $("#infill-form-button").click(function (event) { console.log(editor.selection.getCursor()); });
542
+
543
+ // get temperature and response length parameters
544
+ var send_data = {
545
+ length: $("#length_slider").val(),
546
+ temperature: $("#temp_slider").val(),
547
+ extra_sentinel: $('#extra_sentinel_checkbox').is(":checked"),
548
+ max_retries: $('#max_retries_slider').val(),
549
+ parts: get_infill_parts(url == "infill"),
550
+ prompt: editor.getSession().getValue(),
551
+ }
552
+ console.log("send_data:");
553
+ console.log(send_data);
554
+
555
+ $("#loader_holder").css("visibility", "visible");
556
+ $("#extend-form-button").prop("disabled", true);
557
+ $("#infill-form-button").prop("disabled", true);
558
+ $("#error").text("");
559
+
560
+ function complete() {
561
+ $("#loader_holder").css("visibility", "hidden");
562
+ $("#extend-form-button").prop("disabled", false);
563
+ $("#infill-form-button").prop("disabled", false);
564
+ }
565
+
566
+ function success(receive_data) {
567
+ console.log("Response:");
568
+ console.log(receive_data);
569
+ if (receive_data["result"] == "success") {
570
+ console.log("success");
571
+ // $("#prompt").text(data["prompt"]);
572
+ // $("#response").text(data["text"]);
573
+ set_text(receive_data["text"]);
574
+ set_selection(receive_data);
575
+ $("#error").text("");
576
+ if (receive_data["message"] != "") {
577
+ $("#warning").text(receive_data["message"]);
578
+ } else {
579
+ $("#warning").text("");
580
+ }
581
+ } else {
582
+ console.log("error");
583
+ set_text(receive_data["text"])
584
+ $("#error").text(receive_data["message"]);
585
+ }
586
+ }
587
+
588
+ function error(err) {
589
+ console.log(err);
590
+ $("#error").text(err);
591
+ }
592
+
593
+ try {
594
+ var stringified = JSON.stringify(send_data);
595
+ // var encoded_data = encodeURIComponent(btoa(stringified));
596
+ var encoded_data = Base64.encodeURI(stringified);
597
+
598
+ const response = await fetch(`${url}?info=${encoded_data}`);
599
+ // const response = await fetch(`${url}` {
600
+ // method: 'GET',
601
+ // body: encoded_data,
602
+ // });
603
+ if (response.status >= 400) {
604
+ error(response.statusText);
605
+ console.log("here");
606
+ console.log(response.status);
607
+ } else {
608
+ response.json().then(success).catch(error).finally(complete);
609
+ }
610
+ } catch (e) {
611
+ error(e);
612
+ } finally {
613
+ complete();
614
+ }
615
+ }
616
+ }
617
+
618
+ // actual logic
619
+ $(document).ready(function() {
620
+ $("#insert-mask-button").click(insert_mask);
621
+ $("#extend-form-button").click(make_generate_listener("generate"));
622
+ $("#infill-form-button").click(make_generate_listener("infill"));
623
+ $("#mode").change(function (e) {
624
+ var mode = $("#mode").val();
625
+ set_editor_mode(mode);
626
+ });
627
+ select_example("python")
628
+ // set_editor_mode("python");
629
+ });
630
+ </script>
631
+ <script src="https://cdnjs.cloudflare.com/ajax/libs/iframe-resizer/4.3.2/iframeResizer.contentWindow.min.js"></script>
632
+ </body>
633
+ </html>
static/style.css ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ body {
2
+ padding: 2rem;
3
+ font-family: -apple-system, BlinkMacSystemFont, "Arial", sans-serif;
4
+ }
5
+
6
+ h1 {
7
+ font-size: 16px;
8
+ margin-top: 0;
9
+ }
10
+
11
+ p {
12
+ color: rgb(107, 114, 128);
13
+ font-size: 15px;
14
+ margin-bottom: 10px;
15
+ margin-top: 5px;
16
+ }
17
+
18
+ button {
19
+ font-size: 15px;
20
+ }
21
+
22
+ .softspan {
23
+ color: rgb(127, 134, 148);
24
+ font-size: 15px;
25
+ margin-bottom: 10px;
26
+ margin-top: 5px;
27
+ }
28
+
29
+ .card {
30
+ max-width: 800px;
31
+ margin: 0 auto;
32
+ padding: 16px;
33
+ border: 1px solid lightgray;
34
+ border-radius: 16px;
35
+ }
36
+
37
+ .card p:last-child {
38
+ margin-bottom: 0;
39
+ }
style.css ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ body {
2
+ padding: 2rem;
3
+ font-family: -apple-system, BlinkMacSystemFont, "Arial", sans-serif;
4
+ }
5
+
6
+ h1 {
7
+ font-size: 16px;
8
+ margin-top: 0;
9
+ }
10
+
11
+ p {
12
+ color: rgb(107, 114, 128);
13
+ font-size: 15px;
14
+ margin-bottom: 10px;
15
+ margin-top: 5px;
16
+ }
17
+
18
+ .card {
19
+ max-width: 620px;
20
+ margin: 0 auto;
21
+ padding: 16px;
22
+ border: 1px solid lightgray;
23
+ border-radius: 16px;
24
+ }
25
+
26
+ .card p:last-child {
27
+ margin-bottom: 0;
28
+ }
templates/index.html ADDED
@@ -0,0 +1 @@
 
 
1
+ ../static/index.html
tokenizers_patch.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import subprocess
4
+
5
+
6
+ print("Getting rustup")
7
+ subprocess.run(
8
+ "curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y",
9
+ shell=True,
10
+ )
11
+ print("Got rustup")
12
+ myenv = os.environ.copy()
13
+ myenv["PATH"] = os.path.expanduser("~/.cargo/bin:") + myenv["PATH"]
14
+ print("RUSTC", os.path.isfile(os.path.expanduser("~/.cargo/bin/rustc")))
15
+ subprocess.run("rustc --version", shell=True, env=myenv)
16
+ subprocess.run(
17
+ "pip install -e git+https://github.com/huggingface/tokenizers/#egg=tokenizers\&subdirectory=bindings/python",
18
+ shell=True,
19
+ env=myenv,
20
+ )
21
+ sys.path.append(
22
+ os.path.join(os.getcwd(), "src", "tokenizers", "bindings", "python", "py_src")
23
+ )
24
+
25
+
26
+ import tokenizers