Jhayase commited on
Commit
98c69b7
·
1 Parent(s): 7628468

Updates for SuperBPE website embed

Browse files
Files changed (3) hide show
  1. assets/index-DEbmRw68.js +39 -25
  2. assets/index-Dhl4q2CV.css +5 -23
  3. index.html +1 -1
assets/index-DEbmRw68.js CHANGED
@@ -7139,7 +7139,7 @@ function jd({
7139
  const Ql = Object.freeze({
7140
  "UW/OLMo2-8B-SuperBPE-t180k": "SuperBPE 200k (t=180k)",
7141
  "UW/OLMo2-8B-SuperBPE-t160k": "SuperBPE 200k (t=160k)",
7142
- "UW/OLMo2-8B-SuperBPE-t80k": "SuperBPE 200k (t=160k)",
7143
  "UW/OLMo2-8B-BPE": "BPE 200k (baseline)",
7144
  "Xenova/gpt-4o": "GPT-4o",
7145
  "Xenova/llama-3-tokenizer": "Llama 3",
@@ -7167,6 +7167,13 @@ function Md() {
7167
  const k = x => {
7168
  l(x.data.token_ids), u(x.data.decoded), s(x.data.margins)
7169
  };
 
 
 
 
 
 
 
7170
  return c.current.addEventListener("message", k), () => c.current.removeEventListener("message", k)
7171
  }, []);
7172
  const a = Z.useCallback(() => {
@@ -7194,19 +7201,17 @@ function Md() {
7194
  text: S.current.value
7195
  })
7196
  }, []);
 
7197
  return R.jsxs("div", {
7198
- className: "w-full max-w-[720px] flex flex-col gap-4 items-center",
7199
  children: [R.jsxs("div", {
7200
- children: [R.jsx("h1", {
7201
- className: "text-5xl font-bold mb-2",
7202
- children: "SuperBPE Playground"
7203
- }), R.jsxs("h2", {
7204
  className: "text-lg font-normal",
7205
- children: ["Experiment with different tokenizers (running", " ", R.jsx("a", {
7206
  className: "text-gray-900 underline",
7207
  href: "https://github.com/huggingface/transformers.js",
7208
  children: "locally"
7209
- }), " ", "in your browser)."]
7210
  })]
7211
  }), R.jsxs("div", {
7212
  children: [R.jsx("select", {
@@ -7214,7 +7219,7 @@ function Md() {
7214
  onChange: k => {
7215
  a(), w(""), v(k)
7216
  },
7217
- className: "bg-gray-50 border border-gray-300 text-gray-900 text-sm rounded-lg focus:ring-blue-500 focus:border-blue-500 block w-full p-2",
7218
  children: Object.entries(Ql).map(([k, x]) => R.jsx("option", {
7219
  value: k,
7220
  children: x
@@ -7226,34 +7231,43 @@ function Md() {
7226
  onChange: k => {
7227
  w(k.target.value), v(k)
7228
  },
7229
- className: "bg-white border border-gray-300 text-gray-900 text-sm rounded-lg focus:ring-blue-500 focus:border-blue-500 block w-full py-1 px-2 mt-1"
7230
  })]
7231
  }), R.jsx("textarea", {
7232
  ref: S,
7233
  onChange: d,
7234
  rows: "8",
7235
- className: "font-mono text-lg block w-full p-2.5 text-gray-900 bg-gray-50 rounded-lg border border-gray-200",
7236
  placeholder: "Enter some text",
7237
  defaultValue: n ?? ((E = S.current) == null ? void 0 : E.value) ?? ""
7238
  }), R.jsxs("div", {
7239
- className: "flex justify-center gap-5",
7240
- children: [R.jsxs("div", {
7241
- className: "flex flex-col",
7242
- children: [R.jsx("h2", {
 
 
 
7243
  className: "font-semibold uppercase leading-4",
7244
- children: "Tokens"
7245
- }), R.jsx("h3", {
7246
- className: "font-semibold text-3xl",
 
 
 
7247
  children: r.length.toLocaleString()
 
 
 
7248
  })]
7249
- }), R.jsxs("div", {
7250
- className: "flex flex-col",
7251
- children: [R.jsx("h2", {
 
 
 
7252
  className: "font-semibold uppercase leading-4",
7253
- children: "Characters"
7254
- }), R.jsx("h3", {
7255
- className: "font-semibold text-3xl",
7256
- children: (((N = S.current) == null ? void 0 : N.value.length) ?? 0).toLocaleString()
7257
  })]
7258
  })]
7259
  }), R.jsx("div", {
 
7139
  const Ql = Object.freeze({
7140
  "UW/OLMo2-8B-SuperBPE-t180k": "SuperBPE 200k (t=180k)",
7141
  "UW/OLMo2-8B-SuperBPE-t160k": "SuperBPE 200k (t=160k)",
7142
+ "UW/OLMo2-8B-SuperBPE-t80k": "SuperBPE 200k (t=80k)",
7143
  "UW/OLMo2-8B-BPE": "BPE 200k (baseline)",
7144
  "Xenova/gpt-4o": "GPT-4o",
7145
  "Xenova/llama-3-tokenizer": "Llama 3",
 
7167
  const k = x => {
7168
  l(x.data.token_ids), u(x.data.decoded), s(x.data.margins)
7169
  };
7170
+ if (S.current) {
7171
+ S.current.value = "By the way, I am a fan of the Milky Way!";
7172
+ }
7173
+ c.current.postMessage({
7174
+ model_id: m,
7175
+ text: S.current.value
7176
+ })
7177
  return c.current.addEventListener("message", k), () => c.current.removeEventListener("message", k)
7178
  }, []);
7179
  const a = Z.useCallback(() => {
 
7201
  text: S.current.value
7202
  })
7203
  }, []);
7204
+ const textEncoder = new TextEncoder();
7205
  return R.jsxs("div", {
7206
+ className: "w-full flex flex-col gap-4",
7207
  children: [R.jsxs("div", {
7208
+ children: [R.jsxs("h2", {
 
 
 
7209
  className: "text-lg font-normal",
7210
+ children: ["Experiment with our tokenizers (running", " ", R.jsx("a", {
7211
  className: "text-gray-900 underline",
7212
  href: "https://github.com/huggingface/transformers.js",
7213
  children: "locally"
7214
+ }), " ", "in your browser)!"]
7215
  })]
7216
  }), R.jsxs("div", {
7217
  children: [R.jsx("select", {
 
7219
  onChange: k => {
7220
  a(), w(""), v(k)
7221
  },
7222
+ className: "bg-gray-50 border border-gray-300 text-gray-900 text-sm rounded-lg focus:ring-blue-500 focus:border-blue-500 block p-2",
7223
  children: Object.entries(Ql).map(([k, x]) => R.jsx("option", {
7224
  value: k,
7225
  children: x
 
7231
  onChange: k => {
7232
  w(k.target.value), v(k)
7233
  },
7234
+ className: "bg-white border border-gray-300 text-gray-900 text-sm rounded-lg focus:ring-blue-500 focus:border-blue-500 block py-1 px-2 mt-1"
7235
  })]
7236
  }), R.jsx("textarea", {
7237
  ref: S,
7238
  onChange: d,
7239
  rows: "8",
7240
+ className: "font-mono text-lg block w-full p-2.5 h-[200px] text-gray-900 bg-gray-50 rounded-lg border border-gray-200",
7241
  placeholder: "Enter some text",
7242
  defaultValue: n ?? ((E = S.current) == null ? void 0 : E.value) ?? ""
7243
  }), R.jsxs("div", {
7244
+ className: "",
7245
+ children: [R.jsxs("span", {
7246
+ className: "",
7247
+ children: [R.jsx("span", {
7248
+ className: "font-semibold",
7249
+ children: (((N = S.current) == null ? void 0 : textEncoder.encode(N.value).length) ?? 0).toLocaleString()
7250
+ }), R.jsx("span", {
7251
  className: "font-semibold uppercase leading-4",
7252
+ children: " Bytes, "
7253
+ })]
7254
+ }), R.jsxs("span", {
7255
+ className: "",
7256
+ children: [R.jsx("span", {
7257
+ className: "font-semibold",
7258
  children: r.length.toLocaleString()
7259
+ }), R.jsx("span", {
7260
+ className: "font-semibold uppercase leading-4",
7261
+ children: " Tokens: "
7262
  })]
7263
+ }), R.jsxs("span", {
7264
+ className: "",
7265
+ children: [R.jsx("span", {
7266
+ className: "font-bold",
7267
+ children: ((((N = S.current) == null ? void 0 : textEncoder.encode(N.value).length) ?? 0) / r.length || 0).toLocaleString()
7268
+ }), R.jsx("span", {
7269
  className: "font-semibold uppercase leading-4",
7270
+ children: " Bytes/token"
 
 
 
7271
  })]
7272
  })]
7273
  }), R.jsx("div", {
assets/index-Dhl4q2CV.css CHANGED
@@ -2,8 +2,8 @@
2
  max-width: 1280px;
3
  width: 100%;
4
  margin: 0 auto;
5
- padding: 2rem;
6
- text-align: center;
7
  display: flex;
8
  justify-content: center;
9
  align-items: center;
@@ -351,7 +351,7 @@ video {
351
  height: 1rem;
352
  }
353
  .h-\[200px\] {
354
- height: 200px;
355
  }
356
  .w-4 {
357
  width: 1rem;
@@ -509,8 +509,8 @@ video {
509
  line-height: 1.5;
510
  font-weight: 400;
511
  color-scheme: light dark;
512
- color: #ffffffde;
513
- background-color: #242424;
514
  font-synthesis: none;
515
  text-rendering: optimizeLegibility;
516
  -webkit-font-smoothing: antialiased;
@@ -523,18 +523,6 @@ body {
523
  place-items: center;
524
  min-height: 100vh;
525
  }
526
- @media (prefers-color-scheme: light) {
527
- :root {
528
- color: #213547;
529
- background-color: #fff;
530
- }
531
- a:hover {
532
- color: #747bff;
533
- }
534
- button {
535
- background-color: #f9f9f9;
536
- }
537
- }
538
  .focus\:border-blue-500:focus {
539
  --tw-border-opacity: 1;
540
  border-color: rgb(59 130 246 / var(--tw-border-opacity));
@@ -543,9 +531,3 @@ body {
543
  --tw-ring-opacity: 1;
544
  --tw-ring-color: rgb(59 130 246 / var(--tw-ring-opacity));
545
  }
546
- @media (prefers-color-scheme: dark) {
547
- .dark\:text-gray-300 {
548
- --tw-text-opacity: 1;
549
- color: rgb(209 213 219 / var(--tw-text-opacity));
550
- }
551
- }
 
2
  max-width: 1280px;
3
  width: 100%;
4
  margin: 0 auto;
5
+ padding: 0;
6
+ text-align: left;
7
  display: flex;
8
  justify-content: center;
9
  align-items: center;
 
351
  height: 1rem;
352
  }
353
  .h-\[200px\] {
354
+ height: 150px;
355
  }
356
  .w-4 {
357
  width: 1rem;
 
509
  line-height: 1.5;
510
  font-weight: 400;
511
  color-scheme: light dark;
512
+ color: rgb(54, 54, 54);
513
+ background-color: white;
514
  font-synthesis: none;
515
  text-rendering: optimizeLegibility;
516
  -webkit-font-smoothing: antialiased;
 
523
  place-items: center;
524
  min-height: 100vh;
525
  }
 
 
 
 
 
 
 
 
 
 
 
 
526
  .focus\:border-blue-500:focus {
527
  --tw-border-opacity: 1;
528
  border-color: rgb(59 130 246 / var(--tw-border-opacity));
 
531
  --tw-ring-opacity: 1;
532
  --tw-ring-color: rgb(59 130 246 / var(--tw-ring-opacity));
533
  }
 
 
 
 
 
 
index.html CHANGED
@@ -3,7 +3,7 @@
3
  <head>
4
  <meta charset="UTF-8" />
5
  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
6
- <title>The Tokenizer Playground</title>
7
  <script type="module" crossorigin src="/assets/index-DEbmRw68.js"></script>
8
  <link rel="stylesheet" crossorigin href="/assets/index-Dhl4q2CV.css">
9
  <script
 
3
  <head>
4
  <meta charset="UTF-8" />
5
  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
6
+ <title>SuperBPE Playground</title>
7
  <script type="module" crossorigin src="/assets/index-DEbmRw68.js"></script>
8
  <link rel="stylesheet" crossorigin href="/assets/index-Dhl4q2CV.css">
9
  <script