Spaces:
Running
Running
Commit
•
878e432
1
Parent(s):
08f0bdc
track pending requests, improve ui, add qwen-2-0.5B
Browse files- src/routes/+page.svelte +46 -14
src/routes/+page.svelte
CHANGED
@@ -4,6 +4,8 @@
|
|
4 |
import * as webllm from "@mlc-ai/web-llm";
|
5 |
import { onMount } from 'svelte';
|
6 |
|
|
|
|
|
7 |
let engine: webllm.MLCEngineInterface;
|
8 |
let isLoading = false;
|
9 |
let loadingStatus = '';
|
@@ -12,8 +14,8 @@
|
|
12 |
let error = '';
|
13 |
let completionSpeed: number | null = null;
|
14 |
let tokensPerSecond: number | null = null;
|
15 |
-
let selectedModel = "SmolLM-360M-Instruct-q4f16_1-MLC";
|
16 |
let isGenerating = false;
|
|
|
17 |
|
18 |
async function loadWebLLM() {
|
19 |
isLoading = true;
|
@@ -24,11 +26,18 @@
|
|
24 |
|
25 |
const appConfig: webllm.AppConfig = {
|
26 |
model_list: [{
|
27 |
-
model: `https://huggingface.co/mlc-ai
|
28 |
-
model_id:
|
29 |
model_lib: `${webllm.modelLibURLPrefix}${webllm.modelVersion}/SmolLM-360M-Instruct-q4f16_1-ctx2k_cs1k-webgpu.wasm`,
|
30 |
overrides: { context_window_size: 2048 },
|
31 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
};
|
33 |
|
34 |
try {
|
@@ -44,18 +53,37 @@
|
|
44 |
}
|
45 |
}
|
46 |
|
47 |
-
async function generateCompletion() {
|
48 |
-
if (!engine ||
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
|
50 |
isGenerating = true;
|
51 |
const startTime = performance.now();
|
52 |
try {
|
|
|
53 |
const response = await engine.chat.completions.create({
|
54 |
-
messages: [
|
|
|
|
|
|
|
55 |
max_tokens: 10,
|
56 |
});
|
57 |
|
58 |
outputText = response.choices[0].message.content || "";
|
|
|
|
|
|
|
|
|
|
|
|
|
59 |
const endTime = performance.now();
|
60 |
const elapsedTimeInSeconds = (endTime - startTime) / 1000;
|
61 |
completionSpeed = Math.round(endTime - startTime);
|
@@ -68,6 +96,13 @@
|
|
68 |
error = `Error: ${(err as Error).message}`;
|
69 |
} finally {
|
70 |
isGenerating = false;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
71 |
}
|
72 |
}
|
73 |
|
@@ -79,16 +114,11 @@
|
|
79 |
<p class="text-center font-mono text-sm mb-4">Powered by {selectedModel}</p>
|
80 |
<Textarea
|
81 |
bind:value={inputText}
|
82 |
-
on:input={() =>
|
83 |
-
if (!isGenerating) {
|
84 |
-
generateCompletion();
|
85 |
-
}
|
86 |
-
}}
|
87 |
disabled={isLoading}
|
88 |
class="w-full"
|
89 |
placeholder="Say something..."
|
90 |
/>
|
91 |
-
<pre class="text-lg whitespace-pre-wrap">{outputText}</pre>
|
92 |
{#if isLoading}
|
93 |
<p class="text-sm text-slate-600 text-center">{loadingStatus}</p>
|
94 |
{:else if error}
|
@@ -101,7 +131,9 @@
|
|
101 |
{#if tokensPerSecond !== null}
|
102 |
<Badge>{tokensPerSecond} tok/s</Badge>
|
103 |
{/if}
|
104 |
-
<Badge
|
105 |
</div>
|
106 |
{/if}
|
|
|
|
|
107 |
</div>
|
|
|
4 |
import * as webllm from "@mlc-ai/web-llm";
|
5 |
import { onMount } from 'svelte';
|
6 |
|
7 |
+
let selectedModel = "SmolLM-360M-Instruct-q4f16_1-MLC";
|
8 |
+
|
9 |
let engine: webllm.MLCEngineInterface;
|
10 |
let isLoading = false;
|
11 |
let loadingStatus = '';
|
|
|
14 |
let error = '';
|
15 |
let completionSpeed: number | null = null;
|
16 |
let tokensPerSecond: number | null = null;
|
|
|
17 |
let isGenerating = false;
|
18 |
+
let pendingRequest: string | null = null;
|
19 |
|
20 |
async function loadWebLLM() {
|
21 |
isLoading = true;
|
|
|
26 |
|
27 |
const appConfig: webllm.AppConfig = {
|
28 |
model_list: [{
|
29 |
+
model: `https://huggingface.co/mlc-ai/SmolLM-360M-Instruct-q4f16_1-MLC`,
|
30 |
+
model_id: 'SmolLM-360M-Instruct-q4f16_1-MLC',
|
31 |
model_lib: `${webllm.modelLibURLPrefix}${webllm.modelVersion}/SmolLM-360M-Instruct-q4f16_1-ctx2k_cs1k-webgpu.wasm`,
|
32 |
overrides: { context_window_size: 2048 },
|
33 |
+
},
|
34 |
+
{
|
35 |
+
model: `https://huggingface.co/mlc-ai/Qwen2-0.5B-Instruct-q4f16_1-MLC`,
|
36 |
+
model_id: 'Qwen2-0.5B-Instruct-q4f16_1-MLC',
|
37 |
+
model_lib: `${webllm.modelLibURLPrefix}${webllm.modelVersion}/Qwen2-0.5B-Instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm`,
|
38 |
+
overrides: { context_window_size: 2048 },
|
39 |
+
}
|
40 |
+
],
|
41 |
};
|
42 |
|
43 |
try {
|
|
|
53 |
}
|
54 |
}
|
55 |
|
56 |
+
async function generateCompletion(content: string) {
|
57 |
+
if (!engine || isGenerating) {
|
58 |
+
/**
|
59 |
+
* This is used to store the most recent request from user
|
60 |
+
* while the current request is being processed.
|
61 |
+
*/
|
62 |
+
pendingRequest = content.trim();
|
63 |
+
return;
|
64 |
+
}
|
65 |
+
|
66 |
+
if (!content.trim()) return;
|
67 |
|
68 |
isGenerating = true;
|
69 |
const startTime = performance.now();
|
70 |
try {
|
71 |
+
console.log("Generating completion:", content);
|
72 |
const response = await engine.chat.completions.create({
|
73 |
+
messages: [
|
74 |
+
{role:"system", content: "You are a helpful AI agent helping users. Try your best to answer the users request."},
|
75 |
+
{role: "user", content: content}
|
76 |
+
],
|
77 |
max_tokens: 10,
|
78 |
});
|
79 |
|
80 |
outputText = response.choices[0].message.content || "";
|
81 |
+
|
82 |
+
// indicate that the response was cut short
|
83 |
+
if (response.choices[0].finish_reason === "length") {
|
84 |
+
outputText += "...";
|
85 |
+
}
|
86 |
+
|
87 |
const endTime = performance.now();
|
88 |
const elapsedTimeInSeconds = (endTime - startTime) / 1000;
|
89 |
completionSpeed = Math.round(endTime - startTime);
|
|
|
96 |
error = `Error: ${(err as Error).message}`;
|
97 |
} finally {
|
98 |
isGenerating = false;
|
99 |
+
|
100 |
+
// process pending request if exists
|
101 |
+
if (pendingRequest && pendingRequest !== content) {
|
102 |
+
const nextRequest = pendingRequest;
|
103 |
+
pendingRequest = null;
|
104 |
+
await generateCompletion(nextRequest);
|
105 |
+
}
|
106 |
}
|
107 |
}
|
108 |
|
|
|
114 |
<p class="text-center font-mono text-sm mb-4">Powered by {selectedModel}</p>
|
115 |
<Textarea
|
116 |
bind:value={inputText}
|
117 |
+
on:input={() => generateCompletion(inputText)}
|
|
|
|
|
|
|
|
|
118 |
disabled={isLoading}
|
119 |
class="w-full"
|
120 |
placeholder="Say something..."
|
121 |
/>
|
|
|
122 |
{#if isLoading}
|
123 |
<p class="text-sm text-slate-600 text-center">{loadingStatus}</p>
|
124 |
{:else if error}
|
|
|
131 |
{#if tokensPerSecond !== null}
|
132 |
<Badge>{tokensPerSecond} tok/s</Badge>
|
133 |
{/if}
|
134 |
+
<Badge>{selectedModel}</Badge>
|
135 |
</div>
|
136 |
{/if}
|
137 |
+
<pre class="text-lg font-bold whitespace-pre-wrap">{outputText}</pre>
|
138 |
+
|
139 |
</div>
|