cfahlgren1 HF staff commited on
Commit
878e432
1 Parent(s): 08f0bdc

track pending requests, improve ui, add qwen-2-0.5B

Browse files
Files changed (1) hide show
  1. src/routes/+page.svelte +46 -14
src/routes/+page.svelte CHANGED
@@ -4,6 +4,8 @@
4
  import * as webllm from "@mlc-ai/web-llm";
5
  import { onMount } from 'svelte';
6
 
 
 
7
  let engine: webllm.MLCEngineInterface;
8
  let isLoading = false;
9
  let loadingStatus = '';
@@ -12,8 +14,8 @@
12
  let error = '';
13
  let completionSpeed: number | null = null;
14
  let tokensPerSecond: number | null = null;
15
- let selectedModel = "SmolLM-360M-Instruct-q4f16_1-MLC";
16
  let isGenerating = false;
 
17
 
18
  async function loadWebLLM() {
19
  isLoading = true;
@@ -24,11 +26,18 @@
24
 
25
  const appConfig: webllm.AppConfig = {
26
  model_list: [{
27
- model: `https://huggingface.co/mlc-ai/${selectedModel}`,
28
- model_id: selectedModel,
29
  model_lib: `${webllm.modelLibURLPrefix}${webllm.modelVersion}/SmolLM-360M-Instruct-q4f16_1-ctx2k_cs1k-webgpu.wasm`,
30
  overrides: { context_window_size: 2048 },
31
- }],
 
 
 
 
 
 
 
32
  };
33
 
34
  try {
@@ -44,18 +53,37 @@
44
  }
45
  }
46
 
47
- async function generateCompletion() {
48
- if (!engine || !inputText.trim() || isGenerating) return;
 
 
 
 
 
 
 
 
 
49
 
50
  isGenerating = true;
51
  const startTime = performance.now();
52
  try {
 
53
  const response = await engine.chat.completions.create({
54
- messages: [{role:"system",content: "You are a helpful AI agent helping users. Try your best to answer the users request.",},{ role: "user", content: inputText }],
 
 
 
55
  max_tokens: 10,
56
  });
57
 
58
  outputText = response.choices[0].message.content || "";
 
 
 
 
 
 
59
  const endTime = performance.now();
60
  const elapsedTimeInSeconds = (endTime - startTime) / 1000;
61
  completionSpeed = Math.round(endTime - startTime);
@@ -68,6 +96,13 @@
68
  error = `Error: ${(err as Error).message}`;
69
  } finally {
70
  isGenerating = false;
 
 
 
 
 
 
 
71
  }
72
  }
73
 
@@ -79,16 +114,11 @@
79
  <p class="text-center font-mono text-sm mb-4">Powered by {selectedModel}</p>
80
  <Textarea
81
  bind:value={inputText}
82
- on:input={() => {
83
- if (!isGenerating) {
84
- generateCompletion();
85
- }
86
- }}
87
  disabled={isLoading}
88
  class="w-full"
89
  placeholder="Say something..."
90
  />
91
- <pre class="text-lg whitespace-pre-wrap">{outputText}</pre>
92
  {#if isLoading}
93
  <p class="text-sm text-slate-600 text-center">{loadingStatus}</p>
94
  {:else if error}
@@ -101,7 +131,9 @@
101
  {#if tokensPerSecond !== null}
102
  <Badge>{tokensPerSecond} tok/s</Badge>
103
  {/if}
104
- <Badge class="bg-green-700">{selectedModel}</Badge>
105
  </div>
106
  {/if}
 
 
107
  </div>
 
4
  import * as webllm from "@mlc-ai/web-llm";
5
  import { onMount } from 'svelte';
6
 
7
+ let selectedModel = "SmolLM-360M-Instruct-q4f16_1-MLC";
8
+
9
  let engine: webllm.MLCEngineInterface;
10
  let isLoading = false;
11
  let loadingStatus = '';
 
14
  let error = '';
15
  let completionSpeed: number | null = null;
16
  let tokensPerSecond: number | null = null;
 
17
  let isGenerating = false;
18
+ let pendingRequest: string | null = null;
19
 
20
  async function loadWebLLM() {
21
  isLoading = true;
 
26
 
27
  const appConfig: webllm.AppConfig = {
28
  model_list: [{
29
+ model: `https://huggingface.co/mlc-ai/SmolLM-360M-Instruct-q4f16_1-MLC`,
30
+ model_id: 'SmolLM-360M-Instruct-q4f16_1-MLC',
31
  model_lib: `${webllm.modelLibURLPrefix}${webllm.modelVersion}/SmolLM-360M-Instruct-q4f16_1-ctx2k_cs1k-webgpu.wasm`,
32
  overrides: { context_window_size: 2048 },
33
+ },
34
+ {
35
+ model: `https://huggingface.co/mlc-ai/Qwen2-0.5B-Instruct-q4f16_1-MLC`,
36
+ model_id: 'Qwen2-0.5B-Instruct-q4f16_1-MLC',
37
+ model_lib: `${webllm.modelLibURLPrefix}${webllm.modelVersion}/Qwen2-0.5B-Instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm`,
38
+ overrides: { context_window_size: 2048 },
39
+ }
40
+ ],
41
  };
42
 
43
  try {
 
53
  }
54
  }
55
 
56
+ async function generateCompletion(content: string) {
57
+ if (!engine || isGenerating) {
58
+ /**
59
+ * This is used to store the most recent request from user
60
+ * while the current request is being processed.
61
+ */
62
+ pendingRequest = content.trim();
63
+ return;
64
+ }
65
+
66
+ if (!content.trim()) return;
67
 
68
  isGenerating = true;
69
  const startTime = performance.now();
70
  try {
71
+ console.log("Generating completion:", content);
72
  const response = await engine.chat.completions.create({
73
+ messages: [
74
+ {role:"system", content: "You are a helpful AI agent helping users. Try your best to answer the users request."},
75
+ {role: "user", content: content}
76
+ ],
77
  max_tokens: 10,
78
  });
79
 
80
  outputText = response.choices[0].message.content || "";
81
+
82
+ // indicate that the response was cut short
83
+ if (response.choices[0].finish_reason === "length") {
84
+ outputText += "...";
85
+ }
86
+
87
  const endTime = performance.now();
88
  const elapsedTimeInSeconds = (endTime - startTime) / 1000;
89
  completionSpeed = Math.round(endTime - startTime);
 
96
  error = `Error: ${(err as Error).message}`;
97
  } finally {
98
  isGenerating = false;
99
+
100
+ // process pending request if exists
101
+ if (pendingRequest && pendingRequest !== content) {
102
+ const nextRequest = pendingRequest;
103
+ pendingRequest = null;
104
+ await generateCompletion(nextRequest);
105
+ }
106
  }
107
  }
108
 
 
114
  <p class="text-center font-mono text-sm mb-4">Powered by {selectedModel}</p>
115
  <Textarea
116
  bind:value={inputText}
117
+ on:input={() => generateCompletion(inputText)}
 
 
 
 
118
  disabled={isLoading}
119
  class="w-full"
120
  placeholder="Say something..."
121
  />
 
122
  {#if isLoading}
123
  <p class="text-sm text-slate-600 text-center">{loadingStatus}</p>
124
  {:else if error}
 
131
  {#if tokensPerSecond !== null}
132
  <Badge>{tokensPerSecond} tok/s</Badge>
133
  {/if}
134
+ <Badge>{selectedModel}</Badge>
135
  </div>
136
  {/if}
137
+ <pre class="text-lg font-bold whitespace-pre-wrap">{outputText}</pre>
138
+
139
  </div>