Spaces:
Running
Running
File size: 6,769 Bytes
b14d567 75ad4be b14d567 1fd4bf3 878e432 b14d567 fb60bd2 b14d567 878e432 75ad4be b14d567 7a08a75 62957ae f6f47e4 7a08a75 b14d567 7a08a75 1fd4bf3 7a08a75 b14d567 878e432 b14d567 878e432 b14d567 fb60bd2 878e432 75ad4be b14d567 fb60bd2 878e432 fb60bd2 b14d567 878e432 b14d567 734c928 75ad4be 734c928 e4c0241 75ad4be 7a08a75 b14d567 7a08a75 b14d567 7a08a75 08f0bdc b14d567 75ad4be b14d567 fb60bd2 b14d567 75ad4be 7a08a75 9126f16 7a08a75 878e432 b14d567 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 |
<script lang="ts">
import Textarea from "@/lib/components/ui/textarea/textarea.svelte";
import Badge from "@/lib/components/ui/badge/badge.svelte";
import * as webllm from "@mlc-ai/web-llm";
import { onMount } from 'svelte';
let selectedModel = "smollm-360M-instruct-add-basics-q0f16-MLC";
let engine: webllm.MLCEngineInterface;
let isLoading = false;
let loadingStatus = '';
let inputText = '';
let outputText = '';
let error = '';
let completionSpeed: number | null = null;
let tokensPerSecond: number | null = null;
let isGenerating = false;
let pendingRequest: string | null = null;
let maxTokens = 15;
const promptExamples = [
"Tell me a story about a cat.",
"What is refraction?",
"Explain thermal conductivity",
"What is Newton's first law of motion?",
"How do I make everything uppercase in Python?",
]
async function setPrompt(prompt: string) {
inputText = prompt;
generateCompletion(prompt);
}
async function loadWebLLM() {
isLoading = true;
error = '';
const initProgressCallback = (report: webllm.InitProgressReport) => {
loadingStatus = report.text;
};
const appConfig: webllm.AppConfig = {
model_list: [{
model: `https://huggingface.co/reach-vb/smollm-360M-instruct-add-basics-q0f16-MLC`,
model_id: 'smollm-360M-instruct-add-basics-q0f16-MLC',
model_lib: `${webllm.modelLibURLPrefix}${webllm.modelVersion}/SmolLM-360M-Instruct-q0f16-ctx2k_cs1k-webgpu.wasm`,
overrides: { context_window_size: 2048 },
},
{
model: `https://huggingface.co/mlc-ai/Qwen2-0.5B-Instruct-q4f16_1-MLC`,
model_id: 'Qwen2-0.5B-Instruct-q4f16_1-MLC',
model_lib: `${webllm.modelLibURLPrefix}${webllm.modelVersion}/Qwen2-0.5B-Instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm`,
overrides: { context_window_size: 2048 },
}
],
};
try {
engine = await webllm.CreateMLCEngine(selectedModel, {
appConfig,
initProgressCallback,
logLevel: "INFO",
});
} catch (err) {
error = `Failed to load the model: ${(err as Error).message}`;
} finally {
isLoading = false;
}
}
async function generateCompletion(content: string) {
if (!engine || isGenerating) {
/**
* This is used to store the most recent request from user
* while the current request is being processed.
*/
pendingRequest = content.trim();
return;
}
if (!content.trim()) return;
isGenerating = true;
const startTime = performance.now();
try {
const response = await engine.chat.completions.create({
messages: [
{role: "user", content: content}
],
max_tokens: maxTokens,
});
outputText = response.choices[0].message.content || "";
const endTime = performance.now();
const elapsedTimeInSeconds = (endTime - startTime) / 1000;
completionSpeed = Math.round(endTime - startTime);
const generatedTokens = response.usage?.completion_tokens || 0;
tokensPerSecond = Math.round(generatedTokens / elapsedTimeInSeconds);
error = '';
} catch (err) {
error = `Error: ${(err as Error).message}`;
} finally {
isGenerating = false;
// process pending request if exists
if (pendingRequest && pendingRequest !== content) {
const nextRequest = pendingRequest;
pendingRequest = null;
await generateCompletion(nextRequest);
}
}
}
onMount(loadWebLLM);
</script>
<div class="flex my-12 flex-col items-center gap-6 max-w-xl mx-auto relative font-sans">
<img
src="logo_smollm.png"
alt="logo"
class="absolute top-0 right-0 w-28 h-28 object-contain -mt-8 -mr-8 lg:-mr-16"
/>
<h1 class="text-center font-bold text-5xl text-gray-800 mb-2">Instant SmolLM</h1>
<p class="text-center text-sm text-gray-600">Powered by <a href="https://huggingface.co/mlc-ai" target="_blank" class="underline text-gray-800">MLC</a> WebLLM <a class="underline text-gray-800" href="https://huggingface.co/HuggingFaceTB/SmolLM-360M-Instruct" target="_blank">SmolLM-360M-Instruct</a></p>
<p class="text-center text-xs text-gray-600 mb-4 italic">This is a smol model, go easy on it. Check out <a href="https://huggingface.co/spaces/HuggingFaceTB/SmolLM-360M-Instruct-WebGPU" target="_blank" class="underline text-gray-800">this demo</a> for full conversations.</p>
<Textarea
bind:value={inputText}
on:input={() => generateCompletion(inputText)}
disabled={isLoading}
class="w-full text-lg"
placeholder="Say something..."
/>
{#if isLoading}
<p class="text-sm text-slate-600 text-center">{loadingStatus}</p>
{:else if error}
<p class="text-sm text-red-600">{error}</p>
{:else}
<div class="flex gap-2">
{#if completionSpeed !== null}
<Badge>{completionSpeed}ms</Badge>
{/if}
{#if tokensPerSecond !== null}
<Badge>{tokensPerSecond} tok/s</Badge>
{/if}
</div>
{/if}
<div class="w-full flex flex-col items-center gap-2">
<input
type="range"
id="max-tokens"
bind:value={maxTokens}
min="15"
max="75"
step="1"
class="w-full accent-black"
/>
<label for="max-tokens" class="text-xs italic text-slate-800">Max of {maxTokens} tokens</label>
</div>
<div class="flex flex-col items-center mb-4">
{#if inputText === '' && !isLoading}
<p class="text-sm mb-2">Try these examples:</p>
<div class="flex flex-wrap justify-center gap-2">
{#each promptExamples as prompt}
<button on:click={() => setPrompt(prompt)}>
<Badge
variant="outline"
class="cursor-pointer bg-orange-100 hover:bg-orange-200"
>
{prompt}
</Badge>
</button>
{/each}
</div>
{/if}
</div>
<pre class="text-xl font-bold whitespace-pre-wrap">{outputText}</pre>
</div> |