Spaces:
Running
Running
<script lang="ts"> | |
import Textarea from "@/lib/components/ui/textarea/textarea.svelte"; | |
import Badge from "@/lib/components/ui/badge/badge.svelte"; | |
import * as webllm from "@mlc-ai/web-llm"; | |
import { onMount } from 'svelte'; | |
let selectedModel = "smollm-360M-instruct-add-basics-q0f16-MLC"; | |
let engine: webllm.MLCEngineInterface; | |
let isLoading = false; | |
let loadingStatus = ''; | |
let inputText = ''; | |
let outputText = ''; | |
let error = ''; | |
let completionSpeed: number | null = null; | |
let tokensPerSecond: number | null = null; | |
let isGenerating = false; | |
let pendingRequest: string | null = null; | |
let maxTokens = 15; | |
const promptExamples = [ | |
"Tell me a story about a cat.", | |
"What is refraction?", | |
"Explain thermal conductivity", | |
"What is Newton's first law of motion?", | |
"How do I make everything uppercase in Python?", | |
] | |
async function setPrompt(prompt: string) { | |
inputText = prompt; | |
generateCompletion(prompt); | |
} | |
async function loadWebLLM() { | |
isLoading = true; | |
error = ''; | |
const initProgressCallback = (report: webllm.InitProgressReport) => { | |
loadingStatus = report.text; | |
}; | |
const appConfig: webllm.AppConfig = { | |
model_list: [{ | |
model: `https://huggingface.co/reach-vb/smollm-360M-instruct-add-basics-q0f16-MLC`, | |
model_id: 'smollm-360M-instruct-add-basics-q0f16-MLC', | |
model_lib: `${webllm.modelLibURLPrefix}${webllm.modelVersion}/SmolLM-360M-Instruct-q0f16-ctx2k_cs1k-webgpu.wasm`, | |
overrides: { context_window_size: 2048 }, | |
}, | |
{ | |
model: `https://huggingface.co/mlc-ai/Qwen2-0.5B-Instruct-q4f16_1-MLC`, | |
model_id: 'Qwen2-0.5B-Instruct-q4f16_1-MLC', | |
model_lib: `${webllm.modelLibURLPrefix}${webllm.modelVersion}/Qwen2-0.5B-Instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm`, | |
overrides: { context_window_size: 2048 }, | |
} | |
], | |
}; | |
try { | |
engine = await webllm.CreateMLCEngine(selectedModel, { | |
appConfig, | |
initProgressCallback, | |
logLevel: "INFO", | |
}); | |
} catch (err) { | |
error = `Failed to load the model: ${(err as Error).message}`; | |
} finally { | |
isLoading = false; | |
} | |
} | |
async function generateCompletion(content: string) { | |
if (!engine || isGenerating) { | |
/** | |
* This is used to store the most recent request from user | |
* while the current request is being processed. | |
*/ | |
pendingRequest = content.trim(); | |
return; | |
} | |
if (!content.trim()) return; | |
isGenerating = true; | |
const startTime = performance.now(); | |
try { | |
const response = await engine.chat.completions.create({ | |
messages: [ | |
{role: "user", content: content} | |
], | |
max_tokens: maxTokens, | |
}); | |
outputText = response.choices[0].message.content || ""; | |
const endTime = performance.now(); | |
const elapsedTimeInSeconds = (endTime - startTime) / 1000; | |
completionSpeed = Math.round(endTime - startTime); | |
const generatedTokens = response.usage?.completion_tokens || 0; | |
tokensPerSecond = Math.round(generatedTokens / elapsedTimeInSeconds); | |
error = ''; | |
} catch (err) { | |
error = `Error: ${(err as Error).message}`; | |
} finally { | |
isGenerating = false; | |
// process pending request if exists | |
if (pendingRequest && pendingRequest !== content) { | |
const nextRequest = pendingRequest; | |
pendingRequest = null; | |
await generateCompletion(nextRequest); | |
} | |
} | |
} | |
onMount(loadWebLLM); | |
</script> | |
<div class="flex my-12 flex-col items-center gap-6 max-w-xl mx-auto relative font-sans"> | |
<img | |
src="logo_smollm.png" | |
alt="logo" | |
class="absolute top-0 right-0 w-28 h-28 object-contain -mt-8 -mr-8 lg:-mr-16" | |
/> | |
<h1 class="text-center font-bold text-5xl text-gray-800 mb-2">Instant SmolLM</h1> | |
<p class="text-center text-sm text-gray-600">Powered by <a href="https://huggingface.co/mlc-ai" target="_blank" class="underline text-gray-800">MLC</a> WebLLM <a class="underline text-gray-800" href="https://huggingface.co/HuggingFaceTB/SmolLM-360M-Instruct" target="_blank">SmolLM-360M-Instruct</a></p> | |
<p class="text-center text-xs text-gray-600 mb-4 italic">This is a smol model, go easy on it. Check out <a href="https://huggingface.co/spaces/HuggingFaceTB/SmolLM-360M-Instruct-WebGPU" target="_blank" class="underline text-gray-800">this demo</a> for full conversations.</p> | |
<Textarea | |
bind:value={inputText} | |
on:input={() => generateCompletion(inputText)} | |
disabled={isLoading} | |
class="w-full text-lg" | |
placeholder="Say something..." | |
/> | |
{#if isLoading} | |
<p class="text-sm text-slate-600 text-center">{loadingStatus}</p> | |
{:else if error} | |
<p class="text-sm text-red-600">{error}</p> | |
{:else} | |
<div class="flex gap-2"> | |
{#if completionSpeed !== null} | |
<Badge>{completionSpeed}ms</Badge> | |
{/if} | |
{#if tokensPerSecond !== null} | |
<Badge>{tokensPerSecond} tok/s</Badge> | |
{/if} | |
</div> | |
{/if} | |
<div class="w-full flex flex-col items-center gap-2"> | |
<input | |
type="range" | |
id="max-tokens" | |
bind:value={maxTokens} | |
min="15" | |
max="75" | |
step="1" | |
class="w-full accent-black" | |
/> | |
<label for="max-tokens" class="text-xs italic text-slate-800">Max of {maxTokens} tokens</label> | |
</div> | |
<div class="flex flex-col items-center mb-4"> | |
{#if inputText === '' && !isLoading} | |
<p class="text-sm mb-2">Try these examples:</p> | |
<div class="flex flex-wrap justify-center gap-2"> | |
{#each promptExamples as prompt} | |
<button on:click={() => setPrompt(prompt)}> | |
<Badge | |
variant="outline" | |
class="cursor-pointer bg-orange-100 hover:bg-orange-200" | |
> | |
{prompt} | |
</Badge> | |
</button> | |
{/each} | |
</div> | |
{/if} | |
</div> | |
<pre class="text-xl font-bold whitespace-pre-wrap">{outputText}</pre> | |
</div> |