instant-smollm / src /routes /+page.svelte
cfahlgren1's picture
cfahlgren1 HF staff
fix suggested model name
1fd4bf3
raw
history blame
6.77 kB
<script lang="ts">
import Textarea from "@/lib/components/ui/textarea/textarea.svelte";
import Badge from "@/lib/components/ui/badge/badge.svelte";
import * as webllm from "@mlc-ai/web-llm";
import { onMount } from 'svelte';
let selectedModel = "smollm-360M-instruct-add-basics-q0f16-MLC";
let engine: webllm.MLCEngineInterface;
let isLoading = false;
let loadingStatus = '';
let inputText = '';
let outputText = '';
let error = '';
let completionSpeed: number | null = null;
let tokensPerSecond: number | null = null;
let isGenerating = false;
let pendingRequest: string | null = null;
let maxTokens = 15;
const promptExamples = [
"Tell me a story about a cat.",
"What is refraction?",
"Explain thermal conductivity",
"What is Newton's first law of motion?",
"How do I make everything uppercase in Python?",
]
async function setPrompt(prompt: string) {
inputText = prompt;
generateCompletion(prompt);
}
async function loadWebLLM() {
isLoading = true;
error = '';
const initProgressCallback = (report: webllm.InitProgressReport) => {
loadingStatus = report.text;
};
const appConfig: webllm.AppConfig = {
model_list: [{
model: `https://huggingface.co/reach-vb/smollm-360M-instruct-add-basics-q0f16-MLC`,
model_id: 'smollm-360M-instruct-add-basics-q0f16-MLC',
model_lib: `${webllm.modelLibURLPrefix}${webllm.modelVersion}/SmolLM-360M-Instruct-q0f16-ctx2k_cs1k-webgpu.wasm`,
overrides: { context_window_size: 2048 },
},
{
model: `https://huggingface.co/mlc-ai/Qwen2-0.5B-Instruct-q4f16_1-MLC`,
model_id: 'Qwen2-0.5B-Instruct-q4f16_1-MLC',
model_lib: `${webllm.modelLibURLPrefix}${webllm.modelVersion}/Qwen2-0.5B-Instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm`,
overrides: { context_window_size: 2048 },
}
],
};
try {
engine = await webllm.CreateMLCEngine(selectedModel, {
appConfig,
initProgressCallback,
logLevel: "INFO",
});
} catch (err) {
error = `Failed to load the model: ${(err as Error).message}`;
} finally {
isLoading = false;
}
}
async function generateCompletion(content: string) {
if (!engine || isGenerating) {
/**
* This is used to store the most recent request from user
* while the current request is being processed.
*/
pendingRequest = content.trim();
return;
}
if (!content.trim()) return;
isGenerating = true;
const startTime = performance.now();
try {
const response = await engine.chat.completions.create({
messages: [
{role: "user", content: content}
],
max_tokens: maxTokens,
});
outputText = response.choices[0].message.content || "";
const endTime = performance.now();
const elapsedTimeInSeconds = (endTime - startTime) / 1000;
completionSpeed = Math.round(endTime - startTime);
const generatedTokens = response.usage?.completion_tokens || 0;
tokensPerSecond = Math.round(generatedTokens / elapsedTimeInSeconds);
error = '';
} catch (err) {
error = `Error: ${(err as Error).message}`;
} finally {
isGenerating = false;
// process pending request if exists
if (pendingRequest && pendingRequest !== content) {
const nextRequest = pendingRequest;
pendingRequest = null;
await generateCompletion(nextRequest);
}
}
}
onMount(loadWebLLM);
</script>
<div class="flex my-12 flex-col items-center gap-6 max-w-xl mx-auto relative font-sans">
<img
src="logo_smollm.png"
alt="logo"
class="absolute top-0 right-0 w-28 h-28 object-contain -mt-8 -mr-8 lg:-mr-16"
/>
<h1 class="text-center font-bold text-5xl text-gray-800 mb-2">Instant SmolLM</h1>
<p class="text-center text-sm text-gray-600">Powered by <a href="https://huggingface.co/mlc-ai" target="_blank" class="underline text-gray-800">MLC</a> WebLLM <a class="underline text-gray-800" href="https://huggingface.co/HuggingFaceTB/SmolLM-360M-Instruct" target="_blank">SmolLM-360M-Instruct</a></p>
<p class="text-center text-xs text-gray-600 mb-4 italic">This is a smol model, go easy on it. Check out <a href="https://huggingface.co/spaces/HuggingFaceTB/SmolLM-360M-Instruct-WebGPU" target="_blank" class="underline text-gray-800">this demo</a> for full conversations.</p>
<Textarea
bind:value={inputText}
on:input={() => generateCompletion(inputText)}
disabled={isLoading}
class="w-full text-lg"
placeholder="Say something..."
/>
{#if isLoading}
<p class="text-sm text-slate-600 text-center">{loadingStatus}</p>
{:else if error}
<p class="text-sm text-red-600">{error}</p>
{:else}
<div class="flex gap-2">
{#if completionSpeed !== null}
<Badge>{completionSpeed}ms</Badge>
{/if}
{#if tokensPerSecond !== null}
<Badge>{tokensPerSecond} tok/s</Badge>
{/if}
</div>
{/if}
<div class="w-full flex flex-col items-center gap-2">
<input
type="range"
id="max-tokens"
bind:value={maxTokens}
min="15"
max="75"
step="1"
class="w-full accent-black"
/>
<label for="max-tokens" class="text-xs italic text-slate-800">Max of {maxTokens} tokens</label>
</div>
<div class="flex flex-col items-center mb-4">
{#if inputText === '' && !isLoading}
<p class="text-sm mb-2">Try these examples:</p>
<div class="flex flex-wrap justify-center gap-2">
{#each promptExamples as prompt}
<button on:click={() => setPrompt(prompt)}>
<Badge
variant="outline"
class="cursor-pointer bg-orange-100 hover:bg-orange-200"
>
{prompt}
</Badge>
</button>
{/each}
</div>
{/if}
</div>
<pre class="text-xl font-bold whitespace-pre-wrap">{outputText}</pre>
</div>