Spaces:

HuggingFaceTB
/

instant-smollm

Running

App Files Files Community

instant-smollm / src /routes /+page.svelte

cfahlgren1 HF staff

update ux, links, and example prompts

7a08a75 5 months ago

raw

history blame

6.2 kB

	<script lang="ts">
	import Textarea from "@/lib/components/ui/textarea/textarea.svelte";
	import Badge from "@/lib/components/ui/badge/badge.svelte";
	import * as webllm from "@mlc-ai/web-llm";
	import { onMount, tick } from 'svelte';

	let selectedModel = "smollm-360M-instruct-add-basics-q0f32-MLC";

	let engine: webllm.MLCEngineInterface;
	let isLoading = false;
	let loadingStatus = '';
	let inputText = '';
	let outputText = '';
	let error = '';
	let completionSpeed: number \| null = null;
	let tokensPerSecond: number \| null = null;
	let isGenerating = false;
	let pendingRequest: string \| null = null;

	const promptExamples = [
	"What is the capital of France?",
	"Tell me a story about a cat.",
	"Write a poem about the ocean.",
	]

	async function setPrompt(prompt: string) {
	inputText = prompt;
	generateCompletion(prompt);
	}

	async function loadWebLLM() {
	isLoading = true;
	error = '';
	const initProgressCallback = (report: webllm.InitProgressReport) => {
	loadingStatus = report.text;
	};

	const appConfig: webllm.AppConfig = {
	model_list: [{
	model: `https://huggingface.co/reach-vb/smollm-360M-instruct-add-basics-q0f16-MLC`,
	model_id: 'smollm-360M-instruct-add-basics-q0f32-MLC',
	model_lib: `${webllm.modelLibURLPrefix}${webllm.modelVersion}/SmolLM-360M-Instruct-q0f16-ctx2k_cs1k-webgpu.wasm`,
	overrides: { context_window_size: 2048 },
	},
	{
	model: `https://huggingface.co/mlc-ai/Qwen2-0.5B-Instruct-q4f16_1-MLC`,
	model_id: 'Qwen2-0.5B-Instruct-q4f16_1-MLC',
	model_lib: `${webllm.modelLibURLPrefix}${webllm.modelVersion}/Qwen2-0.5B-Instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm`,
	overrides: { context_window_size: 2048 },
	}
	],
	};

	try {
	engine = await webllm.CreateMLCEngine(selectedModel, {
	appConfig,
	initProgressCallback,
	logLevel: "INFO",
	});
	} catch (err) {
	error = `Failed to load the model: ${(err as Error).message}`;
	} finally {
	isLoading = false;
	}
	}

	async function generateCompletion(content: string) {
	if (!engine \|\| isGenerating) {
	/**
	* This is used to store the most recent request from user
	* while the current request is being processed.
	*/
	pendingRequest = content.trim();
	return;
	}

	if (!content.trim()) return;

	isGenerating = true;
	const startTime = performance.now();
	try {
	console.log("Generating completion:", content);
	const response = await engine.chat.completions.create({
	messages: [
	{role: "user", content: content}
	],
	max_tokens: 15,
	});

	outputText = response.choices[0].message.content \|\| "";

	// indicate that the response was cut short
	if (response.choices[0].finish_reason === "length") {
	outputText += "...";
	}

	const endTime = performance.now();
	const elapsedTimeInSeconds = (endTime - startTime) / 1000;
	completionSpeed = Math.round(endTime - startTime);

	const generatedTokens = response.usage?.completion_tokens \|\| 0;
	tokensPerSecond = Math.round(generatedTokens / elapsedTimeInSeconds);

	error = '';
	} catch (err) {
	error = `Error: ${(err as Error).message}`;
	} finally {
	isGenerating = false;

	// process pending request if exists
	if (pendingRequest && pendingRequest !== content) {
	const nextRequest = pendingRequest;
	pendingRequest = null;
	await generateCompletion(nextRequest);
	}
	}
	}

	onMount(loadWebLLM);
	</script>

	<div class="flex my-20 flex-col items-center gap-4 max-w-xl mx-auto">
	<h1 class="text-center font-mono font-bold text-4xl">SmolLM 🤗</h1>
	<p class="text-center font-mono text-sm mb-4">Powered by <a href="https://huggingface.co/mlc-ai" target="_blank" class="underline text-blue-500">MLC</a> WebLLM <a class="underline text-blue-500" href="https://huggingface.co/HuggingFaceTB/smollm-360M-instruct-add-basics" target="_blank">SmolLM-360M-Instruct-Add-Basics</a> <span class="text-xs italic">(15 Max Tokens)</span></p>

	<Textarea
	bind:value={inputText}
	on:input={() => generateCompletion(inputText)}
	disabled={isLoading}
	class="w-full text-lg"
	placeholder="Say something..."
	/>
	<p class="text-center text-xs italic">This model doesn't work well with extremely creative prompts.</p>
	{#if isLoading}
	<p class="text-sm text-slate-600 text-center">{loadingStatus}</p>
	{:else if error}
	<p class="text-sm text-red-600">{error}</p>
	{:else}
	<div class="flex gap-2">
	{#if completionSpeed !== null}
	<Badge>{completionSpeed}ms</Badge>
	{/if}
	{#if tokensPerSecond !== null}
	<Badge>{tokensPerSecond} tok/s</Badge>
	{/if}
	</div>
	{/if}
	<div class="flex flex-col items-center mb-4">
	{#if inputText === ''}
	<p class="text-sm mb-2">Try these examples:</p>
	<div class="flex flex-wrap justify-center gap-2">
	{#each promptExamples as prompt}
	<button on:click={() => setPrompt(prompt)}>
	<Badge
	variant="outline"
	class="cursor-pointer bg-orange-100 hover:bg-orange-200"
	>
	{prompt}
	</Badge>
	</button>
	{/each}
	</div>
	{/if}
	</div>
	<pre class="text-xl font-bold whitespace-pre-wrap">{outputText}</pre>

	</div>