Spaces:

HuggingFaceTB
/

instant-smollm

Running

App Files Files Community

instant-smollm / src /routes /+page.svelte

cfahlgren1 HF staff

fix suggested model name

1fd4bf3 4 months ago

raw

history blame

6.77 kB

	<script lang="ts">
	import Textarea from "@/lib/components/ui/textarea/textarea.svelte";
	import Badge from "@/lib/components/ui/badge/badge.svelte";
	import * as webllm from "@mlc-ai/web-llm";
	import { onMount } from 'svelte';

	let selectedModel = "smollm-360M-instruct-add-basics-q0f16-MLC";

	let engine: webllm.MLCEngineInterface;
	let isLoading = false;
	let loadingStatus = '';
	let inputText = '';
	let outputText = '';
	let error = '';
	let completionSpeed: number \| null = null;
	let tokensPerSecond: number \| null = null;
	let isGenerating = false;
	let pendingRequest: string \| null = null;
	let maxTokens = 15;

	const promptExamples = [
	"Tell me a story about a cat.",
	"What is refraction?",
	"Explain thermal conductivity",
	"What is Newton's first law of motion?",
	"How do I make everything uppercase in Python?",
	]

	async function setPrompt(prompt: string) {
	inputText = prompt;
	generateCompletion(prompt);
	}

	async function loadWebLLM() {
	isLoading = true;
	error = '';
	const initProgressCallback = (report: webllm.InitProgressReport) => {
	loadingStatus = report.text;
	};

	const appConfig: webllm.AppConfig = {
	model_list: [{
	model: `https://huggingface.co/reach-vb/smollm-360M-instruct-add-basics-q0f16-MLC`,
	model_id: 'smollm-360M-instruct-add-basics-q0f16-MLC',
	model_lib: `${webllm.modelLibURLPrefix}${webllm.modelVersion}/SmolLM-360M-Instruct-q0f16-ctx2k_cs1k-webgpu.wasm`,
	overrides: { context_window_size: 2048 },
	},
	{
	model: `https://huggingface.co/mlc-ai/Qwen2-0.5B-Instruct-q4f16_1-MLC`,
	model_id: 'Qwen2-0.5B-Instruct-q4f16_1-MLC',
	model_lib: `${webllm.modelLibURLPrefix}${webllm.modelVersion}/Qwen2-0.5B-Instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm`,
	overrides: { context_window_size: 2048 },
	}
	],
	};

	try {
	engine = await webllm.CreateMLCEngine(selectedModel, {
	appConfig,
	initProgressCallback,
	logLevel: "INFO",
	});
	} catch (err) {
	error = `Failed to load the model: ${(err as Error).message}`;
	} finally {
	isLoading = false;
	}
	}

	async function generateCompletion(content: string) {
	if (!engine \|\| isGenerating) {
	/**
	* This is used to store the most recent request from user
	* while the current request is being processed.
	*/
	pendingRequest = content.trim();
	return;
	}

	if (!content.trim()) return;

	isGenerating = true;
	const startTime = performance.now();
	try {
	const response = await engine.chat.completions.create({
	messages: [
	{role: "user", content: content}
	],
	max_tokens: maxTokens,
	});

	outputText = response.choices[0].message.content \|\| "";

	const endTime = performance.now();
	const elapsedTimeInSeconds = (endTime - startTime) / 1000;
	completionSpeed = Math.round(endTime - startTime);

	const generatedTokens = response.usage?.completion_tokens \|\| 0;
	tokensPerSecond = Math.round(generatedTokens / elapsedTimeInSeconds);

	error = '';
	} catch (err) {
	error = `Error: ${(err as Error).message}`;
	} finally {
	isGenerating = false;

	// process pending request if exists
	if (pendingRequest && pendingRequest !== content) {
	const nextRequest = pendingRequest;
	pendingRequest = null;
	await generateCompletion(nextRequest);
	}
	}
	}

	onMount(loadWebLLM);
	</script>


	<div class="flex my-12 flex-col items-center gap-6 max-w-xl mx-auto relative font-sans">
	<img
	src="logo_smollm.png"
	alt="logo"
	class="absolute top-0 right-0 w-28 h-28 object-contain -mt-8 -mr-8 lg:-mr-16"
	/>
	<h1 class="text-center font-bold text-5xl text-gray-800 mb-2">Instant SmolLM</h1>
	<p class="text-center text-sm text-gray-600">Powered by <a href="https://huggingface.co/mlc-ai" target="_blank" class="underline text-gray-800">MLC</a> WebLLM <a class="underline text-gray-800" href="https://huggingface.co/HuggingFaceTB/SmolLM-360M-Instruct" target="_blank">SmolLM-360M-Instruct</a></p>
	<p class="text-center text-xs text-gray-600 mb-4 italic">This is a smol model, go easy on it. Check out <a href="https://huggingface.co/spaces/HuggingFaceTB/SmolLM-360M-Instruct-WebGPU" target="_blank" class="underline text-gray-800">this demo</a> for full conversations.</p>

	<Textarea
	bind:value={inputText}
	on:input={() => generateCompletion(inputText)}
	disabled={isLoading}
	class="w-full text-lg"
	placeholder="Say something..."
	/>

	{#if isLoading}
	<p class="text-sm text-slate-600 text-center">{loadingStatus}</p>
	{:else if error}
	<p class="text-sm text-red-600">{error}</p>
	{:else}
	<div class="flex gap-2">
	{#if completionSpeed !== null}
	<Badge>{completionSpeed}ms</Badge>
	{/if}
	{#if tokensPerSecond !== null}
	<Badge>{tokensPerSecond} tok/s</Badge>
	{/if}
	</div>
	{/if}
	<div class="w-full flex flex-col items-center gap-2">
	<input
	type="range"
	id="max-tokens"
	bind:value={maxTokens}
	min="15"
	max="75"
	step="1"
	class="w-full accent-black"
	/>
	<label for="max-tokens" class="text-xs italic text-slate-800">Max of {maxTokens} tokens</label>
	</div>
	<div class="flex flex-col items-center mb-4">
	{#if inputText === '' && !isLoading}
	<p class="text-sm mb-2">Try these examples:</p>
	<div class="flex flex-wrap justify-center gap-2">
	{#each promptExamples as prompt}
	<button on:click={() => setPrompt(prompt)}>
	<Badge
	variant="outline"
	class="cursor-pointer bg-orange-100 hover:bg-orange-200"
	>
	{prompt}
	</Badge>
	</button>
	{/each}
	</div>
	{/if}
	</div>
	<pre class="text-xl font-bold whitespace-pre-wrap">{outputText}</pre>

	</div>