Spaces:
Running
Running
File size: 5,027 Bytes
b14d567 878e432 b14d567 fb60bd2 b14d567 878e432 b14d567 878e432 b14d567 878e432 b14d567 878e432 b14d567 878e432 fb60bd2 878e432 b14d567 fb60bd2 878e432 fb60bd2 b14d567 878e432 b14d567 08f0bdc b14d567 878e432 b14d567 08f0bdc b14d567 fb60bd2 878e432 b14d567 878e432 b14d567 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 |
<script lang="ts">
import Textarea from "@/lib/components/ui/textarea/textarea.svelte";
import Badge from "@/lib/components/ui/badge/badge.svelte";
import * as webllm from "@mlc-ai/web-llm";
import { onMount } from 'svelte';
let selectedModel = "SmolLM-360M-Instruct-q4f16_1-MLC";
let engine: webllm.MLCEngineInterface;
let isLoading = false;
let loadingStatus = '';
let inputText = '';
let outputText = '';
let error = '';
let completionSpeed: number | null = null;
let tokensPerSecond: number | null = null;
let isGenerating = false;
let pendingRequest: string | null = null;
async function loadWebLLM() {
isLoading = true;
error = '';
const initProgressCallback = (report: webllm.InitProgressReport) => {
loadingStatus = report.text;
};
const appConfig: webllm.AppConfig = {
model_list: [{
model: `https://huggingface.co/mlc-ai/SmolLM-360M-Instruct-q4f16_1-MLC`,
model_id: 'SmolLM-360M-Instruct-q4f16_1-MLC',
model_lib: `${webllm.modelLibURLPrefix}${webllm.modelVersion}/SmolLM-360M-Instruct-q4f16_1-ctx2k_cs1k-webgpu.wasm`,
overrides: { context_window_size: 2048 },
},
{
model: `https://huggingface.co/mlc-ai/Qwen2-0.5B-Instruct-q4f16_1-MLC`,
model_id: 'Qwen2-0.5B-Instruct-q4f16_1-MLC',
model_lib: `${webllm.modelLibURLPrefix}${webllm.modelVersion}/Qwen2-0.5B-Instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm`,
overrides: { context_window_size: 2048 },
}
],
};
try {
engine = await webllm.CreateMLCEngine(selectedModel, {
appConfig,
initProgressCallback,
logLevel: "INFO",
});
} catch (err) {
error = `Failed to load the model: ${(err as Error).message}`;
} finally {
isLoading = false;
}
}
async function generateCompletion(content: string) {
if (!engine || isGenerating) {
/**
* This is used to store the most recent request from user
* while the current request is being processed.
*/
pendingRequest = content.trim();
return;
}
if (!content.trim()) return;
isGenerating = true;
const startTime = performance.now();
try {
console.log("Generating completion:", content);
const response = await engine.chat.completions.create({
messages: [
{role:"system", content: "You are a helpful AI agent helping users. Try your best to answer the users request."},
{role: "user", content: content}
],
max_tokens: 10,
});
outputText = response.choices[0].message.content || "";
// indicate that the response was cut short
if (response.choices[0].finish_reason === "length") {
outputText += "...";
}
const endTime = performance.now();
const elapsedTimeInSeconds = (endTime - startTime) / 1000;
completionSpeed = Math.round(endTime - startTime);
const generatedTokens = response.usage?.completion_tokens || 0;
tokensPerSecond = Math.round(generatedTokens / elapsedTimeInSeconds);
error = '';
} catch (err) {
error = `Error: ${(err as Error).message}`;
} finally {
isGenerating = false;
// process pending request if exists
if (pendingRequest && pendingRequest !== content) {
const nextRequest = pendingRequest;
pendingRequest = null;
await generateCompletion(nextRequest);
}
}
}
onMount(loadWebLLM);
</script>
<div class="flex my-20 flex-col items-center gap-4 max-w-lg mx-auto">
<h1 class="text-center font-mono font-bold text-4xl">SmolLM 🤗</h1>
<p class="text-center font-mono text-sm mb-4">Powered by {selectedModel}</p>
<Textarea
bind:value={inputText}
on:input={() => generateCompletion(inputText)}
disabled={isLoading}
class="w-full"
placeholder="Say something..."
/>
{#if isLoading}
<p class="text-sm text-slate-600 text-center">{loadingStatus}</p>
{:else if error}
<p class="text-sm text-red-600">{error}</p>
{:else}
<div class="flex gap-2">
{#if completionSpeed !== null}
<Badge>{completionSpeed}ms</Badge>
{/if}
{#if tokensPerSecond !== null}
<Badge>{tokensPerSecond} tok/s</Badge>
{/if}
<Badge>{selectedModel}</Badge>
</div>
{/if}
<pre class="text-lg font-bold whitespace-pre-wrap">{outputText}</pre>
</div> |