File size: 6,769 Bytes
b14d567
 
 
 
75ad4be
b14d567
1fd4bf3
878e432
b14d567
 
 
 
 
 
 
fb60bd2
b14d567
878e432
75ad4be
b14d567
7a08a75
 
62957ae
 
 
f6f47e4
7a08a75
 
 
 
 
 
 
b14d567
 
 
 
 
 
 
 
 
7a08a75
1fd4bf3
7a08a75
b14d567
878e432
 
 
 
 
 
 
 
b14d567
 
 
 
 
 
 
 
 
 
 
 
 
 
 
878e432
 
 
 
 
 
 
 
 
 
 
b14d567
 
 
 
fb60bd2
878e432
 
 
75ad4be
b14d567
 
fb60bd2
878e432
fb60bd2
 
 
 
 
 
 
b14d567
 
 
 
 
878e432
 
 
 
 
 
 
b14d567
 
 
 
 
 
734c928
75ad4be
734c928
 
 
 
 
e4c0241
 
75ad4be
7a08a75
b14d567
7a08a75
 
b14d567
7a08a75
08f0bdc
b14d567
75ad4be
b14d567
 
 
 
 
 
 
 
 
fb60bd2
 
 
b14d567
 
75ad4be
 
 
 
 
 
 
 
 
 
 
 
7a08a75
9126f16
7a08a75
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
878e432
b14d567
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
<script lang="ts">
    import Textarea from "@/lib/components/ui/textarea/textarea.svelte";
    import Badge from "@/lib/components/ui/badge/badge.svelte";
    import * as webllm from "@mlc-ai/web-llm";
    import { onMount } from 'svelte';

    let selectedModel = "smollm-360M-instruct-add-basics-q0f16-MLC";

    let engine: webllm.MLCEngineInterface;
    let isLoading = false;
    let loadingStatus = '';
    let inputText = '';
    let outputText = '';
    let error = '';
    let completionSpeed: number | null = null;
    let tokensPerSecond: number | null = null;
    let isGenerating = false;
    let pendingRequest: string | null = null;
    let maxTokens = 15; 

    const promptExamples = [
        "Tell me a story about a cat.",
        "What is refraction?",
        "Explain thermal conductivity",
        "What is Newton's first law of motion?",
        "How do I make everything uppercase in Python?",
    ]

    async function setPrompt(prompt: string) {
        inputText = prompt;
        generateCompletion(prompt);
    }

    async function loadWebLLM() {
        isLoading = true;
        error = '';
        const initProgressCallback = (report: webllm.InitProgressReport) => {
            loadingStatus = report.text;
        };

        const appConfig: webllm.AppConfig = {
            model_list: [{
                model: `https://huggingface.co/reach-vb/smollm-360M-instruct-add-basics-q0f16-MLC`,
                model_id: 'smollm-360M-instruct-add-basics-q0f16-MLC',
                model_lib: `${webllm.modelLibURLPrefix}${webllm.modelVersion}/SmolLM-360M-Instruct-q0f16-ctx2k_cs1k-webgpu.wasm`,
                overrides: { context_window_size: 2048 },
            },
            {
                model: `https://huggingface.co/mlc-ai/Qwen2-0.5B-Instruct-q4f16_1-MLC`,
                model_id: 'Qwen2-0.5B-Instruct-q4f16_1-MLC',
                model_lib: `${webllm.modelLibURLPrefix}${webllm.modelVersion}/Qwen2-0.5B-Instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm`,
                overrides: { context_window_size: 2048 },
            }
        ],
        };

        try {
            engine = await webllm.CreateMLCEngine(selectedModel, {
                appConfig,
                initProgressCallback,
                logLevel: "INFO",
            });
        } catch (err) {
            error = `Failed to load the model: ${(err as Error).message}`;
        } finally {
            isLoading = false;
        }
    }

    async function generateCompletion(content: string) {
        if (!engine || isGenerating) {
            /**
             * This is used to store the most recent request from user
             * while the current request is being processed.
             */
            pendingRequest = content.trim();
            return;
        }

        if (!content.trim()) return;

        isGenerating = true;
        const startTime = performance.now();
        try {
            const response = await engine.chat.completions.create({
                messages: [
                    {role: "user", content: content}
                ],
                max_tokens: maxTokens,
            });

            outputText = response.choices[0].message.content || "";

            const endTime = performance.now();
            const elapsedTimeInSeconds = (endTime - startTime) / 1000;
            completionSpeed = Math.round(endTime - startTime);
            
            const generatedTokens = response.usage?.completion_tokens || 0;
            tokensPerSecond = Math.round(generatedTokens / elapsedTimeInSeconds);
            
            error = '';
        } catch (err) {
            error = `Error: ${(err as Error).message}`;
        } finally {
            isGenerating = false;
            
            // process pending request if exists
            if (pendingRequest && pendingRequest !== content) {
                const nextRequest = pendingRequest;
                pendingRequest = null;
                await generateCompletion(nextRequest);
            }
        }
    }

    onMount(loadWebLLM);
</script>


<div class="flex my-12 flex-col items-center gap-6 max-w-xl mx-auto relative font-sans">
    <img 
      src="logo_smollm.png" 
      alt="logo" 
    class="absolute top-0 right-0 w-28 h-28 object-contain -mt-8 -mr-8 lg:-mr-16"
    />
    <h1 class="text-center font-bold text-5xl text-gray-800 mb-2">Instant SmolLM</h1>
    <p class="text-center text-sm text-gray-600">Powered by <a href="https://huggingface.co/mlc-ai" target="_blank" class="underline text-gray-800">MLC</a> WebLLM <a class="underline text-gray-800" href="https://huggingface.co/HuggingFaceTB/SmolLM-360M-Instruct" target="_blank">SmolLM-360M-Instruct</a></p>
    <p class="text-center text-xs text-gray-600 mb-4 italic">This is a smol model, go easy on it. Check out <a href="https://huggingface.co/spaces/HuggingFaceTB/SmolLM-360M-Instruct-WebGPU" target="_blank" class="underline text-gray-800">this demo</a> for full conversations.</p>

    <Textarea 
        bind:value={inputText}
        on:input={() => generateCompletion(inputText)}
        disabled={isLoading}
        class="w-full text-lg" 
        placeholder="Say something..."
    />

    {#if isLoading}
        <p class="text-sm text-slate-600 text-center">{loadingStatus}</p>
    {:else if error}
        <p class="text-sm text-red-600">{error}</p>
    {:else}
        <div class="flex gap-2">
            {#if completionSpeed !== null}
                <Badge>{completionSpeed}ms</Badge>
            {/if}
            {#if tokensPerSecond !== null}
                <Badge>{tokensPerSecond} tok/s</Badge>
            {/if}
        </div>
    {/if}
    <div class="w-full flex flex-col items-center gap-2">
        <input 
            type="range" 
            id="max-tokens" 
            bind:value={maxTokens} 
            min="15" 
            max="75" 
            step="1"
            class="w-full accent-black"
        />
        <label for="max-tokens" class="text-xs italic text-slate-800">Max of {maxTokens} tokens</label>
    </div>
    <div class="flex flex-col items-center mb-4">
        {#if inputText === '' && !isLoading}
        <p class="text-sm mb-2">Try these examples:</p>
        <div class="flex flex-wrap justify-center gap-2">
            {#each promptExamples as prompt}
                <button on:click={() => setPrompt(prompt)}>
                    <Badge
                        variant="outline"
                        class="cursor-pointer bg-orange-100 hover:bg-orange-200"
                    >
                        {prompt}
                    </Badge>
                </button>
            {/each}
        </div>
        {/if}
    </div>
    <pre class="text-xl font-bold whitespace-pre-wrap">{outputText}</pre>

</div>