Spaces:

HuggingFaceTB
/

instant-smollm

Running

cfahlgren1 HF staff commited on Aug 16

Commit

0378a63

•

1 Parent(s): 436a794

use 15 max tokens

Files changed (1) hide show

src/routes/+page.svelte CHANGED Viewed

@@ -4,7 +4,7 @@
     import * as webllm from "@mlc-ai/web-llm";
     import { onMount } from 'svelte';
-    let selectedModel = "smollm-360M-instruct-add-basics-q4f16_1-MLC";
     let engine: webllm.MLCEngineInterface;
     let isLoading = false;
@@ -26,9 +26,9 @@
         const appConfig: webllm.AppConfig = {
             model_list: [{
-                model: `https://huggingface.co/reach-vb/smollm-360M-instruct-add-basics-q4f16_1-MLC`,
-                model_id: 'smollm-360M-instruct-add-basics-q4f16_1-MLC',
-                model_lib: `${webllm.modelLibURLPrefix}${webllm.modelVersion}/SmolLM-360M-Instruct-q4f16_1-ctx2k_cs1k-webgpu.wasm`,
                 overrides: { context_window_size: 2048 },
             },
             {
@@ -71,10 +71,9 @@
             console.log("Generating completion:", content);
             const response = await engine.chat.completions.create({
                 messages: [
-                    {role:"system", content: "You are a helpful AI assistant. Try your best to answer the users request."},
                     {role: "user", content: content}
                 ],
-                max_tokens: 10,
             });
             outputText = response.choices[0].message.content || "";

     import * as webllm from "@mlc-ai/web-llm";
     import { onMount } from 'svelte';
+    let selectedModel = "smollm-360M-instruct-add-basics-q0f32-MLC";
     let engine: webllm.MLCEngineInterface;
     let isLoading = false;
         const appConfig: webllm.AppConfig = {
             model_list: [{
+                model: `https://huggingface.co/reach-vb/smollm-360M-instruct-add-basics-q0f32-MLC`,
+                model_id: 'smollm-360M-instruct-add-basics-q0f32-MLC',
+                model_lib: `${webllm.modelLibURLPrefix}${webllm.modelVersion}/SmolLM-360M-Instruct-q0f32-ctx2k_cs1k-webgpu.wasm`,
                 overrides: { context_window_size: 2048 },
             },
             {
             console.log("Generating completion:", content);
             const response = await engine.chat.completions.create({
                 messages: [
                     {role: "user", content: content}
                 ],
+                max_tokens: 15,
             });
             outputText = response.choices[0].message.content || "";