Spaces:
Running
Running
import { | |
type LoadModelConfig, | |
type SamplingConfig, | |
Wllama, | |
type WllamaChatMessage, | |
type WllamaConfig, | |
} from "@wllama/wllama"; | |
import type { DownloadOptions } from "@wllama/wllama/esm/cache-manager"; | |
import multiThreadWllamaWasmUrl from "@wllama/wllama/esm/multi-thread/wllama.wasm?url"; | |
import singleThreadWllamaWasmUrl from "@wllama/wllama/esm/single-thread/wllama.wasm?url"; | |
import { addLogEntry } from "./logEntries"; | |
import { getSettings } from "./pubSub"; | |
import { getSystemPrompt } from "./systemPrompt"; | |
import { defaultContextSize } from "./textGenerationUtilities"; | |
interface WllamaInitConfig { | |
wllama?: WllamaConfig; | |
model?: LoadModelConfig & DownloadOptions; | |
} | |
async function createWllamaInstance(config?: WllamaConfig): Promise<Wllama> { | |
try { | |
return new Wllama( | |
{ | |
"single-thread/wllama.wasm": singleThreadWllamaWasmUrl, | |
"multi-thread/wllama.wasm": multiThreadWllamaWasmUrl, | |
}, | |
config, | |
); | |
} catch (error) { | |
addLogEntry( | |
`Failed to create Wllama instance: ${ | |
error instanceof Error ? error.message : "Unknown error" | |
}`, | |
); | |
throw error; | |
} | |
} | |
export async function initializeWllama( | |
hfRepoId: string, | |
hfFilePath: string, | |
config?: WllamaInitConfig, | |
): Promise<Wllama> { | |
addLogEntry("Initializing Wllama"); | |
try { | |
const wllama = await createWllamaInstance(config?.wllama); | |
await wllama.loadModelFromHF(hfRepoId, hfFilePath, { | |
...config?.model, | |
n_threads: 1, | |
}); | |
const randomDigitOrLetter = Math.random().toString(36).charAt(2); | |
const warmupResponse = await wllama.createChatCompletion( | |
[ | |
{ | |
role: "user", | |
content: randomDigitOrLetter, | |
}, | |
], | |
{ | |
nPredict: 1, | |
}, | |
); | |
const hasWarmupSucceeded = warmupResponse.length > 0; | |
addLogEntry( | |
`Wllama warmup ${hasWarmupSucceeded ? "succeeded" : "failed"}.`, | |
); | |
await wllama.exit(); | |
await wllama.loadModelFromHF(hfRepoId, hfFilePath, config?.model); | |
addLogEntry("Wllama initialized successfully"); | |
return wllama; | |
} catch (error) { | |
addLogEntry( | |
`Failed to initialize Wllama: ${ | |
error instanceof Error ? error.message : "Unknown error" | |
}`, | |
); | |
throw error; | |
} | |
} | |
export async function clearWllamaCache(): Promise<Wllama> { | |
addLogEntry("Clearing Wllama cache"); | |
try { | |
const wllama = await createWllamaInstance(); | |
await wllama.cacheManager.clear(); | |
addLogEntry("Wllama cache cleared successfully"); | |
return wllama; | |
} catch (error) { | |
addLogEntry( | |
`Failed to clear Wllama cache: ${ | |
error instanceof Error ? error.message : "Unknown error" | |
}`, | |
); | |
throw error; | |
} | |
} | |
export interface WllamaModel { | |
readonly label: string; | |
readonly hfRepoId: string; | |
readonly hfFilePath: string; | |
readonly cacheTypeK: LoadModelConfig["cache_type_k"]; | |
readonly cacheTypeV: LoadModelConfig["cache_type_v"]; | |
readonly contextSize: number; | |
readonly fileSizeInMegabytes: number; | |
readonly shouldIncludeUrlsOnPrompt: boolean; | |
readonly stopStrings?: string[]; | |
readonly stopTokens?: number[]; | |
getSampling: () => SamplingConfig; | |
getMessages: (query: string, searchResults: string) => WllamaChatMessage[]; | |
} | |
const createDefaultModelConfig = (): Omit< | |
WllamaModel, | |
"label" | "fileSizeInMegabytes" | "hfRepoId" | "hfFilePath" | |
> => ({ | |
getMessages: (query, searchResults) => [ | |
{ role: "user", content: getSystemPrompt(searchResults) }, | |
{ role: "assistant", content: "Ok!" }, | |
{ role: "user", content: query }, | |
], | |
cacheTypeK: "f16", | |
cacheTypeV: "f16", | |
contextSize: defaultContextSize, | |
shouldIncludeUrlsOnPrompt: true, | |
getSampling: () => { | |
const settings = getSettings(); | |
return { | |
top_p: settings.inferenceTopP, | |
temp: settings.inferenceTemperature, | |
penalty_freq: settings.inferenceFrequencyPenalty, | |
penalty_present: settings.inferencePresencePenalty, | |
min_p: 0.1, | |
top_k: 0, | |
typical_p: 0.2, | |
penalty_repeat: 1.05, | |
}; | |
}, | |
}); | |
export const wllamaModels: Readonly<Record<string, WllamaModel>> = { | |
"smollm2-135m": { | |
...createDefaultModelConfig(), | |
label: "SmolLM 2 135M", | |
hfRepoId: "Felladrin/gguf-sharded-Q4_K_S-SmolLM2-135M-Instruct", | |
hfFilePath: "model.shard-00001-of-00004.gguf", | |
fileSizeInMegabytes: 102, | |
}, | |
"smollm2-360m": { | |
...createDefaultModelConfig(), | |
label: "SmolLM 2 360M", | |
hfRepoId: "Felladrin/gguf-sharded-Q4_K_S-SmolLM2-360M-Instruct", | |
hfFilePath: "model.shard-00001-of-00005.gguf", | |
fileSizeInMegabytes: 260, | |
}, | |
"qwen-2.5-0.5b": { | |
...createDefaultModelConfig(), | |
label: "Qwen 2.5 0.5B", | |
hfRepoId: "Felladrin/gguf-sharded-Q4_K_S-Qwen2.5-0.5B-Instruct", | |
hfFilePath: "model.shard-00001-of-00003.gguf", | |
fileSizeInMegabytes: 386, | |
}, | |
"amd-olmo-1b": { | |
...createDefaultModelConfig(), | |
label: "AMD OLMo 1B", | |
hfRepoId: "Felladrin/gguf-sharded-Q4_K_S-AMD-OLMo-1B-SFT-DPO", | |
hfFilePath: "model.shard-00001-of-00009.gguf", | |
fileSizeInMegabytes: 697, | |
}, | |
"granite-3.1-1b": { | |
...createDefaultModelConfig(), | |
label: "Granite 3.1 1B [400M]", | |
hfRepoId: "Felladrin/gguf-sharded-Q4_K_S-granite-3.1-1b-a400m-instruct", | |
hfFilePath: "model.shard-00001-of-00020.gguf", | |
fileSizeInMegabytes: 775, | |
}, | |
"llama-3.2-1b": { | |
...createDefaultModelConfig(), | |
label: "Llama 3.2 1B", | |
hfRepoId: "Felladrin/gguf-sharded-Q4_K_S-Llama-3.2-1B-Instruct", | |
hfFilePath: "model.shard-00001-of-00004.gguf", | |
fileSizeInMegabytes: 776, | |
}, | |
"gemma-3-1b": { | |
...createDefaultModelConfig(), | |
label: "Gemma 3 1B", | |
hfRepoId: "Felladrin/gguf-sharded-Q4_K_S-gemma-3-1b-it", | |
hfFilePath: "model.shard-00001-of-00003.gguf", | |
fileSizeInMegabytes: 781, | |
}, | |
"pints-1.5b": { | |
...createDefaultModelConfig(), | |
label: "Pints 1.5B", | |
hfRepoId: "Felladrin/gguf-sharded-Q4_K_S-1.5-Pints-16K-v0.1", | |
hfFilePath: "model.shard-00001-of-00018.gguf", | |
fileSizeInMegabytes: 905, | |
}, | |
"stablelm-2-zephyr-1.6b": { | |
...createDefaultModelConfig(), | |
label: "StableLM 2 Zephyr 1.6B", | |
hfRepoId: "Felladrin/gguf-sharded-Q4_K_S-stablelm-2-zephyr-1.6b", | |
hfFilePath: "model.shard-00001-of-00006.gguf", | |
fileSizeInMegabytes: 989, | |
}, | |
"smollm2-1.7b": { | |
...createDefaultModelConfig(), | |
label: "SmolLM 2 1.7B", | |
hfRepoId: "Felladrin/gguf-sharded-Q4_K_S-SmolLM2-1.7B-Instruct", | |
hfFilePath: "model.shard-00001-of-00012.gguf", | |
fileSizeInMegabytes: 999, | |
}, | |
"falcon-3-1b": { | |
...createDefaultModelConfig(), | |
label: "Falcon 3 1B", | |
hfRepoId: "Felladrin/gguf-sharded-Q4_K_S-Falcon3-1B-Instruct", | |
hfFilePath: "model.shard-00001-of-00005.gguf", | |
fileSizeInMegabytes: 1020, | |
}, | |
"aceinstruct-1.5b": { | |
...createDefaultModelConfig(), | |
label: "AceInstruct 1.5B", | |
hfRepoId: "Felladrin/gguf-sharded-Q4_K_S-AceInstruct-1.5B", | |
hfFilePath: "model.shard-00001-of-00006.gguf", | |
fileSizeInMegabytes: 1070, | |
}, | |
"deepseek-r1-distill-qwen-1.5b": { | |
...createDefaultModelConfig(), | |
label: "DeepSeek R1 Distill Qwen 1.5B", | |
hfRepoId: "Felladrin/gguf-sharded-Q4_K_S-DeepSeek-R1-Distill-Qwen-1.5B", | |
hfFilePath: "model.shard-00001-of-00006.gguf", | |
fileSizeInMegabytes: 1070, | |
}, | |
"internlm-2.5-1.8b": { | |
...createDefaultModelConfig(), | |
label: "InternLM 2.5 1.8B", | |
hfRepoId: "Felladrin/gguf-sharded-Q4_K_S-internlm2_5-1_8b-chat", | |
hfFilePath: "model.shard-00001-of-00008.gguf", | |
fileSizeInMegabytes: 1120, | |
}, | |
"granite-3.1-2b": { | |
...createDefaultModelConfig(), | |
label: "Granite 3.1 2B", | |
hfRepoId: "Felladrin/gguf-sharded-Q4_K_S-granite-3.1-2b-instruct", | |
hfFilePath: "model.shard-00001-of-00019.gguf", | |
fileSizeInMegabytes: 1460, | |
}, | |
"exaone-3.5-2.4b": { | |
...createDefaultModelConfig(), | |
label: "EXAONE 3.5 2.4B", | |
hfRepoId: "Felladrin/gguf-sharded-Q4_K_S-EXAONE-3.5-2.4B-Instruct", | |
hfFilePath: "model.shard-00001-of-00008.gguf", | |
fileSizeInMegabytes: 1580, | |
}, | |
"gemma-2-2b": { | |
...createDefaultModelConfig(), | |
label: "Gemma 2 2B", | |
hfRepoId: "Felladrin/gguf-sharded-Q4_K_S-gemma-2-2b-it", | |
hfFilePath: "model.shard-00001-of-00004.gguf", | |
fileSizeInMegabytes: 1640, | |
}, | |
"megrez-3b": { | |
...createDefaultModelConfig(), | |
label: "Megrez 3B", | |
hfRepoId: "Felladrin/gguf-sharded-Q4_K_S-Megrez-3B-Instruct", | |
hfFilePath: "model.shard-00001-of-00007.gguf", | |
fileSizeInMegabytes: 1740, | |
}, | |
"granite-3.1-3b": { | |
...createDefaultModelConfig(), | |
label: "Granite 3.1 3B [800M]", | |
hfRepoId: "Felladrin/gguf-sharded-Q4_K_S-granite-3.1-3b-a800m-instruct", | |
hfFilePath: "model.shard-00001-of-00033.gguf", | |
fileSizeInMegabytes: 1900, | |
}, | |
"falcon-3-3b": { | |
...createDefaultModelConfig(), | |
label: "Falcon 3 3B", | |
hfRepoId: "Felladrin/gguf-sharded-Q4_K_S-Falcon3-3B-Instruct", | |
hfFilePath: "model.shard-00001-of-00006.gguf", | |
fileSizeInMegabytes: 1930, | |
}, | |
"llama-3.2-3b": { | |
...createDefaultModelConfig(), | |
label: "Llama 3.2 3B", | |
hfRepoId: "Felladrin/gguf-sharded-Q4_K_S-Llama-3.2-3B-Instruct", | |
hfFilePath: "model.shard-00001-of-00007.gguf", | |
fileSizeInMegabytes: 1930, | |
}, | |
"smallthinker-3b": { | |
...createDefaultModelConfig(), | |
label: "SmallThinker 3B", | |
hfRepoId: "Felladrin/gguf-sharded-Q4_K_S-SmallThinker-3B-Preview", | |
hfFilePath: "model.shard-00001-of-00008.gguf", | |
fileSizeInMegabytes: 2010, | |
}, | |
"phi-3.5-mini-3.8b": { | |
...createDefaultModelConfig(), | |
label: "Phi 3.5 Mini 3.8B", | |
hfRepoId: "Felladrin/gguf-sharded-Q4_K_S-Phi-3.5-mini-instruct", | |
hfFilePath: "model.shard-00001-of-00034.gguf", | |
fileSizeInMegabytes: 2190, | |
}, | |
"minicpm3-4b": { | |
...createDefaultModelConfig(), | |
label: "MiniCPM 3 4B", | |
hfRepoId: "Felladrin/gguf-sharded-Q4_K_S-MiniCPM3-4B", | |
hfFilePath: "model.shard-00001-of-00016.gguf", | |
fileSizeInMegabytes: 2360, | |
contextSize: 2560, | |
cacheTypeK: "q8_0", | |
}, | |
"gemma-3-4b": { | |
...createDefaultModelConfig(), | |
label: "Gemma 3 4B", | |
hfRepoId: "Felladrin/gguf-sharded-Q4_K_S-gemma-3-4b-it", | |
hfFilePath: "model.shard-00001-of-00005.gguf", | |
fileSizeInMegabytes: 2380, | |
}, | |
"olmoe-1b-7b": { | |
...createDefaultModelConfig(), | |
label: "OLMoE 7B [1B]", | |
hfRepoId: "Felladrin/gguf-sharded-Q3_K_M-OLMoE-1B-7B-0125-Instruct", | |
hfFilePath: "model.shard-00001-of-00050.gguf", | |
fileSizeInMegabytes: 3340, | |
}, | |
}; | |