import express from "express" import { LLM } from "llama-node" import { LLamaCpp } from "llama-node/dist/llm/llama-cpp.js" import { daisy } from "./daisy.mts" import { alpine } from "./alpine.mts" const llama = new LLM(LLamaCpp) await llama.load({ // If you plan to use a different model you also need to edit line 26 in the Dockerfile modelPath: "./models/airoboros-13b-gpt4.ggmlv3.q4_0.bin", enableLogging: false, nCtx: 1024, seed: 0, f16Kv: false, logitsAll: false, vocabOnly: false, useMlock: false, embedding: false, useMmap: true, nGpuLayers: 0 }) // define the CSS and JS dependencies const css = [ "/css/daisyui@2.6.0.css", ].map(item => ``) .join("") const script = [ "/js/alpinejs@3.12.2.js", "/js/tailwindcss@3.3.2.js" ].map(item => ``) .join("") const app = express() const port = 7860 const minPromptSize = 16 // if you change this, you will need to also change in public/index.html const timeoutInSec = 15 * 60 app.use(express.static("public")) const maxParallelRequests = 1 const pending: { total: number; queue: string[]; aborts: Record, } = { total: 0, queue: [], aborts: {}, } const endRequest = (id: string, reason: string) => { if (!id || !pending.queue.includes(id)) { return } // politely ask the LLM to stop try { pending.aborts[id].abort() } catch (err) { console.log(`could not abort request ${id} (${err})`) } // remove the request from everywhere try { pending.queue = pending.queue.filter(i => i !== id) delete pending.aborts[id] console.log(`cleaned up request ${id}`) } catch (err) { console.log(`failed to properly clean up request ${id}`) } console.log(`request ${id} ended (${reason})`) } app.get("/debug", (req, res) => { res.write(JSON.stringify({ nbTotal: pending.total, nbPending: pending.queue.length, queue: pending.queue, })) res.end() }) app.get("/app", async (req, res) => { if (`${req.query.prompt}`.length < minPromptSize) { res.write(`prompt too short, please enter at least ${minPromptSize} characters`) res.end() return } // naive implementation: we say we are out of capacity if (pending.queue.length >= maxParallelRequests) { res.write('Sorry, max nb of parallel requests reached. A new slot should be available in < 15 min.') res.end() return } // alternative approach: kill old queries // while (pending.queue.length > maxParallelRequests) { // endRequest(pending.queue[0], 'max nb of parallel request reached') // } const id = `${pending.total++}` console.log(`new request ${id}`) pending.queue.push(id) pending.aborts[id] = new AbortController() const prefix = `${css}${script}` res.write(prefix) req.on("close", function() { endRequest(id, "browser ended the connection") }) // for testing we kill after some delay setTimeout(() => { endRequest(id, `timed out after ${timeoutInSec}s`) }, timeoutInSec * 1000) const finalPrompt = `# Context Generate this webapp: ${req.query.prompt}. # Documentation ${daisy} # Guidelines - Never repeat the instruction, instead directly write the final code within a script tag - Use a color scheme consistent with the brief and theme - You need to use Tailwind CSS and DaisyUI for the UI. Do not use JS for simple pages (eg. blogs or articles). - All the JS code will be written directly inside the page, using - You MUST use English not Latin! I repeat: do NOT write lorem ipsum! - No need to write code comments, and try to make the code compact (short function names etc) - Use a central layout by wrapping everything in a \`
\` # HTML Code ${prefix}` const options = { prompt: finalPrompt, nThreads: 6, // try to use the most of our vCPUs nTokPredict: 1024, topK: 40, topP: 0.1, temp: 0.3, repeatPenalty: 1, } try { await llama.createCompletion(options, (response) => { try { res.write(response.token) } catch (err) { console.log(`coudln't write the LLM response to the HTTP stream ${err}`) } }, pending.aborts[id].signal) endRequest(id, `normal end of the llama stream for request ${id}`) } catch (e) { endRequest(id, `premature end of the llama stream for request ${id} (${e})`) } try { res.end() } catch (err) { console.log(`couldn't end the HTTP stream for request ${id} (${err})`) } }) app.listen(port, () => { console.log(`Open http://localhost:${port}/?prompt=a%20webpage%20recipe%20for%20making%20chocolate%20chip%20cookies`) })