,
} = {
total: 0,
queue: [],
aborts: {},
}
const endRequest = (id: string, reason: string) => {
if (!id || !pending.queue.includes(id)) {
return
}
// politely ask the LLM to stop
try {
pending.aborts[id].abort()
} catch (err) {
console.log(`could not abort request ${id} (${err})`)
}
// remove the request from everywhere
try {
pending.queue = pending.queue.filter(i => i !== id)
delete pending.aborts[id]
console.log(`cleaned up request ${id}`)
} catch (err) {
console.log(`failed to properly clean up request ${id}`)
}
console.log(`request ${id} ended (${reason})`)
}
app.get("/debug", (req, res) => {
res.write(JSON.stringify({
nbTotal: pending.total,
nbPending: pending.queue.length,
queue: pending.queue,
}))
res.end()
})
app.get("/app", async (req, res) => {
if (`${req.query.prompt}`.length < minPromptSize) {
res.write(`prompt too short, please enter at least ${minPromptSize} characters`)
res.end()
return
}
// naive implementation: we say we are out of capacity
if (pending.queue.length >= maxParallelRequests) {
res.write('Sorry, max nb of parallel requests reached. A new slot should be available in < 15 min.')
res.end()
return
}
// alternative approach: kill old queries
// while (pending.queue.length > maxParallelRequests) {
// endRequest(pending.queue[0], 'max nb of parallel request reached')
// }
const id = `${pending.total++}`
console.log(`new request ${id}`)
pending.queue.push(id)
pending.aborts[id] = new AbortController()
const prefix = `${css}${script}`
res.write(prefix)
req.on("close", function() {
endRequest(id, "browser ended the connection")
})
// for testing we kill after some delay
setTimeout(() => {
endRequest(id, `timed out after ${timeoutInSec}s`)
}, timeoutInSec * 1000)
const finalPrompt = `# Context
Generate this webapp: ${req.query.prompt}.
# Documentation
${daisy}
# Guidelines
- Never repeat the instruction, instead directly write the final code within a script tag
- Use a color scheme consistent with the brief and theme
- You need to use Tailwind CSS and DaisyUI for the UI. Do not use JS for simple pages (eg. blogs or articles).
- All the JS code will be written directly inside the page, using
- You MUST use English not Latin! I repeat: do NOT write lorem ipsum!
- No need to write code comments, and try to make the code compact (short function names etc)
- Use a central layout by wrapping everything in a \`\`
# HTML Code
${prefix}`
const options = {
prompt: finalPrompt,
nThreads: 6, // try to use the most of our vCPUs
nTokPredict: 1024,
topK: 40,
topP: 0.1,
temp: 0.3,
repeatPenalty: 1,
}
try {
await llama.createCompletion(options, (response) => {
try {
res.write(response.token)
} catch (err) {
console.log(`coudln't write the LLM response to the HTTP stream ${err}`)
}
}, pending.aborts[id].signal)
endRequest(id, `normal end of the llama stream for request ${id}`)
} catch (e) {
endRequest(id, `premature end of the llama stream for request ${id} (${e})`)
}
try {
res.end()
} catch (err) {
console.log(`couldn't end the HTTP stream for request ${id} (${err})`)
}
})
app.listen(port, () => { console.log(`Open http://localhost:${port}/?prompt=a%20webpage%20recipe%20for%20making%20chocolate%20chip%20cookies`) })