benjamin-paine's picture
initial commit
5a0ba44
raw
history blame
3.3 kB
/**
* A class that helps with chunking streaming responing
* from an LLM into whole sentences (or as close as we can get).
*/
export class SentenceChunker {
/**
* @param {Object} options
* @param {number} options.chunkLength - The maximum length of a chunk (default: 96)
* @param {boolean} options.emitParagraphs - Whether to emit paragraphs as chunks (default: true)
*/
constructor(options = {}) {
this.buffer = "";
this.chunkLength = options.chunkLength || 128;
this.emitParagraphs = options.emitParagraphs !== false;
this.callbacks = [];
}
/**
* Emit a chunk of text
* @param {string} output - The chunk of text to emit
*/
emit(output) {
this.callbacks.forEach(cb => cb(output));
}
/**
* Register a callback to be called when a chunk is emitted
* @param {Function} callback - The callback to call
*/
onChunk(callback) {
this.callbacks.push(callback);
}
/**
* Push new data into the chunker
* @param {string} data - The new data to push
*/
push(data) {
let paragraphs = data.split(/(\n+)/);
let numParagraphs = paragraphs.length;
for (let i = 0; i < numParagraphs; i++) {
let paragraph = paragraphs[i];
if (!paragraph) {
continue;
}
let sentences = paragraph.split(/(?<=[;:,.!?]\s+)|(?<=[;:,。!?])/);
let bufferLength = this.buffer.length;
for (let sentence of sentences) {
let sentenceLength = sentence.length;
if (sentenceLength === 0) {
continue;
}
if (bufferLength + sentenceLength <= this.chunkLength) {
this.buffer += sentence;
bufferLength += sentenceLength;
} else {
if (bufferLength > 0) {
this.emit(this.buffer);
}
this.buffer = sentence;
bufferLength = sentenceLength;
}
}
if (this.emitParagraphs && numParagraphs > 1 && i < numParagraphs - 1) {
this.emit(this.buffer);
this.buffer = "";
}
}
}
/**
* Flush the buffer, emitting any remaining text
*/
flush() {
if (this.buffer.length > 0) {
this.emit(this.buffer);
this.buffer = "";
}
}
}
/**
* A SentenceChunker that can handle streaming responses that grow over time
* (e.g. when the LLM is still generating a response and concatenating it to the previous response)
*/
export class GrowingSentenceChunker extends SentenceChunker {
constructor(options = {}) {
super(options);
this.partialSentence = "";
}
/**
* Push new data into the chunker
* @param {string} data - The new data to push
*/
push(data) {
const newData = data.substring(this.partialSentence.length);
this.partialSentence += newData;
super.push(newData);
}
/**
* Flush the buffer, emitting any remaining text
*/
flush() {
super.flush();
this.partialSentence = "";
}
}