Spaces:
Duplicated from reonjy/ai

reon314
/

llm

Runtime error

App Files Files Community

llm / api /app /clients /ChatGPTClient.js

Marco Beretta

LibreChat upload repo

3b6afc0 over 1 year ago

20 kB

	const crypto = require('crypto');
	const Keyv = require('keyv');
	const {
	encoding_for_model: encodingForModel,
	get_encoding: getEncoding,
	} = require('@dqbd/tiktoken');
	const { fetchEventSource } = require('@waylaidwanderer/fetch-event-source');
	const { Agent, ProxyAgent } = require('undici');
	const BaseClient = require('./BaseClient');

	const CHATGPT_MODEL = 'gpt-3.5-turbo';
	const tokenizersCache = {};

	class ChatGPTClient extends BaseClient {
	constructor(apiKey, options = {}, cacheOptions = {}) {
	super(apiKey, options, cacheOptions);

	cacheOptions.namespace = cacheOptions.namespace \|\| 'chatgpt';
	this.conversationsCache = new Keyv(cacheOptions);
	this.setOptions(options);
	}

	setOptions(options) {
	if (this.options && !this.options.replaceOptions) {
	// nested options aren't spread properly, so we need to do this manually
	this.options.modelOptions = {
	...this.options.modelOptions,
	...options.modelOptions,
	};
	delete options.modelOptions;
	// now we can merge options
	this.options = {
	...this.options,
	...options,
	};
	} else {
	this.options = options;
	}

	if (this.options.openaiApiKey) {
	this.apiKey = this.options.openaiApiKey;
	}

	const modelOptions = this.options.modelOptions \|\| {};
	this.modelOptions = {
	...modelOptions,
	// set some good defaults (check for undefined in some cases because they may be 0)
	model: modelOptions.model \|\| CHATGPT_MODEL,
	temperature: typeof modelOptions.temperature === 'undefined' ? 0.8 : modelOptions.temperature,
	top_p: typeof modelOptions.top_p === 'undefined' ? 1 : modelOptions.top_p,
	presence_penalty:
	typeof modelOptions.presence_penalty === 'undefined' ? 1 : modelOptions.presence_penalty,
	stop: modelOptions.stop,
	};

	this.isChatGptModel = this.modelOptions.model.startsWith('gpt-');
	const { isChatGptModel } = this;
	this.isUnofficialChatGptModel =
	this.modelOptions.model.startsWith('text-chat') \|\|
	this.modelOptions.model.startsWith('text-davinci-002-render');
	const { isUnofficialChatGptModel } = this;

	// Davinci models have a max context length of 4097 tokens.
	this.maxContextTokens = this.options.maxContextTokens \|\| (isChatGptModel ? 4095 : 4097);
	// I decided to reserve 1024 tokens for the response.
	// The max prompt tokens is determined by the max context tokens minus the max response tokens.
	// Earlier messages will be dropped until the prompt is within the limit.
	this.maxResponseTokens = this.modelOptions.max_tokens \|\| 1024;
	this.maxPromptTokens =
	this.options.maxPromptTokens \|\| this.maxContextTokens - this.maxResponseTokens;

	if (this.maxPromptTokens + this.maxResponseTokens > this.maxContextTokens) {
	throw new Error(
	`maxPromptTokens + max_tokens (${this.maxPromptTokens} + ${this.maxResponseTokens} = ${
	this.maxPromptTokens + this.maxResponseTokens
	}) must be less than or equal to maxContextTokens (${this.maxContextTokens})`,
	);
	}

	this.userLabel = this.options.userLabel \|\| 'User';
	this.chatGptLabel = this.options.chatGptLabel \|\| 'ChatGPT';

	if (isChatGptModel) {
	// Use these faux tokens to help the AI understand the context since we are building the chat log ourselves.
	// Trying to use "<\|im_start\|>" causes the AI to still generate "<" or "<\|" at the end sometimes for some reason,
	// without tripping the stop sequences, so I'm using "\|\|>" instead.
	this.startToken = '\|\|>';
	this.endToken = '';
	this.gptEncoder = this.constructor.getTokenizer('cl100k_base');
	} else if (isUnofficialChatGptModel) {
	this.startToken = '<\|im_start\|>';
	this.endToken = '<\|im_end\|>';
	this.gptEncoder = this.constructor.getTokenizer('text-davinci-003', true, {
	'<\|im_start\|>': 100264,
	'<\|im_end\|>': 100265,
	});
	} else {
	// Previously I was trying to use "<\|endoftext\|>" but there seems to be some bug with OpenAI's token counting
	// system that causes only the first "<\|endoftext\|>" to be counted as 1 token, and the rest are not treated
	// as a single token. So we're using this instead.
	this.startToken = '\|\|>';
	this.endToken = '';
	try {
	this.gptEncoder = this.constructor.getTokenizer(this.modelOptions.model, true);
	} catch {
	this.gptEncoder = this.constructor.getTokenizer('text-davinci-003', true);
	}
	}

	if (!this.modelOptions.stop) {
	const stopTokens = [this.startToken];
	if (this.endToken && this.endToken !== this.startToken) {
	stopTokens.push(this.endToken);
	}
	stopTokens.push(`\n${this.userLabel}:`);
	stopTokens.push('<\|diff_marker\|>');
	// I chose not to do one for `chatGptLabel` because I've never seen it happen
	this.modelOptions.stop = stopTokens;
	}

	if (this.options.reverseProxyUrl) {
	this.completionsUrl = this.options.reverseProxyUrl;
	} else if (isChatGptModel) {
	this.completionsUrl = 'https://api.openai.com/v1/chat/completions';
	} else {
	this.completionsUrl = 'https://api.openai.com/v1/completions';
	}

	return this;
	}

	static getTokenizer(encoding, isModelName = false, extendSpecialTokens = {}) {
	if (tokenizersCache[encoding]) {
	return tokenizersCache[encoding];
	}
	let tokenizer;
	if (isModelName) {
	tokenizer = encodingForModel(encoding, extendSpecialTokens);
	} else {
	tokenizer = getEncoding(encoding, extendSpecialTokens);
	}
	tokenizersCache[encoding] = tokenizer;
	return tokenizer;
	}

	async getCompletion(input, onProgress, abortController = null) {
	if (!abortController) {
	abortController = new AbortController();
	}
	const modelOptions = { ...this.modelOptions };
	if (typeof onProgress === 'function') {
	modelOptions.stream = true;
	}
	if (this.isChatGptModel) {
	modelOptions.messages = input;
	} else {
	modelOptions.prompt = input;
	}
	const { debug } = this.options;
	const url = this.completionsUrl;
	if (debug) {
	console.debug();
	console.debug(url);
	console.debug(modelOptions);
	console.debug();
	}
	const opts = {
	method: 'POST',
	headers: {
	'Content-Type': 'application/json',
	},
	body: JSON.stringify(modelOptions),
	dispatcher: new Agent({
	bodyTimeout: 0,
	headersTimeout: 0,
	}),
	};

	if (this.apiKey && this.options.azure) {
	opts.headers['api-key'] = this.apiKey;
	} else if (this.apiKey) {
	opts.headers.Authorization = `Bearer ${this.apiKey}`;
	}

	if (this.options.headers) {
	opts.headers = { ...opts.headers, ...this.options.headers };
	}

	if (this.options.proxy) {
	opts.dispatcher = new ProxyAgent(this.options.proxy);
	}

	if (modelOptions.stream) {
	// eslint-disable-next-line no-async-promise-executor
	return new Promise(async (resolve, reject) => {
	try {
	let done = false;
	await fetchEventSource(url, {
	...opts,
	signal: abortController.signal,
	async onopen(response) {
	if (response.status === 200) {
	return;
	}
	if (debug) {
	console.debug(response);
	}
	let error;
	try {
	const body = await response.text();
	error = new Error(`Failed to send message. HTTP ${response.status} - ${body}`);
	error.status = response.status;
	error.json = JSON.parse(body);
	} catch {
	error = error \|\| new Error(`Failed to send message. HTTP ${response.status}`);
	}
	throw error;
	},
	onclose() {
	if (debug) {
	console.debug('Server closed the connection unexpectedly, returning...');
	}
	// workaround for private API not sending [DONE] event
	if (!done) {
	onProgress('[DONE]');
	abortController.abort();
	resolve();
	}
	},
	onerror(err) {
	if (debug) {
	console.debug(err);
	}
	// rethrow to stop the operation
	throw err;
	},
	onmessage(message) {
	if (debug) {
	// console.debug(message);
	}
	if (!message.data \|\| message.event === 'ping') {
	return;
	}
	if (message.data === '[DONE]') {
	onProgress('[DONE]');
	abortController.abort();
	resolve();
	done = true;
	return;
	}
	onProgress(JSON.parse(message.data));
	},
	});
	} catch (err) {
	reject(err);
	}
	});
	}
	const response = await fetch(url, {
	...opts,
	signal: abortController.signal,
	});
	if (response.status !== 200) {
	const body = await response.text();
	const error = new Error(`Failed to send message. HTTP ${response.status} - ${body}`);
	error.status = response.status;
	try {
	error.json = JSON.parse(body);
	} catch {
	error.body = body;
	}
	throw error;
	}
	return response.json();
	}

	async generateTitle(userMessage, botMessage) {
	const instructionsPayload = {
	role: 'system',
	content: `Write an extremely concise subtitle for this conversation with no more than a few words. All words should be capitalized. Exclude punctuation.

	\|\|>Message:
	${userMessage.message}
	\|\|>Response:
	${botMessage.message}

	\|\|>Title:`,
	};

	const titleGenClientOptions = JSON.parse(JSON.stringify(this.options));
	titleGenClientOptions.modelOptions = {
	model: 'gpt-3.5-turbo',
	temperature: 0,
	presence_penalty: 0,
	frequency_penalty: 0,
	};
	const titleGenClient = new ChatGPTClient(this.apiKey, titleGenClientOptions);
	const result = await titleGenClient.getCompletion([instructionsPayload], null);
	// remove any non-alphanumeric characters, replace multiple spaces with 1, and then trim
	return result.choices[0].message.content
	.replace(/[^a-zA-Z0-9' ]/g, '')
	.replace(/\s+/g, ' ')
	.trim();
	}

	async sendMessage(message, opts = {}) {
	if (opts.clientOptions && typeof opts.clientOptions === 'object') {
	this.setOptions(opts.clientOptions);
	}

	const conversationId = opts.conversationId \|\| crypto.randomUUID();
	const parentMessageId = opts.parentMessageId \|\| crypto.randomUUID();

	let conversation =
	typeof opts.conversation === 'object'
	? opts.conversation
	: await this.conversationsCache.get(conversationId);

	let isNewConversation = false;
	if (!conversation) {
	conversation = {
	messages: [],
	createdAt: Date.now(),
	};
	isNewConversation = true;
	}

	const shouldGenerateTitle = opts.shouldGenerateTitle && isNewConversation;

	const userMessage = {
	id: crypto.randomUUID(),
	parentMessageId,
	role: 'User',
	message,
	};
	conversation.messages.push(userMessage);

	// Doing it this way instead of having each message be a separate element in the array seems to be more reliable,
	// especially when it comes to keeping the AI in character. It also seems to improve coherency and context retention.
	const { prompt: payload, context } = await this.buildPrompt(
	conversation.messages,
	userMessage.id,
	{
	isChatGptModel: this.isChatGptModel,
	promptPrefix: opts.promptPrefix,
	},
	);

	if (this.options.keepNecessaryMessagesOnly) {
	conversation.messages = context;
	}

	let reply = '';
	let result = null;
	if (typeof opts.onProgress === 'function') {
	await this.getCompletion(
	payload,
	(progressMessage) => {
	if (progressMessage === '[DONE]') {
	return;
	}
	const token = this.isChatGptModel
	? progressMessage.choices[0].delta.content
	: progressMessage.choices[0].text;
	// first event's delta content is always undefined
	if (!token) {
	return;
	}
	if (this.options.debug) {
	console.debug(token);
	}
	if (token === this.endToken) {
	return;
	}
	opts.onProgress(token);
	reply += token;
	},
	opts.abortController \|\| new AbortController(),
	);
	} else {
	result = await this.getCompletion(
	payload,
	null,
	opts.abortController \|\| new AbortController(),
	);
	if (this.options.debug) {
	console.debug(JSON.stringify(result));
	}
	if (this.isChatGptModel) {
	reply = result.choices[0].message.content;
	} else {
	reply = result.choices[0].text.replace(this.endToken, '');
	}
	}

	// avoids some rendering issues when using the CLI app
	if (this.options.debug) {
	console.debug();
	}

	reply = reply.trim();

	const replyMessage = {
	id: crypto.randomUUID(),
	parentMessageId: userMessage.id,
	role: 'ChatGPT',
	message: reply,
	};
	conversation.messages.push(replyMessage);

	const returnData = {
	response: replyMessage.message,
	conversationId,
	parentMessageId: replyMessage.parentMessageId,
	messageId: replyMessage.id,
	details: result \|\| {},
	};

	if (shouldGenerateTitle) {
	conversation.title = await this.generateTitle(userMessage, replyMessage);
	returnData.title = conversation.title;
	}

	await this.conversationsCache.set(conversationId, conversation);

	if (this.options.returnConversation) {
	returnData.conversation = conversation;
	}

	return returnData;
	}

	async buildPrompt(messages, parentMessageId, { isChatGptModel = false, promptPrefix = null }) {
	const orderedMessages = this.constructor.getMessagesForConversation(messages, parentMessageId);

	promptPrefix = (promptPrefix \|\| this.options.promptPrefix \|\| '').trim();
	if (promptPrefix) {
	// If the prompt prefix doesn't end with the end token, add it.
	if (!promptPrefix.endsWith(`${this.endToken}`)) {
	promptPrefix = `${promptPrefix.trim()}${this.endToken}\n\n`;
	}
	promptPrefix = `${this.startToken}Instructions:\n${promptPrefix}`;
	} else {
	const currentDateString = new Date().toLocaleDateString('en-us', {
	year: 'numeric',
	month: 'long',
	day: 'numeric',
	});
	promptPrefix = `${this.startToken}Instructions:\nYou are ChatGPT, a large language model trained by OpenAI. Respond conversationally.\nCurrent date: ${currentDateString}${this.endToken}\n\n`;
	}

	const promptSuffix = `${this.startToken}${this.chatGptLabel}:\n`; // Prompt ChatGPT to respond.

	const instructionsPayload = {
	role: 'system',
	name: 'instructions',
	content: promptPrefix,
	};

	const messagePayload = {
	role: 'system',
	content: promptSuffix,
	};

	let currentTokenCount;
	if (isChatGptModel) {
	currentTokenCount =
	this.getTokenCountForMessage(instructionsPayload) +
	this.getTokenCountForMessage(messagePayload);
	} else {
	currentTokenCount = this.getTokenCount(`${promptPrefix}${promptSuffix}`);
	}
	let promptBody = '';
	const maxTokenCount = this.maxPromptTokens;

	const context = [];

	// Iterate backwards through the messages, adding them to the prompt until we reach the max token count.
	// Do this within a recursive async function so that it doesn't block the event loop for too long.
	const buildPromptBody = async () => {
	if (currentTokenCount < maxTokenCount && orderedMessages.length > 0) {
	const message = orderedMessages.pop();
	const roleLabel =
	message?.isCreatedByUser \|\| message?.role?.toLowerCase() === 'user'
	? this.userLabel
	: this.chatGptLabel;
	const messageString = `${this.startToken}${roleLabel}:\n${
	message?.text ?? message?.message
	}${this.endToken}\n`;
	let newPromptBody;
	if (promptBody \|\| isChatGptModel) {
	newPromptBody = `${messageString}${promptBody}`;
	} else {
	// Always insert prompt prefix before the last user message, if not gpt-3.5-turbo.
	// This makes the AI obey the prompt instructions better, which is important for custom instructions.
	// After a bunch of testing, it doesn't seem to cause the AI any confusion, even if you ask it things
	// like "what's the last thing I wrote?".
	newPromptBody = `${promptPrefix}${messageString}${promptBody}`;
	}

	context.unshift(message);

	const tokenCountForMessage = this.getTokenCount(messageString);
	const newTokenCount = currentTokenCount + tokenCountForMessage;
	if (newTokenCount > maxTokenCount) {
	if (promptBody) {
	// This message would put us over the token limit, so don't add it.
	return false;
	}
	// This is the first message, so we can't add it. Just throw an error.
	throw new Error(
	`Prompt is too long. Max token count is ${maxTokenCount}, but prompt is ${newTokenCount} tokens long.`,
	);
	}
	promptBody = newPromptBody;
	currentTokenCount = newTokenCount;
	// wait for next tick to avoid blocking the event loop
	await new Promise((resolve) => setImmediate(resolve));
	return buildPromptBody();
	}
	return true;
	};

	await buildPromptBody();

	const prompt = `${promptBody}${promptSuffix}`;
	if (isChatGptModel) {
	messagePayload.content = prompt;
	// Add 2 tokens for metadata after all messages have been counted.
	currentTokenCount += 2;
	}

	// Use up to `this.maxContextTokens` tokens (prompt + response), but try to leave `this.maxTokens` tokens for the response.
	this.modelOptions.max_tokens = Math.min(
	this.maxContextTokens - currentTokenCount,
	this.maxResponseTokens,
	);

	if (this.options.debug) {
	console.debug(`Prompt : ${prompt}`);
	}

	if (isChatGptModel) {
	return { prompt: [instructionsPayload, messagePayload], context };
	}
	return { prompt, context };
	}

	getTokenCount(text) {
	return this.gptEncoder.encode(text, 'all').length;
	}

	/**
	* Algorithm adapted from "6. Counting tokens for chat API calls" of
	* https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb
	*
	* An additional 2 tokens need to be added for metadata after all messages have been counted.
	*
	* @param {*} message
	*/
	getTokenCountForMessage(message) {
	let tokensPerMessage;
	let nameAdjustment;
	if (this.modelOptions.model.startsWith('gpt-4')) {
	tokensPerMessage = 3;
	nameAdjustment = 1;
	} else {
	tokensPerMessage = 4;
	nameAdjustment = -1;
	}

	// Map each property of the message to the number of tokens it contains
	const propertyTokenCounts = Object.entries(message).map(([key, value]) => {
	// Count the number of tokens in the property value
	const numTokens = this.getTokenCount(value);

	// Adjust by `nameAdjustment` tokens if the property key is 'name'
	const adjustment = key === 'name' ? nameAdjustment : 0;
	return numTokens + adjustment;
	});

	// Sum the number of tokens in all properties and add `tokensPerMessage` for metadata
	return propertyTokenCounts.reduce((a, b) => a + b, tokensPerMessage);
	}
	}

	module.exports = ChatGPTClient;