From 29e4a5413053d8f6e13cdae827cc6bfbdb40b113 Mon Sep 17 00:00:00 2001 From: San Date: Thu, 20 Apr 2023 16:11:27 +0800 Subject: [PATCH] try to fix prompt token count --- src/chatgpt-api.ts | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/src/chatgpt-api.ts b/src/chatgpt-api.ts index 2165956..b055b97 100644 --- a/src/chatgpt-api.ts +++ b/src/chatgpt-api.ts @@ -361,6 +361,7 @@ export class ChatGPTAPI { protected async _buildMessages(text: string, opts: types.SendMessageOptions) { const { systemMessage = this._systemMessage } = opts let { parentMessageId } = opts + const { model } = opts.completionParams const userLabel = USER_LABEL_DEFAULT const assistantLabel = ASSISTANT_LABEL_DEFAULT @@ -401,7 +402,10 @@ export class ChatGPTAPI { }, [] as string[]) .join('\n\n') - const nextNumTokensEstimate = await this._getTokenCount(prompt) + const nextNumTokensEstimate = await this._getMessagesTokenCount( + nextMessages, + model + ) const isValidPrompt = nextNumTokensEstimate <= maxNumTokens if (prompt && !isValidPrompt) { @@ -455,6 +459,32 @@ export class ChatGPTAPI { return tokenizer.encode(text).length } + protected async _getMessagesTokenCount( + messages: types.openai.ChatCompletionRequestMessage[], + model: string + ) { + // https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb#6.-Counting-tokens-for-chat-API-calls + let tokens_per_message = 0 + switch (model) { + case 'gpt-3.5-turbo': + case 'gpt-3.5-turbo-0301': + // every message follows <|start|>{role/name}\n{content}<|end|>\n + // if there's a name, the role is omitted + tokens_per_message = 4 - 1 + break + case 'gpt-4': + case 'gpt-4-0314': + tokens_per_message = 3 + 1 + break + } + + return ( + messages.reduce((sum, message) => { + return tokens_per_message + tokenizer.encode(message.content).length + }, 0) + 3 // every reply is primed with <|start|>assistant<|message|> + ) + } + protected async _defaultGetMessageById( id: string ): Promise {