From 95176fa0f2af7bd1b13733cad254665c11c0d3fd Mon Sep 17 00:00:00 2001 From: Travis Fischer Date: Thu, 2 Mar 2023 17:13:21 -0600 Subject: [PATCH] feat: switch tokenizer to use cl100k_base encoding for gpt-3.5-turbo model --- legacy/src/tokenizer.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/legacy/src/tokenizer.ts b/legacy/src/tokenizer.ts index 3bfc0692..dcf2e221 100644 --- a/legacy/src/tokenizer.ts +++ b/legacy/src/tokenizer.ts @@ -1,7 +1,7 @@ -import { encoding_for_model } from '@dqbd/tiktoken' +import { get_encoding } from '@dqbd/tiktoken' // TODO: make this configurable -const tokenizer = encoding_for_model('text-davinci-003') +const tokenizer = get_encoding('cl100k_base') export function encode(input: string): Uint32Array { return tokenizer.encode(input)