diff --git a/legacy/test/tokenizer.test.ts b/legacy/test/tokenizer.test.ts new file mode 100644 index 00000000..9ac8339f --- /dev/null +++ b/legacy/test/tokenizer.test.ts @@ -0,0 +1,29 @@ +import test from 'ava' + +import * as tokenizers from '@/tokenizer' + +import './_utils' + +const models = [ + 'gpt-3.5-turbo', + 'gpt-4', + 'text-davinci-003', + 'code-davinci-002' +] + +for (const model of models) { + test(`getTokenizerForModel ${model}`, async (t) => { + const tokenizer = await tokenizers.getTokenizerForModel(model) + t.truthy(tokenizer) + + const texts = ['Hello World!', 'foo\n\nbar. 123 and also -- 456'] + + for (const text of texts) { + const encoded = tokenizer.encode(text) + t.true(encoded.length > 0) + + const decoded = tokenizer.decode(encoded) + t.is(decoded, text) + } + }) +}