From ea099a7baff808938f8ba062b1a6585811ada645 Mon Sep 17 00:00:00 2001 From: Travis Fischer Date: Tue, 13 Jun 2023 02:25:03 -0700 Subject: [PATCH] feat: add tokenizer unit tests --- legacy/test/tokenizer.test.ts | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 legacy/test/tokenizer.test.ts diff --git a/legacy/test/tokenizer.test.ts b/legacy/test/tokenizer.test.ts new file mode 100644 index 00000000..9ac8339f --- /dev/null +++ b/legacy/test/tokenizer.test.ts @@ -0,0 +1,29 @@ +import test from 'ava' + +import * as tokenizers from '@/tokenizer' + +import './_utils' + +const models = [ + 'gpt-3.5-turbo', + 'gpt-4', + 'text-davinci-003', + 'code-davinci-002' +] + +for (const model of models) { + test(`getTokenizerForModel ${model}`, async (t) => { + const tokenizer = await tokenizers.getTokenizerForModel(model) + t.truthy(tokenizer) + + const texts = ['Hello World!', 'foo\n\nbar. 123 and also -- 456'] + + for (const text of texts) { + const encoded = tokenizer.encode(text) + t.true(encoded.length > 0) + + const decoded = tokenizer.decode(encoded) + t.is(decoded, text) + } + }) +}