feat: add tokenizer unit tests

2023-06-13 02:25:03 -07:00 · 2023-06-13 02:25:03 -07:00 · b17fa6652a
commit b17fa6652a
--- a/test/tokenizer.test.ts
+++ b/test/tokenizer.test.ts
@ -0,0 +1,29 @@
 import test from 'ava'
 import * as tokenizers from '@/tokenizer'
 import './_utils'
 const models = [
  'gpt-3.5-turbo',
  'gpt-4',
  'text-davinci-003',
  'code-davinci-002'
 ]
 for (const model of models) {
  test(`getTokenizerForModel ${model}`, async (t) => {
    const tokenizer = await tokenizers.getTokenizerForModel(model)
    t.truthy(tokenizer)
    const texts = ['Hello World!', 'foo\n\nbar. 123 and also -- 456']
    for (const text of texts) {
      const encoded = tokenizer.encode(text)
      t.true(encoded.length > 0)
      const decoded = tokenizer.decode(encoded)
      t.is(decoded, text)
    }
  })
 }