kopia lustrzana https://github.com/transitive-bullshit/chatgpt-api
41 wiersze
904 B
TypeScript
41 wiersze
904 B
TypeScript
import test from 'ava'
|
|
|
|
import * as tokenizers from '@/tokenizer'
|
|
|
|
const models = [
|
|
'gpt-3.5-turbo',
|
|
'gpt-4',
|
|
// the reason why we're including duplicates here is because we want to test
|
|
// the caching and idempotency of the tokenizer loading
|
|
'gpt-4',
|
|
'gpt-4',
|
|
'gpt-4-0613',
|
|
'text-davinci-003',
|
|
'code-davinci-002',
|
|
'gpt-4',
|
|
'gpt-4',
|
|
'gpt-4',
|
|
'gpt-4',
|
|
'gpt-4'
|
|
]
|
|
|
|
for (let i = 0; i < models.length; ++i) {
|
|
const model = models[i]
|
|
test(`getTokenizerForModel ${model} (${i})`, async (t) => {
|
|
t.timeout(10_000)
|
|
|
|
const tokenizer = await tokenizers.getTokenizerForModel(model)
|
|
t.truthy(tokenizer)
|
|
|
|
const texts = ['Hello World!', 'foo\n\nbar. 123 and also -- 456']
|
|
|
|
for (const text of texts) {
|
|
const encoded = tokenizer.encode(text)
|
|
t.true(encoded.length > 0)
|
|
|
|
const decoded = tokenizer.decode(encoded)
|
|
t.is(decoded, text)
|
|
}
|
|
})
|
|
}
|