kopia lustrzana https://github.com/transitive-bullshit/chatgpt-api
feat: switch from gpt-3-encoder to gpt3-tokenizer
rodzic
fc9869abf5
commit
d8eeb1a736
|
@ -36,7 +36,7 @@
|
|||
},
|
||||
"dependencies": {
|
||||
"eventsource-parser": "^0.0.5",
|
||||
"gpt-3-encoder": "^1.1.4",
|
||||
"gpt3-tokenizer": "^1.1.5",
|
||||
"keyv": "^4.5.2",
|
||||
"p-timeout": "^6.0.0",
|
||||
"quick-lru": "^6.1.1",
|
||||
|
|
|
@ -8,7 +8,7 @@ specifiers:
|
|||
del-cli: ^5.0.0
|
||||
dotenv-safe: ^8.2.0
|
||||
eventsource-parser: ^0.0.5
|
||||
gpt-3-encoder: ^1.1.4
|
||||
gpt3-tokenizer: ^1.1.5
|
||||
husky: ^8.0.2
|
||||
keyv: ^4.5.2
|
||||
lint-staged: ^13.0.3
|
||||
|
@ -26,7 +26,7 @@ specifiers:
|
|||
|
||||
dependencies:
|
||||
eventsource-parser: 0.0.5
|
||||
gpt-3-encoder: 1.1.4
|
||||
gpt3-tokenizer: 1.1.5
|
||||
keyv: 4.5.2
|
||||
p-timeout: 6.1.0
|
||||
quick-lru: 6.1.1
|
||||
|
@ -506,6 +506,10 @@ packages:
|
|||
picomatch: 2.3.1
|
||||
dev: true
|
||||
|
||||
/array-keyed-map/2.1.3:
|
||||
resolution: {integrity: sha512-JIUwuFakO+jHjxyp4YgSiKXSZeC0U+R1jR94bXWBcVlFRBycqXlb+kH9JHxBGcxnVuSqx5bnn0Qz9xtSeKOjiA==}
|
||||
dev: false
|
||||
|
||||
/array-union/2.1.0:
|
||||
resolution: {integrity: sha512-HGyxoOTYUyCM6stUe6EJgnd4EoewAI7zMdfqO+kGjnlZmBDz/cR5pf8r/cR4Wq60sL/p0IkcjUEEPwS3GFrIyw==}
|
||||
engines: {node: '>=8'}
|
||||
|
@ -1376,8 +1380,11 @@ packages:
|
|||
get-intrinsic: 1.2.0
|
||||
dev: true
|
||||
|
||||
/gpt-3-encoder/1.1.4:
|
||||
resolution: {integrity: sha512-fSQRePV+HUAhCn7+7HL7lNIXNm6eaFWFbNLOOGtmSJ0qJycyQvj60OvRlH7mee8xAMjBDNRdMXlMwjAbMTDjkg==}
|
||||
/gpt3-tokenizer/1.1.5:
|
||||
resolution: {integrity: sha512-O9iCL8MqGR0Oe9wTh0YftzIbysypNQmS5a5JG3cB3M4LMYjlAVvNnf8LUzVY9MrI7tj+YLY356uHtO2lLX2HpA==}
|
||||
engines: {node: '>=12'}
|
||||
dependencies:
|
||||
array-keyed-map: 2.1.3
|
||||
dev: false
|
||||
|
||||
/graceful-fs/4.2.10:
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
import { encode as gptEncode } from 'gpt-3-encoder'
|
||||
import Keyv from 'keyv'
|
||||
import pTimeout from 'p-timeout'
|
||||
import QuickLRU from 'quick-lru'
|
||||
import { v4 as uuidv4 } from 'uuid'
|
||||
|
||||
import * as tokenizer from './tokenizer'
|
||||
import * as types from './types'
|
||||
import { fetch as globalFetch } from './fetch'
|
||||
import { fetchSSE } from './fetch-sse'
|
||||
|
@ -438,7 +438,7 @@ Current date: ${currentDate}${this._sepToken}\n\n`
|
|||
text = text.replace(/<\|im_sep\|>/g, '<|endoftext|>')
|
||||
}
|
||||
|
||||
return gptEncode(text).length
|
||||
return tokenizer.encode(text).length
|
||||
}
|
||||
|
||||
protected get _isChatGPTModel() {
|
||||
|
|
|
@ -0,0 +1,12 @@
|
|||
import GPT3TokenizerImport from 'gpt3-tokenizer'
|
||||
|
||||
const GPT3Tokenizer: typeof GPT3TokenizerImport =
|
||||
typeof GPT3TokenizerImport === 'function'
|
||||
? GPT3TokenizerImport
|
||||
: (GPT3TokenizerImport as any).default
|
||||
|
||||
export const tokenizer = new GPT3Tokenizer({ type: 'gpt3' })
|
||||
|
||||
export function encode(input: string): number[] {
|
||||
return tokenizer.encode(input).bpe
|
||||
}
|
Ładowanie…
Reference in New Issue