kopia lustrzana https://github.com/transitive-bullshit/chatgpt-api
Merge pull request #390 from transitive-bullshit/feature/rust-wasm-tokenizer
commit
aaa482b5f0
|
@ -37,10 +37,10 @@
|
||||||
"test:prettier": "prettier '**/*.{js,jsx,ts,tsx}' --check"
|
"test:prettier": "prettier '**/*.{js,jsx,ts,tsx}' --check"
|
||||||
},
|
},
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
|
"@dqbd/tiktoken": "^0.2.1",
|
||||||
"cac": "^6.7.14",
|
"cac": "^6.7.14",
|
||||||
"conf": "^11.0.1",
|
"conf": "^11.0.1",
|
||||||
"eventsource-parser": "^0.0.5",
|
"eventsource-parser": "^0.0.5",
|
||||||
"gpt3-tokenizer": "^1.1.5",
|
|
||||||
"keyv": "^4.5.2",
|
"keyv": "^4.5.2",
|
||||||
"p-timeout": "^6.0.0",
|
"p-timeout": "^6.0.0",
|
||||||
"quick-lru": "^6.1.1",
|
"quick-lru": "^6.1.1",
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
lockfileVersion: 5.4
|
lockfileVersion: 5.4
|
||||||
|
|
||||||
specifiers:
|
specifiers:
|
||||||
|
'@dqbd/tiktoken': ^0.2.1
|
||||||
'@keyv/redis': ^2.5.4
|
'@keyv/redis': ^2.5.4
|
||||||
'@trivago/prettier-plugin-sort-imports': ^4.0.0
|
'@trivago/prettier-plugin-sort-imports': ^4.0.0
|
||||||
'@types/node': ^18.11.9
|
'@types/node': ^18.11.9
|
||||||
|
@ -10,7 +11,6 @@ specifiers:
|
||||||
del-cli: ^5.0.0
|
del-cli: ^5.0.0
|
||||||
dotenv-safe: ^8.2.0
|
dotenv-safe: ^8.2.0
|
||||||
eventsource-parser: ^0.0.5
|
eventsource-parser: ^0.0.5
|
||||||
gpt3-tokenizer: ^1.1.5
|
|
||||||
husky: ^8.0.2
|
husky: ^8.0.2
|
||||||
keyv: ^4.5.2
|
keyv: ^4.5.2
|
||||||
lint-staged: ^13.0.3
|
lint-staged: ^13.0.3
|
||||||
|
@ -28,10 +28,10 @@ specifiers:
|
||||||
uuid: ^9.0.0
|
uuid: ^9.0.0
|
||||||
|
|
||||||
dependencies:
|
dependencies:
|
||||||
|
'@dqbd/tiktoken': 0.2.1
|
||||||
cac: 6.7.14
|
cac: 6.7.14
|
||||||
conf: 11.0.1
|
conf: 11.0.1
|
||||||
eventsource-parser: 0.0.5
|
eventsource-parser: 0.0.5
|
||||||
gpt3-tokenizer: 1.1.5
|
|
||||||
keyv: 4.5.2
|
keyv: 4.5.2
|
||||||
p-timeout: 6.1.0
|
p-timeout: 6.1.0
|
||||||
quick-lru: 6.1.1
|
quick-lru: 6.1.1
|
||||||
|
@ -300,6 +300,10 @@ packages:
|
||||||
to-fast-properties: 2.0.0
|
to-fast-properties: 2.0.0
|
||||||
dev: true
|
dev: true
|
||||||
|
|
||||||
|
/@dqbd/tiktoken/0.2.1:
|
||||||
|
resolution: {integrity: sha512-Nw9Swn37xZLAvz64qA3tTxy4yJLMhYDj7dWS6uSoHkUJxTn+BcYA+r06O36Q3Jya52b3SvK/LDXzl1dVeHqrew==}
|
||||||
|
dev: false
|
||||||
|
|
||||||
/@esbuild-kit/cjs-loader/2.4.1:
|
/@esbuild-kit/cjs-loader/2.4.1:
|
||||||
resolution: {integrity: sha512-lhc/XLith28QdW0HpHZvZKkorWgmCNT7sVelMHDj3HFdTfdqkwEKvT+aXVQtNAmCC39VJhunDkWhONWB7335mg==}
|
resolution: {integrity: sha512-lhc/XLith28QdW0HpHZvZKkorWgmCNT7sVelMHDj3HFdTfdqkwEKvT+aXVQtNAmCC39VJhunDkWhONWB7335mg==}
|
||||||
dependencies:
|
dependencies:
|
||||||
|
@ -525,10 +529,6 @@ packages:
|
||||||
picomatch: 2.3.1
|
picomatch: 2.3.1
|
||||||
dev: true
|
dev: true
|
||||||
|
|
||||||
/array-keyed-map/2.1.3:
|
|
||||||
resolution: {integrity: sha512-JIUwuFakO+jHjxyp4YgSiKXSZeC0U+R1jR94bXWBcVlFRBycqXlb+kH9JHxBGcxnVuSqx5bnn0Qz9xtSeKOjiA==}
|
|
||||||
dev: false
|
|
||||||
|
|
||||||
/array-union/2.1.0:
|
/array-union/2.1.0:
|
||||||
resolution: {integrity: sha512-HGyxoOTYUyCM6stUe6EJgnd4EoewAI7zMdfqO+kGjnlZmBDz/cR5pf8r/cR4Wq60sL/p0IkcjUEEPwS3GFrIyw==}
|
resolution: {integrity: sha512-HGyxoOTYUyCM6stUe6EJgnd4EoewAI7zMdfqO+kGjnlZmBDz/cR5pf8r/cR4Wq60sL/p0IkcjUEEPwS3GFrIyw==}
|
||||||
engines: {node: '>=8'}
|
engines: {node: '>=8'}
|
||||||
|
@ -1444,13 +1444,6 @@ packages:
|
||||||
get-intrinsic: 1.2.0
|
get-intrinsic: 1.2.0
|
||||||
dev: true
|
dev: true
|
||||||
|
|
||||||
/gpt3-tokenizer/1.1.5:
|
|
||||||
resolution: {integrity: sha512-O9iCL8MqGR0Oe9wTh0YftzIbysypNQmS5a5JG3cB3M4LMYjlAVvNnf8LUzVY9MrI7tj+YLY356uHtO2lLX2HpA==}
|
|
||||||
engines: {node: '>=12'}
|
|
||||||
dependencies:
|
|
||||||
array-keyed-map: 2.1.3
|
|
||||||
dev: false
|
|
||||||
|
|
||||||
/graceful-fs/4.2.10:
|
/graceful-fs/4.2.10:
|
||||||
resolution: {integrity: sha512-9ByhssR2fPVsNZj478qUUbKfmL0+t5BDVyjShtyZZLiK7ZDAArFFfopyOTj0M05wE2tJPisA4iTnnXl2YoPvOA==}
|
resolution: {integrity: sha512-9ByhssR2fPVsNZj478qUUbKfmL0+t5BDVyjShtyZZLiK7ZDAArFFfopyOTj0M05wE2tJPisA4iTnnXl2YoPvOA==}
|
||||||
dev: true
|
dev: true
|
||||||
|
|
|
@ -1,12 +1,8 @@
|
||||||
import GPT3TokenizerImport from 'gpt3-tokenizer'
|
import { encoding_for_model } from '@dqbd/tiktoken'
|
||||||
|
|
||||||
const GPT3Tokenizer: typeof GPT3TokenizerImport =
|
// TODO: make this configurable
|
||||||
typeof GPT3TokenizerImport === 'function'
|
const tokenizer = encoding_for_model('text-davinci-003')
|
||||||
? GPT3TokenizerImport
|
|
||||||
: (GPT3TokenizerImport as any).default
|
|
||||||
|
|
||||||
export const tokenizer = new GPT3Tokenizer({ type: 'gpt3' })
|
export function encode(input: string): Uint32Array {
|
||||||
|
return tokenizer.encode(input)
|
||||||
export function encode(input: string): number[] {
|
|
||||||
return tokenizer.encode(input).bpe
|
|
||||||
}
|
}
|
||||||
|
|
Ładowanie…
Reference in New Issue