switch tokenizer implementation with pure js and more compatible js-tiktoken

pull/571/head
Claudio Poli 2023-05-17 04:52:37 +02:00
rodzic f39279cef9
commit 3ec62b89b2
3 zmienionych plików z 13 dodań i 12 usunięć

Wyświetl plik

@ -37,10 +37,10 @@
"test:prettier": "prettier '**/*.{js,jsx,ts,tsx}' --check" "test:prettier": "prettier '**/*.{js,jsx,ts,tsx}' --check"
}, },
"dependencies": { "dependencies": {
"@dqbd/tiktoken": "^1.0.7",
"cac": "^6.7.14", "cac": "^6.7.14",
"conf": "^11.0.1", "conf": "^11.0.1",
"eventsource-parser": "^1.0.0", "eventsource-parser": "^1.0.0",
"js-tiktoken": "^1.0.5",
"keyv": "^4.5.2", "keyv": "^4.5.2",
"p-timeout": "^6.1.1", "p-timeout": "^6.1.1",
"quick-lru": "^6.1.1", "quick-lru": "^6.1.1",

Wyświetl plik

@ -1,9 +1,6 @@
lockfileVersion: '6.0' lockfileVersion: '6.0'
dependencies: dependencies:
'@dqbd/tiktoken':
specifier: ^1.0.7
version: 1.0.7
cac: cac:
specifier: ^6.7.14 specifier: ^6.7.14
version: 6.7.14 version: 6.7.14
@ -13,6 +10,9 @@ dependencies:
eventsource-parser: eventsource-parser:
specifier: ^1.0.0 specifier: ^1.0.0
version: 1.0.0 version: 1.0.0
js-tiktoken:
specifier: ^1.0.5
version: 1.0.5
keyv: keyv:
specifier: ^4.5.2 specifier: ^4.5.2
version: 4.5.2 version: 4.5.2
@ -192,10 +192,6 @@ packages:
to-fast-properties: 2.0.0 to-fast-properties: 2.0.0
dev: true dev: true
/@dqbd/tiktoken@1.0.7:
resolution: {integrity: sha512-bhR5k5W+8GLzysjk8zTMVygQZsgvf7W1F0IlL4ZQ5ugjo5rCyiwGM5d8DYriXspytfu98tv59niang3/T+FoDw==}
dev: false
/@esbuild-kit/cjs-loader@2.4.2: /@esbuild-kit/cjs-loader@2.4.2:
resolution: {integrity: sha512-BDXFbYOJzT/NBEtp71cvsrGPwGAMGRB/349rwKuoxNSiKjPraNNnlK6MIIabViCjqZugu6j+xeMDlEkWdHHJSg==} resolution: {integrity: sha512-BDXFbYOJzT/NBEtp71cvsrGPwGAMGRB/349rwKuoxNSiKjPraNNnlK6MIIabViCjqZugu6j+xeMDlEkWdHHJSg==}
dependencies: dependencies:
@ -809,7 +805,6 @@ packages:
/base64-js@1.5.1: /base64-js@1.5.1:
resolution: {integrity: sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==} resolution: {integrity: sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==}
dev: true
/binary-extensions@2.2.0: /binary-extensions@2.2.0:
resolution: {integrity: sha512-jDctJ/IVQbZoJykoeHbhXpOlNBqGNcwXJKJog42E5HDPUwQTSdjCHdihjj0DlnheQ7blbT6dHOafNAiS8ooQKA==} resolution: {integrity: sha512-jDctJ/IVQbZoJykoeHbhXpOlNBqGNcwXJKJog42E5HDPUwQTSdjCHdihjj0DlnheQ7blbT6dHOafNAiS8ooQKA==}
@ -1831,6 +1826,12 @@ packages:
engines: {node: '>=10'} engines: {node: '>=10'}
dev: true dev: true
/js-tiktoken@1.0.5:
resolution: {integrity: sha512-RYXe54ntls/uQmAxUua2J1+g+EiwWHGn1CxfioYxrP1iVDmksfZsyJt0VySyMNbreJyyreDtyBuBxeXy7HYqjQ==}
dependencies:
base64-js: 1.5.1
dev: false
/js-tokens@4.0.0: /js-tokens@4.0.0:
resolution: {integrity: sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ==} resolution: {integrity: sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ==}

Wyświetl plik

@ -1,8 +1,8 @@
import { get_encoding } from '@dqbd/tiktoken' import { getEncoding } from 'js-tiktoken'
// TODO: make this configurable // TODO: make this configurable
const tokenizer = get_encoding('cl100k_base') const tokenizer = getEncoding('cl100k_base')
export function encode(input: string): Uint32Array { export function encode(input: string): Uint32Array {
return tokenizer.encode(input) return new Uint32Array(tokenizer.encode(input))
} }