feat: WIP add tokenizers and token counting support

Travis Fischer 2023-06-06 23:43:15 -07:00
rodzic 81eb5f4594
commit bc1d53ba48
7 zmienionych plików z 386 dodań i 115 usunięć

12
examples/sentiment.ts vendored
Wyświetl plik

@ -13,12 +13,12 @@ export async function main() {
)
.input(z.object({ texts: z.string().array() }))
.output(z.array(z.object({ text: z.string(), label: z.string() })))
// .examples([
// { input: 'The food was digusting', output: 'negative' },
// { input: 'We had a fantastic night', output: 'positive' },
// { input: 'Recommended', output: 'positive' },
// { input: 'The waiter was rude', output: 'negative' }
// ])
.examples([
{ input: 'The food was digusting', output: 'negative' },
{ input: 'We had a fantastic night', output: 'positive' },
{ input: 'Recommended', output: 'positive' },
{ input: 'The waiter was rude', output: 'negative' }
])
.call({
texts: [
'I went to this place and it was just so awful.',

Wyświetl plik

@ -38,16 +38,16 @@
"test:eslint": "eslint \"**/*.ts\""
},
"dependencies": {
"@anthropic-ai/sdk": "^0.4.3",
"@inquirer/checkbox": "^1.3.0",
"@inquirer/editor": "^1.1.0",
"@inquirer/select": "^1.2.0",
"@anthropic-ai/sdk": "^0.4.4",
"@inquirer/checkbox": "^1.3.1",
"@inquirer/editor": "^1.2.0",
"@inquirer/select": "^1.2.1",
"debug": "^4.3.4",
"handlebars": "^4.7.7",
"js-tiktoken": "^1.0.6",
"jsonrepair": "^3.1.0",
"ky": "^0.33.3",
"openai-fetch": "^1.5.0",
"openai-fetch": "^1.5.1",
"p-map": "^6.0.0",
"p-retry": "^5.1.2",
"p-timeout": "^6.1.1",
@ -59,14 +59,14 @@
"zod-validation-error": "^1.3.0"
},
"devDependencies": {
"@keyv/redis": "^2.6.0",
"@keyv/redis": "^2.6.1",
"@trivago/prettier-plugin-sort-imports": "^4.1.1",
"@types/debug": "^4.1.8",
"@types/node": "^20.2.5",
"@types/sinon": "^10.0.15",
"@types/uuid": "^9.0.1",
"@typescript-eslint/eslint-plugin": "^5.59.8",
"@typescript-eslint/parser": "^5.59.8",
"@typescript-eslint/eslint-plugin": "^5.59.9",
"@typescript-eslint/parser": "^5.59.9",
"ava": "^5.3.0",
"del-cli": "^5.0.0",
"dotenv": "^16.1.4",

Wyświetl plik

@ -6,17 +6,17 @@ settings:
dependencies:
'@anthropic-ai/sdk':
specifier: ^0.4.3
version: 0.4.3
specifier: ^0.4.4
version: 0.4.4
'@inquirer/checkbox':
specifier: ^1.3.0
version: 1.3.0
specifier: ^1.3.1
version: 1.3.1
'@inquirer/editor':
specifier: ^1.1.0
version: 1.1.0
'@inquirer/select':
specifier: ^1.2.0
version: 1.2.0
'@inquirer/select':
specifier: ^1.2.1
version: 1.2.1
debug:
specifier: ^4.3.4
version: 4.3.4
@ -33,8 +33,8 @@ dependencies:
specifier: ^0.33.3
version: 0.33.3
openai-fetch:
specifier: ^1.5.0
version: 1.5.0
specifier: ^1.5.1
version: 1.5.1
p-map:
specifier: ^6.0.0
version: 6.0.0
@ -65,8 +65,8 @@ dependencies:
devDependencies:
'@keyv/redis':
specifier: ^2.6.0
version: 2.6.0
specifier: ^2.6.1
version: 2.6.1
'@trivago/prettier-plugin-sort-imports':
specifier: ^4.1.1
version: 4.1.1(prettier@2.8.8)
@ -83,11 +83,11 @@ devDependencies:
specifier: ^9.0.1
version: 9.0.1
'@typescript-eslint/eslint-plugin':
specifier: ^5.59.8
version: 5.59.8(@typescript-eslint/parser@5.59.8)(eslint@8.42.0)(typescript@5.1.3)
specifier: ^5.59.9
version: 5.59.9(@typescript-eslint/parser@5.59.9)(eslint@8.42.0)(typescript@5.1.3)
'@typescript-eslint/parser':
specifier: ^5.59.8
version: 5.59.8(eslint@8.42.0)(typescript@5.1.3)
specifier: ^5.59.9
version: 5.59.9(eslint@8.42.0)(typescript@5.1.3)
ava:
specifier: ^5.3.0
version: 5.3.0
@ -151,8 +151,8 @@ devDependencies:
packages:
/@anthropic-ai/sdk@0.4.3:
resolution: {integrity: sha512-SZrlXvjUUYT9rPmSzlTtmVk1OjVNpkCzILRluhiYwNcxXfQyvPJDi0CI6PyymygcgtqEF5EVqhKmC/PtPsNEIw==}
/@anthropic-ai/sdk@0.4.4:
resolution: {integrity: sha512-Z/39nQi1sSUCeLII3lsAbL1u+0JF6cR2XmUEX9sLH0VtxmIjY6cjOUYjCkYh4oapTxOkhAFnVSAFJ6cxml2qXg==}
dependencies:
'@fortaine/fetch-event-source': 3.0.6
cross-fetch: 3.1.6
@ -186,21 +186,21 @@ packages:
engines: {node: '>=6.9.0'}
dependencies:
'@babel/template': 7.21.9
'@babel/types': 7.22.0
'@babel/types': 7.22.4
dev: true
/@babel/helper-hoist-variables@7.18.6:
resolution: {integrity: sha512-UlJQPkFqFULIcyW5sbzgbkxn2FKRgwWiRexcuaR8RNJRy8+LLveqPjwZV/bwrLZCN0eUHD/x8D0heK1ozuoo6Q==}
engines: {node: '>=6.9.0'}
dependencies:
'@babel/types': 7.22.0
'@babel/types': 7.22.4
dev: true
/@babel/helper-split-export-declaration@7.18.6:
resolution: {integrity: sha512-bde1etTx6ZyTmobl9LLMMQsaizFVZrquTEHOqKeQESMKo4PlObf+8+JA25ZsIpZhT/WEd39+vOdLXAFG/nELpA==}
engines: {node: '>=6.9.0'}
dependencies:
'@babel/types': 7.22.0
'@babel/types': 7.22.4
dev: true
/@babel/helper-string-parser@7.21.5:
@ -222,8 +222,8 @@ packages:
js-tokens: 4.0.0
dev: true
/@babel/parser@7.22.0:
resolution: {integrity: sha512-DA65VCJRetcFmJnt9/hEmRvXNCwk0V86dxG6p6N13hzDazaLRjGdTGPGgjxZOtLuFgWzOSRX4grybmRXwQ9bSg==}
/@babel/parser@7.22.4:
resolution: {integrity: sha512-VLLsx06XkEYqBtE5YGPwfSGwfrjnyPP5oiGty3S8pQLFDFLaS8VwWSIxkTXpcvr5zeYLE6+MBNl2npl/YnfofA==}
engines: {node: '>=6.0.0'}
hasBin: true
dependencies:
@ -235,8 +235,8 @@ packages:
engines: {node: '>=6.9.0'}
dependencies:
'@babel/code-frame': 7.21.4
'@babel/parser': 7.22.0
'@babel/types': 7.22.0
'@babel/parser': 7.22.4
'@babel/types': 7.22.4
dev: true
/@babel/traverse@7.17.3:
@ -249,7 +249,7 @@ packages:
'@babel/helper-function-name': 7.21.0
'@babel/helper-hoist-variables': 7.18.6
'@babel/helper-split-export-declaration': 7.18.6
'@babel/parser': 7.22.0
'@babel/parser': 7.22.4
'@babel/types': 7.17.0
debug: 4.3.4
globals: 11.12.0
@ -265,8 +265,8 @@ packages:
to-fast-properties: 2.0.0
dev: true
/@babel/types@7.22.0:
resolution: {integrity: sha512-NtXlm3f6cNWIv003cETdlz9sss0VMNtplyatFohxWPz90AbwuhCbHbQopkGis6bG1vOunDLN0FF/4Uv5i8LFZQ==}
/@babel/types@7.22.4:
resolution: {integrity: sha512-Tx9x3UBHTTsMSW85WB2kphxYQVvrZ/t1FxD88IpSgIjiUJlCm9z+xWIDwyo1vffTwSqteqyznB8ZE9vYYk16zA==}
engines: {node: '>=6.9.0'}
dependencies:
'@babel/helper-string-parser': 7.21.5
@ -278,7 +278,7 @@ packages:
resolution: {integrity: sha512-BDXFbYOJzT/NBEtp71cvsrGPwGAMGRB/349rwKuoxNSiKjPraNNnlK6MIIabViCjqZugu6j+xeMDlEkWdHHJSg==}
dependencies:
'@esbuild-kit/core-utils': 3.1.0
get-tsconfig: 4.5.0
get-tsconfig: 4.6.0
dev: true
/@esbuild-kit/core-utils@3.1.0:
@ -292,7 +292,7 @@ packages:
resolution: {integrity: sha512-Qwfvj/qoPbClxCRNuac1Du01r9gvNOT+pMYtJDapfB1eoGN1YlJ1BixLyL9WVENRx5RXgNLdfYdx/CuswlGhMw==}
dependencies:
'@esbuild-kit/core-utils': 3.1.0
get-tsconfig: 4.5.0
get-tsconfig: 4.6.0
dev: true
/@esbuild/android-arm64@0.17.19:
@ -555,19 +555,19 @@ packages:
resolution: {integrity: sha512-ZnQMnLV4e7hDlUvw8H+U8ASL02SS2Gn6+9Ac3wGGLIe7+je2AeAOxPY+izIPJDfFDb7eDjev0Us8MO1iFRN8hA==}
dev: true
/@inquirer/checkbox@1.3.0:
resolution: {integrity: sha512-kfYE5BH7vO0j2IwSgxzDmKPzQm/OpLnIZEEbOetYM+k4+YKTbSqeqCu7VZl3d8/rtotgJQc7gb8u2pIVeJh3Mg==}
/@inquirer/checkbox@1.3.1:
resolution: {integrity: sha512-3l3aC6gYOPGaVOa9cNe4dZ8t96e3CFifC3Hee1MD+F7qaRxGAuXnhCQiUr4ngj2P7xd9U3DCDbLXNsLKQoHYCg==}
engines: {node: '>=14.18.0'}
dependencies:
'@inquirer/core': 2.0.0
'@inquirer/core': 2.1.0
'@inquirer/type': 1.1.0
ansi-escapes: 4.3.2
chalk: 4.1.2
figures: 3.2.0
dev: false
/@inquirer/core@2.0.0:
resolution: {integrity: sha512-NnLGihYWEFVdFIoEDPwGO0jB5phuNcxTUHSNq5geyiOVQOnWNuX9x2rhPPeiikE/5fNXIBmqojD0+PiD9whtXw==}
/@inquirer/core@2.1.0:
resolution: {integrity: sha512-Hq9hZ5G/VUaeWkSs283HZwwMbe79lcOI5HWwW1GIM1ohouy2/x489Qf/A1BJYvMUj+QG4LSB5LtVMjn9P3Ge6Q==}
engines: {node: '>=14.18.0'}
dependencies:
'@inquirer/type': 1.1.0
@ -583,21 +583,21 @@ packages:
wrap-ansi: 6.2.0
dev: false
/@inquirer/editor@1.1.0:
resolution: {integrity: sha512-ll6NfzutAuzRwNELERhECZCnAIzb2DdnRaNwtJ3Gfy6MrQBdYpFzGLwDyxB8+yf2iJoMOmsKWnzGbmvWNvSUhw==}
/@inquirer/editor@1.2.0:
resolution: {integrity: sha512-NMXLLNadvqIR6TD6mNZRa/PKHTvdaa4ndGGeXl+DwybQ4K7cVSJNRrztixpM1KDEoG8Ape5ightNwq25cyugTg==}
engines: {node: '>=14.18.0'}
dependencies:
'@inquirer/core': 2.0.0
'@inquirer/core': 2.1.0
'@inquirer/type': 1.1.0
chalk: 4.1.2
external-editor: 3.1.0
dev: false
/@inquirer/select@1.2.0:
resolution: {integrity: sha512-2CqhtE40GFmRXDFzJeMvSowKcO2/yvIzgSpL44+Hl/SAO/1FJgmHNAFGBuqX0RbohYPnSpF8eftgiy16fA3RJw==}
/@inquirer/select@1.2.1:
resolution: {integrity: sha512-13JDLtlwFoqQUYRdMzz5wP3a4DWccJfNA/8M8MDUhhZ8HeKZ3MPaTMlpxwY+Q0Jgbmt56nf7xUuck0XXPce8Xw==}
engines: {node: '>=14.18.0'}
dependencies:
'@inquirer/core': 2.0.0
'@inquirer/core': 2.1.0
'@inquirer/type': 1.1.0
ansi-escapes: 4.3.2
chalk: 4.1.2
@ -647,8 +647,8 @@ packages:
'@jridgewell/sourcemap-codec': 1.4.14
dev: true
/@keyv/redis@2.6.0:
resolution: {integrity: sha512-fu2mgLp6lwo0l+fT1dFHm+t16EDrD/WECho84HNapsqnelQImv81SCBdi+ky2Hw1BuH2ncku7uNbiZvB631/TA==}
/@keyv/redis@2.6.1:
resolution: {integrity: sha512-s0L2DRZoa3eahIf/I/mhG6bpb90JFKVnpd1iVlhBOr6PxsqvCyPU99BhZKRAkMThnoJZRewI7sO9G/bYjQ+B6Q==}
engines: {node: '>= 14'}
dependencies:
ioredis: 5.3.2
@ -717,7 +717,7 @@ packages:
optional: true
dependencies:
'@babel/generator': 7.17.7
'@babel/parser': 7.22.0
'@babel/parser': 7.22.4
'@babel/traverse': 7.17.3
'@babel/types': 7.17.0
javascript-natural-sort: 0.7.1
@ -775,8 +775,8 @@ packages:
resolution: {integrity: sha512-rFT3ak0/2trgvp4yYZo5iKFEPsET7vKydKF+VRCxlQ9bpheehyAJH89dAkaLEq/j/RZXJIqcgsmPJKUP1Z28HA==}
dev: true
/@typescript-eslint/eslint-plugin@5.59.8(@typescript-eslint/parser@5.59.8)(eslint@8.42.0)(typescript@5.1.3):
resolution: {integrity: sha512-JDMOmhXteJ4WVKOiHXGCoB96ADWg9q7efPWHRViT/f09bA8XOMLAVHHju3l0MkZnG1izaWXYmgvQcUjTRcpShQ==}
/@typescript-eslint/eslint-plugin@5.59.9(@typescript-eslint/parser@5.59.9)(eslint@8.42.0)(typescript@5.1.3):
resolution: {integrity: sha512-4uQIBq1ffXd2YvF7MAvehWKW3zVv/w+mSfRAu+8cKbfj3nwzyqJLNcZJpQ/WZ1HLbJDiowwmQ6NO+63nCA+fqA==}
engines: {node: ^12.22.0 || ^14.17.0 || >=16.0.0}
peerDependencies:
'@typescript-eslint/parser': ^5.0.0
@ -787,10 +787,10 @@ packages:
optional: true
dependencies:
'@eslint-community/regexpp': 4.5.1
'@typescript-eslint/parser': 5.59.8(eslint@8.42.0)(typescript@5.1.3)
'@typescript-eslint/scope-manager': 5.59.8
'@typescript-eslint/type-utils': 5.59.8(eslint@8.42.0)(typescript@5.1.3)
'@typescript-eslint/utils': 5.59.8(eslint@8.42.0)(typescript@5.1.3)
'@typescript-eslint/parser': 5.59.9(eslint@8.42.0)(typescript@5.1.3)
'@typescript-eslint/scope-manager': 5.59.9
'@typescript-eslint/type-utils': 5.59.9(eslint@8.42.0)(typescript@5.1.3)
'@typescript-eslint/utils': 5.59.9(eslint@8.42.0)(typescript@5.1.3)
debug: 4.3.4
eslint: 8.42.0
grapheme-splitter: 1.0.4
@ -803,8 +803,8 @@ packages:
- supports-color
dev: true
/@typescript-eslint/parser@5.59.8(eslint@8.42.0)(typescript@5.1.3):
resolution: {integrity: sha512-AnR19RjJcpjoeGojmwZtCwBX/RidqDZtzcbG3xHrmz0aHHoOcbWnpDllenRDmDvsV0RQ6+tbb09/kyc+UT9Orw==}
/@typescript-eslint/parser@5.59.9(eslint@8.42.0)(typescript@5.1.3):
resolution: {integrity: sha512-FsPkRvBtcLQ/eVK1ivDiNYBjn3TGJdXy2fhXX+rc7czWl4ARwnpArwbihSOHI2Peg9WbtGHrbThfBUkZZGTtvQ==}
engines: {node: ^12.22.0 || ^14.17.0 || >=16.0.0}
peerDependencies:
eslint: ^6.0.0 || ^7.0.0 || ^8.0.0
@ -813,9 +813,9 @@ packages:
typescript:
optional: true
dependencies:
'@typescript-eslint/scope-manager': 5.59.8
'@typescript-eslint/types': 5.59.8
'@typescript-eslint/typescript-estree': 5.59.8(typescript@5.1.3)
'@typescript-eslint/scope-manager': 5.59.9
'@typescript-eslint/types': 5.59.9
'@typescript-eslint/typescript-estree': 5.59.9(typescript@5.1.3)
debug: 4.3.4
eslint: 8.42.0
typescript: 5.1.3
@ -823,16 +823,16 @@ packages:
- supports-color
dev: true
/@typescript-eslint/scope-manager@5.59.8:
resolution: {integrity: sha512-/w08ndCYI8gxGf+9zKf1vtx/16y8MHrZs5/tnjHhMLNSixuNcJavSX4wAiPf4aS5x41Es9YPCn44MIe4cxIlig==}
/@typescript-eslint/scope-manager@5.59.9:
resolution: {integrity: sha512-8RA+E+w78z1+2dzvK/tGZ2cpGigBZ58VMEHDZtpE1v+LLjzrYGc8mMaTONSxKyEkz3IuXFM0IqYiGHlCsmlZxQ==}
engines: {node: ^12.22.0 || ^14.17.0 || >=16.0.0}
dependencies:
'@typescript-eslint/types': 5.59.8
'@typescript-eslint/visitor-keys': 5.59.8
'@typescript-eslint/types': 5.59.9
'@typescript-eslint/visitor-keys': 5.59.9
dev: true
/@typescript-eslint/type-utils@5.59.8(eslint@8.42.0)(typescript@5.1.3):
resolution: {integrity: sha512-+5M518uEIHFBy3FnyqZUF3BMP+AXnYn4oyH8RF012+e7/msMY98FhGL5SrN29NQ9xDgvqCgYnsOiKp1VjZ/fpA==}
/@typescript-eslint/type-utils@5.59.9(eslint@8.42.0)(typescript@5.1.3):
resolution: {integrity: sha512-ksEsT0/mEHg9e3qZu98AlSrONAQtrSTljL3ow9CGej8eRo7pe+yaC/mvTjptp23Xo/xIf2mLZKC6KPv4Sji26Q==}
engines: {node: ^12.22.0 || ^14.17.0 || >=16.0.0}
peerDependencies:
eslint: '*'
@ -841,8 +841,8 @@ packages:
typescript:
optional: true
dependencies:
'@typescript-eslint/typescript-estree': 5.59.8(typescript@5.1.3)
'@typescript-eslint/utils': 5.59.8(eslint@8.42.0)(typescript@5.1.3)
'@typescript-eslint/typescript-estree': 5.59.9(typescript@5.1.3)
'@typescript-eslint/utils': 5.59.9(eslint@8.42.0)(typescript@5.1.3)
debug: 4.3.4
eslint: 8.42.0
tsutils: 3.21.0(typescript@5.1.3)
@ -851,13 +851,13 @@ packages:
- supports-color
dev: true
/@typescript-eslint/types@5.59.8:
resolution: {integrity: sha512-+uWuOhBTj/L6awoWIg0BlWy0u9TyFpCHrAuQ5bNfxDaZ1Ppb3mx6tUigc74LHcbHpOHuOTOJrBoAnhdHdaea1w==}
/@typescript-eslint/types@5.59.9:
resolution: {integrity: sha512-uW8H5NRgTVneSVTfiCVffBb8AbwWSKg7qcA4Ot3JI3MPCJGsB4Db4BhvAODIIYE5mNj7Q+VJkK7JxmRhk2Lyjw==}
engines: {node: ^12.22.0 || ^14.17.0 || >=16.0.0}
dev: true
/@typescript-eslint/typescript-estree@5.59.8(typescript@5.1.3):
resolution: {integrity: sha512-Jy/lPSDJGNow14vYu6IrW790p7HIf/SOV1Bb6lZ7NUkLc2iB2Z9elESmsaUtLw8kVqogSbtLH9tut5GCX1RLDg==}
/@typescript-eslint/typescript-estree@5.59.9(typescript@5.1.3):
resolution: {integrity: sha512-pmM0/VQ7kUhd1QyIxgS+aRvMgw+ZljB3eDb+jYyp6d2bC0mQWLzUDF+DLwCTkQ3tlNyVsvZRXjFyV0LkU/aXjA==}
engines: {node: ^12.22.0 || ^14.17.0 || >=16.0.0}
peerDependencies:
typescript: '*'
@ -865,8 +865,8 @@ packages:
typescript:
optional: true
dependencies:
'@typescript-eslint/types': 5.59.8
'@typescript-eslint/visitor-keys': 5.59.8
'@typescript-eslint/types': 5.59.9
'@typescript-eslint/visitor-keys': 5.59.9
debug: 4.3.4
globby: 11.1.0
is-glob: 4.0.3
@ -877,8 +877,8 @@ packages:
- supports-color
dev: true
/@typescript-eslint/utils@5.59.8(eslint@8.42.0)(typescript@5.1.3):
resolution: {integrity: sha512-Tr65630KysnNn9f9G7ROF3w1b5/7f6QVCJ+WK9nhIocWmx9F+TmCAcglF26Vm7z8KCTwoKcNEBZrhlklla3CKg==}
/@typescript-eslint/utils@5.59.9(eslint@8.42.0)(typescript@5.1.3):
resolution: {integrity: sha512-1PuMYsju/38I5Ggblaeb98TOoUvjhRvLpLa1DoTOFaLWqaXl/1iQ1eGurTXgBY58NUdtfTXKP5xBq7q9NDaLKg==}
engines: {node: ^12.22.0 || ^14.17.0 || >=16.0.0}
peerDependencies:
eslint: ^6.0.0 || ^7.0.0 || ^8.0.0
@ -886,9 +886,9 @@ packages:
'@eslint-community/eslint-utils': 4.4.0(eslint@8.42.0)
'@types/json-schema': 7.0.12
'@types/semver': 7.5.0
'@typescript-eslint/scope-manager': 5.59.8
'@typescript-eslint/types': 5.59.8
'@typescript-eslint/typescript-estree': 5.59.8(typescript@5.1.3)
'@typescript-eslint/scope-manager': 5.59.9
'@typescript-eslint/types': 5.59.9
'@typescript-eslint/typescript-estree': 5.59.9(typescript@5.1.3)
eslint: 8.42.0
eslint-scope: 5.1.1
semver: 7.5.1
@ -897,11 +897,11 @@ packages:
- typescript
dev: true
/@typescript-eslint/visitor-keys@5.59.8:
resolution: {integrity: sha512-pJhi2ms0x0xgloT7xYabil3SGGlojNNKjK/q6dB3Ey0uJLMjK2UDGJvHieiyJVW/7C3KI+Z4Q3pEHkm4ejA+xQ==}
/@typescript-eslint/visitor-keys@5.59.9:
resolution: {integrity: sha512-bT7s0td97KMaLwpEBckbzj/YohnvXtqbe2XgqNvTl6RJVakY5mvENOTPvw5u66nljfZxthESpDozs86U+oLY8Q==}
engines: {node: ^12.22.0 || ^14.17.0 || >=16.0.0}
dependencies:
'@typescript-eslint/types': 5.59.8
'@typescript-eslint/types': 5.59.9
eslint-visitor-keys: 3.4.1
dev: true
@ -1089,7 +1089,7 @@ packages:
pretty-ms: 8.0.0
resolve-cwd: 3.0.0
stack-utils: 2.0.6
strip-ansi: 7.0.1
strip-ansi: 7.1.0
supertap: 3.0.1
temp-dir: 3.0.0
write-file-atomic: 5.0.1
@ -1962,8 +1962,10 @@ packages:
get-intrinsic: 1.2.1
dev: true
/get-tsconfig@4.5.0:
resolution: {integrity: sha512-MjhiaIWCJ1sAU4pIQ5i5OfOuHHxVo1oYeNsWTON7jxYkod8pHocXeh+SSbmu5OZZZK73B6cbJ2XADzXehLyovQ==}
/get-tsconfig@4.6.0:
resolution: {integrity: sha512-lgbo68hHTQnFddybKbbs/RDRJnJT5YyGy2kQzVwbq+g67X73i+5MVTval34QxGkOe9X5Ujf1UYpCaphLyltjEg==}
dependencies:
resolve-pkg-maps: 1.0.0
dev: true
/glob-parent@5.1.2:
@ -2946,8 +2948,8 @@ packages:
mimic-fn: 4.0.0
dev: true
/openai-fetch@1.5.0:
resolution: {integrity: sha512-GSCt5JPNWTADNeip9DUXlkZMwwxczKBRokKLInTQbbX0t81VqTBBUYis36XZeWHQP4eWeWMLe9nGiTtciqZ1HA==}
/openai-fetch@1.5.1:
resolution: {integrity: sha512-LDSsXTFa2ssjYTZY51+B/69wXg8/UteqKyPtuFa+bMFRav7ACQXi3AJl+gieh3BF8La95NHCE0FS8t0F8fRHwA==}
dependencies:
ky: 0.33.3
zod: 3.21.4
@ -3325,6 +3327,10 @@ packages:
engines: {node: '>=8'}
dev: true
/resolve-pkg-maps@1.0.0:
resolution: {integrity: sha512-seS2Tj26TBVOC2NIc2rOe2y2ZO7efxITtLZcGSOnHHNOQ7CkiUBfw0Iw2ck6xkIhPwLhKNLS8BO+hEpngQlqzw==}
dev: true
/resolve@1.22.2:
resolution: {integrity: sha512-Sb+mjNHOULsBv818T40qSPeRiuWLyaGMa5ewydRLFimneixmVy2zdivRl+AF6jaYPC8ERxGDmFSiqui6SfPd+g==}
hasBin: true
@ -3363,8 +3369,8 @@ packages:
glob: 7.2.3
dev: true
/rollup@3.23.0:
resolution: {integrity: sha512-h31UlwEi7FHihLe1zbk+3Q7z1k/84rb9BSwmBSr/XjOCEaBJ2YyedQDuM0t/kfOS0IxM+vk1/zI9XxYj9V+NJQ==}
/rollup@3.23.1:
resolution: {integrity: sha512-ybRdFVHOoljGEFILHLd2g/qateqUdjE6YS41WXq4p3C/WwD3xtWxV4FYWETA1u9TeXQc5K8L8zHE5d/scOvrOQ==}
engines: {node: '>=14.18.0', npm: '>=8.0.0'}
hasBin: true
optionalDependencies:
@ -3385,7 +3391,7 @@ packages:
/rxjs@7.8.1:
resolution: {integrity: sha512-AA3TVj+0A2iuIoQkWEK/tqFjBq2j+6PO6Y0zJcvzLAFhEFIO3HL0vls9hWLncZbAAbK0mar7oZ4V079I/qPMxg==}
dependencies:
tslib: 2.5.2
tslib: 2.5.3
dev: true
/safe-regex-test@1.0.0:
@ -3609,7 +3615,7 @@ packages:
dependencies:
eastasianwidth: 0.2.0
emoji-regex: 9.2.2
strip-ansi: 7.0.1
strip-ansi: 7.1.0
dev: true
/string.prototype.padend@3.1.4:
@ -3652,8 +3658,8 @@ packages:
dependencies:
ansi-regex: 5.0.1
/strip-ansi@7.0.1:
resolution: {integrity: sha512-cXNxvT8dFNRVfhVME3JAe98mkXDYN2O1l7jmcwMnOslDeESg1rF/OZMtK0nRAhiari1unG5cD4jG3rapUAkLbw==}
/strip-ansi@7.1.0:
resolution: {integrity: sha512-iq6eVVI64nQQTRYq2KtEg2d2uU7LElhTJwsH4YzIHZshxlgZms/wIc4VoDQTlG/IvVIrBKG06CrZnp0qv7hkcQ==}
engines: {node: '>=12'}
dependencies:
ansi-regex: 6.0.1
@ -3707,7 +3713,7 @@ packages:
indent-string: 5.0.0
js-yaml: 3.14.1
serialize-error: 7.0.1
strip-ansi: 7.0.1
strip-ansi: 7.1.0
dev: true
/supports-color@5.5.0:
@ -3811,8 +3817,8 @@ packages:
resolution: {integrity: sha512-Xni35NKzjgMrwevysHTCArtLDpPvye8zV/0E4EyYn43P7/7qvQwPh9BGkHewbMulVntbigmcT7rdX3BNo9wRJg==}
dev: true
/tslib@2.5.2:
resolution: {integrity: sha512-5svOrSA2w3iGFDs1HibEVBGbDrAY82bFQ3HZ3ixB+88nsbsWQoKqDRb5UBYAUPEzbBn6dAp5gRNXglySbx1MlA==}
/tslib@2.5.3:
resolution: {integrity: sha512-mSxlJJwl3BMEQCUNnxXBU9jP4JBktcEGhURcPR6VQVlnP0FdDEsIaz0C35dXNGLyRfrATNofF0F5p2KPxQgB+w==}
dev: true
/tsup@6.7.0(typescript@5.1.3):
@ -3841,7 +3847,7 @@ packages:
joycon: 3.1.1
postcss-load-config: 3.1.4
resolve-from: 5.0.0
rollup: 3.23.0
rollup: 3.23.1
source-map: 0.8.0-beta.0
sucrase: 3.32.0
tree-kill: 1.2.2

Wyświetl plik

@ -1,4 +1,5 @@
import { jsonrepair } from 'jsonrepair'
import pMap from 'p-map'
import { dedent } from 'ts-dedent'
import { type SetRequired } from 'type-fest'
import { ZodRawShape, ZodTypeAny, z } from 'zod'
@ -7,6 +8,11 @@ import { printNode, zodToTs } from 'zod-to-ts'
import * as types from './types'
import { BaseTask } from './task'
import { getCompiledTemplate } from './template'
import {
Tokenizer,
getModelNameForTiktoken,
getTokenizerForModel
} from './tokenizer'
import {
extractJSONArrayFromString,
extractJSONObjectFromString
@ -24,6 +30,7 @@ export abstract class BaseLLM<
protected _model: string
protected _modelParams: TModelParams | undefined
protected _examples: types.LLMExample[] | undefined
protected _tokenizer?: Tokenizer | null
constructor(
options: SetRequired<
@ -86,6 +93,30 @@ export abstract class BaseLLM<
this._modelParams = { ...this._modelParams, ...params } as TModelParams
return this
}
async getNumTokens(text: string): Promise<number> {
if (this._tokenizer === undefined) {
const model = this._model || 'gpt2'
try {
this._tokenizer = await getTokenizerForModel(model)
} catch (err) {
this._tokenizer = null
console.warn(
`Failed to initialize tokenizer for model "${model}", falling back to approximate count`,
err
)
}
}
if (this._tokenizer) {
return this._tokenizer.encode(text).length
}
// fallback to approximate calculation if tokenizer is not available
return Math.ceil(text.length / 4)
}
}
export abstract class BaseChatModel<
@ -111,9 +142,7 @@ export abstract class BaseChatModel<
messages: types.ChatMessage[]
): Promise<types.BaseChatCompletionResponse<TChatCompletionResponse>>
protected override async _call(
input?: types.ParsedData<TInput>
): Promise<types.TaskResponse<TOutput>> {
public async buildMessages(input?: types.ParsedData<TInput>) {
if (this._inputSchema) {
const inputSchema =
this._inputSchema instanceof z.ZodType
@ -183,6 +212,14 @@ export abstract class BaseChatModel<
// TODO: filter/compress messages based on token counts
return messages
}
protected override async _call(
input?: types.ParsedData<TInput>
): Promise<types.TaskResponse<TOutput>> {
const messages = await this.buildMessages(input)
console.log('>>>')
console.log(messages)
@ -244,13 +281,69 @@ export abstract class BaseChatModel<
return {
result: outputSchema.parse(output),
metadata: {}
metadata: {
input,
messages,
completion
}
}
} else {
return {
result: output,
metadata: {}
metadata: {
input,
messages,
completion
}
}
}
}
async getNumTokensForMessages(messages: types.ChatMessage[]): Promise<{
numTokensTotal: number
numTokensPerMessage: number[]
}> {
let numTokensTotal = 0
let tokensPerMessage = 0
let tokensPerName = 0
const modelName = getModelNameForTiktoken(this._model)
if (modelName === 'gpt-3.5-turbo') {
tokensPerMessage = 4
tokensPerName = -1
} else if (modelName.startsWith('gpt-4')) {
tokensPerMessage = 3
tokensPerName = 1
} else {
// TODO
}
const numTokensPerMessage = await pMap(
messages,
async (message) => {
const [numTokensContent, numTokensRole, numTokensName] =
await Promise.all([
this.getNumTokens(message.content),
this.getNumTokens(message.role),
message.name
? this.getNumTokens(message.name).then((n) => n + tokensPerName)
: Promise.resolve(0)
])
const numTokens =
tokensPerMessage + numTokensContent + numTokensRole + numTokensName
numTokensTotal += numTokens
return numTokens
},
{
concurrency: 8
}
)
numTokensTotal += 3 // every reply is primed with <|start|>assistant<|message|>
return { numTokensTotal, numTokensPerMessage }
}
}

Wyświetl plik

@ -53,7 +53,8 @@ export abstract class BaseTask<
public async call(
input?: types.ParsedData<TInput>
): Promise<types.ParsedData<TOutput>> {
return this._call(input).then((response) => response.result)
const res = await this.callWithMetadata(input)
return res.result
}
public async callWithMetadata(

Wyświetl plik

@ -1,6 +1,176 @@
import { getEncoding, getEncodingNameForModel } from 'js-tiktoken'
import {
Tiktoken,
TiktokenBPE,
TiktokenEncoding,
TiktokenModel,
getEncodingNameForModel
} from 'js-tiktoken/lite'
import ky from 'ky'
import pMemoize from 'p-memoize'
export function getTokenizerForModel(model: string) {
const encodingName = getEncodingNameForModel(model as any)
return getEncoding(encodingName)
export interface Tokenizer {
encode(
text: string,
options?: {
allowedSpecial?: Array<string> | 'all'
disallowedSpecial?: Array<string> | 'all'
}
): number[]
decode(tokens: number[]): string
}
export class TiktokenTokenizer implements Tokenizer {
protected _tiktoken: Tiktoken
constructor(tiktoken: Tiktoken) {
this._tiktoken = tiktoken
}
encode(
text: string,
options?: {
allowedSpecial?: Array<string> | 'all'
disallowedSpecial?: Array<string> | 'all'
}
): number[] {
return this._tiktoken.encode(
text,
options?.allowedSpecial,
options?.disallowedSpecial
)
}
decode(tokens: number[]): string {
return this._tiktoken.decode(tokens)
}
}
export const getTiktokenBPE = pMemoize(getTiktokenBPEImpl)
async function getTiktokenBPEImpl(
encoding: TiktokenEncoding,
{
signal,
timeoutMs = 30000
}: {
signal?: AbortSignal
timeoutMs?: number
} = {}
) {
return ky(`https://tiktoken.pages.dev/js/${encoding}.json`, {
signal,
timeout: timeoutMs
}).json<TiktokenBPE>()
}
export async function getTokenizerForEncoding(
encoding: TiktokenEncoding,
options?: {
signal?: AbortSignal
extendedSpecialTokens?: Record<string, number>
timeoutMs?: number
}
) {
const tiktokenBPE = await getTiktokenBPE(encoding, options)
const tiktoken = new Tiktoken(tiktokenBPE, options?.extendedSpecialTokens)
return new TiktokenTokenizer(tiktoken)
}
export async function getTokenizerForModel(
model: string,
options?: {
signal?: AbortSignal
extendedSpecialTokens?: Record<string, number>
timeoutMs?: number
}
) {
const modelName = getModelNameForTiktoken(model)
const encoding = getEncodingNameForModel(modelName)
return getTokenizerForEncoding(encoding, options)
}
export function getModelNameForTiktoken(modelName: string): TiktokenModel {
if (modelName.startsWith('gpt-3.5-turbo-')) {
return 'gpt-3.5-turbo'
}
if (modelName.startsWith('gpt-4-32k-')) {
return 'gpt-4-32k'
}
if (modelName.startsWith('gpt-4-')) {
return 'gpt-4'
}
return modelName as TiktokenModel
}
export function getContextSizeForEmbedding(modelName?: string): number {
switch (modelName) {
case 'text-embedding-ada-002':
return 8191
default:
return 2046
}
}
export function getContextSizeForModel(model: string): number {
const modelName = getModelNameForTiktoken(model)
switch (modelName) {
case 'gpt-3.5-turbo':
return 4096
case 'gpt-4-32k':
return 32768
case 'gpt-4':
return 8192
case 'text-davinci-003':
return 4097
case 'text-curie-001':
return 2048
case 'text-babbage-001':
return 2048
case 'text-ada-001':
return 2048
case 'code-davinci-002':
return 8000
case 'code-cushman-001':
return 2048
default:
return 4097
}
}
export const calculateMaxTokens = async ({
prompt,
modelName
}: {
prompt: string
modelName: string
}) => {
// fallback to approximate calculation if tiktoken is not available
let numTokens = Math.ceil(prompt.length / 4)
try {
const tokenizer = await getTokenizerForModel(modelName)
numTokens = tokenizer.encode(prompt).length
} catch (err: any) {
console.warn(
`calculateMaxTokens error for model "${modelName}", falling back to approximate count`,
err.toString()
)
}
const maxTokens = getContextSizeForModel(modelName)
return maxTokens - numTokens
}

Wyświetl plik

@ -75,6 +75,7 @@ export type ChatMessageRole = z.infer<typeof ChatMessageRoleSchema>
export interface ChatMessage {
role: ChatMessageRole
content: string
name?: string
}
export interface ChatModelOptions<