From 16736075fa90b449b84ebd67937649791aa01853 Mon Sep 17 00:00:00 2001 From: Philipp Burckhardt Date: Mon, 19 Jun 2023 20:23:27 -0400 Subject: [PATCH 1/5] feat: switch zod-to-ts to zod-to-json-schema --- legacy/package.json | 1 - legacy/pnpm-lock.yaml | 14 +------------- legacy/src/llms/chat.ts | 30 ++++++++++-------------------- legacy/src/utils.ts | 14 +++++++++++--- 4 files changed, 22 insertions(+), 37 deletions(-) diff --git a/legacy/package.json b/legacy/package.json index c26ffeda..1f3b415d 100644 --- a/legacy/package.json +++ b/legacy/package.json @@ -65,7 +65,6 @@ "uuid": "^9.0.0", "zod": "^3.21.4", "zod-to-json-schema": "^3.21.2", - "zod-to-ts": "^1.1.4", "zod-validation-error": "^1.3.0" }, "devDependencies": { diff --git a/legacy/pnpm-lock.yaml b/legacy/pnpm-lock.yaml index f3c94b7d..755d437b 100644 --- a/legacy/pnpm-lock.yaml +++ b/legacy/pnpm-lock.yaml @@ -86,9 +86,6 @@ dependencies: zod-to-json-schema: specifier: ^3.21.2 version: 3.21.2(zod@3.21.4) - zod-to-ts: - specifier: ^1.1.4 - version: 1.1.4(typescript@5.1.3)(zod@3.21.4) zod-validation-error: specifier: ^1.3.0 version: 1.3.0(zod@3.21.4) @@ -4448,6 +4445,7 @@ packages: resolution: {integrity: sha512-XH627E9vkeqhlZFQuL+UsyAXEnibT0kWR2FWONlr4sTjvxyJYnyefgrkyECLzM5NenmKzRAy2rR/OlYLA1HkZw==} engines: {node: '>=14.17'} hasBin: true + dev: true /uglify-js@3.17.4: resolution: {integrity: sha512-T9q82TJI9e/C1TAxYvfb16xO120tMVFZrGA3f9/P4424DNu6ypK103y0GPFVa17yotwSyZW5iYXgjYHkGrJW/g==} @@ -4645,16 +4643,6 @@ packages: zod: 3.21.4 dev: false - /zod-to-ts@1.1.4(typescript@5.1.3)(zod@3.21.4): - resolution: {integrity: sha512-jsCg+pTNxLAdJOfW4ul+SpechdGYEJPPnssSbqWdR2LSIkotT22k+UvqPb1nEHwe/YbEcbUOlZUfGM0npgR+Jg==} - peerDependencies: - typescript: ^4.9.4 || ^5.0.2 - zod: ^3 - dependencies: - typescript: 5.1.3 - zod: 3.21.4 - dev: false - /zod-validation-error@1.3.0(zod@3.21.4): resolution: {integrity: sha512-4WoQnuWnj06kwKR4A+cykRxFmy+CTvwMQO5ogTXLiVx1AuvYYmMjixh7sbkSsQTr1Fvtss6d5kVz8PGeMPUQjQ==} engines: {node: '>=16.0.0'} diff --git a/legacy/src/llms/chat.ts b/legacy/src/llms/chat.ts index 5f36a95a..886380d4 100644 --- a/legacy/src/llms/chat.ts +++ b/legacy/src/llms/chat.ts @@ -2,7 +2,7 @@ import { JSONRepairError, jsonrepair } from 'jsonrepair' import { dedent } from 'ts-dedent' import { type SetRequired } from 'type-fest' import { ZodType, z } from 'zod' -import { printNode, zodToTs } from 'zod-to-ts' +import { zodToJsonSchema } from 'zod-to-json-schema' import * as errors from '@/errors' import * as types from '@/types' @@ -275,22 +275,12 @@ export abstract class BaseChatCompletion< return null } - // TODO: replace zod-to-ts with zod-to-json-schema? - const { node } = zodToTs(outputSchema) - - if (node.kind === 152) { - // Handle raw strings differently: - return dedent`Output a raw string only, without any additional text.` - } - - const tsTypeString = printNode(node, { - removeComments: false, - // TODO: this doesn't seem to actually work, so we're doing it manually below - omitTrailingSemicolon: true, - noEmitHelpers: true - }) - .replace(/^ {4}/gm, ' ') - .replace(/;$/gm, '') + const schema = zodToJsonSchema(outputSchema) as types.Jsonifiable + const schemaStr = stringifyForModel(schema, [ + 'default', + 'additionalProperties', + '$schema' + ]) let label: string if (outputSchema instanceof z.ZodArray) { label = 'JSON array (minified)' @@ -306,9 +296,9 @@ export abstract class BaseChatCompletion< label = 'JSON value' } - return dedent`Do not output code. Output a single ${label} in the following TypeScript format: - \`\`\`ts - ${tsTypeString} + return dedent`Do not output code. Output a single ${label} according to the following JSON Schema: + \`\`\`json + ${schemaStr} \`\`\`` } diff --git a/legacy/src/utils.ts b/legacy/src/utils.ts index 982a7f68..61562561 100644 --- a/legacy/src/utils.ts +++ b/legacy/src/utils.ts @@ -1,5 +1,6 @@ import { customAlphabet, urlAlphabet } from 'nanoid' import type { ThrottledFunction } from 'p-throttle' +import { JsonValue } from 'type-fest' import * as types from './types' @@ -132,7 +133,10 @@ export function chunkMultipleStrings( * @param json - JSON value to stringify * @returns stringified value with all double quotes around object keys removed */ -export function stringifyForModel(json: types.Jsonifiable): string { +export function stringifyForModel( + json: types.Jsonifiable, + omit: string[] = [] +): string { const UNIQUE_PREFIX = defaultIDGeneratorFn() return ( JSON.stringify(json, replacer) @@ -143,7 +147,11 @@ export function stringifyForModel(json: types.Jsonifiable): string { /** * Replacer function prefixing all keys with a unique identifier. */ - function replacer(_: string, value: any) { + function replacer(key: string, value: JsonValue) { + if (omit.includes(key)) { + return undefined + } + if (value && typeof value === 'object') { if (Array.isArray(value)) { return value @@ -152,7 +160,7 @@ export function stringifyForModel(json: types.Jsonifiable): string { const replacement = {} for (const k in value) { - if (Object.hasOwnProperty.call(value, k)) { + if (Object.hasOwnProperty.call(value, k) && !omit.includes(k)) { replacement[UNIQUE_PREFIX + k] = value[k] } } From f37ec4052e014346d4d9e2f4cc2d7d86eff971f8 Mon Sep 17 00:00:00 2001 From: Philipp Burckhardt Date: Tue, 20 Jun 2023 10:23:46 -0400 Subject: [PATCH 2/5] refactor: move parse functions to own file and add tests --- legacy/src/llms/chat.ts | 117 +-------- legacy/src/llms/parse-output.ts | 161 +++++++++++++ .../.snapshots/test/llms/parse-output.ts.md | 226 ++++++++++++++++++ .../.snapshots/test/llms/parse-output.ts.snap | Bin 0 -> 1347 bytes legacy/test/llms/parse-output.ts | 193 +++++++++++++++ 5 files changed, 583 insertions(+), 114 deletions(-) create mode 100644 legacy/src/llms/parse-output.ts create mode 100644 legacy/test/.snapshots/test/llms/parse-output.ts.md create mode 100644 legacy/test/.snapshots/test/llms/parse-output.ts.snap create mode 100644 legacy/test/llms/parse-output.ts diff --git a/legacy/src/llms/chat.ts b/legacy/src/llms/chat.ts index 886380d4..7e6cb6e0 100644 --- a/legacy/src/llms/chat.ts +++ b/legacy/src/llms/chat.ts @@ -6,14 +6,10 @@ import { zodToJsonSchema } from 'zod-to-json-schema' import * as errors from '@/errors' import * as types from '@/types' +import { parseOutput } from '@/llms/parse-output' import { BaseTask } from '@/task' import { getCompiledTemplate } from '@/template' -import { - extractFunctionIdentifierFromString, - extractJSONArrayFromString, - extractJSONObjectFromString, - stringifyForModel -} from '@/utils' +import { extractFunctionIdentifierFromString, stringifyForModel } from '@/utils' import { BaseLLM } from './llm' import { @@ -21,113 +17,6 @@ import { getNumTokensForChatMessages } from './llm-utils' -const BOOLEAN_OUTPUTS = { - true: true, - false: false, - t: true, - f: false, - yes: true, - no: false, - y: true, - n: false, - '1': true, - '0': false -} - -function parseArrayOutput(output: string): Array { - try { - const trimmedOutput = extractJSONArrayFromString(output) - const parsedOutput = JSON.parse(jsonrepair(trimmedOutput ?? output)) - return parsedOutput - } catch (err: any) { - if (err instanceof JSONRepairError) { - throw new errors.OutputValidationError(err.message, { cause: err }) - } else if (err instanceof SyntaxError) { - throw new errors.OutputValidationError( - `Invalid JSON array: ${err.message}`, - { cause: err } - ) - } else { - throw err - } - } -} - -function parseObjectOutput(output) { - try { - const trimmedOutput = extractJSONObjectFromString(output) - output = JSON.parse(jsonrepair(trimmedOutput ?? output)) - - if (Array.isArray(output)) { - // TODO - output = output[0] - } - - return output - } catch (err: any) { - if (err instanceof JSONRepairError) { - throw new errors.OutputValidationError(err.message, { cause: err }) - } else if (err instanceof SyntaxError) { - throw new errors.OutputValidationError( - `Invalid JSON object: ${err.message}`, - { cause: err } - ) - } else { - throw err - } - } -} - -function parseBooleanOutput(output): boolean { - output = output - .toLowerCase() - .trim() - .replace(/[.!?]+$/, '') - - const booleanOutput = BOOLEAN_OUTPUTS[output] - - if (booleanOutput !== undefined) { - return booleanOutput - } else { - throw new errors.OutputValidationError(`Invalid boolean output: ${output}`) - } -} - -function parseNumberOutput(output, outputSchema: z.ZodNumber): number { - output = output.trim() - - const numberOutput = outputSchema.isInt - ? parseInt(output) - : parseFloat(output) - - if (isNaN(numberOutput)) { - throw new errors.OutputValidationError(`Invalid number output: ${output}`) - } - - return numberOutput -} - -function parseOutput(output: any, outputSchema: ZodType) { - if (outputSchema instanceof z.ZodArray) { - output = parseArrayOutput(output) - } else if (outputSchema instanceof z.ZodObject) { - output = parseObjectOutput(output) - } else if (outputSchema instanceof z.ZodBoolean) { - output = parseBooleanOutput(output) - } else if (outputSchema instanceof z.ZodNumber) { - output = parseNumberOutput(output, outputSchema) - } - - // TODO: fix typescript issue here with recursive types - const safeResult = (outputSchema.safeParse as any)(output) - - if (!safeResult.success) { - throw new errors.ZodOutputValidationError(safeResult.error) - } - - return safeResult.data -} - export abstract class BaseChatCompletion< TInput extends types.TaskInput = void, TOutput extends types.TaskOutput = string, @@ -462,7 +351,7 @@ export abstract class BaseChatCompletion< // console.log('<<<') if (this._outputSchema) { - return parseOutput(output, this._outputSchema) + return parseOutput(output as string, this._outputSchema) } else { return output } diff --git a/legacy/src/llms/parse-output.ts b/legacy/src/llms/parse-output.ts new file mode 100644 index 00000000..8fbda33c --- /dev/null +++ b/legacy/src/llms/parse-output.ts @@ -0,0 +1,161 @@ +import { JSONRepairError, jsonrepair } from 'jsonrepair' +import { ZodType, z } from 'zod' + +import * as errors from '@/errors' +import { + extractJSONArrayFromString, + extractJSONObjectFromString +} from '@/utils' + +const BOOLEAN_OUTPUTS = { + true: true, + false: false, + t: true, + f: false, + yes: true, + no: false, + y: true, + n: false, + '1': true, + '0': false +} + +/** + * Parses an array output from a string. + * + * @param output - string to parse + * @returns parsed array + */ +export function parseArrayOutput(output: string): Array { + try { + const trimmedOutput = extractJSONArrayFromString(output) + const parsedOutput = JSON.parse(jsonrepair(trimmedOutput ?? output)) + if (!Array.isArray(parsedOutput)) { + throw new errors.OutputValidationError( + `Invalid JSON array: ${JSON.stringify(parsedOutput)}` + ) + } + + return parsedOutput + } catch (err: any) { + if (err instanceof JSONRepairError) { + throw new errors.OutputValidationError(err.message, { cause: err }) + } else if (err instanceof SyntaxError) { + throw new errors.OutputValidationError( + `Invalid JSON array: ${err.message}`, + { cause: err } + ) + } else { + throw err + } + } +} + +/** + * Parses an object output from a string. + * + * @param output - string to parse + * @returns parsed object + */ +export function parseObjectOutput(output: string) { + try { + const trimmedOutput = extractJSONObjectFromString(output) + output = JSON.parse(jsonrepair(trimmedOutput ?? output)) + + if (Array.isArray(output)) { + // TODO + output = output[0] + } else if (typeof output !== 'object') { + throw new errors.OutputValidationError( + `Invalid JSON object: ${JSON.stringify(output)}` + ) + } + + return output + } catch (err: any) { + if (err instanceof JSONRepairError) { + throw new errors.OutputValidationError(err.message, { cause: err }) + } else if (err instanceof SyntaxError) { + throw new errors.OutputValidationError( + `Invalid JSON object: ${err.message}`, + { cause: err } + ) + } else { + throw err + } + } +} + +/** + * Parses a boolean output from a string. + * + * @param output - string to parse + * @returns parsed boolean + */ +export function parseBooleanOutput(output: string): boolean { + output = output + .toLowerCase() + .trim() + .replace(/[.!?]+$/, '') + + const booleanOutput = BOOLEAN_OUTPUTS[output] + + if (booleanOutput !== undefined) { + return booleanOutput + } else { + throw new errors.OutputValidationError(`Invalid boolean output: ${output}`) + } +} + +/** + * Parses a number output from a string. + * + * @param output - string to parse + * @param outputSchema - zod number schema + * @returns parsed number + */ +export function parseNumberOutput( + output: string, + outputSchema: z.ZodNumber +): number { + output = output.trim() + + const numberOutput = outputSchema.isInt + ? parseInt(output) + : parseFloat(output) + + if (isNaN(numberOutput)) { + throw new errors.OutputValidationError(`Invalid number output: ${output}`) + } + + return numberOutput +} + +/** + * Parses an output value from a string. + * + * @param output - string to parse + * @param outputSchema - zod schema + * @returns parsed output + */ +export function parseOutput(output: string, outputSchema: ZodType) { + let result + if (outputSchema instanceof z.ZodArray) { + result = parseArrayOutput(output) + } else if (outputSchema instanceof z.ZodObject) { + result = parseObjectOutput(output) + } else if (outputSchema instanceof z.ZodBoolean) { + result = parseBooleanOutput(output) + } else if (outputSchema instanceof z.ZodNumber) { + result = parseNumberOutput(output, outputSchema) + } + + // TODO: fix typescript issue here with recursive types + const safeResult = (outputSchema.safeParse as any)(result) + + if (!safeResult.success) { + throw new errors.ZodOutputValidationError(safeResult.error) + } + + return safeResult.data +} diff --git a/legacy/test/.snapshots/test/llms/parse-output.ts.md b/legacy/test/.snapshots/test/llms/parse-output.ts.md new file mode 100644 index 00000000..54a5033e --- /dev/null +++ b/legacy/test/.snapshots/test/llms/parse-output.ts.md @@ -0,0 +1,226 @@ +# Snapshot report for `test/llms/parse-output.ts` + +The actual snapshot is saved in `parse-output.ts.snap`. + +Generated by [AVA](https://avajs.dev). + +## parseArrayOutput - handles valid arrays correctly + +> should return [1, 2, 3] for "[1,2,3]" + + [ + 1, + 2, + 3, + ] + +> should return ["a", "b", "c"] for "["a", "b", "c"] + + [ + 'a', + 'b', + 'c', + ] + +> should return [{"a": 1}, {"b": 2}] for [{"a": 1}, {"b": 2}] + + [ + { + a: 1, + }, + { + b: 2, + }, + ] + +## parseArrayOutput - handles arrays surrounded by text correctly + +> should return [1, 2, 3] for "The array is [1,2,3]" + + [ + 1, + 2, + 3, + ] + +> should return ["a", "b", "c"] for "Array: ["a", "b", "c"]. That's all!" + + [ + 'a', + 'b', + 'c', + ] + +> should return [{"a": 1}, {"b": 2}] for "This is the array [{"a": 1}, {"b": 2}] in the text" + + [ + { + a: 1, + }, + { + b: 2, + }, + ] + +## parseArrayOutput - handles and repairs broken JSON arrays correctly + +> should repair and return [1, "two", 3] for [1, "two, 3] + + [ + 1, + 'two, 3]', + ] + +> should repair and return ["a", "b", "c"] for Array: ["a, "b", "c"]. Error here! + + [ + 'a, ', + 'b', + ', ', + 'c', + ']', + ] + +> should repair and return {"arr": ["value1", "value2"]} for Array in text {"arr": ["value1, "value2"]} + + [ + 'value1, ', + 'value2', + ']', + ] + +## parseArrayOutput - throws error for invalid arrays + +> Snapshot 1 + + 'Invalid JSON array: "not a valid array"' + +## parseObjectOutput - handles valid objects correctly + +> should return {"a":1,"b":2,"c":3} for {"a":1,"b":2,"c":3} + + { + a: 1, + b: 2, + c: 3, + } + +> should return {"name":"John","age":30,"city":"New York"} for {"name":"John","age":30,"city":"New York"} + + { + age: 30, + city: 'New York', + name: 'John', + } + +## parseObjectOutput - handles objects surrounded by text correctly + +> should return {"a":1,"b":2,"c":3} for "The object is {"a":1,"b":2,"c":3}" + + { + a: 1, + b: 2, + c: 3, + } + +> should return {"name":"John","age":30,"city":"New York"} for "Object: {"name":"John","age":30,"city":"New York"}. That's all!" + + { + age: 30, + city: 'New York', + name: 'John', + } + +## parseObjectOutput - handles and repairs broken JSON objects correctly + +> should repair and return {"a":1, "b":2, "c":3} for {"a":1, "b":2, "c":3 + + { + a: 1, + b: 2, + c: 3, + } + +> should repair and return {"name":"John","age":30,"city":"New York"} for Object: {"name":"John,"age":30,"city":"New York"}. Error here! + + { + 'New York': '}', + age: ':30,', + city: ':', + name: 'John,', + } + +## parseObjectOutput - handles JSON array of objects + +> should return first object {"a":1,"b":2} for [{"a":1,"b":2},{"c":3,"d":4}] + + { + a: 1, + b: 2, + } + +## parseObjectOutput - throws error for invalid objects + +> Snapshot 1 + + 'Invalid JSON object: "not a valid object"' + +## parseBooleanOutput - handles `true` outputs correctly + +> should return true for "True" + + true + +> should return true for "TRUE" + + true + +> should return true for "true." + + true + +## parseBooleanOutput - handles `false` outputs correctly + +> should return false for "False" + + false + +> should return false for "FALSE" + + false + +> should return false for "false!" + + false + +## parseBooleanOutput - throws error for invalid outputs + +> Snapshot 1 + + 'Invalid boolean output: notbooleanvalue' + +## parseNumberOutput - handles integer outputs correctly + +> should return 42 for "42" + + 42 + +> should return -5 for " -5 " + + -5 + +## parseNumberOutput - handles float outputs correctly + +> should return 42.42 for "42.42" + + 42.42 + +> should return -5.5 for " -5.5 " + + -5.5 + +## parseNumberOutput - throws error for invalid outputs + +> Snapshot 1 + + 'Invalid number output: NotANumber' diff --git a/legacy/test/.snapshots/test/llms/parse-output.ts.snap b/legacy/test/.snapshots/test/llms/parse-output.ts.snap new file mode 100644 index 0000000000000000000000000000000000000000..67e98c66e7e6b24abbdf02344c0463fb1d784da6 GIT binary patch literal 1347 zcmV-J1-$w}RzVWr1KCRqjlPSd=Qq3K2k%Rj*r5BwASF}yH)w|if{JD=et6;U>~o}JzM z&CG9R$9X!iY`1sl{qT(P5x42d>XFGjy3Uw+x-;fSV~$tw&~*AX_3&4wJ?djqta-TS zGDdscJ{@_EdE^aU?tS~L-#5AWgTjwfa)E18Zb6t?7?^iEi9vk{{n>6tl4Z< z&5rW5(RE)2DrG zonlT;_<198njuLd-x<=t6g=|qxncFq60G)LXr$NL_|DMemmog2-6H=Fs_&JgIs_Yn z48J@KdZzBk@ihdQiAsf!i&DW6?>;g|%)=IQ52=GUcXzf+6!!aBfw@s4Fh#ilE?VTr zuHd+I(ACgE0;I$nW&;6b5@v*w2!TZTTMXsGhw!`(;~tC=j0ub%XCZE+^Py&HxJ*V= zBehzTB}!BeMat5hd`mNOC`zG325mq|V|YjzZAlM2&Z-2vFb?Og1Q+1aAJf$g^B4RT zeYz?r<7lpNz+%RTup~HLm=c^u_9v04KC?xMPV2|j731{`#`7U_k3CF%2Z>`xPP~VS z2>#8}z;WHsagE9{x^zpbN9kc;;D-Y6E3PoZNp+YnQuLX-PgJJLz z5Ub;fGQO<3qRO=*swk3_YgW!FZ*G<2jo;A(28*jMLybI7JbL_}ctir!#vJ=n4V0wU zMiIU8?*HenHr*_`VFZtGKqNd}s1hsEx<6yqH4NEvS}>l#_;t30^%UNaJ)r@UJRS-f zi2xUo5gA1+YzV4o(lOr^IA$(JW)iZPkMVXS@lS;uI!2c(b_a@tv8!1I=V83})ZzIa zjL&8VWr{?zX(yeynat$@&uJ24{U`ns+Fd6+8aryiKAql zhZ*@d`)=ReDA;!~Z=aa8LyNBzTRbprZ*G&9B!o9pJP~IF7;cDZ2KW*?*SB`F%vL*b z?oNN&ni1-J$^{-M-5FcK!E&F0fpk*jH|R+e+v7(TWmDcCIh-C)R^)wo|9l#PfKvO8 zbj52)S9J8i4;`IkZ2DVndOf*mr5kL*VwURXb5IX#*W@KI@5Wxgh#DQe-O;nd?=LCv z?GXR%!~yk{Zo4b@x6($mLoK_bwS=3$@ F006kQgMI)2 literal 0 HcmV?d00001 diff --git a/legacy/test/llms/parse-output.ts b/legacy/test/llms/parse-output.ts new file mode 100644 index 00000000..94ee932c --- /dev/null +++ b/legacy/test/llms/parse-output.ts @@ -0,0 +1,193 @@ +import test from 'ava' +import { z } from 'zod' + +import { + parseArrayOutput, + parseBooleanOutput, + parseNumberOutput, + parseObjectOutput +} from '@/llms/parse-output' + +test('parseArrayOutput - handles valid arrays correctly', (t) => { + const output1 = parseArrayOutput('[1,2,3]') + const output2 = parseArrayOutput('["a", "b", "c"]') + const output3 = parseArrayOutput('[{"a": 1}, {"b": 2}]') + + t.snapshot(output1, 'should return [1, 2, 3] for "[1,2,3]"') + t.snapshot(output2, 'should return ["a", "b", "c"] for "["a", "b", "c"]') + t.snapshot( + output3, + 'should return [{"a": 1}, {"b": 2}] for [{"a": 1}, {"b": 2}]' + ) +}) + +test('parseArrayOutput - handles arrays surrounded by text correctly', (t) => { + const output1 = parseArrayOutput('The array is [1,2,3]') + const output2 = parseArrayOutput('Array: ["a", "b", "c"]. That\'s all!') + const output3 = parseArrayOutput( + 'This is the array [{"a": 1}, {"b": 2}] in the text' + ) + + t.snapshot(output1, 'should return [1, 2, 3] for "The array is [1,2,3]"') + t.snapshot( + output2, + 'should return ["a", "b", "c"] for "Array: ["a", "b", "c"]. That\'s all!"' + ) + t.snapshot( + output3, + 'should return [{"a": 1}, {"b": 2}] for "This is the array [{"a": 1}, {"b": 2}] in the text"' + ) +}) + +test('parseArrayOutput - handles and repairs broken JSON arrays correctly', (t) => { + const output1 = parseArrayOutput('[1, "two, 3]') + const output2 = parseArrayOutput('Array: ["a, "b", "c"]. Error here!') + const output3 = parseArrayOutput('Array in text {"arr": ["value1, "value2"]}') + + t.snapshot(output1, 'should repair and return [1, "two", 3] for [1, "two, 3]') + t.snapshot( + output2, + 'should repair and return ["a", "b", "c"] for Array: ["a, "b", "c"]. Error here!' + ) + t.snapshot( + output3, + 'should repair and return {"arr": ["value1", "value2"]} for Array in text {"arr": ["value1, "value2"]}' + ) +}) + +test('parseArrayOutput - throws error for invalid arrays', (t) => { + const error = t.throws( + () => { + parseArrayOutput('not a valid array') + }, + { instanceOf: Error } + ) + + t.snapshot(error?.message) +}) + +test('parseObjectOutput - handles valid objects correctly', (t) => { + const output1 = parseObjectOutput('{"a":1,"b":2,"c":3}') + const output2 = parseObjectOutput( + '{"name":"John","age":30,"city":"New York"}' + ) + + t.snapshot( + output1, + 'should return {"a":1,"b":2,"c":3} for {"a":1,"b":2,"c":3}' + ) + t.snapshot( + output2, + 'should return {"name":"John","age":30,"city":"New York"} for {"name":"John","age":30,"city":"New York"}' + ) +}) + +test('parseObjectOutput - handles objects surrounded by text correctly', (t) => { + const output1 = parseObjectOutput('The object is {"a":1,"b":2,"c":3}') + const output2 = parseObjectOutput( + 'Object: {"name":"John","age":30,"city":"New York"}. That\'s all!' + ) + + t.snapshot( + output1, + 'should return {"a":1,"b":2,"c":3} for "The object is {"a":1,"b":2,"c":3}"' + ) + t.snapshot( + output2, + 'should return {"name":"John","age":30,"city":"New York"} for "Object: {"name":"John","age":30,"city":"New York"}. That\'s all!"' + ) +}) + +test('parseObjectOutput - handles and repairs broken JSON objects correctly', (t) => { + const output1 = parseObjectOutput('{"a":1, "b":2, "c":3') + const output2 = parseObjectOutput( + 'Object: {"name":"John,"age":30,"city":"New York"}. Error here!' + ) + + t.snapshot( + output1, + 'should repair and return {"a":1, "b":2, "c":3} for {"a":1, "b":2, "c":3' + ) + t.snapshot( + output2, + 'should repair and return {"name":"John","age":30,"city":"New York"} for Object: {"name":"John,"age":30,"city":"New York"}. Error here!' + ) +}) + +test('parseObjectOutput - handles JSON array of objects', (t) => { + const output = parseObjectOutput('[{"a":1,"b":2},{"c":3,"d":4}]') + + t.snapshot( + output, + 'should return first object {"a":1,"b":2} for [{"a":1,"b":2},{"c":3,"d":4}]' + ) +}) + +test('parseObjectOutput - throws error for invalid objects', (t) => { + const error = t.throws( + () => { + parseObjectOutput('not a valid object') + }, + { instanceOf: Error } + ) + + t.snapshot(error?.message) +}) + +test('parseBooleanOutput - handles `true` outputs correctly', (t) => { + const output1 = parseBooleanOutput('True') + const output2 = parseBooleanOutput('TRUE') + const output3 = parseBooleanOutput('true.') + + t.snapshot(output1, 'should return true for "True"') + t.snapshot(output2, 'should return true for "TRUE"') + t.snapshot(output3, 'should return true for "true."') +}) + +test('parseBooleanOutput - handles `false` outputs correctly', (t) => { + const output1 = parseBooleanOutput('False') + const output2 = parseBooleanOutput('FALSE') + const output3 = parseBooleanOutput('false!') + + t.snapshot(output1, 'should return false for "False"') + t.snapshot(output2, 'should return false for "FALSE"') + t.snapshot(output3, 'should return false for "false!"') +}) + +test('parseBooleanOutput - throws error for invalid outputs', (t) => { + const error = t.throws( + () => { + parseBooleanOutput('NotBooleanValue') + }, + { instanceOf: Error } + ) + + t.snapshot(error?.message) +}) + +test('parseNumberOutput - handles integer outputs correctly', (t) => { + const output1 = parseNumberOutput('42', z.number().int()) + const output2 = parseNumberOutput(' -5 ', z.number().int()) + + t.snapshot(output1, 'should return 42 for "42"') + t.snapshot(output2, 'should return -5 for " -5 "') +}) + +test('parseNumberOutput - handles float outputs correctly', (t) => { + const output1 = parseNumberOutput('42.42', z.number()) + const output2 = parseNumberOutput(' -5.5 ', z.number()) + + t.snapshot(output1, 'should return 42.42 for "42.42"') + t.snapshot(output2, 'should return -5.5 for " -5.5 "') +}) + +test('parseNumberOutput - throws error for invalid outputs', (t) => { + const error = t.throws( + () => { + parseNumberOutput('NotANumber', z.number()) + }, + { instanceOf: Error } + ) + + t.snapshot(error?.message) +}) From 6b4bbbf8d5ead45a907386ed7007018c385d159f Mon Sep 17 00:00:00 2001 From: Philipp Burckhardt Date: Tue, 20 Jun 2023 10:54:53 -0400 Subject: [PATCH 3/5] chore: rename file and add tests --- ...e-output.ts.md => parse-output.test.ts.md} | 41 +++++++++++++- .../test/llms/parse-output.test.ts.snap | Bin 0 -> 1482 bytes .../.snapshots/test/llms/parse-output.ts.snap | Bin 1347 -> 0 bytes .../{parse-output.ts => parse-output.test.ts} | 52 +++++++++++++++++- 4 files changed, 90 insertions(+), 3 deletions(-) rename legacy/test/.snapshots/test/llms/{parse-output.ts.md => parse-output.test.ts.md} (84%) create mode 100644 legacy/test/.snapshots/test/llms/parse-output.test.ts.snap delete mode 100644 legacy/test/.snapshots/test/llms/parse-output.ts.snap rename legacy/test/llms/{parse-output.ts => parse-output.test.ts} (81%) diff --git a/legacy/test/.snapshots/test/llms/parse-output.ts.md b/legacy/test/.snapshots/test/llms/parse-output.test.ts.md similarity index 84% rename from legacy/test/.snapshots/test/llms/parse-output.ts.md rename to legacy/test/.snapshots/test/llms/parse-output.test.ts.md index 54a5033e..e9dba807 100644 --- a/legacy/test/.snapshots/test/llms/parse-output.ts.md +++ b/legacy/test/.snapshots/test/llms/parse-output.test.ts.md @@ -1,6 +1,6 @@ -# Snapshot report for `test/llms/parse-output.ts` +# Snapshot report for `test/llms/parse-output.test.ts` -The actual snapshot is saved in `parse-output.ts.snap`. +The actual snapshot is saved in `parse-output.test.ts.snap`. Generated by [AVA](https://avajs.dev). @@ -224,3 +224,40 @@ Generated by [AVA](https://avajs.dev). > Snapshot 1 'Invalid number output: NotANumber' + +## parseOutput - handles array correctly + +> should parse and return [1, 2, 3] for "[1, 2, 3]" + + [ + 1, + 2, + 3, + ] + +## parseOutput - handles object correctly + +> should parse and return {"a": 1, "b": "two"} for "{"a": 1, "b": "two"}" + + { + a: 1, + b: 'two', + } + +## parseOutput - handles boolean correctly + +> should parse and return true for "True" + + true + +## parseOutput - handles number correctly + +> should parse and return 123.45 for "123.45" + + 123.45 + +## parseOutput - throws error for invalid data + +> Snapshot 1 + + 'Invalid number output: not a number' diff --git a/legacy/test/.snapshots/test/llms/parse-output.test.ts.snap b/legacy/test/.snapshots/test/llms/parse-output.test.ts.snap new file mode 100644 index 0000000000000000000000000000000000000000..b0f9151757d72dcb7fb25a3f5b5525ffc6145f22 GIT binary patch literal 1482 zcmV;*1vUCXRzVl2lxjMDT(HWjv@vv>Q`zoE8>@nxeF72#IJ? zE{dXdZI5F&+udk(O%erHYLVcT2VQtU(DEmE;(U1 zX1;UIcg`H|lTOq1TL;1SPf6GdU2?v17GBM!5k`8`?UwU1%>EGYdu~I>0bw zZ*Fd`U_4haN(JKukSG>jX5F%3h0%bO0+0hT*6X*-%pj$%a}2rj>$;9c&`pWPe&%f>j3v z=06gcOPAq!1=_W7c)?!J8^%X`A@nGF8f=f^577NduAkXz-Un|Q#ke~^KOc$x3L?Ly z#Y(E!i_xBQOy{9phAz}iH>E>`Vcts_<`j%zqx!%^r(g*iSp#kz-Gt=s2RYmotPT2< z`hBlW+NgPgLUJ6QH6o`Oq7r$hOSmei7oao4>YEd=Iy%r!&(+XQ*9jNFKdxKF{|~C~ zPDpidHaHpnhq2SOvOO=7U}VNJ6h6!g1&^J5==5lSn$$la9$Met+MFP<-_<$h#srSZ ziv=*zYIQi!w^y;Xbn;tMO{KkRaoGmE)#4+JD55XoP$fh zPwE==XQCNvG%HBuNTzbYVoEX7gzwH&;k)6z!BEwR*(^n;<&)x)@s@`1VVC+x0V0us z*tR_{S;Gto{^$ue+)8Y?N@2lRyd}hk;b9}}`5|(WwNM;%%Mk>gEHa8GKbq7A*<<0n zZchzhaC7`;=l84rSUU(9-c9R^TFMN8C7V?}2Bz{@cw_d& z3JkJ%$Y{hIoJU5(&SPPPQNC*(Ws5`r`ii zI0gZ!_M$MwE2=3P7Pmvg!kR^Yibb!giTnb zz5Amn3BDHNzpZRgURtTGi1pR9617+j{({xKlhR~*61=UDpejk=Mf=7a*!06yev~}^ z0#AF|Z*n@*J^By7ztx3NBgpA*59%jn{u&$kO}&=l#>i3A`Wr1KCRqjlPSd=Qq3K2k%Rj*r5BwASF}yH)w|if{JD=et6;U>~o}JzM z&CG9R$9X!iY`1sl{qT(P5x42d>XFGjy3Uw+x-;fSV~$tw&~*AX_3&4wJ?djqta-TS zGDdscJ{@_EdE^aU?tS~L-#5AWgTjwfa)E18Zb6t?7?^iEi9vk{{n>6tl4Z< z&5rW5(RE)2DrG zonlT;_<198njuLd-x<=t6g=|qxncFq60G)LXr$NL_|DMemmog2-6H=Fs_&JgIs_Yn z48J@KdZzBk@ihdQiAsf!i&DW6?>;g|%)=IQ52=GUcXzf+6!!aBfw@s4Fh#ilE?VTr zuHd+I(ACgE0;I$nW&;6b5@v*w2!TZTTMXsGhw!`(;~tC=j0ub%XCZE+^Py&HxJ*V= zBehzTB}!BeMat5hd`mNOC`zG325mq|V|YjzZAlM2&Z-2vFb?Og1Q+1aAJf$g^B4RT zeYz?r<7lpNz+%RTup~HLm=c^u_9v04KC?xMPV2|j731{`#`7U_k3CF%2Z>`xPP~VS z2>#8}z;WHsagE9{x^zpbN9kc;;D-Y6E3PoZNp+YnQuLX-PgJJLz z5Ub;fGQO<3qRO=*swk3_YgW!FZ*G<2jo;A(28*jMLybI7JbL_}ctir!#vJ=n4V0wU zMiIU8?*HenHr*_`VFZtGKqNd}s1hsEx<6yqH4NEvS}>l#_;t30^%UNaJ)r@UJRS-f zi2xUo5gA1+YzV4o(lOr^IA$(JW)iZPkMVXS@lS;uI!2c(b_a@tv8!1I=V83})ZzIa zjL&8VWr{?zX(yeynat$@&uJ24{U`ns+Fd6+8aryiKAql zhZ*@d`)=ReDA;!~Z=aa8LyNBzTRbprZ*G&9B!o9pJP~IF7;cDZ2KW*?*SB`F%vL*b z?oNN&ni1-J$^{-M-5FcK!E&F0fpk*jH|R+e+v7(TWmDcCIh-C)R^)wo|9l#PfKvO8 zbj52)S9J8i4;`IkZ2DVndOf*mr5kL*VwURXb5IX#*W@KI@5Wxgh#DQe-O;nd?=LCv z?GXR%!~yk{Zo4b@x6($mLoK_bwS=3$@ F006kQgMI)2 diff --git a/legacy/test/llms/parse-output.ts b/legacy/test/llms/parse-output.test.ts similarity index 81% rename from legacy/test/llms/parse-output.ts rename to legacy/test/llms/parse-output.test.ts index 94ee932c..267e57f9 100644 --- a/legacy/test/llms/parse-output.ts +++ b/legacy/test/llms/parse-output.test.ts @@ -5,7 +5,8 @@ import { parseArrayOutput, parseBooleanOutput, parseNumberOutput, - parseObjectOutput + parseObjectOutput, + parseOutput } from '@/llms/parse-output' test('parseArrayOutput - handles valid arrays correctly', (t) => { @@ -191,3 +192,52 @@ test('parseNumberOutput - throws error for invalid outputs', (t) => { t.snapshot(error?.message) }) + +test('parseOutput - handles arrays correctly', (t) => { + const arraySchema = z.array(z.number()) + const output = '[1, 2, 3]' + const result = parseOutput(output, arraySchema) + + t.snapshot(result, 'should parse and return [1, 2, 3] for "[1, 2, 3]"') +}) + +test('parseOutput - handles objects correctly', (t) => { + const objectSchema = z.object({ a: z.number(), b: z.string() }) + const output = '{"a": 1, "b": "two"}' + const result = parseOutput(output, objectSchema) + + t.snapshot( + result, + 'should parse and return {"a": 1, "b": "two"} for "{"a": 1, "b": "two"}"' + ) +}) + +test('parseOutput - handles booleans correctly', (t) => { + const booleanSchema = z.boolean() + const output = 'True' + const result = parseOutput(output, booleanSchema) + + t.snapshot(result, 'should parse and return true for "True"') +}) + +test('parseOutput - handles numbers correctly', (t) => { + const numberSchema = z.number() + const output = '123.45' + const result = parseOutput(output, numberSchema) + + t.snapshot(result, 'should parse and return 123.45 for "123.45"') +}) + +test('parseOutput - throws error for invalid data', (t) => { + const numberSchema = z.number() + const output = 'not a number' + + const error = t.throws( + () => { + parseOutput(output, numberSchema) + }, + { instanceOf: Error } + ) + + t.snapshot(error?.message) +}) From 39f4583499e730d79a508d3686b5955d7aee7fe2 Mon Sep 17 00:00:00 2001 From: Philipp Burckhardt Date: Tue, 20 Jun 2023 11:36:28 -0400 Subject: [PATCH 4/5] fix: always assign result and update snapshots --- legacy/src/llms/parse-output.ts | 2 + .../test/llms/parse-output.test.ts.md | 43 +++++++++++++++--- .../test/llms/parse-output.test.ts.snap | Bin 1482 -> 1516 bytes 3 files changed, 39 insertions(+), 6 deletions(-) diff --git a/legacy/src/llms/parse-output.ts b/legacy/src/llms/parse-output.ts index 8fbda33c..394c2635 100644 --- a/legacy/src/llms/parse-output.ts +++ b/legacy/src/llms/parse-output.ts @@ -148,6 +148,8 @@ export function parseOutput(output: string, outputSchema: ZodType) { result = parseBooleanOutput(output) } else if (outputSchema instanceof z.ZodNumber) { result = parseNumberOutput(output, outputSchema) + } else { + result = output } // TODO: fix typescript issue here with recursive types diff --git a/legacy/test/.snapshots/test/llms/parse-output.test.ts.md b/legacy/test/.snapshots/test/llms/parse-output.test.ts.md index e9dba807..1fadd79a 100644 --- a/legacy/test/.snapshots/test/llms/parse-output.test.ts.md +++ b/legacy/test/.snapshots/test/llms/parse-output.test.ts.md @@ -225,6 +225,43 @@ Generated by [AVA](https://avajs.dev). 'Invalid number output: NotANumber' +## parseOutput - handles arrays correctly + +> should parse and return [1, 2, 3] for "[1, 2, 3]" + + [ + 1, + 2, + 3, + ] + +## parseOutput - handles objects correctly + +> should parse and return {"a": 1, "b": "two"} for "{"a": 1, "b": "two"}" + + { + a: 1, + b: 'two', + } + +## parseOutput - handles booleans correctly + +> should parse and return true for "True" + + true + +## parseOutput - handles numbers correctly + +> should parse and return 123.45 for "123.45" + + 123.45 + +## parseOutput - throws error for invalid data + +> Snapshot 1 + + 'Invalid number output: not a number' + ## parseOutput - handles array correctly > should parse and return [1, 2, 3] for "[1, 2, 3]" @@ -255,9 +292,3 @@ Generated by [AVA](https://avajs.dev). > should parse and return 123.45 for "123.45" 123.45 - -## parseOutput - throws error for invalid data - -> Snapshot 1 - - 'Invalid number output: not a number' diff --git a/legacy/test/.snapshots/test/llms/parse-output.test.ts.snap b/legacy/test/.snapshots/test/llms/parse-output.test.ts.snap index b0f9151757d72dcb7fb25a3f5b5525ffc6145f22..eb379dba43408eec2d5737eb93883fddb4be6a9b 100644 GIT binary patch literal 1516 zcmV-E{oMepyP_77aQ?e); z8*qsM_aBQ000000000B!SX*!0L=@iHOHy%B62SvVP$q*)fI3Up*|cB~YKqdTAta(n zxu}X-@9u1N8`q9(FG&=(Qi}wyJi-HlmOsH05Bv#!3@<(7nelqOWA84gkd~?wk7v%z zeCM3+oH^?U?S|tu_ktf@dPu`==+ML3zU>Ee#rN%_&0e_Q3-KcE*lx?A0shK%x-D$8 zxd1mk->1#cIqC+ky&rVEF!<(St7V7w7fz`#U10yvDO9e*(}K1J?FO_L(9|5NLjWZg1C;vFF|P?Q5jF;Jq8-Js6l89>l;TG*#y&v)^Snt@FN zFYVyA=VJn|rm?g`1VCR0a2?vs$pBX75VBQ>Y^b4vWJ65>(@eo6HZd@1us=;A!K#A- z^PdUKrE~B+5AEV8ykM_q5&D=fgr3iyi0x7Q5xPIi^;4UT+u%)9jJvb5vys?uAo6=! ztfY!PAMH8AbRO7c;6mMWLpoF#=KYjmj==~PuJsLk43@C4+2_{LO-Sy3n8RJc+MwtA zUe9gO7H%BjkRFC7jmT++s6^iGP_7E@2KdCV`pOurjt;cab5*?EvBL%MkK@df{|D7? zj7fEHHaHpnhq2SOvRyZlU}PpT6h6ue1(%(@Z+HCwH+*l8x_E7Cb7PFeepBa|%VRhu zFBZT=)8T=~I4%ryAvO>LDe%V8KtPzp5g|v|fI#_c0_D^P@Vo@=GiY6CeQ4j0LR?A5 zL&-MqI1y1Ch?NFWB8STHkkoWDTT@L84MIqXpjAj|KJHMT&I=3N)n$S$XnPZ9f>Utm z_h?O{{!}z$4rc|a9L-b?SnT_RX~K7Bs_@;&PJf_k#B7$L)AC7i$#_%4_^{)92LYy$ zf!MZPH(A3B2!8JYH{5b;xJqFTEnF4igYd90alH`R$yz85x}^vLPZk-)lNU{DgY3R= z-gimou>fNfz7EYzrzSL%no2pTDI+PRCz`4I=sh)CMG2^8u;gPJz&~q=J4`b!^!Z7r z&*#v-o?M@jy-NkY)2XR)vl|}8tlgjo_)E{QH z7Bo5sioqc;EVn08`EkW1RW9aHMUrH^V&#PL=IVI7i7FbdFuUqBV&qBU(cOQqHJY+Ot4$dPZvhrA1VN@}MVZO~V%tVaT5;C8U$#O*bsE|R&!R3m*wj^QVY8KA9 zovb}gc)knm)6q^DBGG8t2_vqlvD}7u31zBE=Ba+-hp8Mgjw50jq(!Xy;1_CLXNb%y zW!M&?Ou3+xDQ_udiYGj#)G2&0P7liwj7y&9P}?1H$gR-t(OcMy#`As;tj92lg&ASA zS5>32twf%O9{D@#u3cZvS$8^X9qUzNjnC(6+_s%yVwGnFgx69$VS8~5mswW>evFkX z>svatZhpgXqL%K`-JnM`5 z=aU!&q}sQIDPC4hQ8&3A>L$@F`b#W&MP0PG%okzSOV#rksN0Tbhht#gN*q6pDs{73 zH}&q1rX=`kjQ^UlL3wexx-8bur#&-Aq!w+yZVbut8`s0K8O_{&OR$kDnnNwFuJ>7rla$_Y&c;KbVry)(n zS0-L0@eeMdK-JB}`oF2yUnGP)1L!jQLKeF8osw^TPR_i;x0L{!s+mjGx(ufIKE^UwF7M&T8)6x@ S7ul2lxjMDT(HWjv@vv>Q`zoE8>@nxeF72#IJ? zE{dXdZI5F&+udk(O%erHYLVcT2VQtU(DEmE;(U1 zX1;UIcg`H|lTOq1TL;1SPf6GdU2?v17GBM!5k`8`?UwU1%>EGYdu~I>0bw zZ*Fd`U_4haN(JKukSG>jX5F%3h0%bO0+0hT*6X*-%pj$%a}2rj>$;9c&`pWPe&%f>j3v z=06gcOPAq!1=_W7c)?!J8^%X`A@nGF8f=f^577NduAkXz-Un|Q#ke~^KOc$x3L?Ly z#Y(E!i_xBQOy{9phAz}iH>E>`Vcts_<`j%zqx!%^r(g*iSp#kz-Gt=s2RYmotPT2< z`hBlW+NgPgLUJ6QH6o`Oq7r$hOSmei7oao4>YEd=Iy%r!&(+XQ*9jNFKdxKF{|~C~ zPDpidHaHpnhq2SOvOO=7U}VNJ6h6!g1&^J5==5lSn$$la9$Met+MFP<-_<$h#srSZ ziv=*zYIQi!w^y;Xbn;tMO{KkRaoGmE)#4+JD55XoP$fh zPwE==XQCNvG%HBuNTzbYVoEX7gzwH&;k)6z!BEwR*(^n;<&)x)@s@`1VVC+x0V0us z*tR_{S;Gto{^$ue+)8Y?N@2lRyd}hk;b9}}`5|(WwNM;%%Mk>gEHa8GKbq7A*<<0n zZchzhaC7`;=l84rSUU(9-c9R^TFMN8C7V?}2Bz{@cw_d& z3JkJ%$Y{hIoJU5(&SPPPQNC*(Ws5`r`ii zI0gZ!_M$MwE2=3P7Pmvg!kR^Yibb!giTnb zz5Amn3BDHNzpZRgURtTGi1pR9617+j{({xKlhR~*61=UDpejk=Mf=7a*!06yev~}^ z0#AF|Z*n@*J^By7ztx3NBgpA*59%jn{u&$kO}&=l#>i3A` Date: Wed, 21 Jun 2023 14:33:15 -0400 Subject: [PATCH 5/5] feat: improve JSON extraction --- legacy/src/llms/parse-output.ts | 110 ++++++++++++++-- legacy/src/utils.ts | 20 --- .../test/llms/parse-output.test.ts.md | 81 +----------- .../test/llms/parse-output.test.ts.snap | Bin 1516 -> 1217 bytes legacy/test/llms/parse-output.test.ts | 122 +++++++++++++----- legacy/test/utils.test.ts | 26 ---- 6 files changed, 189 insertions(+), 170 deletions(-) diff --git a/legacy/src/llms/parse-output.ts b/legacy/src/llms/parse-output.ts index 394c2635..3dea6f6e 100644 --- a/legacy/src/llms/parse-output.ts +++ b/legacy/src/llms/parse-output.ts @@ -1,11 +1,87 @@ import { JSONRepairError, jsonrepair } from 'jsonrepair' +import { JsonValue } from 'type-fest' import { ZodType, z } from 'zod' import * as errors from '@/errors' -import { - extractJSONArrayFromString, - extractJSONObjectFromString -} from '@/utils' + +/** + * Checks if character at the specified index in a string is escaped. + * + * @param str - string to check + * @param i - index of the character to check + * @returns whether the character is escaped + */ +function isEscaped(str: string, i: number): boolean { + return i > 0 && str[i - 1] === '\\' && !(i > 1 && str[i - 2] === '\\') +} + +/** + * Extracts JSON objects or arrays from a string. + * + * @param input - string to extract JSON from + * @param jsonStructureType - type of JSON structure to extract + * @returns array of extracted JSON objects or arrays + */ +export function extractJSONFromString( + input: string, + jsonStructureType: 'object' | 'array' +) { + const startChar = jsonStructureType === 'object' ? '{' : '[' + const endChar = jsonStructureType === 'object' ? '}' : ']' + const extractedJSONValues: JsonValue[] = [] + let nestingLevel = 0 + let startIndex = 0 + const isInsideQuoted = { '"': false, "'": false } + + for (let i = 0; i < input.length; i++) { + const ch = input.charAt(i) + switch (ch) { + case '"': + case "'": + if (!isInsideQuoted[ch === '"' ? "'" : '"'] && !isEscaped(input, i)) { + isInsideQuoted[ch] = !isInsideQuoted[ch] + } + + break + + default: + if (!isInsideQuoted['"'] && !isInsideQuoted["'"]) { + switch (ch) { + case startChar: + if (nestingLevel === 0) { + startIndex = i + } + + nestingLevel += 1 + + break + + case endChar: + nestingLevel -= 1 + if (nestingLevel === 0) { + const candidate = input.slice(startIndex, i + 1) + const parsed = JSON.parse(jsonrepair(candidate)) + if (parsed && typeof parsed === 'object') { + extractedJSONValues.push(parsed) + } + } else if (nestingLevel < 0) { + throw new Error( + `Invalid JSON string: unexpected ${endChar} at position ${i}` + ) + } + } + } + } + } + + if (nestingLevel !== 0) { + throw new Error( + 'Invalid JSON string: unmatched ' + startChar + ' or ' + endChar + ) + } + + return extractedJSONValues +} const BOOLEAN_OUTPUTS = { true: true, @@ -28,8 +104,12 @@ const BOOLEAN_OUTPUTS = { */ export function parseArrayOutput(output: string): Array { try { - const trimmedOutput = extractJSONArrayFromString(output) - const parsedOutput = JSON.parse(jsonrepair(trimmedOutput ?? output)) + const arr = extractJSONFromString(output, 'array') + if (arr.length === 0) { + throw new errors.OutputValidationError(`Invalid JSON array: ${output}`) + } + + const parsedOutput = arr[0] if (!Array.isArray(parsedOutput)) { throw new errors.OutputValidationError( `Invalid JSON array: ${JSON.stringify(parsedOutput)}` @@ -59,19 +139,22 @@ export function parseArrayOutput(output: string): Array { */ export function parseObjectOutput(output: string) { try { - const trimmedOutput = extractJSONObjectFromString(output) - output = JSON.parse(jsonrepair(trimmedOutput ?? output)) + const arr = extractJSONFromString(output, 'object') + if (arr.length === 0) { + throw new errors.OutputValidationError(`Invalid JSON object: ${output}`) + } - if (Array.isArray(output)) { + let parsedOutput = arr[0] + if (Array.isArray(parsedOutput)) { // TODO - output = output[0] - } else if (typeof output !== 'object') { + parsedOutput = parsedOutput[0] + } else if (typeof parsedOutput !== 'object') { throw new errors.OutputValidationError( - `Invalid JSON object: ${JSON.stringify(output)}` + `Invalid JSON object: ${JSON.stringify(parsedOutput)}` ) } - return output + return parsedOutput } catch (err: any) { if (err instanceof JSONRepairError) { throw new errors.OutputValidationError(err.message, { cause: err }) @@ -149,6 +232,7 @@ export function parseOutput(output: string, outputSchema: ZodType) { } else if (outputSchema instanceof z.ZodNumber) { result = parseNumberOutput(output, outputSchema) } else { + // Default to string output... result = output } diff --git a/legacy/src/utils.ts b/legacy/src/utils.ts index 61562561..3e8a2c22 100644 --- a/legacy/src/utils.ts +++ b/legacy/src/utils.ts @@ -4,26 +4,6 @@ import { JsonValue } from 'type-fest' import * as types from './types' -/** - * Extracts a JSON object string from a given string. - * - * @param text - string from which to extract the JSON object - * @returns extracted JSON object string, or `undefined` if no JSON object is found - */ -export function extractJSONObjectFromString(text: string): string | undefined { - return text.match(/\{(.|\n)*\}/gm)?.[0] // FIXME: This breaks if there are multiple JSON objects in the string -} - -/** - * Extracts a JSON array string from a given string. - * - * @param text - string from which to extract the JSON array - * @returns extracted JSON array string, or `undefined` if no JSON array is found - */ -export function extractJSONArrayFromString(text: string): string | undefined { - return text.match(/\[(.|\n)*\]/gm)?.[0] // FIXME: This breaks if there are multiple JSON arrays in the string -} - /** * Pauses the execution of a function for a specified time. * diff --git a/legacy/test/.snapshots/test/llms/parse-output.test.ts.md b/legacy/test/.snapshots/test/llms/parse-output.test.ts.md index 1fadd79a..f59dbc49 100644 --- a/legacy/test/.snapshots/test/llms/parse-output.test.ts.md +++ b/legacy/test/.snapshots/test/llms/parse-output.test.ts.md @@ -62,38 +62,11 @@ Generated by [AVA](https://avajs.dev). }, ] -## parseArrayOutput - handles and repairs broken JSON arrays correctly - -> should repair and return [1, "two", 3] for [1, "two, 3] - - [ - 1, - 'two, 3]', - ] - -> should repair and return ["a", "b", "c"] for Array: ["a, "b", "c"]. Error here! - - [ - 'a, ', - 'b', - ', ', - 'c', - ']', - ] - -> should repair and return {"arr": ["value1", "value2"]} for Array in text {"arr": ["value1, "value2"]} - - [ - 'value1, ', - 'value2', - ']', - ] - ## parseArrayOutput - throws error for invalid arrays > Snapshot 1 - 'Invalid JSON array: "not a valid array"' + 'Invalid JSON array: not a valid array' ## parseObjectOutput - handles valid objects correctly @@ -131,25 +104,6 @@ Generated by [AVA](https://avajs.dev). name: 'John', } -## parseObjectOutput - handles and repairs broken JSON objects correctly - -> should repair and return {"a":1, "b":2, "c":3} for {"a":1, "b":2, "c":3 - - { - a: 1, - b: 2, - c: 3, - } - -> should repair and return {"name":"John","age":30,"city":"New York"} for Object: {"name":"John,"age":30,"city":"New York"}. Error here! - - { - 'New York': '}', - age: ':30,', - city: ':', - name: 'John,', - } - ## parseObjectOutput - handles JSON array of objects > should return first object {"a":1,"b":2} for [{"a":1,"b":2},{"c":3,"d":4}] @@ -163,7 +117,7 @@ Generated by [AVA](https://avajs.dev). > Snapshot 1 - 'Invalid JSON object: "not a valid object"' + 'Invalid JSON object: not a valid object' ## parseBooleanOutput - handles `true` outputs correctly @@ -261,34 +215,3 @@ Generated by [AVA](https://avajs.dev). > Snapshot 1 'Invalid number output: not a number' - -## parseOutput - handles array correctly - -> should parse and return [1, 2, 3] for "[1, 2, 3]" - - [ - 1, - 2, - 3, - ] - -## parseOutput - handles object correctly - -> should parse and return {"a": 1, "b": "two"} for "{"a": 1, "b": "two"}" - - { - a: 1, - b: 'two', - } - -## parseOutput - handles boolean correctly - -> should parse and return true for "True" - - true - -## parseOutput - handles number correctly - -> should parse and return 123.45 for "123.45" - - 123.45 diff --git a/legacy/test/.snapshots/test/llms/parse-output.test.ts.snap b/legacy/test/.snapshots/test/llms/parse-output.test.ts.snap index eb379dba43408eec2d5737eb93883fddb4be6a9b..5832b6147597987390bcf002e54960389a26e120 100644 GIT binary patch literal 1217 zcmV;y1U~ygRzVxOo`T1aD9pp6ra-;h4d7I1J-{gcJC67>)Wc6nzQo5lV57 zMJJ;q4iA$dVafN;`u&iF_o7*(EsvBxJ3cENuzRD4dqdugVjcE+`n;!H%*t!n~ghIq$ zeXn&NX@JxPaEq6(<^#B}0zha(6ir3Xumq3Bj=V<26NoXYKk@1>#(#0A`;cc7RMYD6^0GAhm7DyQH>=rZFUm2O zM9dRjCMk4vcTGYyG4JPyIprh-uzBXeQ_e&H{WHamtHWsb!xHUk)h1&~$vE!gKJ1=A zhL71r6LMZ5wvhLRScL*d3A`|>{&WUa%L#*gUjyz9L$=2A6Gf}&{~`7L8KusX&6CW3 zS(dn0*pQN=1Y*7dsGMLFXLt08{GU8kd1$41Sg5RktGBfI=I+k6 z@(Ex}7!0!qg8|Tz02FZr2MLj#&XeRjt%M)4k`^lXfR{VG#Q8LVsWjC}(o~S3d`B<0 zHs7-|CzH|iL?V2Tt2t<$NnRc8I-Z|h@!aL*oB89Zg$G#UyOQgw_eSi*yjGup@s<1BM4GELs8REvlDC}>ZvM{O4og@?j47y4A_ zSf|38bt({@v+S{;+6;(}1o06e5f0<2bUa{mj2}QE+snQ&wo(w)%95t+HJdUqlwMZ6 zhW?7*?K>Zr_+2UT6SYRFzZ>Nd55g#!JLF{z;q{zK#8?W$C!*>AKZEDS)~+jT;|U8r zWp%Rf+^H@WoMK>R!FAQZ+D~duH}U>4woz=4A9pdGVt*7fe1K^g`}#=vJOxp&fVVXj zuh>+yeZ@oDM-Hbyw9~7$(|SiaK~Y2Y^{D{$AR-}~0rPHV`b%J;?Kj%KJN>fDfN!Mu zZ(9S@*E@}l_Fv5h(MZEkFUa0?a*@n0f_E$ttQrx-a@kXld0N2jGR)j8prj)a<#921^p0&Uhn|`a&HcRYU5eQ|aV5oWu-E{oMepyP_77aQ?e); z8*qsM_aBQ000000000B!SX*!0L=@iHOHy%B62SvVP$q*)fI3Up*|cB~YKqdTAta(n zxu}X-@9u1N8`q9(FG&=(Qi}wyJi-HlmOsH05Bv#!3@<(7nelqOWA84gkd~?wk7v%z zeCM3+oH^?U?S|tu_ktf@dPu`==+ML3zU>Ee#rN%_&0e_Q3-KcE*lx?A0shK%x-D$8 zxd1mk->1#cIqC+ky&rVEF!<(St7V7w7fz`#U10yvDO9e*(}K1J?FO_L(9|5NLjWZg1C;vFF|P?Q5jF;Jq8-Js6l89>l;TG*#y&v)^Snt@FN zFYVyA=VJn|rm?g`1VCR0a2?vs$pBX75VBQ>Y^b4vWJ65>(@eo6HZd@1us=;A!K#A- z^PdUKrE~B+5AEV8ykM_q5&D=fgr3iyi0x7Q5xPIi^;4UT+u%)9jJvb5vys?uAo6=! ztfY!PAMH8AbRO7c;6mMWLpoF#=KYjmj==~PuJsLk43@C4+2_{LO-Sy3n8RJc+MwtA zUe9gO7H%BjkRFC7jmT++s6^iGP_7E@2KdCV`pOurjt;cab5*?EvBL%MkK@df{|D7? zj7fEHHaHpnhq2SOvRyZlU}PpT6h6ue1(%(@Z+HCwH+*l8x_E7Cb7PFeepBa|%VRhu zFBZT=)8T=~I4%ryAvO>LDe%V8KtPzp5g|v|fI#_c0_D^P@Vo@=GiY6CeQ4j0LR?A5 zL&-MqI1y1Ch?NFWB8STHkkoWDTT@L84MIqXpjAj|KJHMT&I=3N)n$S$XnPZ9f>Utm z_h?O{{!}z$4rc|a9L-b?SnT_RX~K7Bs_@;&PJf_k#B7$L)AC7i$#_%4_^{)92LYy$ zf!MZPH(A3B2!8JYH{5b;xJqFTEnF4igYd90alH`R$yz85x}^vLPZk-)lNU{DgY3R= z-gimou>fNfz7EYzrzSL%no2pTDI+PRCz`4I=sh)CMG2^8u;gPJz&~q=J4`b!^!Z7r z&*#v-o?M@jy-NkY)2XR)vl|}8tlgjo_)E{QH z7Bo5sioqc;EVn08`EkW1RW9aHMUrH^V&#PL=IVI7i7FbdFuUqBV&qBU(cOQqHJY+Ot4$dPZvhrA1VN@}MVZO~V%tVaT5;C8U$#O*bsE|R&!R3m*wj^QVY8KA9 zovb}gc)knm)6q^DBGG8t2_vqlvD}7u31zBE=Ba+-hp8Mgjw50jq(!Xy;1_CLXNb%y zW!M&?Ou3+xDQ_udiYGj#)G2&0P7liwj7y&9P}?1H$gR-t(OcMy#`As;tj92lg&ASA zS5>32twf%O9{D@#u3cZvS$8^X9qUzNjnC(6+_s%yVwGnFgx69$VS8~5mswW>evFkX z>svatZhpgXqL%K`-JnM`5 z=aU!&q}sQIDPC4hQ8&3A>L$@F`b#W&MP0PG%okzSOV#rksN0Tbhht#gN*q6pDs{73 zH}&q1rX=`kjQ^UlL3wexx-8bur#&-Aq!w+yZVbut8`s0K8O_{&OR$kDnnNwFuJ>7rla$_Y&c;KbVry)(n zS0-L0@eeMdK-JB}`oF2yUnGP)1L!jQLKeF8osw^TPR_i;x0L{!s+mjGx(ufIKE^UwF7M&T8)6x@ S7u { + let jsonStr = 'Some text {"name":"John Doe"} more text' + let result = extractJSONFromString(jsonStr, 'object') + t.deepEqual(result[0], { name: 'John Doe' }) + + jsonStr = + 'Some text {"name":"John Doe","age":42,"address":{"street":"Main Street","number":42}} more text' + result = extractJSONFromString(jsonStr, 'object') + t.deepEqual(result[0], { + name: 'John Doe', + age: 42, + address: { street: 'Main Street', number: 42 } + }) + + jsonStr = 'foo {"name":"John Doe","school":"St. John\'s"} bar' + result = extractJSONFromString(jsonStr, 'object') + t.deepEqual(result[0], { name: 'John Doe', school: "St. John's" }) +}) + +test('extractJSONFromString should extract an invalid JSON object from string', (t) => { + let jsonStr = 'Some text {"name":\'John Doe\'} more text' + let result = extractJSONFromString(jsonStr, 'object') + t.deepEqual(result[0], { name: 'John Doe' }) + + jsonStr = 'Some text {"name":"John Doe","age":42,} more text' + result = extractJSONFromString(jsonStr, 'object') + t.deepEqual(result[0], { name: 'John Doe', age: 42 }) +}) + +test('extractJSONFromString should extract multiple JSON objects from string', (t) => { + let jsonStr = 'Some text {"name":"John Doe"} more text {"name":"Jane Doe"}' + let result = extractJSONFromString(jsonStr, 'object') + t.deepEqual(result[0], { name: 'John Doe' }) + t.deepEqual(result[1], { name: 'Jane Doe' }) + + jsonStr = + 'Some text {"name":"John Doe","age":42,"address":{"street":"Main Street","number":42}} more text {"name":"Jane Doe","age":42,"address":{"street":"Main Street","number":42}}' + result = extractJSONFromString(jsonStr, 'object') + t.deepEqual(result[0], { + name: 'John Doe', + age: 42, + address: { street: 'Main Street', number: 42 } + }) + t.deepEqual(result[1], { + name: 'Jane Doe', + age: 42, + address: { street: 'Main Street', number: 42 } + }) +}) + +test('extractJSONFromString should extract JSON array from string', (t) => { + let jsonString = 'Some text [1,2,3] more text' + let result = extractJSONFromString(jsonString, 'array') + t.deepEqual(result[0], [1, 2, 3]) + + jsonString = 'Some text ["foo","bar","\'quoted\'"] more text' + result = extractJSONFromString(jsonString, 'array') + t.deepEqual(result[0], ['foo', 'bar', "'quoted'"]) +}) + +test('extractJSONFromString should extract an invalid JSON array from string', (t) => { + let jsonString = 'Some text [1,2,3,] more text' + let result = extractJSONFromString(jsonString, 'array') + t.deepEqual(result[0], [1, 2, 3]) + + jsonString = "Some text ['foo','bar'] more text" + result = extractJSONFromString(jsonString, 'array') + t.deepEqual(result[0], ['foo', 'bar']) +}) + +test('extractJSONFromString should extract multiple JSON arrays from string', (t) => { + const jsonString = 'Some text [1,2,3] more text [4,5,6]' + const result = extractJSONFromString(jsonString, 'array') + t.deepEqual(result[0], [1, 2, 3]) + t.deepEqual(result[1], [4, 5, 6]) +}) + +test('extractJSONFromString should return an empty array if no JSON object is found', (t) => { + const jsonString = 'Some text' + const result = extractJSONFromString(jsonString, 'object') + t.deepEqual(result, []) +}) + +test('extractJSONFromString should return an empty array if no JSON array is found', (t) => { + const jsonString = 'Some text' + const result = extractJSONFromString(jsonString, 'array') + t.deepEqual(result, []) +}) + test('parseArrayOutput - handles valid arrays correctly', (t) => { const output1 = parseArrayOutput('[1,2,3]') const output2 = parseArrayOutput('["a", "b", "c"]') @@ -40,22 +130,6 @@ test('parseArrayOutput - handles arrays surrounded by text correctly', (t) => { ) }) -test('parseArrayOutput - handles and repairs broken JSON arrays correctly', (t) => { - const output1 = parseArrayOutput('[1, "two, 3]') - const output2 = parseArrayOutput('Array: ["a, "b", "c"]. Error here!') - const output3 = parseArrayOutput('Array in text {"arr": ["value1, "value2"]}') - - t.snapshot(output1, 'should repair and return [1, "two", 3] for [1, "two, 3]') - t.snapshot( - output2, - 'should repair and return ["a", "b", "c"] for Array: ["a, "b", "c"]. Error here!' - ) - t.snapshot( - output3, - 'should repair and return {"arr": ["value1", "value2"]} for Array in text {"arr": ["value1, "value2"]}' - ) -}) - test('parseArrayOutput - throws error for invalid arrays', (t) => { const error = t.throws( () => { @@ -99,22 +173,6 @@ test('parseObjectOutput - handles objects surrounded by text correctly', (t) => ) }) -test('parseObjectOutput - handles and repairs broken JSON objects correctly', (t) => { - const output1 = parseObjectOutput('{"a":1, "b":2, "c":3') - const output2 = parseObjectOutput( - 'Object: {"name":"John,"age":30,"city":"New York"}. Error here!' - ) - - t.snapshot( - output1, - 'should repair and return {"a":1, "b":2, "c":3} for {"a":1, "b":2, "c":3' - ) - t.snapshot( - output2, - 'should repair and return {"name":"John","age":30,"city":"New York"} for Object: {"name":"John,"age":30,"city":"New York"}. Error here!' - ) -}) - test('parseObjectOutput - handles JSON array of objects', (t) => { const output = parseObjectOutput('[{"a":1,"b":2},{"c":3,"d":4}]') diff --git a/legacy/test/utils.test.ts b/legacy/test/utils.test.ts index 0e4804e8..59079f39 100644 --- a/legacy/test/utils.test.ts +++ b/legacy/test/utils.test.ts @@ -6,8 +6,6 @@ import { chunkString, defaultIDGeneratorFn, extractFunctionIdentifierFromString, - extractJSONArrayFromString, - extractJSONObjectFromString, isValidTaskIdentifier, sleep, stringifyForModel, @@ -33,30 +31,6 @@ test('isValidTaskIdentifier - invalid', async (t) => { t.false(isValidTaskIdentifier('-foo')) }) -test('extractJSONObjectFromString should extract JSON object from string', (t) => { - const jsonString = 'Some text {"name":"John Doe"} more text' - const result = extractJSONObjectFromString(jsonString) - t.is(result, '{"name":"John Doe"}') -}) - -test('extractJSONArrayFromString should extract JSON array from string', (t) => { - const jsonString = 'Some text [1,2,3] more text' - const result = extractJSONArrayFromString(jsonString) - t.is(result, '[1,2,3]') -}) - -test('extractJSONObjectFromString should return undefined if no JSON object is found', (t) => { - const jsonString = 'Some text' - const result = extractJSONObjectFromString(jsonString) - t.is(result, undefined) -}) - -test('extractJSONArrayFromString should return undefined if no JSON array is found', (t) => { - const jsonString = 'Some text' - const result = extractJSONArrayFromString(jsonString) - t.is(result, undefined) -}) - test('sleep should delay execution', async (t) => { const start = Date.now() await sleep(1000) // for example, 1000ms / 1sec