kopia lustrzana https://github.com/transitive-bullshit/chatgpt-api
feat: improve JSON extraction
rodzic
8682fa71c8
commit
9a604891b8
|
@ -1,11 +1,87 @@
|
|||
import { JSONRepairError, jsonrepair } from 'jsonrepair'
|
||||
import { JsonValue } from 'type-fest'
|
||||
import { ZodType, z } from 'zod'
|
||||
|
||||
import * as errors from '@/errors'
|
||||
import {
|
||||
extractJSONArrayFromString,
|
||||
extractJSONObjectFromString
|
||||
} from '@/utils'
|
||||
|
||||
/**
|
||||
* Checks if character at the specified index in a string is escaped.
|
||||
*
|
||||
* @param str - string to check
|
||||
* @param i - index of the character to check
|
||||
* @returns whether the character is escaped
|
||||
*/
|
||||
function isEscaped(str: string, i: number): boolean {
|
||||
return i > 0 && str[i - 1] === '\\' && !(i > 1 && str[i - 2] === '\\')
|
||||
}
|
||||
|
||||
/**
|
||||
* Extracts JSON objects or arrays from a string.
|
||||
*
|
||||
* @param input - string to extract JSON from
|
||||
* @param jsonStructureType - type of JSON structure to extract
|
||||
* @returns array of extracted JSON objects or arrays
|
||||
*/
|
||||
export function extractJSONFromString(
|
||||
input: string,
|
||||
jsonStructureType: 'object' | 'array'
|
||||
) {
|
||||
const startChar = jsonStructureType === 'object' ? '{' : '['
|
||||
const endChar = jsonStructureType === 'object' ? '}' : ']'
|
||||
const extractedJSONValues: JsonValue[] = []
|
||||
let nestingLevel = 0
|
||||
let startIndex = 0
|
||||
const isInsideQuoted = { '"': false, "'": false }
|
||||
|
||||
for (let i = 0; i < input.length; i++) {
|
||||
const ch = input.charAt(i)
|
||||
switch (ch) {
|
||||
case '"':
|
||||
case "'":
|
||||
if (!isInsideQuoted[ch === '"' ? "'" : '"'] && !isEscaped(input, i)) {
|
||||
isInsideQuoted[ch] = !isInsideQuoted[ch]
|
||||
}
|
||||
|
||||
break
|
||||
|
||||
default:
|
||||
if (!isInsideQuoted['"'] && !isInsideQuoted["'"]) {
|
||||
switch (ch) {
|
||||
case startChar:
|
||||
if (nestingLevel === 0) {
|
||||
startIndex = i
|
||||
}
|
||||
|
||||
nestingLevel += 1
|
||||
|
||||
break
|
||||
|
||||
case endChar:
|
||||
nestingLevel -= 1
|
||||
if (nestingLevel === 0) {
|
||||
const candidate = input.slice(startIndex, i + 1)
|
||||
const parsed = JSON.parse(jsonrepair(candidate))
|
||||
if (parsed && typeof parsed === 'object') {
|
||||
extractedJSONValues.push(parsed)
|
||||
}
|
||||
} else if (nestingLevel < 0) {
|
||||
throw new Error(
|
||||
`Invalid JSON string: unexpected ${endChar} at position ${i}`
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (nestingLevel !== 0) {
|
||||
throw new Error(
|
||||
'Invalid JSON string: unmatched ' + startChar + ' or ' + endChar
|
||||
)
|
||||
}
|
||||
|
||||
return extractedJSONValues
|
||||
}
|
||||
|
||||
const BOOLEAN_OUTPUTS = {
|
||||
true: true,
|
||||
|
@ -28,8 +104,12 @@ const BOOLEAN_OUTPUTS = {
|
|||
*/
|
||||
export function parseArrayOutput(output: string): Array<any> {
|
||||
try {
|
||||
const trimmedOutput = extractJSONArrayFromString(output)
|
||||
const parsedOutput = JSON.parse(jsonrepair(trimmedOutput ?? output))
|
||||
const arr = extractJSONFromString(output, 'array')
|
||||
if (arr.length === 0) {
|
||||
throw new errors.OutputValidationError(`Invalid JSON array: ${output}`)
|
||||
}
|
||||
|
||||
const parsedOutput = arr[0]
|
||||
if (!Array.isArray(parsedOutput)) {
|
||||
throw new errors.OutputValidationError(
|
||||
`Invalid JSON array: ${JSON.stringify(parsedOutput)}`
|
||||
|
@ -59,19 +139,22 @@ export function parseArrayOutput(output: string): Array<any> {
|
|||
*/
|
||||
export function parseObjectOutput(output: string) {
|
||||
try {
|
||||
const trimmedOutput = extractJSONObjectFromString(output)
|
||||
output = JSON.parse(jsonrepair(trimmedOutput ?? output))
|
||||
const arr = extractJSONFromString(output, 'object')
|
||||
if (arr.length === 0) {
|
||||
throw new errors.OutputValidationError(`Invalid JSON object: ${output}`)
|
||||
}
|
||||
|
||||
if (Array.isArray(output)) {
|
||||
let parsedOutput = arr[0]
|
||||
if (Array.isArray(parsedOutput)) {
|
||||
// TODO
|
||||
output = output[0]
|
||||
} else if (typeof output !== 'object') {
|
||||
parsedOutput = parsedOutput[0]
|
||||
} else if (typeof parsedOutput !== 'object') {
|
||||
throw new errors.OutputValidationError(
|
||||
`Invalid JSON object: ${JSON.stringify(output)}`
|
||||
`Invalid JSON object: ${JSON.stringify(parsedOutput)}`
|
||||
)
|
||||
}
|
||||
|
||||
return output
|
||||
return parsedOutput
|
||||
} catch (err: any) {
|
||||
if (err instanceof JSONRepairError) {
|
||||
throw new errors.OutputValidationError(err.message, { cause: err })
|
||||
|
@ -149,6 +232,7 @@ export function parseOutput(output: string, outputSchema: ZodType<any>) {
|
|||
} else if (outputSchema instanceof z.ZodNumber) {
|
||||
result = parseNumberOutput(output, outputSchema)
|
||||
} else {
|
||||
// Default to string output...
|
||||
result = output
|
||||
}
|
||||
|
||||
|
|
20
src/utils.ts
20
src/utils.ts
|
@ -4,26 +4,6 @@ import { JsonValue } from 'type-fest'
|
|||
|
||||
import * as types from './types'
|
||||
|
||||
/**
|
||||
* Extracts a JSON object string from a given string.
|
||||
*
|
||||
* @param text - string from which to extract the JSON object
|
||||
* @returns extracted JSON object string, or `undefined` if no JSON object is found
|
||||
*/
|
||||
export function extractJSONObjectFromString(text: string): string | undefined {
|
||||
return text.match(/\{(.|\n)*\}/gm)?.[0] // FIXME: This breaks if there are multiple JSON objects in the string
|
||||
}
|
||||
|
||||
/**
|
||||
* Extracts a JSON array string from a given string.
|
||||
*
|
||||
* @param text - string from which to extract the JSON array
|
||||
* @returns extracted JSON array string, or `undefined` if no JSON array is found
|
||||
*/
|
||||
export function extractJSONArrayFromString(text: string): string | undefined {
|
||||
return text.match(/\[(.|\n)*\]/gm)?.[0] // FIXME: This breaks if there are multiple JSON arrays in the string
|
||||
}
|
||||
|
||||
/**
|
||||
* Pauses the execution of a function for a specified time.
|
||||
*
|
||||
|
|
|
@ -62,38 +62,11 @@ Generated by [AVA](https://avajs.dev).
|
|||
},
|
||||
]
|
||||
|
||||
## parseArrayOutput - handles and repairs broken JSON arrays correctly
|
||||
|
||||
> should repair and return [1, "two", 3] for [1, "two, 3]
|
||||
|
||||
[
|
||||
1,
|
||||
'two, 3]',
|
||||
]
|
||||
|
||||
> should repair and return ["a", "b", "c"] for Array: ["a, "b", "c"]. Error here!
|
||||
|
||||
[
|
||||
'a, ',
|
||||
'b',
|
||||
', ',
|
||||
'c',
|
||||
']',
|
||||
]
|
||||
|
||||
> should repair and return {"arr": ["value1", "value2"]} for Array in text {"arr": ["value1, "value2"]}
|
||||
|
||||
[
|
||||
'value1, ',
|
||||
'value2',
|
||||
']',
|
||||
]
|
||||
|
||||
## parseArrayOutput - throws error for invalid arrays
|
||||
|
||||
> Snapshot 1
|
||||
|
||||
'Invalid JSON array: "not a valid array"'
|
||||
'Invalid JSON array: not a valid array'
|
||||
|
||||
## parseObjectOutput - handles valid objects correctly
|
||||
|
||||
|
@ -131,25 +104,6 @@ Generated by [AVA](https://avajs.dev).
|
|||
name: 'John',
|
||||
}
|
||||
|
||||
## parseObjectOutput - handles and repairs broken JSON objects correctly
|
||||
|
||||
> should repair and return {"a":1, "b":2, "c":3} for {"a":1, "b":2, "c":3
|
||||
|
||||
{
|
||||
a: 1,
|
||||
b: 2,
|
||||
c: 3,
|
||||
}
|
||||
|
||||
> should repair and return {"name":"John","age":30,"city":"New York"} for Object: {"name":"John,"age":30,"city":"New York"}. Error here!
|
||||
|
||||
{
|
||||
'New York': '}',
|
||||
age: ':30,',
|
||||
city: ':',
|
||||
name: 'John,',
|
||||
}
|
||||
|
||||
## parseObjectOutput - handles JSON array of objects
|
||||
|
||||
> should return first object {"a":1,"b":2} for [{"a":1,"b":2},{"c":3,"d":4}]
|
||||
|
@ -163,7 +117,7 @@ Generated by [AVA](https://avajs.dev).
|
|||
|
||||
> Snapshot 1
|
||||
|
||||
'Invalid JSON object: "not a valid object"'
|
||||
'Invalid JSON object: not a valid object'
|
||||
|
||||
## parseBooleanOutput - handles `true` outputs correctly
|
||||
|
||||
|
@ -261,34 +215,3 @@ Generated by [AVA](https://avajs.dev).
|
|||
> Snapshot 1
|
||||
|
||||
'Invalid number output: not a number'
|
||||
|
||||
## parseOutput - handles array correctly
|
||||
|
||||
> should parse and return [1, 2, 3] for "[1, 2, 3]"
|
||||
|
||||
[
|
||||
1,
|
||||
2,
|
||||
3,
|
||||
]
|
||||
|
||||
## parseOutput - handles object correctly
|
||||
|
||||
> should parse and return {"a": 1, "b": "two"} for "{"a": 1, "b": "two"}"
|
||||
|
||||
{
|
||||
a: 1,
|
||||
b: 'two',
|
||||
}
|
||||
|
||||
## parseOutput - handles boolean correctly
|
||||
|
||||
> should parse and return true for "True"
|
||||
|
||||
true
|
||||
|
||||
## parseOutput - handles number correctly
|
||||
|
||||
> should parse and return 123.45 for "123.45"
|
||||
|
||||
123.45
|
||||
|
|
Plik binarny nie jest wyświetlany.
|
@ -2,6 +2,7 @@ import test from 'ava'
|
|||
import { z } from 'zod'
|
||||
|
||||
import {
|
||||
extractJSONFromString,
|
||||
parseArrayOutput,
|
||||
parseBooleanOutput,
|
||||
parseNumberOutput,
|
||||
|
@ -9,6 +10,95 @@ import {
|
|||
parseOutput
|
||||
} from '@/llms/parse-output'
|
||||
|
||||
test('extractJSONFromString should extract JSON object from string', (t) => {
|
||||
let jsonStr = 'Some text {"name":"John Doe"} more text'
|
||||
let result = extractJSONFromString(jsonStr, 'object')
|
||||
t.deepEqual(result[0], { name: 'John Doe' })
|
||||
|
||||
jsonStr =
|
||||
'Some text {"name":"John Doe","age":42,"address":{"street":"Main Street","number":42}} more text'
|
||||
result = extractJSONFromString(jsonStr, 'object')
|
||||
t.deepEqual(result[0], {
|
||||
name: 'John Doe',
|
||||
age: 42,
|
||||
address: { street: 'Main Street', number: 42 }
|
||||
})
|
||||
|
||||
jsonStr = 'foo {"name":"John Doe","school":"St. John\'s"} bar'
|
||||
result = extractJSONFromString(jsonStr, 'object')
|
||||
t.deepEqual(result[0], { name: 'John Doe', school: "St. John's" })
|
||||
})
|
||||
|
||||
test('extractJSONFromString should extract an invalid JSON object from string', (t) => {
|
||||
let jsonStr = 'Some text {"name":\'John Doe\'} more text'
|
||||
let result = extractJSONFromString(jsonStr, 'object')
|
||||
t.deepEqual(result[0], { name: 'John Doe' })
|
||||
|
||||
jsonStr = 'Some text {"name":"John Doe","age":42,} more text'
|
||||
result = extractJSONFromString(jsonStr, 'object')
|
||||
t.deepEqual(result[0], { name: 'John Doe', age: 42 })
|
||||
})
|
||||
|
||||
test('extractJSONFromString should extract multiple JSON objects from string', (t) => {
|
||||
let jsonStr = 'Some text {"name":"John Doe"} more text {"name":"Jane Doe"}'
|
||||
let result = extractJSONFromString(jsonStr, 'object')
|
||||
t.deepEqual(result[0], { name: 'John Doe' })
|
||||
t.deepEqual(result[1], { name: 'Jane Doe' })
|
||||
|
||||
jsonStr =
|
||||
'Some text {"name":"John Doe","age":42,"address":{"street":"Main Street","number":42}} more text {"name":"Jane Doe","age":42,"address":{"street":"Main Street","number":42}}'
|
||||
result = extractJSONFromString(jsonStr, 'object')
|
||||
t.deepEqual(result[0], {
|
||||
name: 'John Doe',
|
||||
age: 42,
|
||||
address: { street: 'Main Street', number: 42 }
|
||||
})
|
||||
t.deepEqual(result[1], {
|
||||
name: 'Jane Doe',
|
||||
age: 42,
|
||||
address: { street: 'Main Street', number: 42 }
|
||||
})
|
||||
})
|
||||
|
||||
test('extractJSONFromString should extract JSON array from string', (t) => {
|
||||
let jsonString = 'Some text [1,2,3] more text'
|
||||
let result = extractJSONFromString(jsonString, 'array')
|
||||
t.deepEqual(result[0], [1, 2, 3])
|
||||
|
||||
jsonString = 'Some text ["foo","bar","\'quoted\'"] more text'
|
||||
result = extractJSONFromString(jsonString, 'array')
|
||||
t.deepEqual(result[0], ['foo', 'bar', "'quoted'"])
|
||||
})
|
||||
|
||||
test('extractJSONFromString should extract an invalid JSON array from string', (t) => {
|
||||
let jsonString = 'Some text [1,2,3,] more text'
|
||||
let result = extractJSONFromString(jsonString, 'array')
|
||||
t.deepEqual(result[0], [1, 2, 3])
|
||||
|
||||
jsonString = "Some text ['foo','bar'] more text"
|
||||
result = extractJSONFromString(jsonString, 'array')
|
||||
t.deepEqual(result[0], ['foo', 'bar'])
|
||||
})
|
||||
|
||||
test('extractJSONFromString should extract multiple JSON arrays from string', (t) => {
|
||||
const jsonString = 'Some text [1,2,3] more text [4,5,6]'
|
||||
const result = extractJSONFromString(jsonString, 'array')
|
||||
t.deepEqual(result[0], [1, 2, 3])
|
||||
t.deepEqual(result[1], [4, 5, 6])
|
||||
})
|
||||
|
||||
test('extractJSONFromString should return an empty array if no JSON object is found', (t) => {
|
||||
const jsonString = 'Some text'
|
||||
const result = extractJSONFromString(jsonString, 'object')
|
||||
t.deepEqual(result, [])
|
||||
})
|
||||
|
||||
test('extractJSONFromString should return an empty array if no JSON array is found', (t) => {
|
||||
const jsonString = 'Some text'
|
||||
const result = extractJSONFromString(jsonString, 'array')
|
||||
t.deepEqual(result, [])
|
||||
})
|
||||
|
||||
test('parseArrayOutput - handles valid arrays correctly', (t) => {
|
||||
const output1 = parseArrayOutput('[1,2,3]')
|
||||
const output2 = parseArrayOutput('["a", "b", "c"]')
|
||||
|
@ -40,22 +130,6 @@ test('parseArrayOutput - handles arrays surrounded by text correctly', (t) => {
|
|||
)
|
||||
})
|
||||
|
||||
test('parseArrayOutput - handles and repairs broken JSON arrays correctly', (t) => {
|
||||
const output1 = parseArrayOutput('[1, "two, 3]')
|
||||
const output2 = parseArrayOutput('Array: ["a, "b", "c"]. Error here!')
|
||||
const output3 = parseArrayOutput('Array in text {"arr": ["value1, "value2"]}')
|
||||
|
||||
t.snapshot(output1, 'should repair and return [1, "two", 3] for [1, "two, 3]')
|
||||
t.snapshot(
|
||||
output2,
|
||||
'should repair and return ["a", "b", "c"] for Array: ["a, "b", "c"]. Error here!'
|
||||
)
|
||||
t.snapshot(
|
||||
output3,
|
||||
'should repair and return {"arr": ["value1", "value2"]} for Array in text {"arr": ["value1, "value2"]}'
|
||||
)
|
||||
})
|
||||
|
||||
test('parseArrayOutput - throws error for invalid arrays', (t) => {
|
||||
const error = t.throws(
|
||||
() => {
|
||||
|
@ -99,22 +173,6 @@ test('parseObjectOutput - handles objects surrounded by text correctly', (t) =>
|
|||
)
|
||||
})
|
||||
|
||||
test('parseObjectOutput - handles and repairs broken JSON objects correctly', (t) => {
|
||||
const output1 = parseObjectOutput('{"a":1, "b":2, "c":3')
|
||||
const output2 = parseObjectOutput(
|
||||
'Object: {"name":"John,"age":30,"city":"New York"}. Error here!'
|
||||
)
|
||||
|
||||
t.snapshot(
|
||||
output1,
|
||||
'should repair and return {"a":1, "b":2, "c":3} for {"a":1, "b":2, "c":3'
|
||||
)
|
||||
t.snapshot(
|
||||
output2,
|
||||
'should repair and return {"name":"John","age":30,"city":"New York"} for Object: {"name":"John,"age":30,"city":"New York"}. Error here!'
|
||||
)
|
||||
})
|
||||
|
||||
test('parseObjectOutput - handles JSON array of objects', (t) => {
|
||||
const output = parseObjectOutput('[{"a":1,"b":2},{"c":3,"d":4}]')
|
||||
|
||||
|
|
|
@ -6,8 +6,6 @@ import {
|
|||
chunkString,
|
||||
defaultIDGeneratorFn,
|
||||
extractFunctionIdentifierFromString,
|
||||
extractJSONArrayFromString,
|
||||
extractJSONObjectFromString,
|
||||
isValidTaskIdentifier,
|
||||
sleep,
|
||||
stringifyForModel,
|
||||
|
@ -33,30 +31,6 @@ test('isValidTaskIdentifier - invalid', async (t) => {
|
|||
t.false(isValidTaskIdentifier('-foo'))
|
||||
})
|
||||
|
||||
test('extractJSONObjectFromString should extract JSON object from string', (t) => {
|
||||
const jsonString = 'Some text {"name":"John Doe"} more text'
|
||||
const result = extractJSONObjectFromString(jsonString)
|
||||
t.is(result, '{"name":"John Doe"}')
|
||||
})
|
||||
|
||||
test('extractJSONArrayFromString should extract JSON array from string', (t) => {
|
||||
const jsonString = 'Some text [1,2,3] more text'
|
||||
const result = extractJSONArrayFromString(jsonString)
|
||||
t.is(result, '[1,2,3]')
|
||||
})
|
||||
|
||||
test('extractJSONObjectFromString should return undefined if no JSON object is found', (t) => {
|
||||
const jsonString = 'Some text'
|
||||
const result = extractJSONObjectFromString(jsonString)
|
||||
t.is(result, undefined)
|
||||
})
|
||||
|
||||
test('extractJSONArrayFromString should return undefined if no JSON array is found', (t) => {
|
||||
const jsonString = 'Some text'
|
||||
const result = extractJSONArrayFromString(jsonString)
|
||||
t.is(result, undefined)
|
||||
})
|
||||
|
||||
test('sleep should delay execution', async (t) => {
|
||||
const start = Date.now()
|
||||
await sleep(1000) // for example, 1000ms / 1sec
|
||||
|
|
Ładowanie…
Reference in New Issue