chatgpt-api/src/utils.ts

import { customAlphabet, urlAlphabet } from 'nanoid'

import * as types from './types'

/**
 * Extracts a JSON object string from a given string.
 *
 * @param text - string from which to extract the JSON object
 * @returns extracted JSON object string, or `undefined` if no JSON object is found
 */
export function extractJSONObjectFromString(text: string): string | undefined {
  return text.match(/\{(.|\n)*\}/gm)?.[0] // FIXME: This breaks if there are multiple JSON objects in the string
}

/**
 * Extracts a JSON array string from a given string.
 *
 * @param text - string from which to extract the JSON array
 * @returns extracted JSON array string, or `undefined` if no JSON array is found
 */
export function extractJSONArrayFromString(text: string): string | undefined {
  return text.match(/\[(.|\n)*\]/gm)?.[0] // FIXME: This breaks if there are multiple JSON arrays in the string
}

/**
 * Pauses the execution of a function for a specified time.
 *
 * @param ms - number of milliseconds to pause
 * @returns promise that resolves after the specified number of milliseconds
 */
export function sleep(ms: number) {
  return new Promise((resolve) => setTimeout(resolve, ms))
}

/**
 * A default ID generator function that uses a custom alphabet based on URL safe symbols.
 */
export const defaultIDGeneratorFn: types.IDGeneratorFunction =
  customAlphabet(urlAlphabet)

const taskNameRegex = /^[a-zA-Z_][a-zA-Z0-9_-]{0,63}$/
export function isValidTaskIdentifier(id: string): boolean {
  return !!id && taskNameRegex.test(id)
}

/**
 * Chunks a string into an array of chunks.
 *
 * @param text - string to chunk
 * @param maxLength - maximum length of each chunk
 * @returns array of chunks
 */
export function chunkString(text: string, maxLength: number): string[] {
  const words = text.split(' ')
  const chunks: string[] = []
  let chunk = ''

  for (const word of words) {
    if (word.length > maxLength) {
      // Truncate the word if it's too long and indicate that it was truncated:
      chunks.push(word.substring(0, maxLength - 3) + '...')
    } else if ((chunk + word + 1).length > maxLength) {
      // +1 accounts for the space between words
      chunks.push(chunk.trim())
      chunk = word
    } else {
      chunk += (chunk ? ' ' : '') + word
    }
  }

  if (chunk) {
    chunks.push(chunk.trim())
  }

  return chunks
}

/**
 * Stringifies a JSON value for use in an LLM prompt.
 *
 * @param json - JSON value to stringify
 * @returns stringified value with all double quotes around object keys removed
 */
export function stringifyForModel(json: types.JsonValue): string {
  const UNIQUE_PREFIX = defaultIDGeneratorFn()
  return (
    JSON.stringify(json, replacer)
      // Remove all double quotes around keys:
      .replace(new RegExp('"' + UNIQUE_PREFIX + '(.*?)"', 'g'), '$1')
  )

  /**
   * Replacer function prefixing all keys with a unique identifier.
   */
  function replacer(_: string, value: any) {
    if (value && typeof value === 'object') {
      if (Array.isArray(value)) {
        return value
      }

      const replacement = {}

      for (const k in value) {
        if (Object.hasOwnProperty.call(value, k)) {
          replacement[UNIQUE_PREFIX + k] = value[k]
        }
      }

      return replacement
    }

    return value
  }
}
feat: add task id and improve core task design 2023-06-11 02:59:33 +00:00			`import { customAlphabet, urlAlphabet } from 'nanoid'`

			`import * as types from './types'`

fix: make quantifier lazy to not span multiple JSON blocks, tests + docs 2023-06-15 03:30:16 +00:00			`/**`
fix: revert to original regex for now 2023-06-15 14:05:52 +00:00			`* Extracts a JSON object string from a given string.`
fix: make quantifier lazy to not span multiple JSON blocks, tests + docs 2023-06-15 03:30:16 +00:00			`*`
			`* @param text - string from which to extract the JSON object`
			* @returns extracted JSON object string, or `undefined` if no JSON object is found
			`*/`
feat: improve openai function/task/tool support 2023-06-14 04:39:19 +00:00			`export function extractJSONObjectFromString(text: string): string \| undefined {`
fix: revert to original regex for now 2023-06-15 14:05:52 +00:00			`return text.match(/\{(.\|\n)*\}/gm)?.[0] // FIXME: This breaks if there are multiple JSON objects in the string`
feat: improve openai function/task/tool support 2023-06-14 04:39:19 +00:00			`}`
🍦 2023-05-24 06:15:59 +00:00
fix: make quantifier lazy to not span multiple JSON blocks, tests + docs 2023-06-15 03:30:16 +00:00			`/**`
fix: revert to original regex for now 2023-06-15 14:05:52 +00:00			`* Extracts a JSON array string from a given string.`
fix: make quantifier lazy to not span multiple JSON blocks, tests + docs 2023-06-15 03:30:16 +00:00			`*`
			`* @param text - string from which to extract the JSON array`
			* @returns extracted JSON array string, or `undefined` if no JSON array is found
			`*/`
feat: improve openai function/task/tool support 2023-06-14 04:39:19 +00:00			`export function extractJSONArrayFromString(text: string): string \| undefined {`
fix: revert to original regex for now 2023-06-15 14:05:52 +00:00			`return text.match(/\[(.\|\n)*\]/gm)?.[0] // FIXME: This breaks if there are multiple JSON arrays in the string`
feat: improve openai function/task/tool support 2023-06-14 04:39:19 +00:00			`}`
feat: add sleep util 2023-06-07 01:03:39 +00:00
fix: make quantifier lazy to not span multiple JSON blocks, tests + docs 2023-06-15 03:30:16 +00:00			`/**`
			`* Pauses the execution of a function for a specified time.`
			`*`
			`* @param ms - number of milliseconds to pause`
			`* @returns promise that resolves after the specified number of milliseconds`
			`*/`
feat: improve openai function/task/tool support 2023-06-14 04:39:19 +00:00			`export function sleep(ms: number) {`
			`return new Promise((resolve) => setTimeout(resolve, ms))`
			`}`
feat: add task id and improve core task design 2023-06-11 02:59:33 +00:00
fix: make quantifier lazy to not span multiple JSON blocks, tests + docs 2023-06-15 03:30:16 +00:00			`/**`
			`* A default ID generator function that uses a custom alphabet based on URL safe symbols.`
			`*/`
feat: add task id and improve core task design 2023-06-11 02:59:33 +00:00			`export const defaultIDGeneratorFn: types.IDGeneratorFunction =`
			`customAlphabet(urlAlphabet)`
feat: improve openai function/task/tool support 2023-06-14 04:39:19 +00:00
			`const taskNameRegex = /^[a-zA-Z_][a-zA-Z0-9_-]{0,63}$/`
			`export function isValidTaskIdentifier(id: string): boolean {`
			`return !!id && taskNameRegex.test(id)`
			`}`
feat: handle message chunking in Twilio service 2023-06-15 03:00:02 +00:00
			`/**`
fix: make quantifier lazy to not span multiple JSON blocks, tests + docs 2023-06-15 03:30:16 +00:00			`* Chunks a string into an array of chunks.`
feat: handle message chunking in Twilio service 2023-06-15 03:00:02 +00:00			`*`
			`* @param text - string to chunk`
fix: make quantifier lazy to not span multiple JSON blocks, tests + docs 2023-06-15 03:30:16 +00:00			`* @param maxLength - maximum length of each chunk`
			`* @returns array of chunks`
feat: handle message chunking in Twilio service 2023-06-15 03:00:02 +00:00			`*/`
feat: add utility to stringify JSON for prompt 2023-06-16 00:58:37 +00:00			`export function chunkString(text: string, maxLength: number): string[] {`
feat: handle message chunking in Twilio service 2023-06-15 03:00:02 +00:00			`const words = text.split(' ')`
			`const chunks: string[] = []`
			`let chunk = ''`

			`for (const word of words) {`
fix: make quantifier lazy to not span multiple JSON blocks, tests + docs 2023-06-15 03:30:16 +00:00			`if (word.length > maxLength) {`
feat: handle message chunking in Twilio service 2023-06-15 03:00:02 +00:00			`// Truncate the word if it's too long and indicate that it was truncated:`
fix: make quantifier lazy to not span multiple JSON blocks, tests + docs 2023-06-15 03:30:16 +00:00			`chunks.push(word.substring(0, maxLength - 3) + '...')`
			`} else if ((chunk + word + 1).length > maxLength) {`
			`// +1 accounts for the space between words`
feat: handle message chunking in Twilio service 2023-06-15 03:00:02 +00:00			`chunks.push(chunk.trim())`
			`chunk = word`
			`} else {`
fix: make quantifier lazy to not span multiple JSON blocks, tests + docs 2023-06-15 03:30:16 +00:00			`chunk += (chunk ? ' ' : '') + word`
feat: handle message chunking in Twilio service 2023-06-15 03:00:02 +00:00			`}`
			`}`

			`if (chunk) {`
			`chunks.push(chunk.trim())`
			`}`

			`return chunks`
			`}`
feat: add utility to stringify JSON for prompt 2023-06-16 00:58:37 +00:00
			`/**`
			`* Stringifies a JSON value for use in an LLM prompt.`
			`*`
			`* @param json - JSON value to stringify`
			`* @returns stringified value with all double quotes around object keys removed`
			`*/`
			`export function stringifyForModel(json: types.JsonValue): string {`
			`const UNIQUE_PREFIX = defaultIDGeneratorFn()`
			`return (`
			`JSON.stringify(json, replacer)`
			`// Remove all double quotes around keys:`
			`.replace(new RegExp('"' + UNIQUE_PREFIX + '(.*?)"', 'g'), '$1')`
			`)`

			`/**`
			`* Replacer function prefixing all keys with a unique identifier.`
			`*/`
			`function replacer(_: string, value: any) {`
			`if (value && typeof value === 'object') {`
			`if (Array.isArray(value)) {`
			`return value`
			`}`

			`const replacement = {}`

			`for (const k in value) {`
			`if (Object.hasOwnProperty.call(value, k)) {`
			`replacement[UNIQUE_PREFIX + k] = value[k]`
			`}`
			`}`

			`return replacement`
			`}`

			`return value`
			`}`
			`}`