kopia lustrzana https://github.com/transitive-bullshit/chatgpt-api
feat: support separator
rodzic
a723d35e78
commit
8fc71d127a
|
@ -67,28 +67,36 @@ export function extractFunctionIdentifierFromString(
|
||||||
*
|
*
|
||||||
* @param text - string to chunk
|
* @param text - string to chunk
|
||||||
* @param maxLength - maximum length of each chunk
|
* @param maxLength - maximum length of each chunk
|
||||||
|
* @param separator - character to split on (will be included in each previous chunk)
|
||||||
* @returns array of chunks
|
* @returns array of chunks
|
||||||
*/
|
*/
|
||||||
export function chunkString(text: string, maxLength: number): string[] {
|
export function chunkString(
|
||||||
const words = text.split(' ')
|
text: string,
|
||||||
|
maxLength: number,
|
||||||
|
separator = ' '
|
||||||
|
): string[] {
|
||||||
|
const words = text.split(new RegExp(`(?<=${separator})`))
|
||||||
const chunks: string[] = []
|
const chunks: string[] = []
|
||||||
let chunk = ''
|
let chunk = ''
|
||||||
|
|
||||||
for (const word of words) {
|
for (const word of words) {
|
||||||
|
// If the word length is more than maxLength, push the current chunk and the truncated word
|
||||||
if (word.length > maxLength) {
|
if (word.length > maxLength) {
|
||||||
// Truncate the word if it's too long and indicate that it was truncated:
|
if (chunk) {
|
||||||
|
chunks.push(chunk)
|
||||||
|
chunk = ''
|
||||||
|
}
|
||||||
|
|
||||||
chunks.push(word.substring(0, maxLength - 3) + '...')
|
chunks.push(word.substring(0, maxLength - 3) + '...')
|
||||||
} else if ((chunk + ' ' + word).length > maxLength) {
|
} else if ((chunk && chunk + separator + word).length > maxLength) {
|
||||||
chunks.push(chunk.trim())
|
chunks.push(chunk)
|
||||||
chunk = word
|
chunk = word
|
||||||
} else {
|
} else {
|
||||||
chunk += (chunk ? ' ' : '') + word
|
chunk += word
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (chunk) {
|
if (chunk) chunks.push(chunk)
|
||||||
chunks.push(chunk.trim())
|
|
||||||
}
|
|
||||||
|
|
||||||
return chunks
|
return chunks
|
||||||
}
|
}
|
||||||
|
|
|
@ -52,10 +52,11 @@ test('chunkString should split string into chunks', (t) => {
|
||||||
const text = 'Hello, this is a test string for chunkString function.'
|
const text = 'Hello, this is a test string for chunkString function.'
|
||||||
const chunks = chunkString(text, 12)
|
const chunks = chunkString(text, 12)
|
||||||
t.deepEqual(chunks, [
|
t.deepEqual(chunks, [
|
||||||
'Hello, this',
|
'Hello, ',
|
||||||
'is a test',
|
'this is a ',
|
||||||
'string for',
|
'test ',
|
||||||
'chunkString',
|
'string for ',
|
||||||
|
'chunkString ',
|
||||||
'function.'
|
'function.'
|
||||||
])
|
])
|
||||||
})
|
})
|
||||||
|
|
Ładowanie…
Reference in New Issue