feat: support separator

old-agentic-v1^2
Philipp Burckhardt 2023-07-07 23:27:15 -04:00
rodzic a723d35e78
commit 8fc71d127a
2 zmienionych plików z 22 dodań i 13 usunięć

Wyświetl plik

@ -67,28 +67,36 @@ export function extractFunctionIdentifierFromString(
* *
* @param text - string to chunk * @param text - string to chunk
* @param maxLength - maximum length of each chunk * @param maxLength - maximum length of each chunk
* @param separator - character to split on (will be included in each previous chunk)
* @returns array of chunks * @returns array of chunks
*/ */
export function chunkString(text: string, maxLength: number): string[] { export function chunkString(
const words = text.split(' ') text: string,
maxLength: number,
separator = ' '
): string[] {
const words = text.split(new RegExp(`(?<=${separator})`))
const chunks: string[] = [] const chunks: string[] = []
let chunk = '' let chunk = ''
for (const word of words) { for (const word of words) {
// If the word length is more than maxLength, push the current chunk and the truncated word
if (word.length > maxLength) { if (word.length > maxLength) {
// Truncate the word if it's too long and indicate that it was truncated: if (chunk) {
chunks.push(chunk)
chunk = ''
}
chunks.push(word.substring(0, maxLength - 3) + '...') chunks.push(word.substring(0, maxLength - 3) + '...')
} else if ((chunk + ' ' + word).length > maxLength) { } else if ((chunk && chunk + separator + word).length > maxLength) {
chunks.push(chunk.trim()) chunks.push(chunk)
chunk = word chunk = word
} else { } else {
chunk += (chunk ? ' ' : '') + word chunk += word
} }
} }
if (chunk) { if (chunk) chunks.push(chunk)
chunks.push(chunk.trim())
}
return chunks return chunks
} }

Wyświetl plik

@ -52,10 +52,11 @@ test('chunkString should split string into chunks', (t) => {
const text = 'Hello, this is a test string for chunkString function.' const text = 'Hello, this is a test string for chunkString function.'
const chunks = chunkString(text, 12) const chunks = chunkString(text, 12)
t.deepEqual(chunks, [ t.deepEqual(chunks, [
'Hello, this', 'Hello, ',
'is a test', 'this is a ',
'string for', 'test ',
'chunkString', 'string for ',
'chunkString ',
'function.' 'function.'
]) ])
}) })