From c67df0ad871593a7cbd9b6b53e7f323850a3afc7 Mon Sep 17 00:00:00 2001 From: Michael Ball Date: Sat, 11 Dec 2021 13:34:37 -0800 Subject: [PATCH 1/5] Use `Array.from` which a more Unicode friendly form of `split()`. --- src/threads.js | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/threads.js b/src/threads.js index 6f4b5aa1..fdbd171e 100644 --- a/src/threads.js +++ b/src/threads.js @@ -4352,7 +4352,7 @@ Process.prototype.reportUnicode = function (string) { } str = isNil(string) ? '\u0000' : string.toString(); if (str.length > 1) { - return this.reportUnicode(new List(str.split(''))); + return this.reportUnicode(new List(Array.from(str))); } } else { str = isNil(string) ? '\u0000' : string.toString(); @@ -4421,8 +4421,7 @@ Process.prototype.reportBasicTextSplit = function (string, delimiter) { del = /\s+/; break; case 'letter': - del = ''; - break; + return Array.from(str); case 'csv': return this.parseCSV(string); case 'json': From 665c2d6e3b0dc9ca8b9626f58dc39cd8aeb783b2 Mon Sep 17 00:00:00 2001 From: Michael Ball Date: Sat, 11 Dec 2021 14:11:22 -0800 Subject: [PATCH 2/5] CSV: `Array.from` which a more Unicode friendly form of `split()`. --- src/lists.js | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/lists.js b/src/lists.js index b676c00c..90653ea8 100644 --- a/src/lists.js +++ b/src/lists.js @@ -922,7 +922,7 @@ List.prototype.asCSV = function () { var items = this.itemsArray(), rows = []; - + function encodeCell(atomicValue) { var string = isNil(atomicValue) ? '' : atomicValue.toString(), cell; @@ -932,7 +932,7 @@ List.prototype.asCSV = function () { return string; } cell = ['\"']; - string.split('').forEach(letter => { + Array.from(string).forEach(letter => { cell.push(letter); if (letter === '\"') { cell.push(letter); @@ -1094,7 +1094,7 @@ List.prototype.blockify = function (limit = 500, count = [0]) { block.isDraggable = true; slots.removeInput(); - + // fill the slots with the data for (i = 0; i < len && count[0] < limit; i += 1) { value = this.at(i + 1); From 9b555f578b4c5792d6e80a278766cd7d5f50ee84 Mon Sep 17 00:00:00 2001 From: Michael Ball Date: Sat, 11 Dec 2021 14:19:52 -0800 Subject: [PATCH 3/5] Split: Properly turn JS array into list --- src/threads.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/threads.js b/src/threads.js index fdbd171e..e71e1d57 100644 --- a/src/threads.js +++ b/src/threads.js @@ -4421,7 +4421,7 @@ Process.prototype.reportBasicTextSplit = function (string, delimiter) { del = /\s+/; break; case 'letter': - return Array.from(str); + return new List(Array.from(str)); case 'csv': return this.parseCSV(string); case 'json': From 5af67eae8bc35f7e27191bd95f12807105558b8c Mon Sep 17 00:00:00 2001 From: Michael Ball Date: Sat, 11 Dec 2021 14:27:56 -0800 Subject: [PATCH 4/5] Split: Handle UTF8+ safe way for empty delimeters --- src/threads.js | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/threads.js b/src/threads.js index e71e1d57..4bc2815e 100644 --- a/src/threads.js +++ b/src/threads.js @@ -4420,6 +4420,8 @@ Process.prototype.reportBasicTextSplit = function (string, delimiter) { str = str.trim(); del = /\s+/; break; + case isNil(delimiter): + case '': case 'letter': return new List(Array.from(str)); case 'csv': @@ -4433,7 +4435,7 @@ Process.prototype.reportBasicTextSplit = function (string, delimiter) { return this.parseCSVfields(string); */ default: - del = isNil(delimiter) ? '' : delimiter.toString(); + del = delimiter.toString(); } return new List(str.split(del)); }; From b9c7198a77f1a03d77d8139bc0b4aa0a4f0fe9fe Mon Sep 17 00:00:00 2001 From: Michael Ball Date: Sat, 11 Dec 2021 14:37:33 -0800 Subject: [PATCH 5/5] Unicode: Properly handle multi-byte emoji with hyperblocks --- src/threads.js | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/threads.js b/src/threads.js index 4bc2815e..05d8b82d 100644 --- a/src/threads.js +++ b/src/threads.js @@ -4344,15 +4344,16 @@ Process.prototype.reportStringSize = function (data) { }; Process.prototype.reportUnicode = function (string) { - var str; + var str, unicodeList; if (this.enableHyperOps) { if (string instanceof List) { return string.map(each => this.reportUnicode(each)); } str = isNil(string) ? '\u0000' : string.toString(); - if (str.length > 1) { - return this.reportUnicode(new List(Array.from(str))); + unicodeList = Array.from(str); + if (unicodeList.length > 1) { + return this.reportUnicode(new List(unicodeList)); } } else { str = isNil(string) ? '\u0000' : string.toString();