Skip to content

Commit 0d24d6d

Browse files
BridgeARdanielleadams
authored andcommitted
util: escape lone surrogate code points using .inspect()
Unpaired surrogate code points have no representation in UTF8. Therefore, such code points are just "random" output that is unreadable. Instead, escape the code points similar to C0 and C1 control characters. Refs: https://unicodebook.readthedocs.io/unicode_encodings.html#utf-16-surrogate-pairs Signed-off-by: Ruben Bridgewater <[email protected]> PR-URL: #41001 Reviewed-By: James M Snell <[email protected]>
1 parent a6460e2 commit 0d24d6d

File tree

2 files changed

+62
-8
lines changed

2 files changed

+62
-8
lines changed

lib/internal/util/inspect.js

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -176,10 +176,10 @@ const kArrayType = 1;
176176
const kArrayExtrasType = 2;
177177

178178
/* eslint-disable no-control-regex */
179-
const strEscapeSequencesRegExp = /[\x00-\x1f\x27\x5c\x7f-\x9f]/;
180-
const strEscapeSequencesReplacer = /[\x00-\x1f\x27\x5c\x7f-\x9f]/g;
181-
const strEscapeSequencesRegExpSingle = /[\x00-\x1f\x5c\x7f-\x9f]/;
182-
const strEscapeSequencesReplacerSingle = /[\x00-\x1f\x5c\x7f-\x9f]/g;
179+
const strEscapeSequencesRegExp = /[\x00-\x1f\x27\x5c\x7f-\x9f]|[\ud800-\udbff](?![\udc00-\udfff])|(?<![\ud800-\udbff])[\udc00-\udfff]/;
180+
const strEscapeSequencesReplacer = /[\x00-\x1f\x27\x5c\x7f-\x9f]|[\ud800-\udbff](?![\udc00-\udfff])|(?<![\ud800-\udbff])[\udc00-\udfff]/g;
181+
const strEscapeSequencesRegExpSingle = /[\x00-\x1f\x5c\x7f-\x9f]|[\ud800-\udbff](?![\udc00-\udfff])|(?<![\ud800-\udbff])[\udc00-\udfff]/;
182+
const strEscapeSequencesReplacerSingle = /[\x00-\x1f\x5c\x7f-\x9f]|[\ud800-\udbff](?![\udc00-\udfff])|(?<![\ud800-\udbff])[\udc00-\udfff]/g;
183183
/* eslint-enable no-control-regex */
184184

185185
const keyStrRegExp = /^[a-zA-Z_][a-zA-Z_0-9]*$/;
@@ -458,7 +458,10 @@ function addQuotes(str, quotes) {
458458
return `'${str}'`;
459459
}
460460

461-
const escapeFn = (str) => meta[StringPrototypeCharCodeAt(str)];
461+
function escapeFn(str) {
462+
const charCode = StringPrototypeCharCodeAt(str);
463+
return meta.length > charCode ? meta[charCode] : `\\u${charCode.toString(16)}`;
464+
}
462465

463466
// Escape control characters, single quotes and the backslash.
464467
// This is similar to JSON stringify escaping.
@@ -496,8 +499,7 @@ function strEscape(str) {
496499

497500
let result = '';
498501
let last = 0;
499-
const lastIndex = str.length;
500-
for (let i = 0; i < lastIndex; i++) {
502+
for (let i = 0; i < str.length; i++) {
501503
const point = StringPrototypeCharCodeAt(str, i);
502504
if (point === singleQuote ||
503505
point === 92 ||
@@ -509,10 +511,20 @@ function strEscape(str) {
509511
result += `${StringPrototypeSlice(str, last, i)}${meta[point]}`;
510512
}
511513
last = i + 1;
514+
} else if (point >= 0xd800 && point <= 0xdfff) {
515+
if (point <= 0xdbff && i + 1 < str.length) {
516+
const point = StringPrototypeCharCodeAt(str, i + 1);
517+
if (point >= 0xdc00 && point <= 0xdfff) {
518+
i++;
519+
continue;
520+
}
521+
}
522+
result += `${StringPrototypeSlice(str, last, i)}${`\\u${point.toString(16)}`}`;
523+
last = i + 1;
512524
}
513525
}
514526

515-
if (last !== lastIndex) {
527+
if (last !== str.length) {
516528
result += StringPrototypeSlice(str, last);
517529
}
518530
return addQuotes(result, singleQuote);

test/parallel/test-util-inspect.js

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -837,6 +837,48 @@ assert.strictEqual(util.inspect(Object.create(Date.prototype)), 'Date {}');
837837
);
838838
}
839839

840+
// Escape unpaired surrogate pairs.
841+
{
842+
const edgeChar = String.fromCharCode(0xd799);
843+
844+
for (let charCode = 0xD800; charCode < 0xDFFF; charCode++) {
845+
const surrogate = String.fromCharCode(charCode);
846+
847+
assert.strictEqual(
848+
util.inspect(surrogate),
849+
`'\\u${charCode.toString(16)}'`
850+
);
851+
assert.strictEqual(
852+
util.inspect(`${'a'.repeat(200)}${surrogate}`),
853+
`'${'a'.repeat(200)}\\u${charCode.toString(16)}'`
854+
);
855+
assert.strictEqual(
856+
util.inspect(`${surrogate}${'a'.repeat(200)}`),
857+
`'\\u${charCode.toString(16)}${'a'.repeat(200)}'`
858+
);
859+
if (charCode < 0xdc00) {
860+
const highSurrogate = surrogate;
861+
const lowSurrogate = String.fromCharCode(charCode + 1024);
862+
assert(
863+
!util.inspect(
864+
`${edgeChar}${highSurrogate}${lowSurrogate}${edgeChar}`
865+
).includes('\\u')
866+
);
867+
assert.strictEqual(
868+
(util.inspect(
869+
`${highSurrogate}${highSurrogate}${lowSurrogate}`
870+
).match(/\\u/g) ?? []).length,
871+
1
872+
);
873+
} else {
874+
assert.strictEqual(
875+
util.inspect(`${edgeChar}${surrogate}${edgeChar}`),
876+
`'${edgeChar}\\u${charCode.toString(16)}${edgeChar}'`
877+
);
878+
}
879+
}
880+
}
881+
840882
// Test util.inspect.styles and util.inspect.colors.
841883
{
842884
function testColorStyle(style, input, implicit) {

0 commit comments

Comments
 (0)