diff --git a/src/compiler/parser.ts b/src/compiler/parser.ts index be81d789c57d0..62f7502c8c9c6 100644 --- a/src/compiler/parser.ts +++ b/src/compiler/parser.ts @@ -8571,10 +8571,6 @@ namespace Parser { let tags: JSDocTag[]; let tagsPos: number; let tagsEnd: number; - let linkEnd: number; - let commentsPos: number | undefined; - let comments: string[] = []; - const parts: JSDocComment[] = []; // + 3 for leading /**, - 5 in total for /** */ return scanner.scanRange(start + 3, length - 5, () => { @@ -8585,27 +8581,46 @@ namespace Parser { // + 4 for leading '/** ' // + 1 because the last index of \n is always one index before the first character in the line and coincidentally, if there is no \n before start, it is -1, which is also one index before the first character let indent = start - (content.lastIndexOf("\n", start) + 1) + 4; - function pushComment(text: string) { + let commentsPos: number | undefined; + let linkEnd: number | undefined; // TODO: This isn't set anywhere that I can see, so should be removed + const parts: JSDocComment[] = []; + function indentMargin() { if (!margin) { margin = indent; } - comments.push(text); - indent += text.length; + indent += scanner.getTextPos() - scanner.getTokenPos() + } + function addComment() { + // TODO: addComment likely needs to be aware of indent/margin, at least when adding things after a newline + if (cpos === cend) return + comments += scanner.getText().slice(cpos, cend) + cpos = cend // TODO: Most callers (I think) immediately set cpos and cend themselves; TODO: if cpos > cend, I'm not sure what that means } + // TODO: These might be the same as some existing variable (specifically, maybe cend = commentsPos) + let comments = ""; + let cpos = scanner.getTextPos() // -OR- 0 ? getStartPos ? + let cend = cpos + nextTokenJSDoc(); - while (parseOptionalJsdoc(SyntaxKind.WhitespaceTrivia)); + while (parseOptionalJsdoc(SyntaxKind.WhitespaceTrivia)) { + cpos = cend = scanner.getTokenPos(); + } if (parseOptionalJsdoc(SyntaxKind.NewLineTrivia)) { state = JSDocState.BeginningOfLine; indent = 0; + cpos = cend = scanner.getTokenPos(); } loop: while (true) { switch (token()) { case SyntaxKind.AtToken: if (state === JSDocState.BeginningOfLine || state === JSDocState.SawAsterisk) { - removeTrailingWhitespace(comments); - if (!commentsPos) commentsPos = getNodePos(); + cend = scanner.getStartPos() + addComment() + comments = comments.trimEnd() + if (!commentsPos) commentsPos = scanner.getStartPos(); addTag(parseTag(indent)); + cend = cpos = scanner.getTokenPos() // NOTE: According to usejsdoc.org, a tag goes to end of line, except the last tag. // Real-world comments may break this rule, so "BeginningOfLine" will not be a real line beginning // for malformed examples like `/** @param {string} x @returns {number} the length */` @@ -8613,53 +8628,63 @@ namespace Parser { margin = undefined; } else { - pushComment(scanner.getTokenText()); + cend = scanner.getTextPos() + indentMargin(); } break; case SyntaxKind.NewLineTrivia: - comments.push(scanner.getTokenText()); + cend = scanner.getTextPos() state = JSDocState.BeginningOfLine; indent = 0; break; case SyntaxKind.AsteriskToken: - const asterisk = scanner.getTokenText(); if (state === JSDocState.SawAsterisk || state === JSDocState.SavingComments) { // If we've already seen an asterisk, then we can no longer parse a tag on this line state = JSDocState.SavingComments; - pushComment(asterisk); + cend = scanner.getTextPos() + indentMargin(); } else { // Ignore the first asterisk on a line + // cend = scanner.getTokenPos() + // addComment() + cpos = cend = scanner.getTextPos() state = JSDocState.SawAsterisk; - indent += asterisk.length; + indent += scanner.getTextPos() - scanner.getTokenPos() // asterisk.length; } break; case SyntaxKind.WhitespaceTrivia: // only collect whitespace if we're already saving comments or have just crossed the comment indent margin - const whitespace = scanner.getTokenText(); + const whitespaceLength = scanner.getTextPos() - scanner.getTokenPos() if (state === JSDocState.SavingComments) { - comments.push(whitespace); + cend = scanner.getTextPos() } - else if (margin !== undefined && indent + whitespace.length > margin) { - comments.push(whitespace.slice(margin - indent)); + else if (margin !== undefined && indent + whitespaceLength > margin) { + cend = scanner.getTokenPos() + addComment() + cpos = cend = scanner.getTokenPos() + (margin - indent) + // comments += whitespace.slice(margin - indent); } - indent += whitespace.length; + indent += whitespaceLength; break; case SyntaxKind.EndOfFileToken: + addComment() break loop; case SyntaxKind.OpenBraceToken: state = JSDocState.SavingComments; const commentEnd = scanner.getStartPos(); - const linkStart = scanner.getTextPos() - 1; + cend = scanner.getTokenPos() + addComment() + const linkStart = scanner.getTextPos() - 1; // TODO: SHould probably be scanner.getTokenPos() const link = parseJSDocLink(linkStart); if (link) { if (!linkEnd) { - removeLeadingNewlines(comments); + comments = removeLeadingNewlines(comments); } - parts.push(finishNode(factory.createJSDocText(comments.join("")), linkEnd ?? start, commentEnd)); + parts.push(finishNode(factory.createJSDocText(comments), linkEnd ?? start, commentEnd)); parts.push(link); - comments = []; - linkEnd = scanner.getTextPos(); + cpos = cend = linkEnd ?? start // this just makes no sense + comments = ""; break; } // fallthrough if it's not a {@link sequence @@ -8668,30 +8693,29 @@ namespace Parser { // wasn't a tag, we can no longer parse a tag on this line until we hit the next // line break. state = JSDocState.SavingComments; - pushComment(scanner.getTokenText()); + cend = scanner.getTextPos() + indentMargin(); break; } nextTokenJSDoc(); } - removeTrailingWhitespace(comments); + addComment() + comments = comments.trimEnd() if (parts.length && comments.length) { - parts.push(finishNode(factory.createJSDocText(comments.join("")), linkEnd ?? start, commentsPos)); + parts.push(finishNode(factory.createJSDocText(comments), linkEnd ?? start, commentsPos)); } if (parts.length && tags) Debug.assertIsDefined(commentsPos, "having parsed tags implies that the end of the comment span should be set"); const tagsArray = tags && createNodeArray(tags, tagsPos, tagsEnd); - return finishNode(factory.createJSDocComment(parts.length ? createNodeArray(parts, start, commentsPos) : comments.length ? comments.join("") : undefined, tagsArray), start, end); + return finishNode(factory.createJSDocComment(parts.length ? createNodeArray(parts, start, commentsPos) : comments.length ? comments : undefined, tagsArray), start, end); }); - function removeLeadingNewlines(comments: string[]) { - while (comments.length && (comments[0] === "\n" || comments[0] === "\r")) { - comments.shift(); - } - } - - function removeTrailingWhitespace(comments: string[]) { - while (comments.length && comments[comments.length - 1].trim() === "") { - comments.pop(); + function removeLeadingNewlines(comments: string) { + // TODO: Also a regex would work, and perhaps faster + let i = 0 + while (i < comments.length && (comments[i] === "\n" || comments[i] === "\r")) { + i++; } + return i > 0 ? comments.slice(i) : comments; } function isNextNonwhitespaceTokenEndOfFile(): boolean { @@ -8842,23 +8866,30 @@ namespace Parser { function parseTagComments(indent: number, initialMargin?: string): string | NodeArray | undefined { const commentsPos = getNodePos(); - let comments: string[] = []; + let comments = ""; const parts: JSDocComment[] = []; let linkEnd; let state = JSDocState.BeginningOfLine; let previousWhitespace = true; let margin: number | undefined; - function pushComment(text: string) { + let cpos = scanner.getTokenPos() + let cend = cpos; + function indentMargin() { if (!margin) { margin = indent; } - comments.push(text); - indent += text.length; + indent += scanner.getTextPos() - scanner.getTokenPos() // text.length; + } + function addComment() { + if (cpos === end) return + comments += scanner.getText().slice(cpos, cend) + cpos = cend // TODO: if cpos > cend, I'm not sure what the means, but maybe don't reset it } if (initialMargin !== undefined) { // jump straight to saving comments if there is some initial indentation if (initialMargin !== "") { - pushComment(initialMargin); + comments = initialMargin; + indentMargin(); } state = JSDocState.SawAsterisk; } @@ -8867,17 +8898,18 @@ namespace Parser { switch (tok) { case SyntaxKind.NewLineTrivia: state = JSDocState.BeginningOfLine; - // don't use pushComment here because we want to keep the margin unchanged - comments.push(scanner.getTokenText()); + // don't indent margin here because we want to keep the margin unchanged + cend = scanner.getTextPos() indent = 0; break; case SyntaxKind.AtToken: if (state === JSDocState.SavingBackticks || state === JSDocState.SavingComments && (!previousWhitespace || lookAhead(isNextJSDocTokenWhitespace))) { // @ doesn't start a new tag inside ``, and inside a comment, only after whitespace or not before whitespace - comments.push(scanner.getTokenText()); + cend = scanner.getTextPos() break; } + cend--; scanner.setTextPos(scanner.getTextPos() - 1); // falls through case SyntaxKind.EndOfFileToken: @@ -8885,30 +8917,42 @@ namespace Parser { break loop; case SyntaxKind.WhitespaceTrivia: if (state === JSDocState.SavingComments || state === JSDocState.SavingBackticks) { - pushComment(scanner.getTokenText()); + cend = scanner.getTextPos() + indentMargin() } else { - const whitespace = scanner.getTokenText(); + const whitespaceLength = scanner.getTextPos() - scanner.getTokenPos() // if the whitespace crosses the margin, take only the whitespace that passes the margin - if (margin !== undefined && indent + whitespace.length > margin) { - comments.push(whitespace.slice(margin - indent)); + if (margin !== undefined && indent + whitespaceLength > margin) { + cend = scanner.getTokenPos() + addComment() + cpos = cend = scanner.getTokenPos() + (margin - indent) + // comments += whitespace.slice(margin - indent); } - indent += whitespace.length; + else { + addComment() + cpos = cend = scanner.getTextPos() + } + // TODO: Why is this only in the else branch, but in both branches for top-level comments? Seems wrong. + indent += whitespaceLength; } break; case SyntaxKind.OpenBraceToken: state = JSDocState.SavingComments; const commentEnd = scanner.getStartPos(); - const linkStart = scanner.getTextPos() - 1; + const linkStart = scanner.getTextPos() - 1; // TODO: Should probably be scanner.getTokenPos + cend = scanner.getTokenPos(); + addComment() const link = parseJSDocLink(linkStart); if (link) { - parts.push(finishNode(factory.createJSDocText(comments.join("")), linkEnd ?? commentsPos, commentEnd)); + parts.push(finishNode(factory.createJSDocText(comments), linkEnd ?? commentsPos, commentEnd)); parts.push(link); - comments = []; + comments = ""; linkEnd = scanner.getTextPos(); + cpos = cend = linkEnd } else { - pushComment(scanner.getTokenText()); + cend = scanner.getTextPos() } break; case SyntaxKind.BacktickToken: @@ -8918,11 +8962,14 @@ namespace Parser { else { state = JSDocState.SavingBackticks; } - pushComment(scanner.getTokenText()); + cend = scanner.getTextPos() break; case SyntaxKind.AsteriskToken: if (state === JSDocState.BeginningOfLine) { // leading asterisks start recording on the *next* (non-whitespace) token + // cend = scanner.getTokenPos() + // addComment() + cpos = cend = scanner.getTextPos() state = JSDocState.SawAsterisk; indent += 1; break; @@ -8933,23 +8980,22 @@ namespace Parser { if (state !== JSDocState.SavingBackticks) { state = JSDocState.SavingComments; // leading identifiers start recording as well } - pushComment(scanner.getTokenText()); + cend = scanner.getTextPos() break; } previousWhitespace = token() === SyntaxKind.WhitespaceTrivia; tok = nextTokenJSDoc(); } - - removeLeadingNewlines(comments); - removeTrailingWhitespace(comments); + addComment() + comments = removeLeadingNewlines(comments).trimEnd(); if (parts.length) { if (comments.length) { - parts.push(finishNode(factory.createJSDocText(comments.join("")), linkEnd ?? commentsPos)); + parts.push(finishNode(factory.createJSDocText(comments), linkEnd ?? commentsPos)); } return createNodeArray(parts, commentsPos, scanner.getTextPos()); } else if (comments.length) { - return comments.join(""); + return comments; } } @@ -9151,7 +9197,7 @@ namespace Parser { } function parseAuthorNameAndEmail(): JSDocText { - const comments: string[] = []; + let comments = ""; // TODO: Should be named authorNameAndEmail, sheesh let inEmail = false; let token = scanner.getToken(); while (token !== SyntaxKind.EndOfFileToken && token !== SyntaxKind.NewLineTrivia) { @@ -9162,15 +9208,15 @@ namespace Parser { break; } else if (token === SyntaxKind.GreaterThanToken && inEmail) { - comments.push(scanner.getTokenText()); + comments += scanner.getTokenText(); scanner.setTextPos(scanner.getTokenPos() + 1); break; } - comments.push(scanner.getTokenText()); + comments += scanner.getTokenText(); token = nextTokenJSDoc(); } - return factory.createJSDocText(comments.join("")); + return factory.createJSDocText(comments); // TODO: Should return undefined if (!comments) } function parseImplementsTag(start: number, tagName: Identifier, margin: number, indentText: string): JSDocImplementsTag {