diff --git a/src/services/services.ts b/src/services/services.ts index def895880e683..0a901f0df0bd0 100644 --- a/src/services/services.ts +++ b/src/services/services.ts @@ -1143,6 +1143,9 @@ module ts { InMultiLineCommentTrivia, InSingleQuoteStringLiteral, InDoubleQuoteStringLiteral, + InTemplateHeadOrNoSubstitutionTemplate, + InTemplateMiddleOrTail, + InTemplateSubstitutionPosition, } export enum TokenClass { @@ -1168,7 +1171,26 @@ module ts { } export interface Classifier { - getClassificationsForLine(text: string, lexState: EndOfLineState, classifyKeywordsInGenerics?: boolean): ClassificationResult; + /** + * Gives lexical classifications of tokens on a line without any syntactic context. + * For instance, a token consisting of the text 'string' can be either an identifier + * named 'string' or the keyword 'string', however, because this classifier is not aware, + * it relies on certain heuristics to give acceptable results. For classifications where + * speed trumps accuracy, this function is preferable; however, for true accuracy, the + * syntactic classifier is ideal. In fact, in certain editing scenarios, combining the + * lexical, syntactic, and semantic classifiers may issue the best user experience. + * + * @param text The text of a line to classify. + * @param lexState The state of the lexical classifier at the end of the previous line. + * @param syntacticClassifierAbsent Whether the client is *not* using a syntactic classifier. + * If there is no syntactic classifier (syntacticClassifierAbsent=true), + * certain heuristics may be used in its place; however, if there is a + * syntactic classifier (syntacticClassifierAbsent=false), certain + * classifications which may be incorrectly categorized will be given + * back as Identifiers in order to allow the syntactic classifier to + * subsume the classification. + */ + getClassificationsForLine(text: string, lexState: EndOfLineState, syntacticClassifierAbsent: boolean): ClassificationResult; } /** @@ -5617,6 +5639,28 @@ module ts { noRegexTable[SyntaxKind.TrueKeyword] = true; noRegexTable[SyntaxKind.FalseKeyword] = true; + // Just a stack of TemplateHeads and OpenCurlyBraces, used to perform rudimentary (inexact) + // classification on template strings. Because of the context free nature of templates, + // the only precise way to classify a template portion would be by propagating the stack across + // lines, just as we do with the end-of-line state. However, this is a burden for implementers, + // and the behavior is entirely subsumed by the syntactic classifier anyway, so we instead + // flatten any nesting when the template stack is non-empty and encode it in the end-of-line state. + // Situations in which this fails are + // 1) When template strings are nested across different lines: + // `hello ${ `world + // ` }` + // + // Where on the second line, you will get the closing of a template, + // a closing curly, and a new template. + // + // 2) When substitution expressions have curly braces and the curly brace falls on the next line: + // `hello ${ () => { + // return "world" } } ` + // + // Where on the second line, you will get the 'return' keyword, + // a string literal, and a template end consisting of '} } `'. + var templateStack: SyntaxKind[] = []; + function isAccessibilityModifier(kind: SyntaxKind) { switch (kind) { case SyntaxKind.PublicKeyword: @@ -5650,13 +5694,19 @@ module ts { // if there are more cases we want the classifier to be better at. return true; } - - // 'classifyKeywordsInGenerics' should be 'true' when a syntactic classifier is not present. - function getClassificationsForLine(text: string, lexState: EndOfLineState, classifyKeywordsInGenerics?: boolean): ClassificationResult { + + // If there is a syntactic classifier ('syntacticClassifierAbsent' is false), + // we will be more conservative in order to avoid conflicting with the syntactic classifier. + function getClassificationsForLine(text: string, lexState: EndOfLineState, syntacticClassifierAbsent?: boolean): ClassificationResult { var offset = 0; var token = SyntaxKind.Unknown; var lastNonTriviaToken = SyntaxKind.Unknown; + // Empty out the template stack for reuse. + while (templateStack.length > 0) { + templateStack.pop(); + } + // If we're in a string literal, then prepend: "\ // (and a newline). That way when we lex we'll think we're still in a string literal. // @@ -5675,6 +5725,17 @@ module ts { text = "/*\n" + text; offset = 3; break; + case EndOfLineState.InTemplateHeadOrNoSubstitutionTemplate: + text = "`\n" + text; + offset = 2; + break; + case EndOfLineState.InTemplateMiddleOrTail: + text = "}\n" + text; + offset = 2; + // fallthrough + case EndOfLineState.InTemplateSubstitutionPosition: + templateStack.push(SyntaxKind.TemplateHead); + break; } scanner.setText(text); @@ -5739,12 +5800,45 @@ module ts { token === SyntaxKind.StringKeyword || token === SyntaxKind.NumberKeyword || token === SyntaxKind.BooleanKeyword) { - if (angleBracketStack > 0 && !classifyKeywordsInGenerics) { - // If it looks like we're could be in something generic, don't classify this - // as a keyword. We may just get overwritten by the syntactic classifier, - // causing a noisy experience for the user. - token = SyntaxKind.Identifier; - } + if (angleBracketStack > 0 && !syntacticClassifierAbsent) { + // If it looks like we're could be in something generic, don't classify this + // as a keyword. We may just get overwritten by the syntactic classifier, + // causing a noisy experience for the user. + token = SyntaxKind.Identifier; + } + } + else if (token === SyntaxKind.TemplateHead) { + templateStack.push(token); + } + else if (token === SyntaxKind.OpenBraceToken) { + // If we don't have anything on the template stack, + // then we aren't trying to keep track of a previously scanned template head. + if (templateStack.length > 0) { + templateStack.push(token); + } + } + else if (token === SyntaxKind.CloseBraceToken) { + // If we don't have anything on the template stack, + // then we aren't trying to keep track of a previously scanned template head. + if (templateStack.length > 0) { + var lastTemplateStackToken = lastOrUndefined(templateStack); + + if (lastTemplateStackToken === SyntaxKind.TemplateHead) { + token = scanner.reScanTemplateToken(); + + // Only pop on a TemplateTail; a TemplateMiddle indicates there is more for us. + if (token === SyntaxKind.TemplateTail) { + templateStack.pop(); + } + else { + Debug.assert(token === SyntaxKind.TemplateMiddle, "Should have been a template middle. Was " + token); + } + } + else { + Debug.assert(lastTemplateStackToken === SyntaxKind.OpenBraceToken, "Should have been an open brace. Was: " + token); + templateStack.pop(); + } + } } lastNonTriviaToken = token; @@ -5789,6 +5883,22 @@ module ts { result.finalLexState = EndOfLineState.InMultiLineCommentTrivia; } } + else if (isTemplateLiteralKind(token)) { + if (scanner.isUnterminated()) { + if (token === SyntaxKind.TemplateTail) { + result.finalLexState = EndOfLineState.InTemplateMiddleOrTail; + } + else if (token === SyntaxKind.NoSubstitutionTemplateLiteral) { + result.finalLexState = EndOfLineState.InTemplateHeadOrNoSubstitutionTemplate; + } + else { + Debug.fail("Only 'NoSubstitutionTemplateLiteral's and 'TemplateTail's can be unterminated; got SyntaxKind #" + token); + } + } + } + else if (templateStack.length > 0 && lastOrUndefined(templateStack) === SyntaxKind.TemplateHead) { + result.finalLexState = EndOfLineState.InTemplateSubstitutionPosition; + } } } @@ -5892,6 +6002,9 @@ module ts { return TokenClass.Whitespace; case SyntaxKind.Identifier: default: + if (isTemplateLiteralKind(token)) { + return TokenClass.StringLiteral; + } return TokenClass.Identifier; } } diff --git a/src/services/shims.ts b/src/services/shims.ts index 0f9f1a12cd199..a0eb1cb1ac42c 100644 --- a/src/services/shims.ts +++ b/src/services/shims.ts @@ -165,7 +165,7 @@ module ts { } export interface ClassifierShim extends Shim { - getClassificationsForLine(text: string, lexState: EndOfLineState, classifyKeywordsInGenerics?: boolean): string; + getClassificationsForLine(text: string, lexState: EndOfLineState, syntacticClassifierAbsent?: boolean): string; } export interface CoreServicesShim extends Shim { diff --git a/tests/baselines/reference/APISample_compile.js b/tests/baselines/reference/APISample_compile.js index 9a44ae12df01a..b91a706aa4519 100644 --- a/tests/baselines/reference/APISample_compile.js +++ b/tests/baselines/reference/APISample_compile.js @@ -1741,6 +1741,9 @@ declare module "typescript" { InMultiLineCommentTrivia = 1, InSingleQuoteStringLiteral = 2, InDoubleQuoteStringLiteral = 3, + InTemplateHeadOrNoSubstitutionTemplate = 4, + InTemplateMiddleOrTail = 5, + InTemplateSubstitutionPosition = 6, } enum TokenClass { Punctuation = 0, @@ -1762,7 +1765,26 @@ declare module "typescript" { classification: TokenClass; } interface Classifier { - getClassificationsForLine(text: string, lexState: EndOfLineState, classifyKeywordsInGenerics?: boolean): ClassificationResult; + /** + * Gives lexical classifications of tokens on a line without any syntactic context. + * For instance, a token consisting of the text 'string' can be either an identifier + * named 'string' or the keyword 'string', however, because this classifier is not aware, + * it relies on certain heuristics to give acceptable results. For classifications where + * speed trumps accuracy, this function is preferable; however, for true accuracy, the + * syntactic classifier is ideal. In fact, in certain editing scenarios, combining the + * lexical, syntactic, and semantic classifiers may issue the best user experience. + * + * @param text The text of a line to classify. + * @param lexState The state of the lexical classifier at the end of the previous line. + * @param syntacticClassifierAbsent Whether the client is *not* using a syntactic classifier. + * If there is no syntactic classifier (syntacticClassifierAbsent=true), + * certain heuristics may be used in its place; however, if there is a + * syntactic classifier (syntacticClassifierAbsent=false), certain + * classifications which may be incorrectly categorized will be given + * back as Identifiers in order to allow the syntactic classifier to + * subsume the classification. + */ + getClassificationsForLine(text: string, lexState: EndOfLineState, syntacticClassifierAbsent: boolean): ClassificationResult; } /** * The document registry represents a store of SourceFile objects that can be shared between diff --git a/tests/baselines/reference/APISample_compile.types b/tests/baselines/reference/APISample_compile.types index 6945832023689..ae53c66680708 100644 --- a/tests/baselines/reference/APISample_compile.types +++ b/tests/baselines/reference/APISample_compile.types @@ -5539,6 +5539,15 @@ declare module "typescript" { InDoubleQuoteStringLiteral = 3, >InDoubleQuoteStringLiteral : EndOfLineState + + InTemplateHeadOrNoSubstitutionTemplate = 4, +>InTemplateHeadOrNoSubstitutionTemplate : EndOfLineState + + InTemplateMiddleOrTail = 5, +>InTemplateMiddleOrTail : EndOfLineState + + InTemplateSubstitutionPosition = 6, +>InTemplateSubstitutionPosition : EndOfLineState } enum TokenClass { >TokenClass : TokenClass @@ -5594,12 +5603,31 @@ declare module "typescript" { interface Classifier { >Classifier : Classifier - getClassificationsForLine(text: string, lexState: EndOfLineState, classifyKeywordsInGenerics?: boolean): ClassificationResult; ->getClassificationsForLine : (text: string, lexState: EndOfLineState, classifyKeywordsInGenerics?: boolean) => ClassificationResult + /** + * Gives lexical classifications of tokens on a line without any syntactic context. + * For instance, a token consisting of the text 'string' can be either an identifier + * named 'string' or the keyword 'string', however, because this classifier is not aware, + * it relies on certain heuristics to give acceptable results. For classifications where + * speed trumps accuracy, this function is preferable; however, for true accuracy, the + * syntactic classifier is ideal. In fact, in certain editing scenarios, combining the + * lexical, syntactic, and semantic classifiers may issue the best user experience. + * + * @param text The text of a line to classify. + * @param lexState The state of the lexical classifier at the end of the previous line. + * @param syntacticClassifierAbsent Whether the client is *not* using a syntactic classifier. + * If there is no syntactic classifier (syntacticClassifierAbsent=true), + * certain heuristics may be used in its place; however, if there is a + * syntactic classifier (syntacticClassifierAbsent=false), certain + * classifications which may be incorrectly categorized will be given + * back as Identifiers in order to allow the syntactic classifier to + * subsume the classification. + */ + getClassificationsForLine(text: string, lexState: EndOfLineState, syntacticClassifierAbsent: boolean): ClassificationResult; +>getClassificationsForLine : (text: string, lexState: EndOfLineState, syntacticClassifierAbsent: boolean) => ClassificationResult >text : string >lexState : EndOfLineState >EndOfLineState : EndOfLineState ->classifyKeywordsInGenerics : boolean +>syntacticClassifierAbsent : boolean >ClassificationResult : ClassificationResult } /** diff --git a/tests/baselines/reference/APISample_linter.js b/tests/baselines/reference/APISample_linter.js index 5413cdb8a7890..68c13edb7dbca 100644 --- a/tests/baselines/reference/APISample_linter.js +++ b/tests/baselines/reference/APISample_linter.js @@ -1772,6 +1772,9 @@ declare module "typescript" { InMultiLineCommentTrivia = 1, InSingleQuoteStringLiteral = 2, InDoubleQuoteStringLiteral = 3, + InTemplateHeadOrNoSubstitutionTemplate = 4, + InTemplateMiddleOrTail = 5, + InTemplateSubstitutionPosition = 6, } enum TokenClass { Punctuation = 0, @@ -1793,7 +1796,26 @@ declare module "typescript" { classification: TokenClass; } interface Classifier { - getClassificationsForLine(text: string, lexState: EndOfLineState, classifyKeywordsInGenerics?: boolean): ClassificationResult; + /** + * Gives lexical classifications of tokens on a line without any syntactic context. + * For instance, a token consisting of the text 'string' can be either an identifier + * named 'string' or the keyword 'string', however, because this classifier is not aware, + * it relies on certain heuristics to give acceptable results. For classifications where + * speed trumps accuracy, this function is preferable; however, for true accuracy, the + * syntactic classifier is ideal. In fact, in certain editing scenarios, combining the + * lexical, syntactic, and semantic classifiers may issue the best user experience. + * + * @param text The text of a line to classify. + * @param lexState The state of the lexical classifier at the end of the previous line. + * @param syntacticClassifierAbsent Whether the client is *not* using a syntactic classifier. + * If there is no syntactic classifier (syntacticClassifierAbsent=true), + * certain heuristics may be used in its place; however, if there is a + * syntactic classifier (syntacticClassifierAbsent=false), certain + * classifications which may be incorrectly categorized will be given + * back as Identifiers in order to allow the syntactic classifier to + * subsume the classification. + */ + getClassificationsForLine(text: string, lexState: EndOfLineState, syntacticClassifierAbsent: boolean): ClassificationResult; } /** * The document registry represents a store of SourceFile objects that can be shared between diff --git a/tests/baselines/reference/APISample_linter.types b/tests/baselines/reference/APISample_linter.types index 55613f8d3e3d5..0c153e7b1dc20 100644 --- a/tests/baselines/reference/APISample_linter.types +++ b/tests/baselines/reference/APISample_linter.types @@ -5683,6 +5683,15 @@ declare module "typescript" { InDoubleQuoteStringLiteral = 3, >InDoubleQuoteStringLiteral : EndOfLineState + + InTemplateHeadOrNoSubstitutionTemplate = 4, +>InTemplateHeadOrNoSubstitutionTemplate : EndOfLineState + + InTemplateMiddleOrTail = 5, +>InTemplateMiddleOrTail : EndOfLineState + + InTemplateSubstitutionPosition = 6, +>InTemplateSubstitutionPosition : EndOfLineState } enum TokenClass { >TokenClass : TokenClass @@ -5738,12 +5747,31 @@ declare module "typescript" { interface Classifier { >Classifier : Classifier - getClassificationsForLine(text: string, lexState: EndOfLineState, classifyKeywordsInGenerics?: boolean): ClassificationResult; ->getClassificationsForLine : (text: string, lexState: EndOfLineState, classifyKeywordsInGenerics?: boolean) => ClassificationResult + /** + * Gives lexical classifications of tokens on a line without any syntactic context. + * For instance, a token consisting of the text 'string' can be either an identifier + * named 'string' or the keyword 'string', however, because this classifier is not aware, + * it relies on certain heuristics to give acceptable results. For classifications where + * speed trumps accuracy, this function is preferable; however, for true accuracy, the + * syntactic classifier is ideal. In fact, in certain editing scenarios, combining the + * lexical, syntactic, and semantic classifiers may issue the best user experience. + * + * @param text The text of a line to classify. + * @param lexState The state of the lexical classifier at the end of the previous line. + * @param syntacticClassifierAbsent Whether the client is *not* using a syntactic classifier. + * If there is no syntactic classifier (syntacticClassifierAbsent=true), + * certain heuristics may be used in its place; however, if there is a + * syntactic classifier (syntacticClassifierAbsent=false), certain + * classifications which may be incorrectly categorized will be given + * back as Identifiers in order to allow the syntactic classifier to + * subsume the classification. + */ + getClassificationsForLine(text: string, lexState: EndOfLineState, syntacticClassifierAbsent: boolean): ClassificationResult; +>getClassificationsForLine : (text: string, lexState: EndOfLineState, syntacticClassifierAbsent: boolean) => ClassificationResult >text : string >lexState : EndOfLineState >EndOfLineState : EndOfLineState ->classifyKeywordsInGenerics : boolean +>syntacticClassifierAbsent : boolean >ClassificationResult : ClassificationResult } /** diff --git a/tests/baselines/reference/APISample_transform.js b/tests/baselines/reference/APISample_transform.js index 13ec31cd7d205..0e80f6b55b963 100644 --- a/tests/baselines/reference/APISample_transform.js +++ b/tests/baselines/reference/APISample_transform.js @@ -1773,6 +1773,9 @@ declare module "typescript" { InMultiLineCommentTrivia = 1, InSingleQuoteStringLiteral = 2, InDoubleQuoteStringLiteral = 3, + InTemplateHeadOrNoSubstitutionTemplate = 4, + InTemplateMiddleOrTail = 5, + InTemplateSubstitutionPosition = 6, } enum TokenClass { Punctuation = 0, @@ -1794,7 +1797,26 @@ declare module "typescript" { classification: TokenClass; } interface Classifier { - getClassificationsForLine(text: string, lexState: EndOfLineState, classifyKeywordsInGenerics?: boolean): ClassificationResult; + /** + * Gives lexical classifications of tokens on a line without any syntactic context. + * For instance, a token consisting of the text 'string' can be either an identifier + * named 'string' or the keyword 'string', however, because this classifier is not aware, + * it relies on certain heuristics to give acceptable results. For classifications where + * speed trumps accuracy, this function is preferable; however, for true accuracy, the + * syntactic classifier is ideal. In fact, in certain editing scenarios, combining the + * lexical, syntactic, and semantic classifiers may issue the best user experience. + * + * @param text The text of a line to classify. + * @param lexState The state of the lexical classifier at the end of the previous line. + * @param syntacticClassifierAbsent Whether the client is *not* using a syntactic classifier. + * If there is no syntactic classifier (syntacticClassifierAbsent=true), + * certain heuristics may be used in its place; however, if there is a + * syntactic classifier (syntacticClassifierAbsent=false), certain + * classifications which may be incorrectly categorized will be given + * back as Identifiers in order to allow the syntactic classifier to + * subsume the classification. + */ + getClassificationsForLine(text: string, lexState: EndOfLineState, syntacticClassifierAbsent: boolean): ClassificationResult; } /** * The document registry represents a store of SourceFile objects that can be shared between diff --git a/tests/baselines/reference/APISample_transform.types b/tests/baselines/reference/APISample_transform.types index a8ba6e9f52177..0c201b0a5fd44 100644 --- a/tests/baselines/reference/APISample_transform.types +++ b/tests/baselines/reference/APISample_transform.types @@ -5635,6 +5635,15 @@ declare module "typescript" { InDoubleQuoteStringLiteral = 3, >InDoubleQuoteStringLiteral : EndOfLineState + + InTemplateHeadOrNoSubstitutionTemplate = 4, +>InTemplateHeadOrNoSubstitutionTemplate : EndOfLineState + + InTemplateMiddleOrTail = 5, +>InTemplateMiddleOrTail : EndOfLineState + + InTemplateSubstitutionPosition = 6, +>InTemplateSubstitutionPosition : EndOfLineState } enum TokenClass { >TokenClass : TokenClass @@ -5690,12 +5699,31 @@ declare module "typescript" { interface Classifier { >Classifier : Classifier - getClassificationsForLine(text: string, lexState: EndOfLineState, classifyKeywordsInGenerics?: boolean): ClassificationResult; ->getClassificationsForLine : (text: string, lexState: EndOfLineState, classifyKeywordsInGenerics?: boolean) => ClassificationResult + /** + * Gives lexical classifications of tokens on a line without any syntactic context. + * For instance, a token consisting of the text 'string' can be either an identifier + * named 'string' or the keyword 'string', however, because this classifier is not aware, + * it relies on certain heuristics to give acceptable results. For classifications where + * speed trumps accuracy, this function is preferable; however, for true accuracy, the + * syntactic classifier is ideal. In fact, in certain editing scenarios, combining the + * lexical, syntactic, and semantic classifiers may issue the best user experience. + * + * @param text The text of a line to classify. + * @param lexState The state of the lexical classifier at the end of the previous line. + * @param syntacticClassifierAbsent Whether the client is *not* using a syntactic classifier. + * If there is no syntactic classifier (syntacticClassifierAbsent=true), + * certain heuristics may be used in its place; however, if there is a + * syntactic classifier (syntacticClassifierAbsent=false), certain + * classifications which may be incorrectly categorized will be given + * back as Identifiers in order to allow the syntactic classifier to + * subsume the classification. + */ + getClassificationsForLine(text: string, lexState: EndOfLineState, syntacticClassifierAbsent: boolean): ClassificationResult; +>getClassificationsForLine : (text: string, lexState: EndOfLineState, syntacticClassifierAbsent: boolean) => ClassificationResult >text : string >lexState : EndOfLineState >EndOfLineState : EndOfLineState ->classifyKeywordsInGenerics : boolean +>syntacticClassifierAbsent : boolean >ClassificationResult : ClassificationResult } /** diff --git a/tests/baselines/reference/APISample_watcher.js b/tests/baselines/reference/APISample_watcher.js index 5c3f944faf18a..27a2ce5e41556 100644 --- a/tests/baselines/reference/APISample_watcher.js +++ b/tests/baselines/reference/APISample_watcher.js @@ -1810,6 +1810,9 @@ declare module "typescript" { InMultiLineCommentTrivia = 1, InSingleQuoteStringLiteral = 2, InDoubleQuoteStringLiteral = 3, + InTemplateHeadOrNoSubstitutionTemplate = 4, + InTemplateMiddleOrTail = 5, + InTemplateSubstitutionPosition = 6, } enum TokenClass { Punctuation = 0, @@ -1831,7 +1834,26 @@ declare module "typescript" { classification: TokenClass; } interface Classifier { - getClassificationsForLine(text: string, lexState: EndOfLineState, classifyKeywordsInGenerics?: boolean): ClassificationResult; + /** + * Gives lexical classifications of tokens on a line without any syntactic context. + * For instance, a token consisting of the text 'string' can be either an identifier + * named 'string' or the keyword 'string', however, because this classifier is not aware, + * it relies on certain heuristics to give acceptable results. For classifications where + * speed trumps accuracy, this function is preferable; however, for true accuracy, the + * syntactic classifier is ideal. In fact, in certain editing scenarios, combining the + * lexical, syntactic, and semantic classifiers may issue the best user experience. + * + * @param text The text of a line to classify. + * @param lexState The state of the lexical classifier at the end of the previous line. + * @param syntacticClassifierAbsent Whether the client is *not* using a syntactic classifier. + * If there is no syntactic classifier (syntacticClassifierAbsent=true), + * certain heuristics may be used in its place; however, if there is a + * syntactic classifier (syntacticClassifierAbsent=false), certain + * classifications which may be incorrectly categorized will be given + * back as Identifiers in order to allow the syntactic classifier to + * subsume the classification. + */ + getClassificationsForLine(text: string, lexState: EndOfLineState, syntacticClassifierAbsent: boolean): ClassificationResult; } /** * The document registry represents a store of SourceFile objects that can be shared between diff --git a/tests/baselines/reference/APISample_watcher.types b/tests/baselines/reference/APISample_watcher.types index 54304c2e1b9d8..4e137cae59d1a 100644 --- a/tests/baselines/reference/APISample_watcher.types +++ b/tests/baselines/reference/APISample_watcher.types @@ -5808,6 +5808,15 @@ declare module "typescript" { InDoubleQuoteStringLiteral = 3, >InDoubleQuoteStringLiteral : EndOfLineState + + InTemplateHeadOrNoSubstitutionTemplate = 4, +>InTemplateHeadOrNoSubstitutionTemplate : EndOfLineState + + InTemplateMiddleOrTail = 5, +>InTemplateMiddleOrTail : EndOfLineState + + InTemplateSubstitutionPosition = 6, +>InTemplateSubstitutionPosition : EndOfLineState } enum TokenClass { >TokenClass : TokenClass @@ -5863,12 +5872,31 @@ declare module "typescript" { interface Classifier { >Classifier : Classifier - getClassificationsForLine(text: string, lexState: EndOfLineState, classifyKeywordsInGenerics?: boolean): ClassificationResult; ->getClassificationsForLine : (text: string, lexState: EndOfLineState, classifyKeywordsInGenerics?: boolean) => ClassificationResult + /** + * Gives lexical classifications of tokens on a line without any syntactic context. + * For instance, a token consisting of the text 'string' can be either an identifier + * named 'string' or the keyword 'string', however, because this classifier is not aware, + * it relies on certain heuristics to give acceptable results. For classifications where + * speed trumps accuracy, this function is preferable; however, for true accuracy, the + * syntactic classifier is ideal. In fact, in certain editing scenarios, combining the + * lexical, syntactic, and semantic classifiers may issue the best user experience. + * + * @param text The text of a line to classify. + * @param lexState The state of the lexical classifier at the end of the previous line. + * @param syntacticClassifierAbsent Whether the client is *not* using a syntactic classifier. + * If there is no syntactic classifier (syntacticClassifierAbsent=true), + * certain heuristics may be used in its place; however, if there is a + * syntactic classifier (syntacticClassifierAbsent=false), certain + * classifications which may be incorrectly categorized will be given + * back as Identifiers in order to allow the syntactic classifier to + * subsume the classification. + */ + getClassificationsForLine(text: string, lexState: EndOfLineState, syntacticClassifierAbsent: boolean): ClassificationResult; +>getClassificationsForLine : (text: string, lexState: EndOfLineState, syntacticClassifierAbsent: boolean) => ClassificationResult >text : string >lexState : EndOfLineState >EndOfLineState : EndOfLineState ->classifyKeywordsInGenerics : boolean +>syntacticClassifierAbsent : boolean >ClassificationResult : ClassificationResult } /** diff --git a/tests/cases/unittests/services/colorization.ts b/tests/cases/unittests/services/colorization.ts index 011497643059c..c834f85806d86 100644 --- a/tests/cases/unittests/services/colorization.ts +++ b/tests/cases/unittests/services/colorization.ts @@ -4,6 +4,7 @@ interface ClassificationEntry { value: any; classification: ts.TokenClass; + position?: number; } describe('Colorization', function () { @@ -23,16 +24,23 @@ describe('Colorization', function () { return undefined; } - function punctuation(text: string): ClassificationEntry { return { value: text, classification: ts.TokenClass.Punctuation }; } - function keyword(text: string): ClassificationEntry { return { value: text, classification: ts.TokenClass.Keyword }; } - function operator(text: string): ClassificationEntry { return { value: text, classification: ts.TokenClass.Operator }; } - function comment(text: string): ClassificationEntry { return { value: text, classification: ts.TokenClass.Comment }; } - function whitespace(text: string): ClassificationEntry { return { value: text, classification: ts.TokenClass.Whitespace }; } - function identifier(text: string): ClassificationEntry { return { value: text, classification: ts.TokenClass.Identifier }; } - function numberLiteral(text: string): ClassificationEntry { return { value: text, classification: ts.TokenClass.NumberLiteral }; } - function stringLiteral(text: string): ClassificationEntry { return { value: text, classification: ts.TokenClass.StringLiteral }; } - function regExpLiteral(text: string): ClassificationEntry { return { value: text, classification: ts.TokenClass.RegExpLiteral }; } - function finalEndOfLineState(value: number): ClassificationEntry { return { value: value, classification: undefined }; } + function punctuation(text: string, position?: number) { return createClassification(text, ts.TokenClass.Punctuation, position); } + function keyword(text: string, position?: number) { return createClassification(text, ts.TokenClass.Keyword, position); } + function operator(text: string, position?: number) { return createClassification(text, ts.TokenClass.Operator, position); } + function comment(text: string, position?: number) { return createClassification(text, ts.TokenClass.Comment, position); } + function whitespace(text: string, position?: number) { return createClassification(text, ts.TokenClass.Whitespace, position); } + function identifier(text: string, position?: number) { return createClassification(text, ts.TokenClass.Identifier, position); } + function numberLiteral(text: string, position?: number) { return createClassification(text, ts.TokenClass.NumberLiteral, position); } + function stringLiteral(text: string, position?: number) { return createClassification(text, ts.TokenClass.StringLiteral, position); } + function regExpLiteral(text: string, position?: number) { return createClassification(text, ts.TokenClass.RegExpLiteral, position); } + function finalEndOfLineState(value: number): ClassificationEntry { return { value: value, classification: undefined, position: 0 }; } + function createClassification(text: string, tokenClass: ts.TokenClass, position?: number): ClassificationEntry { + return { + value: text, + classification: tokenClass, + position: position, + }; + } function testLexicalClassification(text: string, initialEndOfLineState: ts.EndOfLineState, ...expectedEntries: ClassificationEntry[]): void { var result = classifier.getClassificationsForLine(text, initialEndOfLineState); @@ -44,7 +52,7 @@ describe('Colorization', function () { assert.equal(result.finalLexState, expectedEntry.value, "final endOfLineState does not match expected."); } else { - var actualEntryPosition = text.indexOf(expectedEntry.value); + var actualEntryPosition = expectedEntry.position !== undefined ? expectedEntry.position : text.indexOf(expectedEntry.value); assert(actualEntryPosition >= 0, "token: '" + expectedEntry.value + "' does not exit in text: '" + text + "'."); var actualEntry = getEntryAtPosistion(result, actualEntryPosition); @@ -254,6 +262,106 @@ describe('Colorization', function () { finalEndOfLineState(ts.EndOfLineState.Start)); }); + it("classifies a single line no substitution template string correctly", () => { + testLexicalClassification("`number number public string`", + ts.EndOfLineState.Start, + stringLiteral("`number number public string`"), + finalEndOfLineState(ts.EndOfLineState.Start)); + }); + it("classifies substitution parts of a template string correctly", () => { + testLexicalClassification("`number '${ 1 + 1 }' string '${ 'hello' }'`", + ts.EndOfLineState.Start, + stringLiteral("`number '${"), + numberLiteral("1"), + operator("+"), + numberLiteral("1"), + stringLiteral("}' string '${"), + stringLiteral("'hello'"), + stringLiteral("}'`"), + finalEndOfLineState(ts.EndOfLineState.Start)); + }); + it("classifies an unterminated no substitution template string correctly", () => { + testLexicalClassification("`hello world", + ts.EndOfLineState.Start, + stringLiteral("`hello world"), + finalEndOfLineState(ts.EndOfLineState.InTemplateHeadOrNoSubstitutionTemplate)); + }); + it("classifies the entire line of an unterminated multiline no-substitution/head template", () => { + testLexicalClassification("...", + ts.EndOfLineState.InTemplateHeadOrNoSubstitutionTemplate, + stringLiteral("..."), + finalEndOfLineState(ts.EndOfLineState.InTemplateHeadOrNoSubstitutionTemplate)); + }); + it("classifies the entire line of an unterminated multiline template middle/end",() => { + testLexicalClassification("...", + ts.EndOfLineState.InTemplateMiddleOrTail, + stringLiteral("..."), + finalEndOfLineState(ts.EndOfLineState.InTemplateMiddleOrTail)); + }); + it("classifies a termination of a multiline template head", () => { + testLexicalClassification("...${", + ts.EndOfLineState.InTemplateHeadOrNoSubstitutionTemplate, + stringLiteral("...${"), + finalEndOfLineState(ts.EndOfLineState.InTemplateSubstitutionPosition)); + }); + it("classifies the termination of a multiline no substitution template", () => { + testLexicalClassification("...`", + ts.EndOfLineState.InTemplateHeadOrNoSubstitutionTemplate, + stringLiteral("...`"), + finalEndOfLineState(ts.EndOfLineState.Start)); + }); + it("classifies the substitution parts and middle/tail of a multiline template string", () => { + testLexicalClassification("${ 1 + 1 }...`", + ts.EndOfLineState.InTemplateHeadOrNoSubstitutionTemplate, + stringLiteral("${"), + numberLiteral("1"), + operator("+"), + numberLiteral("1"), + stringLiteral("}...`"), + finalEndOfLineState(ts.EndOfLineState.Start)); + }); + it("classifies a template middle and propagates the end of line state",() => { + testLexicalClassification("${ 1 + 1 }...`", + ts.EndOfLineState.InTemplateHeadOrNoSubstitutionTemplate, + stringLiteral("${"), + numberLiteral("1"), + operator("+"), + numberLiteral("1"), + stringLiteral("}...`"), + finalEndOfLineState(ts.EndOfLineState.Start)); + }); + it("classifies substitution expressions with curly braces appropriately", () => { + var pos = 0; + var lastLength = 0; + + testLexicalClassification("...${ () => { } } ${ { x: `1` } }...`", + ts.EndOfLineState.InTemplateHeadOrNoSubstitutionTemplate, + stringLiteral(track("...${"), pos), + punctuation(track(" ", "("), pos), + punctuation(track(")"), pos), + punctuation(track(" ", "=>"), pos), + punctuation(track(" ", "{"), pos), + punctuation(track(" ", "}"), pos), + stringLiteral(track(" ", "} ${"), pos), + punctuation(track(" ", "{"), pos), + identifier(track(" ", "x"), pos), + punctuation(track(":"), pos), + stringLiteral(track(" ", "`1`"), pos), + punctuation(track(" ", "}"), pos), + stringLiteral(track(" ", "}...`"), pos), + finalEndOfLineState(ts.EndOfLineState.Start)); + + // Adjusts 'pos' by accounting for the length of each portion of the string, + // but only return the last given string + function track(...vals: string[]): string { + for (var i = 0, n = vals.length; i < n; i++) { + pos += lastLength; + lastLength = vals[i].length; + } + return ts.lastOrUndefined(vals); + } + }); + it("classifies partially written generics correctly.", function () { testLexicalClassification("Foo