Skip to content

Spelling correction fixes should not be case-agnostic for two equally weighed options #39060

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 37 commits into from
Dec 15, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
362032b
Spelling correction fixes should not be case-agnostic when two equall…
Zzzen Jun 13, 2020
0ce23d5
update tests
Zzzen Jun 16, 2020
46d1239
Merge remote-tracking branch 'origin/master'
Zzzen Jun 17, 2020
923a243
Update src/compiler/core.ts
Zzzen Jun 30, 2020
a48eaca
Merge remote-tracking branch 'ts/master'
Zzzen Jul 20, 2020
2d21abc
init bestCaseSensitiveDistance lazily
Zzzen Jul 21, 2020
bd93aba
Add a test case with a class named the same as an instance, except fo…
Zzzen Nov 2, 2020
9754a1f
make the core levenshtein distance check case-aware
Zzzen Nov 2, 2020
e4d8ed9
Merge remote-tracking branch 'ts/master'
Zzzen Nov 3, 2020
0576bfa
Update package-lock.json
typescript-bot Nov 3, 2020
d88b03f
use fractional Levenshtein distance
Zzzen Nov 4, 2020
cfa904a
fix weight of Levenshtein distance
Zzzen Nov 5, 2020
f96f8f5
Update package-lock.json
typescript-bot Nov 10, 2020
e9b8afa
Update package-lock.json
typescript-bot Nov 11, 2020
7f61bcc
Update package-lock.json
typescript-bot Nov 12, 2020
fc4a305
Update package-lock.json
typescript-bot Nov 13, 2020
2f9d83b
Update package-lock.json
typescript-bot Nov 15, 2020
e6fbb24
Update package-lock.json
typescript-bot Nov 18, 2020
5737f16
Update package-lock.json
typescript-bot Nov 20, 2020
ea056d7
Update package-lock.json
typescript-bot Nov 23, 2020
383f92d
Update package-lock.json
typescript-bot Nov 26, 2020
167e679
Update package-lock.json
typescript-bot Nov 27, 2020
53e78bc
Update package-lock.json
typescript-bot Dec 1, 2020
1a2d5c2
Update package-lock.json
typescript-bot Dec 2, 2020
325bf67
Update package-lock.json
typescript-bot Dec 3, 2020
6e1e876
Update package-lock.json
typescript-bot Dec 4, 2020
b37c118
Update src/compiler/core.ts
Zzzen Dec 4, 2020
32d9595
refactor
Zzzen Dec 4, 2020
797b55d
Update package-lock.json
typescript-bot Dec 5, 2020
5f1fed4
revert unnecessary changes
Zzzen Dec 5, 2020
6511c0a
Update package-lock.json
typescript-bot Dec 6, 2020
70280bc
increase bestDistance
Zzzen Dec 8, 2020
63f835f
increase bestDistance again
Zzzen Dec 8, 2020
958708a
make changes minimal
Zzzen Dec 8, 2020
7f1168f
Update package-lock.json
typescript-bot Dec 9, 2020
97bb757
Update package-lock.json
typescript-bot Dec 11, 2020
99a5074
Update package-lock.json
typescript-bot Dec 13, 2020
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
619 changes: 427 additions & 192 deletions package-lock.json

Large diffs are not rendered by default.

55 changes: 22 additions & 33 deletions src/compiler/core.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1927,11 +1927,9 @@ namespace ts {

/**
* Given a name and a list of names that are *not* equal to the name, return a spelling suggestion if there is one that is close enough.
* Names less than length 3 only check for case-insensitive equality, not Levenshtein distance.
* Names less than length 3 only check for case-insensitive equality.
*
* If there is a candidate that's the same except for case, return that.
* If there is a candidate that's within one edit of the name, return that.
* Otherwise, return the candidate with the smallest Levenshtein distance,
* find the candidate with the smallest Levenshtein distance,
* except for candidates:
* * With no name
* * Whose length differs from the target name by more than 0.34 of the length of the name.
Expand All @@ -1941,41 +1939,28 @@ namespace ts {
*/
export function getSpellingSuggestion<T>(name: string, candidates: T[], getName: (candidate: T) => string | undefined): T | undefined {
const maximumLengthDifference = Math.min(2, Math.floor(name.length * 0.34));
let bestDistance = Math.floor(name.length * 0.4) + 1; // If the best result isn't better than this, don't bother.
let bestDistance = Math.floor(name.length * 0.4) + 1; // If the best result is worse than this, don't bother.
let bestCandidate: T | undefined;
let justCheckExactMatches = false;
const nameLowerCase = name.toLowerCase();
for (const candidate of candidates) {
const candidateName = getName(candidate);
if (candidateName !== undefined && Math.abs(candidateName.length - nameLowerCase.length) <= maximumLengthDifference) {
const candidateNameLowerCase = candidateName.toLowerCase();
if (candidateNameLowerCase === nameLowerCase) {
if (candidateName === name) {
continue;
}
return candidate;
}
if (justCheckExactMatches) {
if (candidateName !== undefined && Math.abs(candidateName.length - name.length) <= maximumLengthDifference) {
if (candidateName === name) {
continue;
}
if (candidateName.length < 3) {
// Don't bother, user would have noticed a 2-character name having an extra character
// Only consider candidates less than 3 characters long when they differ by case.
// Otherwise, don't bother, since a user would usually notice differences of a 2-character name.
if (candidateName.length < 3 && candidateName.toLowerCase() !== name.toLowerCase()) {
continue;
}
// Only care about a result better than the best so far.
const distance = levenshteinWithMax(nameLowerCase, candidateNameLowerCase, bestDistance - 1);

const distance = levenshteinWithMax(name, candidateName, bestDistance - 0.1);
if (distance === undefined) {
continue;
}
if (distance < 3) {
justCheckExactMatches = true;
bestCandidate = candidate;
}
else {
Debug.assert(distance < bestDistance); // Else `levenshteinWithMax` should return undefined
bestDistance = distance;
bestCandidate = candidate;
}

Debug.assert(distance < bestDistance); // Else `levenshteinWithMax` should return undefined
bestDistance = distance;
bestCandidate = candidate;
}
}
return bestCandidate;
Expand All @@ -1985,26 +1970,30 @@ namespace ts {
let previous = new Array(s2.length + 1);
let current = new Array(s2.length + 1);
/** Represents any value > max. We don't care about the particular value. */
const big = max + 1;
const big = max + 0.01;

for (let i = 0; i <= s2.length; i++) {
previous[i] = i;
}

for (let i = 1; i <= s1.length; i++) {
const c1 = s1.charCodeAt(i - 1);
const minJ = i > max ? i - max : 1;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think I understand the need for Math.ceil and Math.floor, but why change to Math.max and Math.min?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Math.max/min is more readable for me. Anyway, reverted now.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK, makes sense. After this PR is in, maybe you can re-submit that change separately where it would be obvious that Math.max/min are equivalent. I thought it was more readable too, but I prefer to reduce code change per PR in complicated functions like this.

const maxJ = s2.length > max + i ? max + i : s2.length;
const minJ = Math.ceil(i > max ? i - max : 1);
const maxJ = Math.floor(s2.length > max + i ? max + i : s2.length);
current[0] = i;
/** Smallest value of the matrix in the ith column. */
let colMin = i;
for (let j = 1; j < minJ; j++) {
current[j] = big;
}
for (let j = minJ; j <= maxJ; j++) {
// case difference should be significantly cheaper than other differences
const substitutionDistance = s1[i - 1].toLowerCase() === s2[j-1].toLowerCase()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it make sense to cache the lowercase versions of s1 and s2 and index into those instead?

This piece of code needs a microbenchmark, since I have no idea. I'm not sure how to do that off the top of my head though.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How about extracting all symbols from some mostly used type definitions and then apply getSpellingSuggestion randomly?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe? One way to stress the system would be to have a scope that fills in all possible neighbours, so that the algorithm has to run on lots of candidates, all under the maximum distance cutoff.

I'm going to run the perf suite on this too, since all those projects compile with lots of errors now -- maybe enough of them are name resolution errors to notice a slowdown.

? (previous[j - 1] + 0.1)
: (previous[j - 1] + 2);
const dist = c1 === s2.charCodeAt(j - 1)
? previous[j - 1]
: Math.min(/*delete*/ previous[j] + 1, /*insert*/ current[j - 1] + 1, /*substitute*/ previous[j - 1] + 2);
: Math.min(/*delete*/ previous[j] + 1, /*insert*/ current[j - 1] + 1, /*substitute*/ substitutionDistance);
current[j] = dist;
colMin = Math.min(colMin, dist);
}
Expand Down
7 changes: 7 additions & 0 deletions tests/cases/fourslash/codeFixSpellingCaseSensitive1.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
/// <reference path='fourslash.ts' />

////export let Console = 1;
////export let console = 1;
////[|conole|] = 1;

verify.rangeAfterCodeFix('console');
7 changes: 7 additions & 0 deletions tests/cases/fourslash/codeFixSpellingCaseSensitive2.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
/// <reference path='fourslash.ts' />

////export let console = 1;
////export let Console = 1;
////[|conole|] = 1;

verify.rangeAfterCodeFix('console');
7 changes: 7 additions & 0 deletions tests/cases/fourslash/codeFixSpellingCaseSensitive3.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
/// <reference path='fourslash.ts' />

////class Node {}
////let node = new Node();
////[|nodes|]

verify.rangeAfterCodeFix('node');
7 changes: 7 additions & 0 deletions tests/cases/fourslash/codeFixSpellingCaseWeight1.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
/// <reference path='fourslash.ts' />

////let ABCDEFGHIJKLMNOPQR = 1;
////let abcdefghijklmnopqrs = 1;
////[|abcdefghijklmnopqr|]

verify.rangeAfterCodeFix('abcdefghijklmnopqrs');
7 changes: 7 additions & 0 deletions tests/cases/fourslash/codeFixSpellingCaseWeight2.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
/// <reference path='fourslash.ts' />

////let ABCDEFGHI = 1;
////let abcdefghij = 1;
////[|abcdefghi|]

verify.rangeAfterCodeFix('ABCDEFGHI');
6 changes: 6 additions & 0 deletions tests/cases/fourslash/codeFixSpellingShortName1.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
/// <reference path='fourslash.ts' />

////export let ab = 1;
////[|aB|] = 1;

verify.rangeAfterCodeFix('ab');
6 changes: 6 additions & 0 deletions tests/cases/fourslash/codeFixSpellingShortName2.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
/// <reference path='fourslash.ts' />

////export let ab = 1;
////abc;

verify.not.codeFixAvailable();