(todo) Tests and extractFoldableRegions done

glennsarti · glennsarti · commit 8549cb1864dd · 2018-10-22T20:59:52.000+08:00
diff --git a/src/PowerShellEditorServices/Language/FoldingReference.cs b/src/PowerShellEditorServices/Language/FoldingReference.cs
@@ -4,15 +4,13 @@
 //
 
 using System;
-using System.Diagnostics;
-using System.Management.Automation.Language;
 
 namespace Microsoft.PowerShell.EditorServices
 {
     /// <summary>
     /// A class that holds the information for a foldable region of text in a document
     /// </summary>
-    public class FoldingReference
+    public class FoldingReference: IComparable<FoldingReference>
     {
         /// <summary>
         /// The zero-based line number from where the folded range starts.
@@ -38,5 +36,54 @@ public class FoldingReference
         /// Describes the kind of the folding range such as `comment' or 'region'.
         /// <summary>
         public string kind { get; set; }
+
+        // TODO: Do constructors go at the top?
+        public FoldingReference(
+            int startLine,
+            int startCharacter,
+            int endLine,
+            int endCharacter,
+            string kind)
+        {
+            this.endCharacter = endCharacter;
+            this.endLine = endLine;
+            this.kind = kind;
+            this.startCharacter = startCharacter;
+            this.startLine = startLine;
+        }
+
+        public FoldingReference(
+            int startLine,
+            int endLine,
+            string kind)
+        {
+            this.endCharacter = 0;
+            this.endLine = endLine;
+            this.kind = kind;
+            this.startCharacter = 0;
+            this.startLine = startLine;
+        }
+
+        /// <summary>
+        /// A custom comparable method which can properly sort FoldingReference objects
+        /// </summary>
+        public int CompareTo(FoldingReference that) {
+            // Initially look at the start line
+            if (this.startLine < that.startLine) { return -1; }
+            if (this.startLine > that.startLine) { return 1; }
+            // They have the same start line so now consider the end line.
+            // The biggest line range is sorted first
+            if (this.endLine > that.endLine) { return -1; }
+            if (this.endLine < that.endLine) { return 1; }
+            // They have the same lines, but what about character offsets
+            if (this.startCharacter < that.startCharacter) { return -1; }
+            if (this.startCharacter > that.startCharacter) { return 1; }
+            if (this.endCharacter < that.endCharacter) { return -1; }
+            if (this.endCharacter > that.endCharacter) { return 1; }
+            // They're the same range, but what about kind
+            // Check for nulls
+            if ((this.kind == null) & (that.kind == null)) { return 0; }
+            return this.kind.CompareTo(that.kind);
+        }
     }
 }
diff --git a/src/PowerShellEditorServices/Language/TokenOperations.cs b/src/PowerShellEditorServices/Language/TokenOperations.cs
@@ -3,29 +3,259 @@
 // Licensed under the MIT license. See LICENSE file in the project root for full license information.
 //
 
-// using Microsoft.PowerShell.EditorServices.Utility;
-// using System;
-// using System.Diagnostics;
-// using System.Collections.Generic;
-// using System.Linq;
-// using System.Reflection;
-// using System.Threading;
-// using System.Threading.Tasks;
+using System.Collections.Generic;
+using System.Text.RegularExpressions;
 using System.Management.Automation.Language;
-// using System.Management.Automation.Runspaces;
 
 namespace Microsoft.PowerShell.EditorServices
 {
-    using System.Management.Automation;
 
     /// <summary>
     /// Provides common operations for the tokens of a parsed script.
     /// </summary>
     internal static class TokenOperations
     {
+        /// <summary>
+        /// Extracts all of the unique foldable regions in a script given the list tokens
+        /// </summary>
         static public FoldingReference[] FoldableRegions(Token[] tokens) {
-            FoldingReference[] result = new FoldingReference[] {};
+            List<FoldingReference> foldableRegions = new List<FoldingReference>();
+
+            // Find matching braces { -> }
+            foldableRegions.AddRange(
+                MatchTokenElements(tokens, TokenKind.LCurly, TokenKind.RCurly, null)
+            );
+
+            // Find matching braces ( -> )
+            foldableRegions.AddRange(
+                MatchTokenElements(tokens, TokenKind.LParen, TokenKind.RParen, null)
+            );
+
+            // Find matching arrays @( -> )
+            foldableRegions.AddRange(
+                MatchTokenElements(tokens, TokenKind.AtParen, TokenKind.RParen, null)
+            );
+
+            // Find matching hashes @{ -> }
+            foldableRegions.AddRange(
+                MatchTokenElements(tokens, TokenKind.AtCurly, TokenKind.RParen, null)
+            );
+
+            // Find contiguous here strings @' -> '@
+            foldableRegions.AddRange(
+                MatchTokenElement(tokens, TokenKind.HereStringLiteral, null)
+            );
+
+            // Find contiguous here strings @" -> "@
+            foldableRegions.AddRange(
+                MatchTokenElement(tokens, TokenKind.HereStringExpandable, null)
+            );
+
+            // Find matching comment regions   #region -> #endregion
+            foldableRegions.AddRange(
+                MatchCustomCommentRegionTokenElements(tokens, "region")
+            );
+
+            // Find blocks of line comments # comment1\n# comment2\n...
+            foldableRegions.AddRange(
+                MatchBlockCommentTokenElement(tokens, "comment")
+            );
+
+            // Find comments regions <# -> #>
+            foldableRegions.AddRange(
+                MatchTokenElement(tokens, TokenKind.Comment, "comment")
+            );
+
+            // Remove any null entries. Nulls appear if the folding reference is invalid
+            // or missing
+            foldableRegions.RemoveAll(item => item == null);
+
+            // Sort the FoldingReferences, starting at the top of the document,
+            // and ensure that, in the case of multiple ranges starting the same line,
+            // that the largest range (i.e. most number of lines spanned) is sorted
+            // first. This is needed to detect duplicate regions. The first in the list
+            // will be used and subsequent duplicates ignored.
+            foldableRegions.Sort();
+
+            // It's possible to have duplicate or overlapping ranges, that is, regions which have the same starting
+            // line number as the previous region. Therefore only emit ranges which have a different starting line
+            // than the previous range.
+            foldableRegions.RemoveAll( (FoldingReference item) => {
+                // Note - I'm not happy with searching here, but as the RemoveAll
+                // doesn't expose the index in the List, we need to calculate it. Fortunately the
+                // list is sorted at this point, so we can use BinarySearch.
+                int index = foldableRegions.BinarySearch(item);
+                if (index == 0) { return false; }
+                return (item.startLine == foldableRegions[index - 1].startLine);
+            });
+
+            return foldableRegions.ToArray();
+        }
+
+        /// <summary>
+        /// Creates an instance of a FoldingReference object from a start and end langauge Token
+        /// Returns null if the line range is invalid
+        /// </summary>
+        static private FoldingReference CreateFoldingReference(
+            Token startToken,
+            Token endToken,
+            string matchKind)
+        {
+            if (endToken.Extent.EndLineNumber == startToken.Extent.StartLineNumber) { return null; }
+            return new FoldingReference(
+                startToken.Extent.StartLineNumber - 1,   // Extents are base 1, but LSP is base 0
+                startToken.Extent.StartColumnNumber - 1, // Extents are base 1, but LSP is base 0
+                endToken.Extent.EndLineNumber - 1,       // Extents are base 1, but LSP is base 0
+                endToken.Extent.EndColumnNumber - 1,     // Extents are base 1, but LSP is base 0
+                matchKind
+            );
+        }
+
+        /// <summary>
+        /// Creates an instance of a FoldingReference object from a start token and an end line
+        /// Returns null if the line range is invalid
+        /// </summary>
+        static private FoldingReference CreateFoldingReference(
+            Token startToken,
+            int endLine,
+            string matchKind)
+        {
+            if (endLine == (startToken.Extent.StartLineNumber - 1)) { return null; }
+            return new FoldingReference(
+                startToken.Extent.StartLineNumber - 1,   // Extents are base 1, but LSP is base 0
+                startToken.Extent.StartColumnNumber - 1, // Extents are base 1, but LSP is base 0
+                endLine,
+                0,
+                matchKind
+            );
+        }
+
+        /// <summary>
+        /// Given a array tokens finds matching regions which start and end with a different TokenKind
+        /// </summary>
+        static private List<FoldingReference> MatchTokenElements(
+            Token[] tokens,
+            TokenKind startTokenKind,
+            TokenKind endTokenKind,
+            string matchKind)
+        {
+            List<FoldingReference> result = new List<FoldingReference>();
+            Stack<Token> tokenStack = new Stack<Token>();
+            foreach (Token token in tokens)
+            {
+                if (token.Kind == startTokenKind) {
+                    tokenStack.Push(token);
+                }
+                if ((tokenStack.Count > 0) & (token.Kind == endTokenKind)) {
+                    result.Add(CreateFoldingReference(tokenStack.Pop(), token, matchKind));
+                }
+            }
+            return result;
+        }
+
+        /// <summary>
+        /// Given a array tokens finds a specific token
+        /// </summary>
+        static private List<FoldingReference> MatchTokenElement(
+            Token[] tokens,
+            TokenKind tokenKind,
+            string matchKind)
+        {
+            List<FoldingReference> result = new List<FoldingReference>();
+            foreach (Token token in tokens)
+            {
+                if ((token.Kind == tokenKind) & (token.Extent.StartLineNumber != token.Extent.EndLineNumber)) {
+                    result.Add(CreateFoldingReference(token, token, matchKind));
+                }
+            }
+            return result;
+        }
+
+        /// <summary>
+        /// Returns true if a Token is a block comment;
+        /// - Must be a TokenKind.comment
+        /// - Must be preceeded by TokenKind.NewLine
+        /// - Token text must start with a '#'.false  This is because comment regions
+        ///   start with '<#' but have the same TokenKind
+        /// </summary>
+        static private bool IsBlockComment(int index, Token[] tokens) {
+            Token thisToken = tokens[index];
+            if (thisToken.Kind != TokenKind.Comment) { return false; }
+            if (index == 0) { return true; }
+            if (tokens[index - 1].Kind != TokenKind.NewLine) { return false; }
+            return thisToken.Text.StartsWith("#");
+        }
+
+        /// <summary>
+        /// Finding blocks of comment tokens is more complicated as the newline characters are not
+        /// classed as comments.  To workaround this we search for valid block comments (See IsBlockCmment)
+        /// and then determine contiguous line numbers from there
+        /// </summary>
+        static private List<FoldingReference> MatchBlockCommentTokenElement(
+            Token[] tokens,
+            string matchKind)
+        {
+            // This regular expressions is used to detect a line comment (as opposed to an inline comment), that is not a region
+            // block directive i.e.
+            // - No text between the beginning of the line and `#`
+            // - Comment does start with region
+            // - Comment does start with endregion
+            string lineCommentText = @"\s*#(?!region\b|endregion\b)";
+
+            List<FoldingReference> result = new List<FoldingReference>();
+            Token startToken = null;
+            int nextLine = -1;
+            for (int index = 0; index < tokens.Length; index++)
+            {
+                Token thisToken = tokens[index];
+                if ((IsBlockComment(index, tokens)) & (Regex.IsMatch(thisToken.Text, lineCommentText, RegexOptions.IgnoreCase))) {
+                    int thisLine = thisToken.Extent.StartLineNumber - 1;
+                    if ((startToken != null) & (thisLine != nextLine)) {
+                        result.Add(CreateFoldingReference(startToken, nextLine - 1, matchKind));
+                        startToken = thisToken;
+                    }
+                    if (startToken == null) { startToken = thisToken; }
+                    nextLine = thisLine + 1;
+                }
+            }
+            // If we exit the token array and we're still processing comment lines, then the
+            // comment block simply ends at the end of document
+            if (startToken != null) {
+                result.Add(CreateFoldingReference(startToken, nextLine - 1, matchKind));
+            }
+            return result;
+        }
+
+        /// <summary>
+        /// Given a list of tokens, find the tokens that are comments and
+        /// the comment text is either `# region` or `# endregion`, and then use a stack to determine
+        /// the ranges they span
+        /// </summary>
+        static private List<FoldingReference> MatchCustomCommentRegionTokenElements(
+            Token[] tokens,
+            string matchKind)
+        {
+            // These regular expressions are used to match lines which mark the start and end of region comment in a PowerShell
+            // script. They are based on the defaults in the VS Code Language Configuration at;
+            // https://github.com/Microsoft/vscode/blob/64186b0a26/extensions/powershell/language-configuration.json#L26-L31
+            string startRegionText = @"^\s*#region\b";
+            string endRegionText = @"^\s*#endregion\b";
+
+            List<FoldingReference> result = new List<FoldingReference>();
+            Stack<Token> tokenStack = new Stack<Token>();
+            for (int index = 0; index < tokens.Length; index++)
+            {
+                if (IsBlockComment(index, tokens)) {
+                    Token token = tokens[index];
+                    if (Regex.IsMatch(token.Text, startRegionText, RegexOptions.IgnoreCase)) {
+                        tokenStack.Push(token);
+                    }
+                    if ((tokenStack.Count > 0) & (Regex.IsMatch(token.Text, endRegionText, RegexOptions.IgnoreCase))) {
+                        result.Add(CreateFoldingReference(tokenStack.Pop(), token, matchKind));
+                    }
+                }
+            }
             return result;
         }
-   }
+    }
 }
diff --git a/test/PowerShellEditorServices.Test/Language/TokenOperationsTests.cs b/test/PowerShellEditorServices.Test/Language/TokenOperationsTests.cs