Skip to content

Commit 847c239

Browse files
authored
Update Data.Text.Utf16.Rope to Data.Text.Utf16.Rope.Mixed (#542)
* fix typo * update Data.Text.Utf16.Rope to Data.Text.Utf16.Rope.Mixed * fix omitting newline * revert extractLine * update Note [Converting between code points and code units] * format * add back note * simplified * update comment * fix typo
1 parent 6faa9b5 commit 847c239

File tree

2 files changed

+26
-55
lines changed

2 files changed

+26
-55
lines changed

lsp/src/Language/LSP/VFS.hs

Lines changed: 25 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -81,10 +81,11 @@ import Data.Row
8181
import Data.Text (Text)
8282
import Data.Text qualified as T
8383
import Data.Text.IO qualified as T
84+
import Data.Text.Lines as Char (Position (..))
8485
import Data.Text.Prettyprint.Doc hiding (line)
85-
import Data.Text.Rope qualified as URope
86-
import Data.Text.Utf16.Rope (Rope)
87-
import Data.Text.Utf16.Rope qualified as Rope
86+
import Data.Text.Utf16.Lines as Utf16 (Position (..))
87+
import Data.Text.Utf16.Rope.Mixed (Rope)
88+
import Data.Text.Utf16.Rope.Mixed qualified as Rope
8889
import Language.LSP.Protocol.Lens qualified as J
8990
import Language.LSP.Protocol.Message qualified as J
9091
import Language.LSP.Protocol.Types qualified as J
@@ -115,7 +116,7 @@ data VFS = VFS
115116
deriving (Show)
116117

117118
data VfsLog
118-
= SplitInsideCodePoint Rope.Position Rope
119+
= SplitInsideCodePoint Utf16.Position Rope
119120
| URINotFound J.NormalizedUri
120121
| Opening J.NormalizedUri
121122
| Closing J.NormalizedUri
@@ -350,7 +351,7 @@ applyChange :: (Monad m) => LogAction m (WithSeverity VfsLog) -> Rope -> J.TextD
350351
applyChange logger str (J.TextDocumentContentChangeEvent (J.InL e))
351352
| J.Range (J.Position sl sc) (J.Position fl fc) <- e .! #range
352353
, txt <- e .! #text =
353-
changeChars logger str (Rope.Position (fromIntegral sl) (fromIntegral sc)) (Rope.Position (fromIntegral fl) (fromIntegral fc)) txt
354+
changeChars logger str (Utf16.Position (fromIntegral sl) (fromIntegral sc)) (Utf16.Position (fromIntegral fl) (fromIntegral fc)) txt
354355
applyChange _ _ (J.TextDocumentContentChangeEvent (J.InR e)) =
355356
pure $ Rope.fromText $ e .! #text
356357

@@ -360,11 +361,11 @@ applyChange _ _ (J.TextDocumentContentChangeEvent (J.InR e)) =
360361
the given range with the new text. If the given positions lie within
361362
a code point then this does nothing (returns the original 'Rope') and logs.
362363
-}
363-
changeChars :: (Monad m) => LogAction m (WithSeverity VfsLog) -> Rope -> Rope.Position -> Rope.Position -> Text -> m Rope
364+
changeChars :: (Monad m) => LogAction m (WithSeverity VfsLog) -> Rope -> Utf16.Position -> Utf16.Position -> Text -> m Rope
364365
changeChars logger str start finish new = do
365-
case Rope.splitAtPosition finish str of
366+
case Rope.utf16SplitAtPosition finish str of
366367
Nothing -> logger <& SplitInsideCodePoint finish str `WithSeverity` Warning >> pure str
367-
Just (before, after) -> case Rope.splitAtPosition start before of
368+
Just (before, after) -> case Rope.utf16SplitAtPosition start before of
368369
Nothing -> logger <& SplitInsideCodePoint start before `WithSeverity` Warning >> pure str
369370
Just (before', _) -> pure $ mconcat [before', Rope.fromText new, after]
370371

@@ -402,11 +403,14 @@ In particular, we use the good asymptotics of 'Rope' to our advantage:
402403
- We then split the line at the given position, and check how long the prefix is, which takes
403404
linear time in the length of the (single) line.
404405
405-
We also may need to convert the line back and forth between ropes with different indexing. Again
406-
this is linear time in the length of the line.
407-
408406
So the overall process is logarithmic in the number of lines, and linear in the length of the specific
409407
line. Which is okay-ish, so long as we don't have very long lines.
408+
409+
We are not able to use the `Rope.splitAtPosition`
410+
Because when column index out of range or when the column indexing at the newline char.
411+
The prefix result would wrap over the line and having the same result (nextLineNum, 0).
412+
We would not be able to distinguish them. When the first case should return `Nothing`,
413+
second case should return a `Just (CurrentLineNum, columnNumberConverted)`.
410414
-}
411415

412416
{- | Extracts a specific line from a 'Rope.Rope'.
@@ -415,41 +419,12 @@ line. Which is okay-ish, so long as we don't have very long lines.
415419
extractLine :: Rope.Rope -> Word -> Maybe Rope.Rope
416420
extractLine rope l = do
417421
-- Check for the line being out of bounds
418-
let lastLine = Rope.posLine $ Rope.lengthAsPosition rope
422+
let lastLine = Utf16.posLine $ Rope.utf16LengthAsPosition rope
419423
guard $ l <= lastLine
420-
421424
let (_, suffix) = Rope.splitAtLine l rope
422425
(prefix, _) = Rope.splitAtLine 1 suffix
423426
pure prefix
424427

425-
{- | Translate a code-point offset into a code-unit offset.
426-
Linear in the length of the rope.
427-
-}
428-
codePointOffsetToCodeUnitOffset :: URope.Rope -> Word -> Maybe Word
429-
codePointOffsetToCodeUnitOffset rope offset = do
430-
-- Check for the position being out of bounds
431-
guard $ offset <= URope.length rope
432-
-- Split at the given position in *code points*
433-
let (prefix, _) = URope.splitAt offset rope
434-
-- Convert the prefix to a rope using *code units*
435-
utf16Prefix = Rope.fromText $ URope.toText prefix
436-
-- Get the length of the prefix in *code units*
437-
pure $ Rope.length utf16Prefix
438-
439-
{- | Translate a UTF-16 code-unit offset into a code-point offset.
440-
Linear in the length of the rope.
441-
-}
442-
codeUnitOffsetToCodePointOffset :: Rope.Rope -> Word -> Maybe Word
443-
codeUnitOffsetToCodePointOffset rope offset = do
444-
-- Check for the position being out of bounds
445-
guard $ offset <= Rope.length rope
446-
-- Split at the given position in *code units*
447-
(prefix, _) <- Rope.splitAt offset rope
448-
-- Convert the prefix to a rope using *code points*
449-
let utfPrefix = URope.fromText $ Rope.toText prefix
450-
-- Get the length of the prefix in *code points*
451-
pure $ URope.length utfPrefix
452-
453428
{- | Given a virtual file, translate a 'CodePointPosition' in that file into a 'J.Position' in that file.
454429
455430
Will return 'Nothing' if the requested position is out of bounds of the document.
@@ -458,15 +433,12 @@ codeUnitOffsetToCodePointOffset rope offset = do
458433
the position.
459434
-}
460435
codePointPositionToPosition :: VirtualFile -> CodePointPosition -> Maybe J.Position
461-
codePointPositionToPosition vFile (CodePointPosition l cpc) = do
436+
codePointPositionToPosition vFile (CodePointPosition l c) = do
462437
-- See Note [Converting between code points and code units]
463438
let text = _file_text vFile
464-
utf16Line <- extractLine text (fromIntegral l)
465-
-- Convert the line a rope using *code points*
466-
let utfLine = URope.fromText $ Rope.toText utf16Line
467-
468-
cuc <- codePointOffsetToCodeUnitOffset utfLine (fromIntegral cpc)
469-
pure $ J.Position l (fromIntegral cuc)
439+
lineRope <- extractLine text $ fromIntegral l
440+
guard $ c <= fromIntegral (Rope.charLength lineRope)
441+
return $ J.Position l (fromIntegral $ Rope.utf16Length $ fst $ Rope.charSplitAt (fromIntegral c) lineRope)
470442

471443
{- | Given a virtual file, translate a 'CodePointRange' in that file into a 'J.Range' in that file.
472444
@@ -487,13 +459,12 @@ codePointRangeToRange vFile (CodePointRange b e) =
487459
the position.
488460
-}
489461
positionToCodePointPosition :: VirtualFile -> J.Position -> Maybe CodePointPosition
490-
positionToCodePointPosition vFile (J.Position l cuc) = do
462+
positionToCodePointPosition vFile (J.Position l c) = do
491463
-- See Note [Converting between code points and code units]
492464
let text = _file_text vFile
493-
utf16Line <- extractLine text (fromIntegral l)
494-
495-
cpc <- codeUnitOffsetToCodePointOffset utf16Line (fromIntegral cuc)
496-
pure $ CodePointPosition l (fromIntegral cpc)
465+
lineRope <- extractLine text $ fromIntegral l
466+
guard $ c <= fromIntegral (Rope.utf16Length lineRope)
467+
CodePointPosition l . fromIntegral . Rope.charLength . fst <$> Rope.utf16SplitAt (fromIntegral c) lineRope
497468

498469
{- | Given a virtual file, translate a 'J.Range' in that file into a 'CodePointRange' in that file.
499470
@@ -535,7 +506,7 @@ getCompletionPrefix pos@(J.Position l c) (VirtualFile _ _ ropetext) =
535506
lastMaybe xs = Just $ last xs
536507

537508
let curRope = fst $ Rope.splitAtLine 1 $ snd $ Rope.splitAtLine (fromIntegral l) ropetext
538-
beforePos <- Rope.toText . fst <$> Rope.splitAt (fromIntegral c) curRope
509+
beforePos <- Rope.toText . fst <$> Rope.utf16SplitAt (fromIntegral c) curRope
539510
curWord <-
540511
if
541512
| T.null beforePos -> Just ""

lsp/test/VspSpec.hs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ module VspSpec where
66
import Data.Row
77
import Data.String
88
import Data.Text qualified as T
9-
import Data.Text.Utf16.Rope qualified as Rope
9+
import Data.Text.Utf16.Rope.Mixed qualified as Rope
1010
import Language.LSP.Protocol.Types qualified as J
1111
import Language.LSP.VFS
1212

0 commit comments

Comments
 (0)