@@ -74,10 +74,9 @@ import Data.Ord
7474import qualified Data.HashMap.Strict as HashMap
7575import qualified Data.Map.Strict as Map
7676import Data.Maybe
77- import qualified Data.Text.Lines as ULines
77+ import qualified Data.Text.Rope as URope
7878import Data.Text.Utf16.Rope ( Rope )
7979import qualified Data.Text.Utf16.Rope as Rope
80- import qualified Data.Text.Utf16.Lines as Lines
8180import Data.Text.Prettyprint.Doc
8281import qualified Language.LSP.Types as J
8382import qualified Language.LSP.Types.Lens as J
@@ -363,32 +362,71 @@ data CodePointPosition =
363362 , _character :: J. UInt
364363 } deriving (Show , Read , Eq , Ord )
365364
365+ {- Note [Converting between code points and code units]
366+ This is inherently a somewhat expensive operation, but we take some care to minimize the cost.
367+ In particular, we use the good asymptotics of 'Rope' to our advantage:
368+ - We extract the single line that we are interested in in time logarithmic in the number of lines.
369+ - We then split the line at the given position, and check how long the prefix is, which takes
370+ linear time in the length of the (single) line.
371+
372+ We also may need to convert the line back and forth between ropes with different indexing. Again
373+ this is linear time in the length of the line.
374+
375+ So the overall process is logarithmic in the number of lines, and linear in the length of the specific
376+ line. Which is okay-ish, so long as we don't have very long lines.
377+ -}
378+
379+ -- | Extracts a specific line from a 'Rope.Rope'.
380+ -- Logarithmic in the number of lines.
381+ extractLine :: Rope. Rope -> Word -> Rope. Rope
382+ extractLine rope l =
383+ let (_, suffix) = Rope. splitAtLine l rope
384+ (prefix, _) = Rope. splitAtLine 1 suffix
385+ in prefix
386+
366387-- | Given a virtual file, translate a 'CodePointPosition' in that file into a 'J.Position' in that file.
367388--
389+ -- If the position is out of bounds (i.e. beyond the last line or the last character in a line), then the
390+ -- greatest valid position less than that will be returned.
391+ --
368392-- We need the file itself because this requires translating between code points and code units.
369393codePointPositionToPosition :: VirtualFile -> CodePointPosition -> J. Position
370- codePointPositionToPosition vFile (CodePointPosition cpl cpc) =
394+ codePointPositionToPosition vFile (CodePointPosition l cpc) =
395+ -- See Note [Converting between code points and code units]
371396 let text = _file_text vFile
372- lines = Rope. toTextLines text
397+ utf16Line = extractLine text (fromIntegral l)
398+
399+ -- Convert the line a rope using *code points*
400+ utfLine = URope. fromText $ Rope. toText utf16Line
373401 -- Split at the given position in *code points*
374- (prefix, _) = ULines. splitAtPosition (ULines. Position (fromIntegral cpl) (fromIntegral cpc)) lines
402+ (utfLinePrefix, _) = URope. splitAt (fromIntegral cpc) utfLine
403+ -- Convert the prefix to a rope using *code units*
404+ utf16LinePrefix = Rope. fromText $ URope. toText utfLinePrefix
375405 -- Get the length of the prefix in *code units*
376- ( Lines. Position cul cuc) = Lines. lengthAsPosition prefix
377- in J. Position ( fromIntegral cul) (fromIntegral cuc)
406+ cuc = Rope. length utf16LinePrefix
407+ in J. Position l (fromIntegral cuc)
378408
379409-- | Given a virtual file, translate a 'J.Position' in that file into a 'CodePointPosition' in that file.
410+ --
380411-- May fail if the requested position lies inside a code point.
381412--
413+ -- If the position is out of bounds (i.e. beyond the last line or the last character in a line), then the
414+ -- greatest valid position less than that will be returned.
415+ --
382416-- We need the file itself because this requires translating between code unit and code points.
383417positionToCodePointPosition :: VirtualFile -> J. Position -> Maybe CodePointPosition
384- positionToCodePointPosition vFile (J. Position cul cuc) = do
418+ positionToCodePointPosition vFile (J. Position l cuc) = do
419+ -- See Note [Converting between code points and code units]
385420 let text = _file_text vFile
386- lines = Rope. toTextLines text
387- -- Split at the given location in *code units*
388- (prefix, _) <- Lines. splitAtPosition (Lines. Position (fromIntegral cul) (fromIntegral cuc)) lines
389- -- Get the length of the prefix in *code points*
390- let (ULines. Position cpl cpc) = ULines. lengthAsPosition prefix
391- pure $ CodePointPosition (fromIntegral cpl) (fromIntegral cpc)
421+ utf16Line = extractLine text (fromIntegral l)
422+
423+ -- Split at the given position in *code units*
424+ (utf16LinePrefix, _) <- Rope. splitAt (fromIntegral cuc) utf16Line
425+ -- Convert the prefixto a rope using *code points*
426+ let utfLinePrefix = URope. fromText $ Rope. toText utf16LinePrefix
427+ -- Get the length of the prefix in *code points*
428+ cpc = URope. length utfLinePrefix
429+ pure $ CodePointPosition l (fromIntegral cpc)
392430
393431-- ---------------------------------------------------------------------
394432
0 commit comments