@@ -100,8 +100,13 @@ function chunkHelper.get_chunk_range(opts)
100100 end
101101end
102102
103- function chunkHelper .calc (str , col , leftcol )
104- local len = vim .api .nvim_strwidth (str )
103+ --- @param str string
104+ --- @param col integer
105+ --- @param leftcol integer
106+ --- @param shiftwidth integer
107+ --- @return string , integer
108+ function chunkHelper .calc (str , col , leftcol , shiftwidth )
109+ local len = chunkHelper .virtTextStrWidth (str , shiftwidth )
105110 if col < leftcol then
106111 local byte_idx = math.min (leftcol - col , len )
107112 local utf_beg = vim .str_byteindex (str , byte_idx )
@@ -113,6 +118,8 @@ function chunkHelper.calc(str, col, leftcol)
113118 return str , col
114119end
115120
121+ --- @param inputstr string
122+ --- @return string[]
116123function chunkHelper .utf8Split (inputstr )
117124 local list = {}
118125 for uchar in string.gmatch (inputstr , " [^\128 -\191 ][\128 -\191 ]*" ) do
@@ -133,6 +140,76 @@ function chunkHelper.rangeFromTo(i, j, step)
133140 return t
134141end
135142
143+ --- @param char_list table<integer , string>
144+ --- @param leftcol integer
145+ --- @param shiftwidth integer
146+ --- @return integer[]
147+ function chunkHelper .getColList (char_list , leftcol , shiftwidth )
148+ local t = {}
149+ local next_col = leftcol
150+ for i = 1 , # char_list do
151+ table.insert (t , next_col )
152+ next_col = next_col + chunkHelper .virtTextStrWidth (char_list [i ], shiftwidth )
153+ end
154+ return t
155+ end
156+
157+ --- @param str string
158+ --- @param width integer
159+ --- @param shiftwidth integer
160+ function chunkHelper .repeatToWidth (str , width , shiftwidth )
161+ local str_width = chunkHelper .virtTextStrWidth (str , shiftwidth )
162+
163+ -- "1" -> "1111"
164+ if str_width == 1 then
165+ return str :rep (width )
166+ end
167+
168+ -- "12" -> "1212"
169+ if width % str_width == 0 then
170+ return str :rep (width / str_width )
171+ end
172+
173+ -- "12" -> "12121"
174+ -- "1" -> "11 "
175+ -- "⏻ " -> "⏻ ⏻ "
176+ local repeatable_len = math.floor (width / str_width )
177+ local s = str :rep (repeatable_len )
178+ local chars = chunkHelper .utf8Split (str )
179+ local current_width = str_width * repeatable_len
180+ local i = 1
181+ while i <= # chars do
182+ local char_width = chunkHelper .virtTextStrWidth (chars [i ], shiftwidth )
183+ --- assumed to be an out-of-bounds char (like in nerd fonts) followed by a whitespace if true
184+ local likely_oob_char =
185+ -- single-cell
186+ char_width == 1
187+ -- followed by a whitespace
188+ and chars [i + 1 ] == " "
189+ -- non-ASCII
190+ and chars [i ]:byte (1 ) > 0x7F
191+ local char = likely_oob_char and chars [i ] .. " " or chars [i ]
192+ local next_width = current_width + (likely_oob_char and 2 or char_width )
193+ if next_width < width then
194+ s = s .. char
195+ current_width = next_width
196+ elseif next_width == width then
197+ s = s .. char
198+ break
199+ else
200+ s = s .. string.rep (" " , width - current_width )
201+ break
202+ end
203+ if likely_oob_char then
204+ -- skip the whitespace part of out-of-bounds char + " "
205+ i = i + 2
206+ else
207+ i = i + 1
208+ end
209+ end
210+ return s
211+ end
212+
136213function chunkHelper .shallowCmp (t1 , t2 )
137214 if # t1 ~= # t2 then
138215 return false
@@ -147,4 +224,150 @@ function chunkHelper.shallowCmp(t1, t2)
147224 return flag
148225end
149226
227+ --- @param line string
228+ --- @param start_col integer
229+ --- @param end_col integer
230+ --- @param shiftwidth integer
231+ --- @return boolean
232+ function chunkHelper .checkCellsBlank (line , start_col , end_col , shiftwidth )
233+ local current_col = 1
234+ local current_byte = 1
235+ local current_char = 1
236+ while current_byte <= # line and current_col <= end_col do
237+ local final_byte = vim .str_byteindex (line , current_char )
238+ local char = line :sub (current_byte , final_byte )
239+ local b1 , b2 , b3 = char :byte (1 , 3 )
240+ if char == " " then
241+ break
242+ end
243+ --- @type integer
244+ local next_col
245+ local next_byte = final_byte + 1
246+ local next_char = current_char + 1
247+ if char == " " then
248+ next_col = current_col + 1
249+ elseif char == " \t " then
250+ next_col = current_col + shiftwidth
251+ elseif b1 <= 0x1F or char == " \127 " then
252+ -- despite nvim_strwidth returning 0 or 1, control chars are 2 cells wide
253+ next_col = current_col + 2
254+ elseif b1 <= 0x7F then
255+ -- other ASCII chars are single cell wide
256+ next_col = current_col + 1
257+ else
258+ local char_width = vim .api .nvim_strwidth (char )
259+ local next_byte_peek = line :byte (final_byte + 1 )
260+ if char_width == 1 and next_byte_peek == 0x20 then
261+ -- the char is assumed to be an out-of-bounds char (like in nerd fonts)
262+ -- followed by a whitespace
263+ next_col = current_col + 2
264+ -- skip the whitespace part of out-of-bounds char + " "
265+ next_byte = next_byte + 1
266+ next_char = next_char + 1
267+ else
268+ next_col = current_col + char_width
269+ end
270+ end
271+ -- we're going to match these characters manually
272+ -- as we can't use "%s" to check blank cells
273+ -- (e.g. "%s" matches to "\v" but it will be printed as ^K)
274+ if
275+ (current_col >= start_col or next_col - 1 >= start_col )
276+ -- Singles
277+ --
278+ -- Indent characters
279+ -- Unicode Scripts Z*
280+ -- 0020 - SPACE
281+ and char ~= " "
282+ --
283+ -- Unicode Scripts C*
284+ -- 0009 - TAB
285+ -- control characters except TAB should be rendered like "^[" or "<200b>"
286+ and char ~= " "
287+ --
288+ -- Non indent characters
289+ -- Unicode Scripts Z*
290+ -- 00A0 - NO-BREAK SPACE
291+ and char ~= " "
292+ --[[
293+ -- 1680 - OGHAM SPACE MARK
294+ -- usually rendered as "-"
295+ -- see https://www.unicode.org/charts/PDF/U1680.pdf
296+ and char ~= " "
297+ ]]
298+ -- 2000..200A - EN QUAD..HAIR SPACE
299+ -- " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", " "
300+ and not (b1 == 0xe2 and b2 == 0x80 and b3 >= 0x80 and b3 <= 0x8a )
301+ -- 202F - NARROW NO-BREAK SPACE
302+ and char ~= " "
303+ -- 205F - MEDIUM MATHEMATICAL SPACE
304+ and char ~= " "
305+ -- 3000 - IDEOGRAPHIC SPACE
306+ and char ~= " "
307+ --[[
308+ -- 2028 - LINE SEPARATOR
309+ -- some fonts lacks this and may render it as "?" or "█"
310+ -- as this character is usually treated as a line-break
311+ and char ~= "
"
312+ ]]
313+ --[[
314+ -- 2029 - PARAGRAPH SEPARATOR
315+ -- some fonts lacks this and may render it as "?" or "█"
316+ -- as this character is usually treated as a line-break
317+ and char ~= "
"
318+ ]]
319+ --
320+ -- Others
321+ -- 2800 - BRAILLE PATTERN BLANK
322+ and char ~= " ⠀"
323+ --[[
324+ -- 3164 - HANGUL FILLER
325+ -- technically "blank" but can easily break the rendering
326+ and "\227\133\164" -- do not replace this with a literal notation
327+ ]]
328+ --[[
329+ -- FFA0 - HALFWIDTH HANGUL FILLER
330+ -- technically "blank" but can easily break the rendering
331+ and "\239\190\160" -- do not replace this with a literal notation
332+ ]]
333+ then
334+ return false
335+ end
336+ current_col = next_col
337+ current_byte = next_byte
338+ current_char = next_char
339+ end
340+ return true
341+ end
342+
343+ --- @param str string
344+ --- @param shiftwidth integer
345+ --- @param stop_on_null ? boolean
346+ --- @return integer
347+ function chunkHelper .virtTextStrWidth (str , shiftwidth , stop_on_null )
348+ local current_width = 0
349+ for _ , char in ipairs (chunkHelper .utf8Split (str )) do
350+ if char == " \0 " then
351+ if stop_on_null then
352+ return current_width
353+ end
354+ -- just ignore otherwise
355+ elseif char == " \t " then
356+ current_width = current_width + shiftwidth
357+ else
358+ local b1 = char :byte (1 )
359+ if b1 <= 0x1F or b1 == 0x7F then
360+ -- control chars other than NULL and TAB are two cells wide
361+ current_width = current_width + 2
362+ elseif b1 <= 0x7F then
363+ -- other ASCII chars are single cell wide
364+ current_width = current_width + 1
365+ else
366+ current_width = current_width + vim .api .nvim_strwidth (char )
367+ end
368+ end
369+ end
370+ return current_width
371+ end
372+
150373return chunkHelper
0 commit comments