@@ -104,8 +104,13 @@ function chunkHelper.get_chunk_range(opts)
104104 end
105105end
106106
107- function chunkHelper .calc (str , col , leftcol )
108- local len = vim .api .nvim_strwidth (str )
107+ --- @param str string
108+ --- @param col integer
109+ --- @param leftcol integer
110+ --- @param shiftwidth integer
111+ --- @return string , integer
112+ function chunkHelper .calc (str , col , leftcol , shiftwidth )
113+ local len = chunkHelper .virtTextStrWidth (str , shiftwidth )
109114 if col < leftcol then
110115 local byte_idx = math.min (leftcol - col , len )
111116 local utf_beg = vim .str_byteindex (str , byte_idx )
@@ -117,6 +122,8 @@ function chunkHelper.calc(str, col, leftcol)
117122 return str , col
118123end
119124
125+ --- @param inputstr string
126+ --- @return string[]
120127function chunkHelper .utf8Split (inputstr )
121128 local list = {}
122129 for uchar in string.gmatch (inputstr , " [^\128 -\191 ][\128 -\191 ]*" ) do
@@ -137,6 +144,76 @@ function chunkHelper.rangeFromTo(i, j, step)
137144 return t
138145end
139146
147+ --- @param char_list table<integer , string>
148+ --- @param leftcol integer
149+ --- @param shiftwidth integer
150+ --- @return integer[]
151+ function chunkHelper .getColList (char_list , leftcol , shiftwidth )
152+ local t = {}
153+ local next_col = leftcol
154+ for i = 1 , # char_list do
155+ table.insert (t , next_col )
156+ next_col = next_col + chunkHelper .virtTextStrWidth (char_list [i ], shiftwidth )
157+ end
158+ return t
159+ end
160+
161+ --- @param str string
162+ --- @param width integer
163+ --- @param shiftwidth integer
164+ function chunkHelper .repeatToWidth (str , width , shiftwidth )
165+ local str_width = chunkHelper .virtTextStrWidth (str , shiftwidth )
166+
167+ -- "1" -> "1111"
168+ if str_width == 1 then
169+ return str :rep (width )
170+ end
171+
172+ -- "12" -> "1212"
173+ if width % str_width == 0 then
174+ return str :rep (width / str_width )
175+ end
176+
177+ -- "12" -> "12121"
178+ -- "1" -> "11 "
179+ -- "⏻ " -> "⏻ ⏻ "
180+ local repeatable_len = math.floor (width / str_width )
181+ local s = str :rep (repeatable_len )
182+ local chars = chunkHelper .utf8Split (str )
183+ local current_width = str_width * repeatable_len
184+ local i = 1
185+ while i <= # chars do
186+ local char_width = chunkHelper .virtTextStrWidth (chars [i ], shiftwidth )
187+ --- assumed to be an out-of-bounds char (like in nerd fonts) followed by a whitespace if true
188+ local likely_oob_char =
189+ -- single-cell
190+ char_width == 1
191+ -- followed by a whitespace
192+ and chars [i + 1 ] == " "
193+ -- non-ASCII
194+ and chars [i ]:byte (1 ) > 0x7F
195+ local char = likely_oob_char and chars [i ] .. " " or chars [i ]
196+ local next_width = current_width + (likely_oob_char and 2 or char_width )
197+ if next_width < width then
198+ s = s .. char
199+ current_width = next_width
200+ elseif next_width == width then
201+ s = s .. char
202+ break
203+ else
204+ s = s .. string.rep (" " , width - current_width )
205+ break
206+ end
207+ if likely_oob_char then
208+ -- skip the whitespace part of out-of-bounds char + " "
209+ i = i + 2
210+ else
211+ i = i + 1
212+ end
213+ end
214+ return s
215+ end
216+
140217function chunkHelper .shallowCmp (t1 , t2 )
141218 if # t1 ~= # t2 then
142219 return false
@@ -151,4 +228,150 @@ function chunkHelper.shallowCmp(t1, t2)
151228 return flag
152229end
153230
231+ --- @param line string
232+ --- @param start_col integer
233+ --- @param end_col integer
234+ --- @param shiftwidth integer
235+ --- @return boolean
236+ function chunkHelper .checkCellsBlank (line , start_col , end_col , shiftwidth )
237+ local current_col = 1
238+ local current_byte = 1
239+ local current_char = 1
240+ while current_byte <= # line and current_col <= end_col do
241+ local final_byte = vim .str_byteindex (line , current_char )
242+ local char = line :sub (current_byte , final_byte )
243+ local b1 , b2 , b3 = char :byte (1 , 3 )
244+ if char == " " then
245+ break
246+ end
247+ --- @type integer
248+ local next_col
249+ local next_byte = final_byte + 1
250+ local next_char = current_char + 1
251+ if char == " " then
252+ next_col = current_col + 1
253+ elseif char == " \t " then
254+ next_col = current_col + shiftwidth
255+ elseif b1 <= 0x1F or char == " \127 " then
256+ -- despite nvim_strwidth returning 0 or 1, control chars are 2 cells wide
257+ next_col = current_col + 2
258+ elseif b1 <= 0x7F then
259+ -- other ASCII chars are single cell wide
260+ next_col = current_col + 1
261+ else
262+ local char_width = vim .api .nvim_strwidth (char )
263+ local next_byte_peek = line :byte (final_byte + 1 )
264+ if char_width == 1 and next_byte_peek == 0x20 then
265+ -- the char is assumed to be an out-of-bounds char (like in nerd fonts)
266+ -- followed by a whitespace
267+ next_col = current_col + 2
268+ -- skip the whitespace part of out-of-bounds char + " "
269+ next_byte = next_byte + 1
270+ next_char = next_char + 1
271+ else
272+ next_col = current_col + char_width
273+ end
274+ end
275+ -- we're going to match these characters manually
276+ -- as we can't use "%s" to check blank cells
277+ -- (e.g. "%s" matches to "\v" but it will be printed as ^K)
278+ if
279+ (current_col >= start_col or next_col - 1 >= start_col )
280+ -- Singles
281+ --
282+ -- Indent characters
283+ -- Unicode Scripts Z*
284+ -- 0020 - SPACE
285+ and char ~= " "
286+ --
287+ -- Unicode Scripts C*
288+ -- 0009 - TAB
289+ -- control characters except TAB should be rendered like "^[" or "<200b>"
290+ and char ~= " "
291+ --
292+ -- Non indent characters
293+ -- Unicode Scripts Z*
294+ -- 00A0 - NO-BREAK SPACE
295+ and char ~= " "
296+ --[[
297+ -- 1680 - OGHAM SPACE MARK
298+ -- usually rendered as "-"
299+ -- see https://www.unicode.org/charts/PDF/U1680.pdf
300+ and char ~= " "
301+ ]]
302+ -- 2000..200A - EN QUAD..HAIR SPACE
303+ -- " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", " "
304+ and not (b1 == 0xe2 and b2 == 0x80 and b3 >= 0x80 and b3 <= 0x8a )
305+ -- 202F - NARROW NO-BREAK SPACE
306+ and char ~= " "
307+ -- 205F - MEDIUM MATHEMATICAL SPACE
308+ and char ~= " "
309+ -- 3000 - IDEOGRAPHIC SPACE
310+ and char ~= " "
311+ --[[
312+ -- 2028 - LINE SEPARATOR
313+ -- some fonts lacks this and may render it as "?" or "█"
314+ -- as this character is usually treated as a line-break
315+ and char ~= "
"
316+ ]]
317+ --[[
318+ -- 2029 - PARAGRAPH SEPARATOR
319+ -- some fonts lacks this and may render it as "?" or "█"
320+ -- as this character is usually treated as a line-break
321+ and char ~= "
"
322+ ]]
323+ --
324+ -- Others
325+ -- 2800 - BRAILLE PATTERN BLANK
326+ and char ~= " ⠀"
327+ --[[
328+ -- 3164 - HANGUL FILLER
329+ -- technically "blank" but can easily break the rendering
330+ and "\227\133\164" -- do not replace this with a literal notation
331+ ]]
332+ --[[
333+ -- FFA0 - HALFWIDTH HANGUL FILLER
334+ -- technically "blank" but can easily break the rendering
335+ and "\239\190\160" -- do not replace this with a literal notation
336+ ]]
337+ then
338+ return false
339+ end
340+ current_col = next_col
341+ current_byte = next_byte
342+ current_char = next_char
343+ end
344+ return true
345+ end
346+
347+ --- @param str string
348+ --- @param shiftwidth integer
349+ --- @param stop_on_null ? boolean
350+ --- @return integer
351+ function chunkHelper .virtTextStrWidth (str , shiftwidth , stop_on_null )
352+ local current_width = 0
353+ for _ , char in ipairs (chunkHelper .utf8Split (str )) do
354+ if char == " \0 " then
355+ if stop_on_null then
356+ return current_width
357+ end
358+ -- just ignore otherwise
359+ elseif char == " \t " then
360+ current_width = current_width + shiftwidth
361+ else
362+ local b1 = char :byte (1 )
363+ if b1 <= 0x1F or b1 == 0x7F then
364+ -- control chars other than NULL and TAB are two cells wide
365+ current_width = current_width + 2
366+ elseif b1 <= 0x7F then
367+ -- other ASCII chars are single cell wide
368+ current_width = current_width + 1
369+ else
370+ current_width = current_width + vim .api .nvim_strwidth (char )
371+ end
372+ end
373+ end
374+ return current_width
375+ end
376+
154377return chunkHelper
0 commit comments