@@ -238,164 +238,3 @@ class SplitPreTokenizer: PreTokenizer {
238238 return pattern. split ( text, invert: invert)
239239 }
240240}
241-
242- enum StringSplitPattern {
243- case regexp( regexp: String )
244- case string( pattern: String )
245- }
246-
247- extension StringSplitPattern {
248- func split( _ text: String , invert: Bool = true ) -> [ String ] {
249- switch self {
250- case let . regexp( regexp) :
251- text. split ( by: regexp, includeSeparators: true )
252- case let . string( substring) :
253- text. split ( by: substring, options: [ ] , includeSeparators: !invert)
254- }
255- }
256- }
257-
258- extension StringSplitPattern {
259- static func from( config: Config ) -> StringSplitPattern ? {
260- if let pattern = config. pattern. String. string ( ) {
261- return StringSplitPattern . string ( pattern: pattern)
262- }
263- if let pattern = config. pattern. Regex. string ( ) {
264- return StringSplitPattern . regexp ( regexp: pattern)
265- }
266- return nil
267- }
268- }
269-
270- public extension String {
271- func ranges( of string: String , options: CompareOptions = . regularExpression) -> [ Range < Index > ] {
272- var result : [ Range < Index > ] = [ ]
273- var start = startIndex
274- while let range = range ( of: string, options: options, range: start..< endIndex) {
275- result. append ( range)
276- start = range. lowerBound < range. upperBound ? range. upperBound : index ( range. lowerBound, offsetBy: 1 , limitedBy: endIndex) ?? endIndex
277- }
278- return result
279- }
280-
281- func split( by string: String , options: CompareOptions = . regularExpression, includeSeparators: Bool = false , omittingEmptySubsequences: Bool = true ) -> [ String ] {
282- var result : [ String ] = [ ]
283- var start = startIndex
284- while let range = range ( of: string, options: options, range: start..< endIndex) {
285- // Prevent empty strings
286- if omittingEmptySubsequences, start < range. lowerBound {
287- result. append ( String ( self [ start..< range. lowerBound] ) )
288- }
289- if includeSeparators {
290- result. append ( String ( self [ range] ) )
291- }
292- start = range. upperBound
293- }
294-
295- if omittingEmptySubsequences, start < endIndex {
296- result. append ( String ( self [ start... ] ) )
297- }
298- return result
299- }
300-
301- /// This version supports capture groups, wheres the one above doesn't
302- func split( by captureRegex: NSRegularExpression ) -> [ String ] {
303- // Find the matching capture groups
304- let selfRange = NSRange ( startIndex..< endIndex, in: self )
305- let matches = captureRegex. matches ( in: self , options: [ ] , range: selfRange)
306-
307- if matches. isEmpty { return [ self ] }
308-
309- var result : [ String ] = [ ]
310- var start = startIndex
311-
312- for match in matches {
313- // IMPORTANT: convert from NSRange to Range<String.Index>
314- // https://stackoverflow.com/questions/75543272/convert-a-given-utf8-nsrange-in-a-string-to-a-utf16-nsrange
315- guard let matchRange = Range ( match. range, in: self ) else { continue }
316-
317- // Add text before the match
318- if start < matchRange. lowerBound {
319- result. append ( String ( self [ start..< matchRange. lowerBound] ) )
320- }
321-
322- // Move start to after the match
323- start = matchRange. upperBound
324-
325- // Append separator, supporting capture groups
326- for r in ( 0 ..< match. numberOfRanges) . reversed ( ) {
327- let nsRange = match. range ( at: r)
328- if let sepRange = Range ( nsRange, in: self ) {
329- result. append ( String ( self [ sepRange] ) )
330- break
331- }
332- }
333- }
334-
335- // Append remaining suffix
336- if start < endIndex {
337- result. append ( String ( self [ start... ] ) )
338- }
339-
340- return result
341- }
342- }
343-
344- public enum SplitDelimiterBehavior {
345- case removed
346- case isolated
347- case mergedWithPrevious
348- case mergedWithNext
349- }
350-
351- public extension String {
352- func split( by string: String , options: CompareOptions = . regularExpression, behavior: SplitDelimiterBehavior ) -> [ String ] {
353- func mergedWithNext( ranges: [ Range < String . Index > ] ) -> [ Range < String . Index > ] {
354- var merged : [ Range < String . Index > ] = [ ]
355- var currentStart = startIndex
356- for range in ranges {
357- if range. lowerBound == startIndex { continue }
358- let mergedRange = currentStart..< range. lowerBound
359- currentStart = range. lowerBound
360- merged. append ( mergedRange)
361- }
362- if currentStart < endIndex {
363- merged. append ( currentStart..< endIndex)
364- }
365- return merged
366- }
367-
368- func mergedWithPrevious( ranges: [ Range < String . Index > ] ) -> [ Range < String . Index > ] {
369- var merged : [ Range < String . Index > ] = [ ]
370- var currentStart = startIndex
371- for range in ranges {
372- let mergedRange = currentStart..< range. upperBound
373- currentStart = range. upperBound
374- merged. append ( mergedRange)
375- }
376- if currentStart < endIndex {
377- merged. append ( currentStart..< endIndex)
378- }
379- return merged
380- }
381-
382- switch behavior {
383- case . removed:
384- return split ( by: string, options: options, includeSeparators: false )
385- case . isolated:
386- return split ( by: string, options: options, includeSeparators: true )
387- case . mergedWithNext:
388- // Obtain ranges and merge them
389- // "the-final--countdown" -> (3, 4), (9, 10), (10, 11) -> (start, 2), (3, 8), (9, 9), (10, end)
390- let ranges = ranges ( of: string, options: options)
391- let merged = mergedWithNext ( ranges: ranges)
392- return merged. map { String ( self [ $0] ) }
393- case . mergedWithPrevious:
394- // Obtain ranges and merge them
395- // "the-final--countdown" -> (3, 4), (9, 10), (10, 11) -> (start, 3), (4, 9), (10, 10), (11, end)
396- let ranges = ranges ( of: string, options: options)
397- let merged = mergedWithPrevious ( ranges: ranges)
398- return merged. map { String ( self [ $0] ) }
399- }
400- }
401- }
0 commit comments