1- fn sa_naive ( s : & [ i32 ] ) -> Vec < usize > {
1+ fn sa_naive < T : Ord > ( s : & [ T ] ) -> Vec < usize > {
22 let n = s. len ( ) ;
33 let mut sa: Vec < usize > = ( 0 ..n) . collect ( ) ;
44 sa. sort_by ( |& ( mut l) , & ( mut r) | {
@@ -52,16 +52,173 @@ fn sa_doubling(s: &[i32]) -> Vec<usize> {
5252 sa
5353}
5454
55- fn sa_is ( s : & [ i32 ] , upper : i32 ) -> Vec < usize > {
56- sa_doubling ( s)
55+ trait Threshold {
56+ fn threshold_naive ( ) -> usize ;
57+ fn threshold_doubling ( ) -> usize ;
58+ }
59+
60+ enum DefaultThreshold { }
61+ impl Threshold for DefaultThreshold {
62+ fn threshold_naive ( ) -> usize {
63+ 10
64+ }
65+ fn threshold_doubling ( ) -> usize {
66+ 40
67+ }
68+ }
69+
70+ fn sa_is < T : Threshold > ( s : & [ usize ] , upper : usize ) -> Vec < usize > {
71+ let n = s. len ( ) ;
72+ match n {
73+ 0 => return vec ! [ ] ,
74+ 1 => return vec ! [ 0 ] ,
75+ 2 => return if s[ 0 ] < s[ 1 ] { vec ! [ 0 , 1 ] } else { vec ! [ 1 , 0 ] } ,
76+ _ => ( ) ,
77+ }
78+ if n < T :: threshold_naive ( ) {
79+ return sa_naive ( s) ;
80+ }
81+ if n < T :: threshold_doubling ( ) {
82+ let s: Vec < i32 > = s. iter ( ) . map ( |& x| x as i32 ) . collect ( ) ;
83+ return sa_doubling ( & s) ;
84+ }
85+ let mut sa = vec ! [ 0 ; n] ;
86+ let mut ls = vec ! [ false ; n] ;
87+ for i in ( 0 ..n - 1 ) . rev ( ) {
88+ ls[ i] = if s[ i] == s[ i + 1 ] {
89+ ls[ i + 1 ]
90+ } else {
91+ s[ i] < s[ i + 1 ]
92+ } ;
93+ }
94+ let mut sum_l = vec ! [ 0 ; upper + 1 ] ;
95+ let mut sum_s = vec ! [ 0 ; upper + 1 ] ;
96+ for i in 0 ..n {
97+ if !ls[ i] {
98+ sum_s[ s[ i] ] += 1 ;
99+ } else {
100+ sum_l[ s[ i] + 1 ] += 1 ;
101+ }
102+ }
103+ for i in 0 ..=upper {
104+ sum_s[ i] += sum_l[ i] ;
105+ if i < upper {
106+ sum_l[ i + 1 ] += sum_s[ i] ;
107+ }
108+ }
109+
110+ // sa's origin is 1.
111+ let induce = |sa : & mut [ usize ] , lms : & [ usize ] | {
112+ for elem in sa. iter_mut ( ) {
113+ * elem = 0 ;
114+ }
115+ let mut buf = sum_s. clone ( ) ;
116+ for & d in lms {
117+ if d == n {
118+ continue ;
119+ }
120+ let old = buf[ s[ d] ] ;
121+ buf[ s[ d] ] += 1 ;
122+ sa[ old] = d + 1 ;
123+ }
124+ buf. copy_from_slice ( & sum_l) ;
125+ let old = buf[ s[ n - 1 ] ] ;
126+ buf[ s[ n - 1 ] ] += 1 ;
127+ sa[ old] = n;
128+ for i in 0 ..n {
129+ let v = sa[ i] ;
130+ if v >= 2 && !ls[ v - 2 ] {
131+ let old = buf[ s[ v - 2 ] ] ;
132+ buf[ s[ v - 2 ] ] += 1 ;
133+ sa[ old] = v - 1 ;
134+ }
135+ }
136+ buf. copy_from_slice ( & sum_l) ;
137+ for i in ( 0 ..n) . rev ( ) {
138+ let v = sa[ i] ;
139+ if v >= 2 && ls[ v - 2 ] {
140+ buf[ s[ v - 2 ] + 1 ] -= 1 ;
141+ sa[ buf[ s[ v - 2 ] + 1 ] ] = v - 1 ;
142+ }
143+ }
144+ } ;
145+ // origin: 1
146+ let mut lms_map = vec ! [ 0 ; n + 1 ] ;
147+ let mut m = 0 ;
148+ for i in 1 ..n {
149+ if !ls[ i - 1 ] && ls[ i] {
150+ lms_map[ i] = m + 1 ;
151+ m += 1 ;
152+ }
153+ }
154+ let mut lms = Vec :: with_capacity ( m) ;
155+ for i in 1 ..n {
156+ if !ls[ i - 1 ] && ls[ i] {
157+ lms. push ( i) ;
158+ }
159+ }
160+ assert_eq ! ( lms. len( ) , m) ;
161+ induce ( & mut sa, & lms) ;
162+
163+ if m > 0 {
164+ let mut sorted_lms = Vec :: with_capacity ( m) ;
165+ for & v in & sa {
166+ if lms_map[ v - 1 ] != 0 {
167+ sorted_lms. push ( v - 1 ) ;
168+ }
169+ }
170+ let mut rec_s = vec ! [ 0 ; m] ;
171+ let mut rec_upper = 0 ;
172+ rec_s[ lms_map[ sorted_lms[ 0 ] ] - 1 ] = 0 ;
173+ for i in 1 ..m {
174+ let mut l = sorted_lms[ i - 1 ] ;
175+ let mut r = sorted_lms[ i] ;
176+ let end_l = if lms_map[ l] < m { lms[ lms_map[ l] ] } else { n } ;
177+ let end_r = if lms_map[ r] < m { lms[ lms_map[ r] ] } else { n } ;
178+ let mut same = true ;
179+ if end_l - l != end_r - r {
180+ same = false ;
181+ } else {
182+ while l < end_l {
183+ if s[ l] != s[ r] {
184+ break ;
185+ }
186+ l += 1 ;
187+ r += 1 ;
188+ }
189+ if l == n || s[ l] != s[ r] {
190+ same = false ;
191+ }
192+ }
193+ if !same {
194+ rec_upper += 1 ;
195+ }
196+ rec_s[ lms_map[ sorted_lms[ i] ] - 1 ] = rec_upper;
197+ }
198+
199+ let rec_sa = sa_is :: < T > ( & rec_s, rec_upper) ;
200+ for i in 0 ..m {
201+ sorted_lms[ i] = lms[ rec_sa[ i] ] ;
202+ }
203+ induce ( & mut sa, & mut sorted_lms) ;
204+ }
205+ for i in 0 ..n {
206+ sa[ i] -= 1 ;
207+ }
208+ sa
209+ }
210+
211+ fn sa_is_i32 < T : Threshold > ( s : & [ i32 ] , upper : i32 ) -> Vec < usize > {
212+ let s: Vec < usize > = s. iter ( ) . map ( |& x| x as usize ) . collect ( ) ;
213+ sa_is :: < T > ( & s, upper as usize )
57214}
58215
59216pub fn suffix_array_manual ( s : & [ i32 ] , upper : i32 ) -> Vec < usize > {
60217 assert ! ( upper >= 0 ) ;
61218 for & elem in s {
62219 assert ! ( 0 <= elem && elem <= upper) ;
63220 }
64- sa_is ( s, upper)
221+ sa_is_i32 :: < DefaultThreshold > ( s, upper)
65222}
66223
67224pub fn suffix_array_arbitrary < T : Ord > ( s : & [ T ] ) -> Vec < usize > {
@@ -76,18 +233,41 @@ pub fn suffix_array_arbitrary<T: Ord>(s: &[T]) -> Vec<usize> {
76233 }
77234 s2[ idx[ i] ] = now;
78235 }
79- sa_is ( & s2, now)
236+ sa_is_i32 :: < DefaultThreshold > ( & s2, now)
80237}
81238
82239pub fn suffix_array ( s : impl IntoIterator < Item = char > ) -> Vec < usize > {
83- let mut s2: Vec < i32 > = s. into_iter ( ) . map ( |x| x as i32 ) . collect ( ) ;
84- sa_is ( & s2, 255 )
240+ let s2: Vec < usize > = s. into_iter ( ) . map ( |x| x as usize ) . collect ( ) ;
241+ sa_is :: < DefaultThreshold > ( & s2, 255 )
85242}
86243
87244#[ cfg( test) ]
88245mod tests {
89246 use super :: * ;
90247
248+ enum ZeroThreshold { }
249+ impl Threshold for ZeroThreshold {
250+ fn threshold_naive ( ) -> usize {
251+ 0
252+ }
253+ fn threshold_doubling ( ) -> usize {
254+ 0
255+ }
256+ }
257+
258+ fn verify_all ( str : & str , expected_array : & [ usize ] ) {
259+ let array: Vec < i32 > = str. bytes ( ) . map ( |x| x as i32 ) . collect ( ) ;
260+ let sa = sa_doubling ( & array) ;
261+ assert_eq ! ( sa, expected_array) ;
262+ let sa_naive = sa_naive ( & array) ;
263+ assert_eq ! ( sa_naive, expected_array) ;
264+ let sa_is = sa_is_i32 :: < ZeroThreshold > ( & array, 255 ) ;
265+ assert_eq ! ( sa_is, expected_array) ;
266+
267+ let sa_str = suffix_array ( str. chars ( ) ) ;
268+ assert_eq ! ( sa_str, expected_array) ;
269+ }
270+
91271 #[ test]
92272 fn test_sa_0 ( ) {
93273 let array = vec ! [ 0 , 1 , 2 , 3 , 4 ] ;
@@ -98,15 +278,12 @@ mod tests {
98278 #[ test]
99279 fn test_sa_1 ( ) {
100280 let str = "abracadabra" ;
101- let array: Vec < i32 > = str. bytes ( ) . map ( |x| x as i32 ) . collect ( ) ;
102- let sa = sa_doubling ( & array) ;
103- assert_eq ! ( sa, vec![ 10 , 7 , 0 , 3 , 5 , 8 , 1 , 4 , 6 , 9 , 2 ] ) ;
104- let sa_naive = sa_naive ( & array) ;
105- assert_eq ! ( sa_naive, sa) ;
106- let sa_is = sa_is ( & array, 10 ) ;
107- assert_eq ! ( sa_is, sa) ;
281+ verify_all ( str, & [ 10 , 7 , 0 , 3 , 5 , 8 , 1 , 4 , 6 , 9 , 2 ] ) ;
282+ }
108283
109- let sa_str = suffix_array ( str. chars ( ) ) ;
110- assert_eq ! ( sa_str, sa) ;
284+ #[ test]
285+ fn test_sa_2 ( ) {
286+ let str = "mmiissiissiippii" ; // an example taken from https://mametter.hatenablog.com/entry/20180130/p1
287+ verify_all ( str, & [ 15 , 14 , 10 , 6 , 2 , 11 , 7 , 3 , 1 , 0 , 13 , 12 , 9 , 5 , 8 , 4 ] ) ;
111288 }
112289}
0 commit comments