55 'find'
66]
77
8+ PRIME_NUMBER , MOD = 257 , 1000000007
9+
810def find (text , query , algorithm ):
911 """
1012 Finds occurrence of a query string within the text string.
@@ -22,6 +24,7 @@ def find(text, query, algorithm):
2224 Currently the following algorithms are
2325 supported,
2426 'kmp' -> Knuth-Morris-Pratt as given in [1].
27+ 'rabin_karp' -> Rabin–Karp algorithm as given in [2].
2528
2629 Returns
2730 =======
@@ -52,6 +55,7 @@ def find(text, query, algorithm):
5255 ==========
5356
5457 .. [1] https://en.wikipedia.org/wiki/Knuth–Morris–Pratt_algorithm
58+ .. [2] https://en.wikipedia.org/wiki/Rabin%E2%80%93Karp_algorithm
5559 """
5660 import pydatastructs .strings .algorithms as algorithms
5761 func = "_" + algorithm
@@ -64,6 +68,8 @@ def find(text, query, algorithm):
6468
6569
6670def _knuth_morris_pratt (text , query ):
71+ if len (text ) == 0 or len (query ) == 0 :
72+ return DynamicOneDimensionalArray (int , 0 )
6773 kmp_table = _build_kmp_table (query )
6874 return _do_match (text , query , kmp_table )
6975
@@ -107,3 +113,38 @@ def _do_match(string, query, kmp_table):
107113 k = k + 1
108114
109115 return positions
116+
117+ def _p_pow (length , p = PRIME_NUMBER , m = MOD ):
118+ p_pow = OneDimensionalArray (int , length )
119+ p_pow [0 ] = 1
120+ for i in range (1 , length ):
121+ p_pow [i ] = (p_pow [i - 1 ] * p ) % m
122+ return p_pow
123+
124+ def _hash_str (string , p = PRIME_NUMBER , m = MOD ):
125+ hash_value = 0
126+ p_pow = _p_pow (len (string ), p , m )
127+ for i in range (len (string )):
128+ hash_value = (hash_value + ord (string [i ]) * p_pow [i ]) % m
129+ return hash_value
130+
131+ def _rabin_karp (text , query ):
132+ t = len (text )
133+ q = len (query )
134+ positions = DynamicOneDimensionalArray (int , 0 )
135+ if q == 0 or t == 0 :
136+ return positions
137+
138+ query_hash = _hash_str (query )
139+ text_hash = OneDimensionalArray (int , t + 1 )
140+ text_hash .fill (0 )
141+ p_pow = _p_pow (t )
142+
143+ for i in range (t ):
144+ text_hash [i + 1 ] = (text_hash [i ] + ord (text [i ]) * p_pow [i ]) % MOD
145+ for i in range (t - q + 1 ):
146+ curr_hash = (text_hash [i + q ] + MOD - text_hash [i ]) % MOD
147+ if curr_hash == (query_hash * p_pow [i ]) % MOD :
148+ positions .append (i )
149+
150+ return positions
0 commit comments