Skip to content

Commit 22dea95

Browse files
committed
Use 64 bit integers when matching pages
1 parent c3c82f3 commit 22dea95

File tree

1 file changed

+6
-6
lines changed

1 file changed

+6
-6
lines changed

scrapely/extraction/_similarity.pyx

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@ cimport numpy as np
33
cimport cython
44
from cpython.version cimport PY_MAJOR_VERSION
55

6-
cdef np_kmp_match_length(np.ndarray[np.int_t, ndim=1] sequence,
7-
np.ndarray[np.int_t, ndim=1] pattern,
6+
cdef np_kmp_match_length(np.ndarray[np.int64_t, ndim=1] sequence,
7+
np.ndarray[np.int64_t, ndim=1] pattern,
88
int start=0,
99
int end=-1):
1010
"""Adaptated from KMP substring search:
@@ -17,7 +17,7 @@ cdef np_kmp_match_length(np.ndarray[np.int_t, ndim=1] sequence,
1717
if end == -1:
1818
end = m
1919
# build table of shift amounts
20-
cdef np.ndarray[np.int_t, ndim=1] shifts = np.ones((m + 1,), dtype=int)
20+
cdef np.ndarray[np.int64_t, ndim=1] shifts = np.ones((m + 1,), dtype=int)
2121
cdef int shift = 1
2222
cdef int pos
2323
for pos in range(m):
@@ -56,7 +56,7 @@ cdef u_kmp_match_length(unicode sequence, unicode pattern, int start=0, int end=
5656
if end == -1:
5757
end = m
5858
# build table of shift amounts
59-
cdef np.ndarray[np.int_t, ndim=1] shifts = np.ones((m + 1,), dtype=int)
59+
cdef np.ndarray[np.int64_t, ndim=1] shifts = np.ones((m + 1,), dtype=int)
6060
cdef int shift = 1
6161
cdef int pos
6262
for pos in range(m):
@@ -84,8 +84,8 @@ cdef u_kmp_match_length(unicode sequence, unicode pattern, int start=0, int end=
8484
return ret
8585

8686

87-
cdef np_naive_match_length(np.ndarray[np.int_t, ndim=1] sequence,
88-
np.ndarray[np.int_t, ndim=1] pattern,
87+
cdef np_naive_match_length(np.ndarray[np.int64_t, ndim=1] sequence,
88+
np.ndarray[np.int64_t, ndim=1] pattern,
8989
int start=0,
9090
int end=-1):
9191
ret = []

0 commit comments

Comments
 (0)