@@ -1121,11 +1121,13 @@ cdef class StringHashTable(HashTable):
11211121 const char **vecs
11221122 khiter_t k
11231123 bint use_na_value
1124+ bint non_null_na_value
11241125
11251126 if return_inverse:
11261127 labels = np.zeros(n, dtype=np.intp)
11271128 uindexer = np.empty(n, dtype=np.int64)
11281129 use_na_value = na_value is not None
1130+ non_null_na_value = not checknull(na_value)
11291131
11301132 # assign pointers and pre-filter out missing (if ignore_na)
11311133 vecs = <const char **>malloc(n * sizeof(char *))
@@ -1134,7 +1136,12 @@ cdef class StringHashTable(HashTable):
11341136
11351137 if (ignore_na
11361138 and (not isinstance(val, str)
1137- or (use_na_value and val == na_value))):
1139+ or (use_na_value and (
1140+ (non_null_na_value and val == na_value) or
1141+ (not non_null_na_value and is_matching_na(val, na_value)))
1142+ )
1143+ )
1144+ ):
11381145 # if missing values do not count as unique values (i.e. if
11391146 # ignore_na is True), we can skip the actual value, and
11401147 # replace the label with na_sentinel directly
@@ -1400,18 +1407,23 @@ cdef class PyObjectHashTable(HashTable):
14001407 object val
14011408 khiter_t k
14021409 bint use_na_value
1403-
1410+ bint non_null_na_value
14041411 if return_inverse:
14051412 labels = np.empty(n, dtype=np.intp)
14061413 use_na_value = na_value is not None
1414+ non_null_na_value = not checknull(na_value)
14071415
14081416 for i in range(n):
14091417 val = values[i]
14101418 hash(val)
14111419
14121420 if ignore_na and (
14131421 checknull(val)
1414- or (use_na_value and val == na_value)
1422+ or (use_na_value and (
1423+ (non_null_na_value and val == na_value) or
1424+ (not non_null_na_value and is_matching_na(val, na_value))
1425+ )
1426+ )
14151427 ):
14161428 # if missing values do not count as unique values (i.e. if
14171429 # ignore_na is True), skip the hashtable entry for them, and
0 commit comments