1111 cast ,
1212 overload ,
1313)
14+ import warnings
1415
1516import numpy as np
1617
2324)
2425from pandas ._libs .arrays import NDArrayBacked
2526from pandas .compat .numpy import function as nv
27+ from pandas .errors import Pandas4Warning
28+ from pandas .util ._exceptions import find_stack_level
2629from pandas .util ._validators import validate_bool_kwarg
2730
2831from pandas .core .dtypes .cast import (
@@ -476,7 +479,11 @@ def __init__(
476479 elif isinstance (values .dtype , CategoricalDtype ):
477480 old_codes = extract_array (values )._codes
478481 codes = recode_for_categories (
479- old_codes , values .dtype .categories , dtype .categories , copy = copy
482+ old_codes ,
483+ values .dtype .categories ,
484+ dtype .categories ,
485+ copy = copy ,
486+ warn = True ,
480487 )
481488
482489 else :
@@ -528,7 +535,12 @@ def _from_sequence(
528535
529536 def _cast_pointwise_result (self , values ) -> ArrayLike :
530537 res = super ()._cast_pointwise_result (values )
531- cat = type (self )._from_sequence (res , dtype = self .dtype )
538+ with warnings .catch_warnings ():
539+ warnings .filterwarnings (
540+ "ignore" ,
541+ "Constructing a Categorical with a dtype and values containing" ,
542+ )
543+ cat = type (self )._from_sequence (res , dtype = self .dtype )
532544 if (cat .isna () == isna (res )).all ():
533545 # i.e. the conversion was non-lossy
534546 return cat
@@ -565,6 +577,15 @@ def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike:
565577 dtype = self .dtype .update_dtype (dtype )
566578 self = self .copy () if copy else self
567579 result = self ._set_dtype (dtype , copy = False )
580+ wrong = result .isna () & ~ self .isna ()
581+ if wrong .any ():
582+ warnings .warn (
583+ "Constructing a Categorical with a dtype and values containing "
584+ "non-null entries not in that dtype's categories is deprecated "
585+ "and will raise in a future version." ,
586+ Pandas4Warning ,
587+ stacklevel = find_stack_level (),
588+ )
568589
569590 elif isinstance (dtype , ExtensionDtype ):
570591 return super ().astype (dtype , copy = copy )
@@ -659,14 +680,16 @@ def _from_inferred_categories(
659680 if known_categories :
660681 # Recode from observation order to dtype.categories order.
661682 categories = dtype .categories
662- codes = recode_for_categories (inferred_codes , cats , categories , copy = False )
683+ codes = recode_for_categories (
684+ inferred_codes , cats , categories , copy = False , warn = True
685+ )
663686 elif not cats .is_monotonic_increasing :
664687 # Sort categories and recode for unknown categories.
665688 unsorted = cats .copy ()
666689 categories = cats .sort_values ()
667690
668691 codes = recode_for_categories (
669- inferred_codes , unsorted , categories , copy = False
692+ inferred_codes , unsorted , categories , copy = False , warn = True
670693 )
671694 dtype = CategoricalDtype (categories , ordered = False )
672695 else :
@@ -787,7 +810,7 @@ def categories(self) -> Index:
787810 >>> ser.cat.categories
788811 Index(['a', 'b', 'c'], dtype='str')
789812
790- >>> raw_cat = pd.Categorical(["a" , "b", "c", "a" ], categories=["b", "c", "d"])
813+ >>> raw_cat = pd.Categorical([None , "b", "c", None ], categories=["b", "c", "d"])
791814 >>> ser = pd.Series(raw_cat)
792815 >>> ser.cat.categories
793816 Index(['b', 'c', 'd'], dtype='str')
@@ -1095,7 +1118,7 @@ def set_categories(
10951118 For :class:`pandas.Series`:
10961119
10971120 >>> raw_cat = pd.Categorical(
1098- ... ["a", "b", "c", "A" ], categories=["a", "b", "c"], ordered=True
1121+ ... ["a", "b", "c", None ], categories=["a", "b", "c"], ordered=True
10991122 ... )
11001123 >>> ser = pd.Series(raw_cat)
11011124 >>> ser
@@ -1117,7 +1140,7 @@ def set_categories(
11171140 For :class:`pandas.CategoricalIndex`:
11181141
11191142 >>> ci = pd.CategoricalIndex(
1120- ... ["a", "b", "c", "A" ], categories=["a", "b", "c"], ordered=True
1143+ ... ["a", "b", "c", None ], categories=["a", "b", "c"], ordered=True
11211144 ... )
11221145 >>> ci
11231146 CategoricalIndex(['a', 'b', 'c', nan], categories=['a', 'b', 'c'],
@@ -1145,7 +1168,7 @@ def set_categories(
11451168 codes = cat ._codes
11461169 else :
11471170 codes = recode_for_categories (
1148- cat .codes , cat .categories , new_dtype .categories , copy = False
1171+ cat .codes , cat .categories , new_dtype .categories , copy = False , warn = False
11491172 )
11501173 NDArrayBacked .__init__ (cat , codes , new_dtype )
11511174 return cat
@@ -2956,7 +2979,7 @@ def codes(self) -> Series:
29562979
29572980 Examples
29582981 --------
2959- >>> raw_cate = pd.Categorical(["a", "b", "c" , "a"], categories=["a", "b"])
2982+ >>> raw_cate = pd.Categorical(["a", "b", None , "a"], categories=["a", "b"])
29602983 >>> ser = pd.Series(raw_cate)
29612984 >>> ser.cat.codes
29622985 0 0
@@ -2991,11 +3014,25 @@ def _get_codes_for_values(
29913014 If `values` is known to be a Categorical, use recode_for_categories instead.
29923015 """
29933016 codes = categories .get_indexer_for (values )
3017+ wrong = (codes == - 1 ) & ~ isna (values )
3018+ if wrong .any ():
3019+ warnings .warn (
3020+ "Constructing a Categorical with a dtype and values containing "
3021+ "non-null entries not in that dtype's categories is deprecated "
3022+ "and will raise in a future version." ,
3023+ Pandas4Warning ,
3024+ stacklevel = find_stack_level (),
3025+ )
29943026 return coerce_indexer_dtype (codes , categories )
29953027
29963028
29973029def recode_for_categories (
2998- codes : np .ndarray , old_categories , new_categories , * , copy : bool
3030+ codes : np .ndarray ,
3031+ old_categories ,
3032+ new_categories ,
3033+ * ,
3034+ copy : bool = True ,
3035+ warn : bool = False ,
29993036) -> np .ndarray :
30003037 """
30013038 Convert a set of codes for to a new set of categories
@@ -3006,6 +3043,8 @@ def recode_for_categories(
30063043 old_categories, new_categories : Index
30073044 copy: bool, default True
30083045 Whether to copy if the codes are unchanged.
3046+ warn : bool, default False
3047+ Whether to warn on silent-NA mapping.
30093048
30103049 Returns
30113050 -------
@@ -3030,9 +3069,18 @@ def recode_for_categories(
30303069 return codes .copy ()
30313070 return codes
30323071
3033- indexer = coerce_indexer_dtype (
3034- new_categories .get_indexer_for (old_categories ), new_categories
3035- )
3072+ codes_in_old_cats = new_categories .get_indexer_for (old_categories )
3073+ if warn :
3074+ wrong = codes_in_old_cats == - 1
3075+ if wrong .any ():
3076+ warnings .warn (
3077+ "Constructing a Categorical with a dtype and values containing "
3078+ "non-null entries not in that dtype's categories is deprecated "
3079+ "and will raise in a future version." ,
3080+ Pandas4Warning ,
3081+ stacklevel = find_stack_level (),
3082+ )
3083+ indexer = coerce_indexer_dtype (codes_in_old_cats , new_categories )
30363084 new_codes = take_nd (indexer , codes , fill_value = - 1 )
30373085 return new_codes
30383086
0 commit comments