From b1a226724190be4c6e80836b5fd7bd6bcf30d56e Mon Sep 17 00:00:00 2001 From: Lu Yibo <1478354316@qq.com> Date: Wed, 5 Nov 2025 00:22:50 +0800 Subject: [PATCH 1/2] Enhance zip calls with strict parameter Added 'strict=True' parameter to zip calls for better error handling. --- pandas/core/reshape/encoding.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/core/reshape/encoding.py b/pandas/core/reshape/encoding.py index 5c5fed272b925..fad2cf4b3cd1e 100644 --- a/pandas/core/reshape/encoding.py +++ b/pandas/core/reshape/encoding.py @@ -211,7 +211,7 @@ def check_len(item, name: str) -> None: # columns to prepend to result. with_dummies = [data.select_dtypes(exclude=dtypes_to_encode)] - for col, pre, sep in zip(data_to_encode.items(), prefix, prefix_sep): + for col, pre, sep in zip(data_to_encode.items(), prefix, prefix_sep, strict=True): # col is (column_name, column), use just column data here dummy = _get_dummies_1d( col[1], @@ -325,7 +325,7 @@ def get_empty_frame(data) -> DataFrame: codes = codes[mask] n_idx = np.arange(N)[mask] - for ndx, code in zip(n_idx, codes): + for ndx, code in zip(n_idx, codes, strict=True): sp_indices[code].append(ndx) if drop_first: @@ -333,7 +333,7 @@ def get_empty_frame(data) -> DataFrame: # GH12042 sp_indices = sp_indices[1:] dummy_cols = dummy_cols[1:] - for col, ixs in zip(dummy_cols, sp_indices): + for col, ixs in zip(dummy_cols, sp_indices, strict=True): sarr = SparseArray( np.ones(len(ixs), dtype=dtype), sparse_index=IntIndex(N, ixs), @@ -538,7 +538,7 @@ def from_dummies( raise ValueError(len_msg) elif isinstance(default_category, Hashable): default_category = dict( - zip(variables_slice, [default_category] * len(variables_slice)) + zip(variables_slice, [default_category] * len(variables_slice), strict=True) ) else: raise TypeError( From d43efb99758f4300e4a85b4ddd4fa240c7e76ea9 Mon Sep 17 00:00:00 2001 From: Lu Yibo <1478354316@qq.com> Date: Wed, 5 Nov 2025 00:35:25 +0800 Subject: [PATCH 2/2] some changes to fix bugs --- pandas/core/reshape/encoding.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/reshape/encoding.py b/pandas/core/reshape/encoding.py index fad2cf4b3cd1e..0dfd721dee312 100644 --- a/pandas/core/reshape/encoding.py +++ b/pandas/core/reshape/encoding.py @@ -211,7 +211,7 @@ def check_len(item, name: str) -> None: # columns to prepend to result. with_dummies = [data.select_dtypes(exclude=dtypes_to_encode)] - for col, pre, sep in zip(data_to_encode.items(), prefix, prefix_sep, strict=True): + for col, pre, sep in zip(data_to_encode.items(), prefix, prefix_sep): # col is (column_name, column), use just column data here dummy = _get_dummies_1d( col[1], @@ -538,7 +538,7 @@ def from_dummies( raise ValueError(len_msg) elif isinstance(default_category, Hashable): default_category = dict( - zip(variables_slice, [default_category] * len(variables_slice), strict=True) + zip(variables_slice, [default_category] * len(variables_slice)) ) else: raise TypeError(