-
-
Notifications
You must be signed in to change notification settings - Fork 19.3k
Zip Strict specification for pandas/core/indexes
#62533
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 4 commits
f6f1210
7b352dc
a57bea0
418f19f
6dd8988
998a26b
f302b33
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -720,7 +720,7 @@ def from_frame( | |
| if not isinstance(df, ABCDataFrame): | ||
| raise TypeError("Input must be a DataFrame") | ||
|
|
||
| column_names, columns = zip(*df.items()) | ||
| column_names, columns = zip(*df.items(), strict=True) | ||
| names = column_names if names is None else names | ||
| return cls.from_arrays(columns, sortorder=sortorder, names=names) | ||
|
|
||
|
|
@@ -878,7 +878,10 @@ def levels(self) -> FrozenList: | |
| # Use cache_readonly to ensure that self.get_locs doesn't repeatedly | ||
| # create new IndexEngine | ||
| # https://github.com/pandas-dev/pandas/issues/31648 | ||
| result = [x._rename(name=name) for x, name in zip(self._levels, self._names)] | ||
| result = [ | ||
| x._rename(name=name) | ||
| for x, name in zip(self._levels, self._names, strict=True) | ||
| ] | ||
| for level in result: | ||
| # disallow midx.levels[0].name = "foo" | ||
| level._no_setting_name = True | ||
|
|
@@ -912,7 +915,7 @@ def _set_levels( | |
| else: | ||
| level_numbers = [self._get_level_number(lev) for lev in level] | ||
| new_levels_list = list(self._levels) | ||
| for lev_num, lev in zip(level_numbers, levels): | ||
| for lev_num, lev in zip(level_numbers, levels, strict=False): | ||
| new_levels_list[lev_num] = ensure_index(lev, copy=copy)._view() | ||
| new_levels = FrozenList(new_levels_list) | ||
|
|
||
|
|
@@ -1148,13 +1151,13 @@ def _set_codes( | |
| if level is None: | ||
| new_codes = FrozenList( | ||
| _coerce_indexer_frozen(level_codes, lev, copy=copy).view() | ||
| for lev, level_codes in zip(self._levels, codes) | ||
| for lev, level_codes in zip(self._levels, codes, strict=False) | ||
| ) | ||
| level_numbers = range(len(new_codes)) | ||
| else: | ||
| level_numbers = [self._get_level_number(lev) for lev in level] | ||
| new_codes_list = list(self._codes) | ||
| for lev_num, level_codes in zip(level_numbers, codes): | ||
| for lev_num, level_codes in zip(level_numbers, codes, strict=False): | ||
| lev = self.levels[lev_num] | ||
| new_codes_list[lev_num] = _coerce_indexer_frozen( | ||
| level_codes, lev, copy=copy | ||
|
|
@@ -1478,7 +1481,7 @@ def _formatter_func(self, tup): | |
| Formats each item in tup according to its level's formatter function. | ||
| """ | ||
| formatter_funcs = (level._formatter_func for level in self.levels) | ||
| return tuple(func(val) for func, val in zip(formatter_funcs, tup)) | ||
| return tuple(func(val) for func, val in zip(formatter_funcs, tup, strict=False)) | ||
|
|
||
| def _get_values_for_csv( | ||
| self, *, na_rep: str = "nan", **kwargs | ||
|
|
@@ -1487,7 +1490,7 @@ def _get_values_for_csv( | |
| new_codes = [] | ||
|
|
||
| # go through the levels and format them | ||
| for level, level_codes in zip(self.levels, self.codes): | ||
| for level, level_codes in zip(self.levels, self.codes, strict=False): | ||
| level_strs = level._get_values_for_csv(na_rep=na_rep, **kwargs) | ||
| # add nan values, if there are any | ||
| mask = level_codes == -1 | ||
|
|
@@ -1527,7 +1530,7 @@ def _format_multi( | |
| return [] | ||
|
|
||
| stringified_levels = [] | ||
| for lev, level_codes in zip(self.levels, self.codes): | ||
| for lev, level_codes in zip(self.levels, self.codes, strict=False): | ||
| na = _get_na_rep(lev.dtype) | ||
|
|
||
| if len(lev) > 0: | ||
|
|
@@ -1550,7 +1553,7 @@ def _format_multi( | |
| stringified_levels.append(formatted) | ||
|
|
||
| result_levels = [] | ||
| for lev, lev_name in zip(stringified_levels, self.names): | ||
| for lev, lev_name in zip(stringified_levels, self.names, strict=False): | ||
| level = [] | ||
|
|
||
| if include_names: | ||
|
|
@@ -1627,7 +1630,7 @@ def _set_names(self, names, *, level=None) -> None: | |
| level = (self._get_level_number(lev) for lev in level) | ||
|
|
||
| # set the name | ||
| for lev, name in zip(level, names): | ||
| for lev, name in zip(level, names, strict=False): | ||
| if name is not None: | ||
| # GH 20527 | ||
| # All items in 'names' need to be hashable: | ||
|
|
@@ -2094,7 +2097,7 @@ def _sort_levels_monotonic(self, raise_if_incomparable: bool = False) -> MultiIn | |
| new_levels = [] | ||
| new_codes = [] | ||
|
|
||
| for lev, level_codes in zip(self.levels, self.codes): | ||
| for lev, level_codes in zip(self.levels, self.codes, strict=False): | ||
| if not lev.is_monotonic_increasing: | ||
| try: | ||
| # indexer to reorder the levels | ||
|
|
@@ -2173,7 +2176,7 @@ def remove_unused_levels(self) -> MultiIndex: | |
| new_codes = [] | ||
|
|
||
| changed = False | ||
| for lev, level_codes in zip(self.levels, self.codes): | ||
| for lev, level_codes in zip(self.levels, self.codes, strict=False): | ||
|
||
| # Since few levels are typically unused, bincount() is more | ||
| # efficient than unique() - however it only accepts positive values | ||
| # (and drops order): | ||
|
|
@@ -2240,7 +2243,7 @@ def __getitem__(self, key): | |
| key = com.cast_scalar_indexer(key) | ||
|
|
||
| retval = [] | ||
| for lev, level_codes in zip(self.levels, self.codes): | ||
| for lev, level_codes in zip(self.levels, self.codes, strict=False): | ||
|
||
| if level_codes[key] == -1: | ||
| retval.append(np.nan) | ||
| else: | ||
|
|
@@ -3078,7 +3081,7 @@ def _partial_tup_index(self, tup: tuple, side: Literal["left", "right"] = "left" | |
|
|
||
| n = len(tup) | ||
| start, end = 0, len(self) | ||
| zipped = zip(tup, self.levels, self.codes) | ||
| zipped = zip(tup, self.levels, self.codes, strict=False) | ||
| for k, (lab, lev, level_codes) in enumerate(zipped): | ||
| section = level_codes[start:end] | ||
|
|
||
|
|
@@ -3362,7 +3365,7 @@ def maybe_mi_droplevels(indexer, levels): | |
| "Key for location must have same length as number of levels" | ||
| ) | ||
| result = None | ||
| for lev, k in zip(level, key): | ||
| for lev, k in zip(level, key, strict=False): | ||
| loc, new_index = self._get_loc_level(k, level=lev) | ||
| if isinstance(loc, slice): | ||
| mask = np.zeros(len(self), dtype=bool) | ||
|
|
@@ -3948,7 +3951,7 @@ def _union(self, other, sort) -> MultiIndex: | |
| if isinstance(result, MultiIndex): | ||
| return result | ||
| return MultiIndex.from_arrays( | ||
| zip(*result), sortorder=None, names=result_names | ||
| zip(*result, strict=False), sortorder=None, names=result_names | ||
| ) | ||
|
|
||
| else: | ||
|
|
@@ -3995,7 +3998,7 @@ def _maybe_match_names(self, other): | |
| if len(self.names) != len(other.names): | ||
| return [None] * len(self.names) | ||
| names = [] | ||
| for a_name, b_name in zip(self.names, other.names): | ||
| for a_name, b_name in zip(self.names, other.names, strict=False): | ||
| if a_name == b_name: | ||
| names.append(a_name) | ||
| else: | ||
|
|
@@ -4092,7 +4095,7 @@ def putmask(self, mask, value: MultiIndex) -> MultiIndex: | |
| new_codes = [] | ||
|
|
||
| for i, (value_level, level, level_codes) in enumerate( | ||
| zip(subset.levels, self.levels, self.codes) | ||
| zip(subset.levels, self.levels, self.codes, strict=False) | ||
| ): | ||
| new_level = level.union(value_level, sort=False) | ||
| value_codes = new_level.get_indexer_for(subset.get_level_values(i)) | ||
|
|
@@ -4123,7 +4126,7 @@ def insert(self, loc: int, item) -> MultiIndex: | |
|
|
||
| new_levels = [] | ||
| new_codes = [] | ||
| for k, level, level_codes in zip(item, self.levels, self.codes): | ||
| for k, level, level_codes in zip(item, self.levels, self.codes, strict=True): | ||
| if k not in level: | ||
| # have to insert into level | ||
| # must insert at end otherwise you have to recompute all the | ||
|
|
@@ -4219,7 +4222,7 @@ def _lexsort_depth(codes: list[np.ndarray], nlevels: int) -> int: | |
|
|
||
|
|
||
| def sparsify_labels(label_list, start: int = 0, sentinel: object = ""): | ||
| pivoted = list(zip(*label_list)) | ||
| pivoted = list(zip(*label_list, strict=True)) | ||
| k = len(label_list) | ||
|
|
||
| result = pivoted[: start + 1] | ||
|
|
@@ -4228,7 +4231,7 @@ def sparsify_labels(label_list, start: int = 0, sentinel: object = ""): | |
| for cur in pivoted[start + 1 :]: | ||
| sparse_cur = [] | ||
|
|
||
| for i, (p, t) in enumerate(zip(prev, cur)): | ||
| for i, (p, t) in enumerate(zip(prev, cur, strict=True)): | ||
| if i == k - 1: | ||
| sparse_cur.append(t) | ||
| result.append(sparse_cur) # type: ignore[arg-type] | ||
|
|
@@ -4243,7 +4246,7 @@ def sparsify_labels(label_list, start: int = 0, sentinel: object = ""): | |
|
|
||
| prev = cur | ||
|
|
||
| return list(zip(*result)) | ||
| return list(zip(*result, strict=True)) | ||
|
|
||
|
|
||
| def _get_na_rep(dtype: DtypeObj) -> str: | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What fails when this is
True?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
None of the tests are failing for any of the cases flagged. I originally set this to
False, but after some more digging, and I see the MultiIndex constructor enforces the same length for the levels and codes. Setting these toTrue