Skip to content

Commit 1ca0f5e

Browse files
committed
Provide new object for copy routines requiring info. on its destination.
1 parent 6d0095a commit 1ca0f5e

File tree

1 file changed

+125
-36
lines changed

1 file changed

+125
-36
lines changed

pandas/tests/generic/test_attrs_deepcopy_destination.py

Lines changed: 125 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
21
import weakref
32

43
import numpy as np
@@ -21,11 +20,23 @@ class StatsSummary(dict):
2120

2221
def __init__(self, owner, *, cols=None):
2322
import pandas as pd
23+
2424
assert isinstance(owner, pd.core.generic.NDFrame)
2525
self._owner_ref = weakref.ref(owner)
26-
super(StatsSummary, self).__init__(dict((column, type(self)(owner[column])) for column in (list(getattr(owner, "columns", {})) or super(StatsSummary, self).__init__(
27-
(name, function(owner)) for name, function in self.stats().items()
28-
) or {}) if owner[column].dtype.kind in "if"))
26+
super(StatsSummary, self).__init__(
27+
dict(
28+
(column, type(self)(owner[column]))
29+
for column in (
30+
list(getattr(owner, "columns", {}))
31+
or super(StatsSummary, self).__init__(
32+
(name, function(owner))
33+
for name, function in self.stats().items()
34+
)
35+
or {}
36+
)
37+
if owner[column].dtype.kind in "if"
38+
)
39+
)
2940
pass
3041

3142
@classmethod
@@ -34,12 +45,25 @@ def stats(cls):
3445
cummin=lambda series: series.cummin().sum(),
3546
cummax=lambda series: series.cummax().sum(),
3647
kurtosis=lambda series: series.kurt(),
37-
median=lambda series:series.median(),
48+
median=lambda series: series.median(),
3849
)
3950

4051
@classmethod
4152
def gauge(cls, obj, columns):
42-
return dict(((column,dict([[name, function(obj[column])] for name, function in cls.stats().items()])) for column,dtyp in columns))
53+
return dict(
54+
(
55+
(
56+
column,
57+
dict(
58+
[
59+
[name, function(obj[column])]
60+
for name, function in cls.stats().items()
61+
]
62+
),
63+
)
64+
for column, dtyp in columns
65+
)
66+
)
4367

4468
@property
4569
def owner(self):
@@ -51,12 +75,17 @@ def __eq__(self, other) -> bool:
5175

5276
def __deepcopy__(self, memo):
5377
import pandas as pd
78+
5479
# Find destination NDFrame in memo. The patch injects {id(dest): dest}.
55-
new_owner = next(
80+
new_owner = next(
5681
(v for v in memo.values() if isinstance(v, pd.core.generic.NDFrame)),
5782
None,
5883
)
59-
return type(self)(new_owner) if hasattr(new_owner, "select_dtypes") or new_owner.dtype.kind in "if" else None
84+
return (
85+
type(self)(new_owner)
86+
if hasattr(new_owner, "select_dtypes") or new_owner.dtype.kind in "if"
87+
else None
88+
)
6089

6190

6291
class FrozenHeadTail(dict):
@@ -68,9 +97,14 @@ class FrozenHeadTail(dict):
6897

6998
def __init__(self, owner, *, cols=None):
7099
import pandas as pd
100+
71101
assert isinstance(owner, pd.core.generic.NDFrame)
72102
self._owner_ref = weakref.ref(owner)
73-
super(FrozenHeadTail, self).__init__(dict((name, function(self.owner)) for name, function in self.stats().items()))
103+
super(FrozenHeadTail, self).__init__(
104+
dict(
105+
(name, function(self.owner)) for name, function in self.stats().items()
106+
)
107+
)
74108
pass
75109

76110
@property
@@ -80,8 +114,16 @@ def owner(self):
80114
@classmethod
81115
def stats(cls):
82116
return dict(
83-
head=lambda x:pd.DataFrame(x.values[:2], columns=list(getattr(x,"columns",[])) or [x.name], index=x.index[:2]),
84-
tail=lambda x:pd.DataFrame(x.values[-2:], columns=list(getattr(x,"columns",[])) or [x.name], index=x.index[-2:]),
117+
head=lambda x: pd.DataFrame(
118+
x.values[:2],
119+
columns=list(getattr(x, "columns", [])) or [x.name],
120+
index=x.index[:2],
121+
),
122+
tail=lambda x: pd.DataFrame(
123+
x.values[-2:],
124+
columns=list(getattr(x, "columns", [])) or [x.name],
125+
index=x.index[-2:],
126+
),
85127
)
86128

87129
def __eq__(self, other) -> bool:
@@ -92,7 +134,7 @@ def __eq__(self, other) -> bool:
92134
return False
93135

94136
def __deepcopy__(self, memo):
95-
new_owner = next(
137+
new_owner = next(
96138
(v for v in memo.values() if isinstance(v, pd.core.generic.NDFrame)),
97139
None,
98140
)
@@ -101,96 +143,143 @@ def __deepcopy__(self, memo):
101143

102144
def test_attrs_stats_summary_binds_to_destination_on_copy():
103145
# Sample Data
104-
dset = np.arange(8,dtype=float)
146+
dset = np.arange(8, dtype=float)
105147
np.random.shuffle(dset)
106148

107-
df = pd.DataFrame({"foo": dset, "bar": dset*2, "qux": np.array(["waldo","fred","plugh","thud"]).repeat(len(dset)//4)}) # mixed dtypes
149+
df = pd.DataFrame(
150+
{
151+
"foo": dset,
152+
"bar": dset * 2,
153+
"qux": np.array(["waldo", "fred", "plugh", "thud"]).repeat(len(dset) // 4),
154+
}
155+
) # mixed dtypes
108156

109157
df.attrs["summary"] = StatsSummary(df)
110158

111159
# --------------------------------------
112160
# Copy triggered by panel Y axis slicing
113161
# --------------------------------------
114-
out = df.iloc[:len(df)//2]
162+
out = df.iloc[: len(df) // 2]
115163
summ = out.attrs.get("summary")
116-
gage = StatsSummary.gauge(out, list(filter(lambda x:x[-1].kind in "if", out.dtypes.to_dict().items())))
164+
gage = StatsSummary.gauge(
165+
out, list(filter(lambda x: x[-1].kind in "if", out.dtypes.to_dict().items()))
166+
)
117167

118168
assert isinstance(summ, StatsSummary)
119169

120170
# The cache should now belong to the *new* DataFrame
121171
assert summ.owner is out
122172
# pandas.DataFrame propagate to its pandas.Series correspondingly
123-
assert all([out[column].attrs["summary"] == out.attrs["summary"][column] for column in list(gage)])
173+
assert all(
174+
[
175+
out[column].attrs["summary"] == out.attrs["summary"][column]
176+
for column in list(gage)
177+
]
178+
)
124179
# And stats reflect the destination (shape matches numeric subset)
125180
assert summ == gage
126181

127182
# -----------------------------------
128183
# Copy triggered by columns selection
129184
# -----------------------------------
130-
out = df[["foo","qux"]]
185+
out = df[["foo", "qux"]]
131186
summ = out.attrs.get("summary")
132-
gage = StatsSummary.gauge(out, list(filter(lambda x:x[-1].kind in "if", out.dtypes.to_dict().items())))
187+
gage = StatsSummary.gauge(
188+
out, list(filter(lambda x: x[-1].kind in "if", out.dtypes.to_dict().items()))
189+
)
133190

134191
assert isinstance(summ, StatsSummary)
135192

136193
# The cache should now belong to the *new* DataFrame
137194
assert summ.owner is out
138195
# pandas.DataFrame propagate to its pandas.Series correspondingly
139-
assert all([out[column].attrs["summary"] == out.attrs["summary"][column] for column in list(gage)])
196+
assert all(
197+
[
198+
out[column].attrs["summary"] == out.attrs["summary"][column]
199+
for column in list(gage)
200+
]
201+
)
140202
# And stats reflect the destination (shape matches numeric subset)
141203
assert summ == gage
142204

143205
# ----------------------------------
144206
# Copy triggered by DataFrame concat
145207
# ----------------------------------
146-
left = df.iloc[len(df)//4:].copy(deep=True)
147-
right = df.iloc[len(df)//4:].copy(deep=True)
148-
out = pd.concat([left,right])
208+
left = df.iloc[len(df) // 4 :].copy(deep=True)
209+
right = df.iloc[len(df) // 4 :].copy(deep=True)
210+
out = pd.concat([left, right])
149211

150212
summ = out.attrs.get("summary")
151-
gage = StatsSummary.gauge(out, list(filter(lambda x:x[-1].kind in "if", out.dtypes.to_dict().items())))
213+
gage = StatsSummary.gauge(
214+
out, list(filter(lambda x: x[-1].kind in "if", out.dtypes.to_dict().items()))
215+
)
152216

153217
assert isinstance(summ, StatsSummary)
154218

155219
# The cache should now belong to the *new* DataFrame
156220
assert summ.owner is out
157221
# pandas.DataFrame propagate to its pandas.Series correspondingly
158-
assert all([out[column].attrs["summary"] == out.attrs["summary"][column] for column in list(gage)])
222+
assert all(
223+
[
224+
out[column].attrs["summary"] == out.attrs["summary"][column]
225+
for column in list(gage)
226+
]
227+
)
159228
# And stats reflect the destination (shape matches numeric subset)
160229
assert summ == gage
161230

162231
# -----------------------------------
163232
# Arithemetic operations on DataFrame
164233
# -----------------------------------
165-
out = df[["foo","bar"]]
166-
out = out.multiply(np.random.random_integers(0, 1, len(out))*np.lib.stride_tricks.as_strided(np.asarray(2, dtype=np.int8), shape=(len(out),), strides=(0,))-1, axis=0)
234+
out = df[["foo", "bar"]]
235+
out = out.multiply(
236+
np.random.random_integers(0, 1, len(out))
237+
* np.lib.stride_tricks.as_strided(
238+
np.asarray(2, dtype=np.int8), shape=(len(out),), strides=(0,)
239+
)
240+
- 1,
241+
axis=0,
242+
)
167243

168244
summ = out.attrs.get("summary")
169-
gage = StatsSummary.gauge(out, list(filter(lambda x:x[-1].kind in "if", out.dtypes.to_dict().items())))
245+
gage = StatsSummary.gauge(
246+
out, list(filter(lambda x: x[-1].kind in "if", out.dtypes.to_dict().items()))
247+
)
170248

171249
assert isinstance(summ, StatsSummary)
172250

173251
# The cache should now belong to the *new* DataFrame
174252
assert summ.owner is out
175253
# pandas.DataFrame propagate to its pandas.Series correspondingly
176-
assert all([out[column].attrs["summary"] == out.attrs["summary"][column] for column in list(gage)])
254+
assert all(
255+
[
256+
out[column].attrs["summary"] == out.attrs["summary"][column]
257+
for column in list(gage)
258+
]
259+
)
177260
# And stats reflect the destination (shape matches numeric subset)
178261
assert summ == gage
179262

180263

181264
def test_attrs_stats_summary_works_for_series_too():
182265
# Sample Data
183-
dset = np.arange(8,dtype=float)
266+
dset = np.arange(8, dtype=float)
184267
np.random.shuffle(dset)
185268

186-
df = pd.DataFrame({"foo": dset, "bar": dset*2, "qux": np.array(["waldo","fred","plugh","thud"]).repeat(len(dset)//4)}) # mixed dtypes
269+
df = pd.DataFrame(
270+
{
271+
"foo": dset,
272+
"bar": dset * 2,
273+
"qux": np.array(["waldo", "fred", "plugh", "thud"]).repeat(len(dset) // 4),
274+
}
275+
) # mixed dtypes
187276
df.attrs["summary"] = StatsSummary(df)
188277

189278
# ------------------------------------------
190279
# Directly to pandas.Series, complex slicing
191280
# ------------------------------------------
192281
sr = df["bar"]
193-
out = pd.concat([sr.iloc[:len(sr)//2],sr.iloc[len(sr)//4:]])
282+
out = pd.concat([sr.iloc[: len(sr) // 2], sr.iloc[len(sr) // 4 :]])
194283

195284
summ = out.attrs["summary"] = StatsSummary(out)
196285
gage = StatsSummary.gauge(out, [(Ellipsis, sr.dtype)])[...]
@@ -205,15 +294,15 @@ def test_attrs_stats_summary_works_for_series_too():
205294

206295
def test_attrs_headtail_probe_rebinds_on_concat_have_same_attrs():
207296
# Sample Data
208-
dset = np.arange(8,dtype=float)
297+
dset = np.arange(8, dtype=float)
209298
np.random.shuffle(dset)
210-
df = pd.DataFrame(dict(foo=dset*2, bar=dset*4, baz=dset*8, qux=dset*16))
299+
df = pd.DataFrame(dict(foo=dset * 2, bar=dset * 4, baz=dset * 8, qux=dset * 16))
211300

212301
df.attrs["preview"] = FrozenHeadTail(df)
213302

214303
# same attrs object on both inputs -> triggers have_same_attrs=True branch
215304
fred = df.copy(deep=True)
216-
thud = df.iloc[list(range(-2,2))].sort_index()
305+
thud = df.iloc[list(range(-2, 2))].sort_index()
217306

218307
out = pd.concat([fred, thud], ignore_index=True)
219308

@@ -232,4 +321,4 @@ def test_attrs_empty_remains_empty_on_deepcopy():
232321
df = pd.DataFrame({"a": [1, 2]})
233322
assert df.attrs == {}
234323
out = df.copy(deep=True)
235-
assert out.attrs == {}
324+
assert out.attrs == {}

0 commit comments

Comments
 (0)