1-
21import weakref
32
43import numpy as np
@@ -21,11 +20,23 @@ class StatsSummary(dict):
2120
2221 def __init__ (self , owner , * , cols = None ):
2322 import pandas as pd
23+
2424 assert isinstance (owner , pd .core .generic .NDFrame )
2525 self ._owner_ref = weakref .ref (owner )
26- super (StatsSummary , self ).__init__ (dict ((column , type (self )(owner [column ])) for column in (list (getattr (owner , "columns" , {})) or super (StatsSummary , self ).__init__ (
27- (name , function (owner )) for name , function in self .stats ().items ()
28- ) or {}) if owner [column ].dtype .kind in "if" ))
26+ super (StatsSummary , self ).__init__ (
27+ dict (
28+ (column , type (self )(owner [column ]))
29+ for column in (
30+ list (getattr (owner , "columns" , {}))
31+ or super (StatsSummary , self ).__init__ (
32+ (name , function (owner ))
33+ for name , function in self .stats ().items ()
34+ )
35+ or {}
36+ )
37+ if owner [column ].dtype .kind in "if"
38+ )
39+ )
2940 pass
3041
3142 @classmethod
@@ -34,12 +45,25 @@ def stats(cls):
3445 cummin = lambda series : series .cummin ().sum (),
3546 cummax = lambda series : series .cummax ().sum (),
3647 kurtosis = lambda series : series .kurt (),
37- median = lambda series :series .median (),
48+ median = lambda series : series .median (),
3849 )
3950
4051 @classmethod
4152 def gauge (cls , obj , columns ):
42- return dict (((column ,dict ([[name , function (obj [column ])] for name , function in cls .stats ().items ()])) for column ,dtyp in columns ))
53+ return dict (
54+ (
55+ (
56+ column ,
57+ dict (
58+ [
59+ [name , function (obj [column ])]
60+ for name , function in cls .stats ().items ()
61+ ]
62+ ),
63+ )
64+ for column , dtyp in columns
65+ )
66+ )
4367
4468 @property
4569 def owner (self ):
@@ -51,12 +75,17 @@ def __eq__(self, other) -> bool:
5175
5276 def __deepcopy__ (self , memo ):
5377 import pandas as pd
78+
5479 # Find destination NDFrame in memo. The patch injects {id(dest): dest}.
55- new_owner = next (
80+ new_owner = next (
5681 (v for v in memo .values () if isinstance (v , pd .core .generic .NDFrame )),
5782 None ,
5883 )
59- return type (self )(new_owner ) if hasattr (new_owner , "select_dtypes" ) or new_owner .dtype .kind in "if" else None
84+ return (
85+ type (self )(new_owner )
86+ if hasattr (new_owner , "select_dtypes" ) or new_owner .dtype .kind in "if"
87+ else None
88+ )
6089
6190
6291class FrozenHeadTail (dict ):
@@ -68,9 +97,14 @@ class FrozenHeadTail(dict):
6897
6998 def __init__ (self , owner , * , cols = None ):
7099 import pandas as pd
100+
71101 assert isinstance (owner , pd .core .generic .NDFrame )
72102 self ._owner_ref = weakref .ref (owner )
73- super (FrozenHeadTail , self ).__init__ (dict ((name , function (self .owner )) for name , function in self .stats ().items ()))
103+ super (FrozenHeadTail , self ).__init__ (
104+ dict (
105+ (name , function (self .owner )) for name , function in self .stats ().items ()
106+ )
107+ )
74108 pass
75109
76110 @property
@@ -80,8 +114,16 @@ def owner(self):
80114 @classmethod
81115 def stats (cls ):
82116 return dict (
83- head = lambda x :pd .DataFrame (x .values [:2 ], columns = list (getattr (x ,"columns" ,[])) or [x .name ], index = x .index [:2 ]),
84- tail = lambda x :pd .DataFrame (x .values [- 2 :], columns = list (getattr (x ,"columns" ,[])) or [x .name ], index = x .index [- 2 :]),
117+ head = lambda x : pd .DataFrame (
118+ x .values [:2 ],
119+ columns = list (getattr (x , "columns" , [])) or [x .name ],
120+ index = x .index [:2 ],
121+ ),
122+ tail = lambda x : pd .DataFrame (
123+ x .values [- 2 :],
124+ columns = list (getattr (x , "columns" , [])) or [x .name ],
125+ index = x .index [- 2 :],
126+ ),
85127 )
86128
87129 def __eq__ (self , other ) -> bool :
@@ -92,7 +134,7 @@ def __eq__(self, other) -> bool:
92134 return False
93135
94136 def __deepcopy__ (self , memo ):
95- new_owner = next (
137+ new_owner = next (
96138 (v for v in memo .values () if isinstance (v , pd .core .generic .NDFrame )),
97139 None ,
98140 )
@@ -101,96 +143,143 @@ def __deepcopy__(self, memo):
101143
102144def test_attrs_stats_summary_binds_to_destination_on_copy ():
103145 # Sample Data
104- dset = np .arange (8 ,dtype = float )
146+ dset = np .arange (8 , dtype = float )
105147 np .random .shuffle (dset )
106148
107- df = pd .DataFrame ({"foo" : dset , "bar" : dset * 2 , "qux" : np .array (["waldo" ,"fred" ,"plugh" ,"thud" ]).repeat (len (dset )// 4 )}) # mixed dtypes
149+ df = pd .DataFrame (
150+ {
151+ "foo" : dset ,
152+ "bar" : dset * 2 ,
153+ "qux" : np .array (["waldo" , "fred" , "plugh" , "thud" ]).repeat (len (dset ) // 4 ),
154+ }
155+ ) # mixed dtypes
108156
109157 df .attrs ["summary" ] = StatsSummary (df )
110158
111159 # --------------------------------------
112160 # Copy triggered by panel Y axis slicing
113161 # --------------------------------------
114- out = df .iloc [:len (df )// 2 ]
162+ out = df .iloc [: len (df ) // 2 ]
115163 summ = out .attrs .get ("summary" )
116- gage = StatsSummary .gauge (out , list (filter (lambda x :x [- 1 ].kind in "if" , out .dtypes .to_dict ().items ())))
164+ gage = StatsSummary .gauge (
165+ out , list (filter (lambda x : x [- 1 ].kind in "if" , out .dtypes .to_dict ().items ()))
166+ )
117167
118168 assert isinstance (summ , StatsSummary )
119169
120170 # The cache should now belong to the *new* DataFrame
121171 assert summ .owner is out
122172 # pandas.DataFrame propagate to its pandas.Series correspondingly
123- assert all ([out [column ].attrs ["summary" ] == out .attrs ["summary" ][column ] for column in list (gage )])
173+ assert all (
174+ [
175+ out [column ].attrs ["summary" ] == out .attrs ["summary" ][column ]
176+ for column in list (gage )
177+ ]
178+ )
124179 # And stats reflect the destination (shape matches numeric subset)
125180 assert summ == gage
126181
127182 # -----------------------------------
128183 # Copy triggered by columns selection
129184 # -----------------------------------
130- out = df [["foo" ,"qux" ]]
185+ out = df [["foo" , "qux" ]]
131186 summ = out .attrs .get ("summary" )
132- gage = StatsSummary .gauge (out , list (filter (lambda x :x [- 1 ].kind in "if" , out .dtypes .to_dict ().items ())))
187+ gage = StatsSummary .gauge (
188+ out , list (filter (lambda x : x [- 1 ].kind in "if" , out .dtypes .to_dict ().items ()))
189+ )
133190
134191 assert isinstance (summ , StatsSummary )
135192
136193 # The cache should now belong to the *new* DataFrame
137194 assert summ .owner is out
138195 # pandas.DataFrame propagate to its pandas.Series correspondingly
139- assert all ([out [column ].attrs ["summary" ] == out .attrs ["summary" ][column ] for column in list (gage )])
196+ assert all (
197+ [
198+ out [column ].attrs ["summary" ] == out .attrs ["summary" ][column ]
199+ for column in list (gage )
200+ ]
201+ )
140202 # And stats reflect the destination (shape matches numeric subset)
141203 assert summ == gage
142204
143205 # ----------------------------------
144206 # Copy triggered by DataFrame concat
145207 # ----------------------------------
146- left = df .iloc [len (df )// 4 :].copy (deep = True )
147- right = df .iloc [len (df )// 4 :].copy (deep = True )
148- out = pd .concat ([left ,right ])
208+ left = df .iloc [len (df ) // 4 :].copy (deep = True )
209+ right = df .iloc [len (df ) // 4 :].copy (deep = True )
210+ out = pd .concat ([left , right ])
149211
150212 summ = out .attrs .get ("summary" )
151- gage = StatsSummary .gauge (out , list (filter (lambda x :x [- 1 ].kind in "if" , out .dtypes .to_dict ().items ())))
213+ gage = StatsSummary .gauge (
214+ out , list (filter (lambda x : x [- 1 ].kind in "if" , out .dtypes .to_dict ().items ()))
215+ )
152216
153217 assert isinstance (summ , StatsSummary )
154218
155219 # The cache should now belong to the *new* DataFrame
156220 assert summ .owner is out
157221 # pandas.DataFrame propagate to its pandas.Series correspondingly
158- assert all ([out [column ].attrs ["summary" ] == out .attrs ["summary" ][column ] for column in list (gage )])
222+ assert all (
223+ [
224+ out [column ].attrs ["summary" ] == out .attrs ["summary" ][column ]
225+ for column in list (gage )
226+ ]
227+ )
159228 # And stats reflect the destination (shape matches numeric subset)
160229 assert summ == gage
161230
162231 # -----------------------------------
163232 # Arithemetic operations on DataFrame
164233 # -----------------------------------
165- out = df [["foo" ,"bar" ]]
166- out = out .multiply (np .random .random_integers (0 , 1 , len (out ))* np .lib .stride_tricks .as_strided (np .asarray (2 , dtype = np .int8 ), shape = (len (out ),), strides = (0 ,))- 1 , axis = 0 )
234+ out = df [["foo" , "bar" ]]
235+ out = out .multiply (
236+ np .random .random_integers (0 , 1 , len (out ))
237+ * np .lib .stride_tricks .as_strided (
238+ np .asarray (2 , dtype = np .int8 ), shape = (len (out ),), strides = (0 ,)
239+ )
240+ - 1 ,
241+ axis = 0 ,
242+ )
167243
168244 summ = out .attrs .get ("summary" )
169- gage = StatsSummary .gauge (out , list (filter (lambda x :x [- 1 ].kind in "if" , out .dtypes .to_dict ().items ())))
245+ gage = StatsSummary .gauge (
246+ out , list (filter (lambda x : x [- 1 ].kind in "if" , out .dtypes .to_dict ().items ()))
247+ )
170248
171249 assert isinstance (summ , StatsSummary )
172250
173251 # The cache should now belong to the *new* DataFrame
174252 assert summ .owner is out
175253 # pandas.DataFrame propagate to its pandas.Series correspondingly
176- assert all ([out [column ].attrs ["summary" ] == out .attrs ["summary" ][column ] for column in list (gage )])
254+ assert all (
255+ [
256+ out [column ].attrs ["summary" ] == out .attrs ["summary" ][column ]
257+ for column in list (gage )
258+ ]
259+ )
177260 # And stats reflect the destination (shape matches numeric subset)
178261 assert summ == gage
179262
180263
181264def test_attrs_stats_summary_works_for_series_too ():
182265 # Sample Data
183- dset = np .arange (8 ,dtype = float )
266+ dset = np .arange (8 , dtype = float )
184267 np .random .shuffle (dset )
185268
186- df = pd .DataFrame ({"foo" : dset , "bar" : dset * 2 , "qux" : np .array (["waldo" ,"fred" ,"plugh" ,"thud" ]).repeat (len (dset )// 4 )}) # mixed dtypes
269+ df = pd .DataFrame (
270+ {
271+ "foo" : dset ,
272+ "bar" : dset * 2 ,
273+ "qux" : np .array (["waldo" , "fred" , "plugh" , "thud" ]).repeat (len (dset ) // 4 ),
274+ }
275+ ) # mixed dtypes
187276 df .attrs ["summary" ] = StatsSummary (df )
188277
189278 # ------------------------------------------
190279 # Directly to pandas.Series, complex slicing
191280 # ------------------------------------------
192281 sr = df ["bar" ]
193- out = pd .concat ([sr .iloc [:len (sr )// 2 ],sr .iloc [len (sr )// 4 :]])
282+ out = pd .concat ([sr .iloc [: len (sr ) // 2 ], sr .iloc [len (sr ) // 4 :]])
194283
195284 summ = out .attrs ["summary" ] = StatsSummary (out )
196285 gage = StatsSummary .gauge (out , [(Ellipsis , sr .dtype )])[...]
@@ -205,15 +294,15 @@ def test_attrs_stats_summary_works_for_series_too():
205294
206295def test_attrs_headtail_probe_rebinds_on_concat_have_same_attrs ():
207296 # Sample Data
208- dset = np .arange (8 ,dtype = float )
297+ dset = np .arange (8 , dtype = float )
209298 np .random .shuffle (dset )
210- df = pd .DataFrame (dict (foo = dset * 2 , bar = dset * 4 , baz = dset * 8 , qux = dset * 16 ))
299+ df = pd .DataFrame (dict (foo = dset * 2 , bar = dset * 4 , baz = dset * 8 , qux = dset * 16 ))
211300
212301 df .attrs ["preview" ] = FrozenHeadTail (df )
213302
214303 # same attrs object on both inputs -> triggers have_same_attrs=True branch
215304 fred = df .copy (deep = True )
216- thud = df .iloc [list (range (- 2 ,2 ))].sort_index ()
305+ thud = df .iloc [list (range (- 2 , 2 ))].sort_index ()
217306
218307 out = pd .concat ([fred , thud ], ignore_index = True )
219308
@@ -232,4 +321,4 @@ def test_attrs_empty_remains_empty_on_deepcopy():
232321 df = pd .DataFrame ({"a" : [1 , 2 ]})
233322 assert df .attrs == {}
234323 out = df .copy (deep = True )
235- assert out .attrs == {}
324+ assert out .attrs == {}
0 commit comments