26
26
from pandas .util import _test_decorators as td
27
27
28
28
29
+ @pytest .fixture (
30
+ params = [np .int32 , np .int64 , np .float32 , np .float64 ],
31
+ ids = ["np.int32" , "np.int64" , "np.float32" , "np.float64" ],
32
+ )
33
+ def numpy_dtypes_for_minmax (request ):
34
+ """
35
+ Fixture of numpy dtypes with min and max values used for testing
36
+ cummin and cummax
37
+ """
38
+ dtype = request .param
39
+ min_val = (
40
+ np .iinfo (dtype ).min if np .dtype (dtype ).kind == "i" else np .finfo (dtype ).min
41
+ )
42
+ max_val = (
43
+ np .iinfo (dtype ).max if np .dtype (dtype ).kind == "i" else np .finfo (dtype ).max
44
+ )
45
+
46
+ return (dtype , min_val , max_val )
47
+
48
+
29
49
@pytest .mark .parametrize ("agg_func" , ["any" , "all" ])
30
50
@pytest .mark .parametrize ("skipna" , [True , False ])
31
51
@pytest .mark .parametrize (
@@ -174,11 +194,10 @@ def test_arg_passthru():
174
194
)
175
195
176
196
for attr in ["mean" , "median" ]:
177
- f = getattr (df .groupby ("group" ), attr )
178
- result = f ()
197
+ result = getattr (df .groupby ("group" ), attr )()
179
198
tm .assert_index_equal (result .columns , expected_columns_numeric )
180
199
181
- result = f (numeric_only = False )
200
+ result = getattr ( df . groupby ( "group" ), attr ) (numeric_only = False )
182
201
tm .assert_frame_equal (result .reindex_like (expected ), expected )
183
202
184
203
# TODO: min, max *should* handle
@@ -195,11 +214,10 @@ def test_arg_passthru():
195
214
]
196
215
)
197
216
for attr in ["min" , "max" ]:
198
- f = getattr (df .groupby ("group" ), attr )
199
- result = f ()
217
+ result = getattr (df .groupby ("group" ), attr )()
200
218
tm .assert_index_equal (result .columns , expected_columns )
201
219
202
- result = f (numeric_only = False )
220
+ result = getattr ( df . groupby ( "group" ), attr ) (numeric_only = False )
203
221
tm .assert_index_equal (result .columns , expected_columns )
204
222
205
223
expected_columns = Index (
@@ -215,52 +233,47 @@ def test_arg_passthru():
215
233
]
216
234
)
217
235
for attr in ["first" , "last" ]:
218
- f = getattr (df .groupby ("group" ), attr )
219
- result = f ()
236
+ result = getattr (df .groupby ("group" ), attr )()
220
237
tm .assert_index_equal (result .columns , expected_columns )
221
238
222
- result = f (numeric_only = False )
239
+ result = getattr ( df . groupby ( "group" ), attr ) (numeric_only = False )
223
240
tm .assert_index_equal (result .columns , expected_columns )
224
241
225
242
expected_columns = Index (["int" , "float" , "string" , "category_int" , "timedelta" ])
226
- for attr in ["sum" ]:
227
- f = getattr (df .groupby ("group" ), attr )
228
- result = f ()
229
- tm .assert_index_equal (result .columns , expected_columns_numeric )
230
243
231
- result = f (numeric_only = False )
232
- tm .assert_index_equal (result .columns , expected_columns )
244
+ result = df .groupby ("group" ).sum ()
245
+ tm .assert_index_equal (result .columns , expected_columns_numeric )
246
+
247
+ result = df .groupby ("group" ).sum (numeric_only = False )
248
+ tm .assert_index_equal (result .columns , expected_columns )
233
249
234
250
expected_columns = Index (["int" , "float" , "category_int" ])
235
251
for attr in ["prod" , "cumprod" ]:
236
- f = getattr (df .groupby ("group" ), attr )
237
- result = f ()
252
+ result = getattr (df .groupby ("group" ), attr )()
238
253
tm .assert_index_equal (result .columns , expected_columns_numeric )
239
254
240
- result = f (numeric_only = False )
255
+ result = getattr ( df . groupby ( "group" ), attr ) (numeric_only = False )
241
256
tm .assert_index_equal (result .columns , expected_columns )
242
257
243
258
# like min, max, but don't include strings
244
259
expected_columns = Index (
245
260
["int" , "float" , "category_int" , "datetime" , "datetimetz" , "timedelta" ]
246
261
)
247
262
for attr in ["cummin" , "cummax" ]:
248
- f = getattr (df .groupby ("group" ), attr )
249
- result = f ()
263
+ result = getattr (df .groupby ("group" ), attr )()
250
264
# GH 15561: numeric_only=False set by default like min/max
251
265
tm .assert_index_equal (result .columns , expected_columns )
252
266
253
- result = f (numeric_only = False )
267
+ result = getattr ( df . groupby ( "group" ), attr ) (numeric_only = False )
254
268
tm .assert_index_equal (result .columns , expected_columns )
255
269
256
270
expected_columns = Index (["int" , "float" , "category_int" , "timedelta" ])
257
- for attr in ["cumsum" ]:
258
- f = getattr (df .groupby ("group" ), attr )
259
- result = f ()
260
- tm .assert_index_equal (result .columns , expected_columns_numeric )
261
271
262
- result = f (numeric_only = False )
263
- tm .assert_index_equal (result .columns , expected_columns )
272
+ result = getattr (df .groupby ("group" ), "cumsum" )()
273
+ tm .assert_index_equal (result .columns , expected_columns_numeric )
274
+
275
+ result = getattr (df .groupby ("group" ), "cumsum" )(numeric_only = False )
276
+ tm .assert_index_equal (result .columns , expected_columns )
264
277
265
278
266
279
def test_non_cython_api ():
@@ -691,59 +704,31 @@ def test_numpy_compat(func):
691
704
reason = "https://github.com/pandas-dev/pandas/issues/31992" ,
692
705
strict = False ,
693
706
)
694
- def test_cummin_cummax ():
707
+ def test_cummin (numpy_dtypes_for_minmax ):
708
+ dtype = numpy_dtypes_for_minmax [0 ]
709
+ min_val = numpy_dtypes_for_minmax [1 ]
710
+
695
711
# GH 15048
696
- num_types = [np .int32 , np .int64 , np .float32 , np .float64 ]
697
- num_mins = [
698
- np .iinfo (np .int32 ).min ,
699
- np .iinfo (np .int64 ).min ,
700
- np .finfo (np .float32 ).min ,
701
- np .finfo (np .float64 ).min ,
702
- ]
703
- num_max = [
704
- np .iinfo (np .int32 ).max ,
705
- np .iinfo (np .int64 ).max ,
706
- np .finfo (np .float32 ).max ,
707
- np .finfo (np .float64 ).max ,
708
- ]
709
712
base_df = pd .DataFrame (
710
713
{"A" : [1 , 1 , 1 , 1 , 2 , 2 , 2 , 2 ], "B" : [3 , 4 , 3 , 2 , 2 , 3 , 2 , 1 ]}
711
714
)
712
715
expected_mins = [3 , 3 , 3 , 2 , 2 , 2 , 2 , 1 ]
713
- expected_maxs = [3 , 4 , 4 , 4 , 2 , 3 , 3 , 3 ]
714
716
715
- for dtype , min_val , max_val in zip (num_types , num_mins , num_max ):
716
- df = base_df .astype (dtype )
717
+ df = base_df .astype (dtype )
717
718
718
- # cummin
719
- expected = pd .DataFrame ({"B" : expected_mins }).astype (dtype )
720
- result = df .groupby ("A" ).cummin ()
721
- tm .assert_frame_equal (result , expected )
722
- result = df .groupby ("A" ).B .apply (lambda x : x .cummin ()).to_frame ()
723
- tm .assert_frame_equal (result , expected )
724
-
725
- # Test cummin w/ min value for dtype
726
- df .loc [[2 , 6 ], "B" ] = min_val
727
- expected .loc [[2 , 3 , 6 , 7 ], "B" ] = min_val
728
- result = df .groupby ("A" ).cummin ()
729
- tm .assert_frame_equal (result , expected )
730
- expected = df .groupby ("A" ).B .apply (lambda x : x .cummin ()).to_frame ()
731
- tm .assert_frame_equal (result , expected )
732
-
733
- # cummax
734
- expected = pd .DataFrame ({"B" : expected_maxs }).astype (dtype )
735
- result = df .groupby ("A" ).cummax ()
736
- tm .assert_frame_equal (result , expected )
737
- result = df .groupby ("A" ).B .apply (lambda x : x .cummax ()).to_frame ()
738
- tm .assert_frame_equal (result , expected )
719
+ expected = pd .DataFrame ({"B" : expected_mins }).astype (dtype )
720
+ result = df .groupby ("A" ).cummin ()
721
+ tm .assert_frame_equal (result , expected )
722
+ result = df .groupby ("A" ).B .apply (lambda x : x .cummin ()).to_frame ()
723
+ tm .assert_frame_equal (result , expected )
739
724
740
- # Test cummax w/ max value for dtype
741
- df .loc [[2 , 6 ], "B" ] = max_val
742
- expected .loc [[2 , 3 , 6 , 7 ], "B" ] = max_val
743
- result = df .groupby ("A" ).cummax ()
744
- tm .assert_frame_equal (result , expected )
745
- expected = df .groupby ("A" ).B .apply (lambda x : x .cummax ()).to_frame ()
746
- tm .assert_frame_equal (result , expected )
725
+ # Test w/ min value for dtype
726
+ df .loc [[2 , 6 ], "B" ] = min_val
727
+ expected .loc [[2 , 3 , 6 , 7 ], "B" ] = min_val
728
+ result = df .groupby ("A" ).cummin ()
729
+ tm .assert_frame_equal (result , expected )
730
+ expected = df .groupby ("A" ).B .apply (lambda x : x .cummin ()).to_frame ()
731
+ tm .assert_frame_equal (result , expected )
747
732
748
733
# Test nan in some values
749
734
base_df .loc [[0 , 2 , 4 , 6 ], "B" ] = np .nan
@@ -753,41 +738,101 @@ def test_cummin_cummax():
753
738
expected = base_df .groupby ("A" ).B .apply (lambda x : x .cummin ()).to_frame ()
754
739
tm .assert_frame_equal (result , expected )
755
740
756
- expected = pd .DataFrame ({"B" : [np .nan , 4 , np .nan , 4 , np .nan , 3 , np .nan , 3 ]})
757
- result = base_df .groupby ("A" ).cummax ()
758
- tm .assert_frame_equal (result , expected )
759
- expected = base_df .groupby ("A" ).B .apply (lambda x : x .cummax ()).to_frame ()
760
- tm .assert_frame_equal (result , expected )
741
+ # GH 15561
742
+ df = pd .DataFrame (dict (a = [1 ], b = pd .to_datetime (["2001" ])))
743
+ expected = pd .Series (pd .to_datetime ("2001" ), index = [0 ], name = "b" )
744
+
745
+ result = df .groupby ("a" )["b" ].cummin ()
746
+ tm .assert_series_equal (expected , result )
747
+
748
+ # GH 15635
749
+ df = pd .DataFrame (dict (a = [1 , 2 , 1 ], b = [1 , 2 , 2 ]))
750
+ result = df .groupby ("a" ).b .cummin ()
751
+ expected = pd .Series ([1 , 2 , 1 ], name = "b" )
752
+ tm .assert_series_equal (result , expected )
753
+
754
+
755
+ @pytest .mark .xfail (
756
+ _is_numpy_dev ,
757
+ reason = "https://github.com/pandas-dev/pandas/issues/31992" ,
758
+ strict = False ,
759
+ )
760
+ def test_cummin_all_nan_column ():
761
+ base_df = pd .DataFrame ({"A" : [1 , 1 , 1 , 1 , 2 , 2 , 2 , 2 ], "B" : [np .nan ] * 8 })
761
762
762
- # Test nan in entire column
763
- base_df ["B" ] = np .nan
764
763
expected = pd .DataFrame ({"B" : [np .nan ] * 8 })
765
764
result = base_df .groupby ("A" ).cummin ()
766
765
tm .assert_frame_equal (expected , result )
767
766
result = base_df .groupby ("A" ).B .apply (lambda x : x .cummin ()).to_frame ()
768
767
tm .assert_frame_equal (expected , result )
768
+
769
+
770
+ @pytest .mark .xfail (
771
+ _is_numpy_dev ,
772
+ reason = "https://github.com/pandas-dev/pandas/issues/31992" ,
773
+ strict = False ,
774
+ )
775
+ def test_cummax (numpy_dtypes_for_minmax ):
776
+ dtype = numpy_dtypes_for_minmax [0 ]
777
+ max_val = numpy_dtypes_for_minmax [2 ]
778
+
779
+ # GH 15048
780
+ base_df = pd .DataFrame (
781
+ {"A" : [1 , 1 , 1 , 1 , 2 , 2 , 2 , 2 ], "B" : [3 , 4 , 3 , 2 , 2 , 3 , 2 , 1 ]}
782
+ )
783
+ expected_maxs = [3 , 4 , 4 , 4 , 2 , 3 , 3 , 3 ]
784
+
785
+ df = base_df .astype (dtype )
786
+
787
+ expected = pd .DataFrame ({"B" : expected_maxs }).astype (dtype )
788
+ result = df .groupby ("A" ).cummax ()
789
+ tm .assert_frame_equal (result , expected )
790
+ result = df .groupby ("A" ).B .apply (lambda x : x .cummax ()).to_frame ()
791
+ tm .assert_frame_equal (result , expected )
792
+
793
+ # Test w/ max value for dtype
794
+ df .loc [[2 , 6 ], "B" ] = max_val
795
+ expected .loc [[2 , 3 , 6 , 7 ], "B" ] = max_val
796
+ result = df .groupby ("A" ).cummax ()
797
+ tm .assert_frame_equal (result , expected )
798
+ expected = df .groupby ("A" ).B .apply (lambda x : x .cummax ()).to_frame ()
799
+ tm .assert_frame_equal (result , expected )
800
+
801
+ # Test nan in some values
802
+ base_df .loc [[0 , 2 , 4 , 6 ], "B" ] = np .nan
803
+ expected = pd .DataFrame ({"B" : [np .nan , 4 , np .nan , 4 , np .nan , 3 , np .nan , 3 ]})
769
804
result = base_df .groupby ("A" ).cummax ()
770
- tm .assert_frame_equal (expected , result )
771
- result = base_df .groupby ("A" ).B .apply (lambda x : x .cummax ()).to_frame ()
772
- tm .assert_frame_equal (expected , result )
805
+ tm .assert_frame_equal (result , expected )
806
+ expected = base_df .groupby ("A" ).B .apply (lambda x : x .cummax ()).to_frame ()
807
+ tm .assert_frame_equal (result , expected )
773
808
774
809
# GH 15561
775
810
df = pd .DataFrame (dict (a = [1 ], b = pd .to_datetime (["2001" ])))
776
811
expected = pd .Series (pd .to_datetime ("2001" ), index = [0 ], name = "b" )
777
- for method in [ "cummax" , "cummin" ]:
778
- result = getattr ( df .groupby ("a" )["b" ], method ) ()
779
- tm .assert_series_equal (expected , result )
812
+
813
+ result = df .groupby ("a" )["b" ]. cummax ()
814
+ tm .assert_series_equal (expected , result )
780
815
781
816
# GH 15635
782
817
df = pd .DataFrame (dict (a = [1 , 2 , 1 ], b = [2 , 1 , 1 ]))
783
818
result = df .groupby ("a" ).b .cummax ()
784
819
expected = pd .Series ([2 , 1 , 2 ], name = "b" )
785
820
tm .assert_series_equal (result , expected )
786
821
787
- df = pd .DataFrame (dict (a = [1 , 2 , 1 ], b = [1 , 2 , 2 ]))
788
- result = df .groupby ("a" ).b .cummin ()
789
- expected = pd .Series ([1 , 2 , 1 ], name = "b" )
790
- tm .assert_series_equal (result , expected )
822
+
823
+ @pytest .mark .xfail (
824
+ _is_numpy_dev ,
825
+ reason = "https://github.com/pandas-dev/pandas/issues/31992" ,
826
+ strict = False ,
827
+ )
828
+ def test_cummax_all_nan_column ():
829
+ base_df = pd .DataFrame ({"A" : [1 , 1 , 1 , 1 , 2 , 2 , 2 , 2 ], "B" : [np .nan ] * 8 })
830
+
831
+ expected = pd .DataFrame ({"B" : [np .nan ] * 8 })
832
+ result = base_df .groupby ("A" ).cummax ()
833
+ tm .assert_frame_equal (expected , result )
834
+ result = base_df .groupby ("A" ).B .apply (lambda x : x .cummax ()).to_frame ()
835
+ tm .assert_frame_equal (expected , result )
791
836
792
837
793
838
@pytest .mark .parametrize (
0 commit comments