Skip to content

Commit df1cd38

Browse files
authored
Add ignore_index keyword arg in dropna and drop_duplicates (Part of GH624) (#1030)
* add ignore_index keyword parameter to Series and DF dropna and drop_duplicates * use assert_type instead * reverse overloads order, remove ellipsis when inplace=True
1 parent 0ab562c commit df1cd38

File tree

4 files changed

+76
-9
lines changed

4 files changed

+76
-9
lines changed

pandas-stubs/core/frame.pyi

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -880,6 +880,7 @@ class DataFrame(NDFrame, OpsMixin):
880880
thresh: int | None = ...,
881881
subset: ListLikeU | Scalar | None = ...,
882882
inplace: Literal[True],
883+
ignore_index: _bool = ...,
883884
) -> None: ...
884885
@overload
885886
def dropna(
@@ -890,6 +891,7 @@ class DataFrame(NDFrame, OpsMixin):
890891
thresh: int | None = ...,
891892
subset: ListLikeU | Scalar | None = ...,
892893
inplace: Literal[False] = ...,
894+
ignore_index: _bool = ...,
893895
) -> DataFrame: ...
894896
@overload
895897
def dropna(
@@ -900,15 +902,35 @@ class DataFrame(NDFrame, OpsMixin):
900902
thresh: int | None = ...,
901903
subset: ListLikeU | Scalar | None = ...,
902904
inplace: _bool | None = ...,
905+
ignore_index: _bool = ...,
903906
) -> DataFrame | None: ...
907+
@overload
904908
def drop_duplicates(
905909
self,
906910
subset: Hashable | Iterable[Hashable] | None = ...,
907911
*,
908912
keep: NaPosition | _bool = ...,
909-
inplace: _bool = ...,
913+
inplace: Literal[True],
914+
ignore_index: _bool = ...,
915+
) -> None: ...
916+
@overload
917+
def drop_duplicates(
918+
self,
919+
subset: Hashable | Iterable[Hashable] | None = ...,
920+
*,
921+
keep: NaPosition | _bool = ...,
922+
inplace: Literal[False] = ...,
910923
ignore_index: _bool = ...,
911924
) -> DataFrame: ...
925+
@overload
926+
def drop_duplicates(
927+
self,
928+
subset: Hashable | Iterable[Hashable] | None = ...,
929+
*,
930+
keep: NaPosition | _bool = ...,
931+
inplace: _bool = ...,
932+
ignore_index: _bool = ...,
933+
) -> DataFrame | None: ...
912934
def duplicated(
913935
self,
914936
subset: Hashable | Iterable[Hashable] | None = ...,

pandas-stubs/core/series.pyi

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -726,15 +726,27 @@ class Series(IndexOpsMixin[S1], NDFrame):
726726
def unique(self) -> np.ndarray: ...
727727
@overload
728728
def drop_duplicates(
729-
self, *, keep: NaPosition | Literal[False] = ..., inplace: Literal[False] = ...
730-
) -> Series[S1]: ...
729+
self,
730+
*,
731+
keep: NaPosition | Literal[False] = ...,
732+
inplace: Literal[True],
733+
ignore_index: _bool = ...,
734+
) -> None: ...
731735
@overload
732736
def drop_duplicates(
733-
self, *, keep: NaPosition | Literal[False] = ..., inplace: Literal[True]
734-
) -> None: ...
737+
self,
738+
*,
739+
keep: NaPosition | Literal[False] = ...,
740+
inplace: Literal[False] = ...,
741+
ignore_index: _bool = ...,
742+
) -> Series[S1]: ...
735743
@overload
736744
def drop_duplicates(
737-
self, *, keep: NaPosition | Literal[False] = ..., inplace: bool = ...
745+
self,
746+
*,
747+
keep: NaPosition | Literal[False] = ...,
748+
inplace: bool = ...,
749+
ignore_index: _bool = ...,
738750
) -> Series[S1] | None: ...
739751
def duplicated(self, keep: NaPosition | Literal[False] = ...) -> Series[_bool]: ...
740752
def idxmax(
@@ -1148,6 +1160,7 @@ class Series(IndexOpsMixin[S1], NDFrame):
11481160
axis: AxisIndex = ...,
11491161
inplace: Literal[True],
11501162
how: Literal["any", "all"] | None = ...,
1163+
ignore_index: _bool = ...,
11511164
) -> None: ...
11521165
@overload
11531166
def dropna(
@@ -1156,6 +1169,7 @@ class Series(IndexOpsMixin[S1], NDFrame):
11561169
axis: AxisIndex = ...,
11571170
inplace: Literal[False] = ...,
11581171
how: Literal["any", "all"] | None = ...,
1172+
ignore_index: _bool = ...,
11591173
) -> Series[S1]: ...
11601174
@overload
11611175
def dropna(
@@ -1164,6 +1178,7 @@ class Series(IndexOpsMixin[S1], NDFrame):
11641178
axis: AxisIndex = ...,
11651179
inplace: _bool = ...,
11661180
how: Literal["any", "all"] | None = ...,
1181+
ignore_index: _bool = ...,
11671182
) -> Series[S1] | None: ...
11681183
def to_timestamp(
11691184
self,

tests/test_frame.py

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -373,9 +373,22 @@ def test_arguments_drop() -> None:
373373

374374
def test_types_dropna() -> None:
375375
df = pd.DataFrame(data={"col1": [np.nan, np.nan], "col2": [3, np.nan]})
376-
res: pd.DataFrame = df.dropna()
377-
res2: pd.DataFrame = df.dropna(axis=1, thresh=1)
378-
res3: None = df.dropna(axis=0, how="all", subset=["col1"], inplace=True)
376+
check(assert_type(df.dropna(), pd.DataFrame), pd.DataFrame)
377+
check(assert_type(df.dropna(ignore_index=True), pd.DataFrame), pd.DataFrame)
378+
check(assert_type(df.dropna(axis=1, thresh=1), pd.DataFrame), pd.DataFrame)
379+
assert (
380+
assert_type(df.dropna(axis=0, how="all", subset=["col1"], inplace=True), None)
381+
is None
382+
)
383+
assert (
384+
assert_type(
385+
df.dropna(
386+
axis=0, how="all", subset=["col1"], inplace=True, ignore_index=False
387+
),
388+
None,
389+
)
390+
is None
391+
)
379392

380393

381394
def test_types_drop_duplicates() -> None:
@@ -392,6 +405,13 @@ def test_types_drop_duplicates() -> None:
392405
check(assert_type(df.drop_duplicates(["AAA"]), pd.DataFrame), pd.DataFrame)
393406
check(assert_type(df.drop_duplicates(("AAA",)), pd.DataFrame), pd.DataFrame)
394407
check(assert_type(df.drop_duplicates("AAA"), pd.DataFrame), pd.DataFrame)
408+
assert assert_type(df.drop_duplicates("AAA", inplace=True), None) is None
409+
check(
410+
assert_type(
411+
df.drop_duplicates("AAA", inplace=False, ignore_index=True), pd.DataFrame
412+
),
413+
pd.DataFrame,
414+
)
395415

396416
if not PD_LTE_22:
397417
check(assert_type(df.drop_duplicates({"AAA"}), pd.DataFrame), pd.DataFrame)

tests/test_series.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -308,10 +308,20 @@ def test_types_drop_multilevel() -> None:
308308
res: pd.Series = s.drop(labels="first", level=1)
309309

310310

311+
def test_types_drop_duplicates() -> None:
312+
s = pd.Series([1.0, 2.0, 2.0])
313+
check(assert_type(s.drop_duplicates(), "pd.Series[float]"), pd.Series, float)
314+
assert assert_type(s.drop_duplicates(inplace=True), None) is None
315+
assert (
316+
assert_type(s.drop_duplicates(inplace=True, ignore_index=False), None) is None
317+
)
318+
319+
311320
def test_types_dropna() -> None:
312321
s = pd.Series([1.0, np.nan, np.nan])
313322
check(assert_type(s.dropna(), "pd.Series[float]"), pd.Series, float)
314323
assert assert_type(s.dropna(axis=0, inplace=True), None) is None
324+
assert assert_type(s.dropna(axis=0, inplace=True, ignore_index=True), None) is None
315325

316326

317327
def test_pop() -> None:

0 commit comments

Comments
 (0)