Skip to content

Commit 32e335e

Browse files
authored
Merge pull request #147 from pandas-dev/master
Sync Fork from Upstream Repo
2 parents 012a635 + 8945a42 commit 32e335e

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

77 files changed

+1454
-697
lines changed

.github/workflows/ci.yml

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -163,12 +163,10 @@ jobs:
163163
pytest pandas/tests/resample/
164164
pytest pandas/tests/reshape/merge
165165
pytest pandas/tests/series/
166-
167-
# indexing subset (temporary since other tests don't pass yet)
168-
pytest pandas/tests/indexing/multiindex/test_setitem.py::TestMultiIndexSetItem::test_astype_assignment_with_dups
169-
pytest pandas/tests/indexing/multiindex/test_setitem.py::TestMultiIndexSetItem::test_frame_setitem_multi_column
166+
pytest pandas/tests/indexing/
170167
171168
pytest pandas/tests/api/
169+
pytest pandas/tests/apply/
172170
pytest pandas/tests/arrays/
173171
pytest pandas/tests/base/
174172
pytest pandas/tests/computation/

Makefile

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
.PHONY : develop build clean clean_pyc doc lint-diff black
1+
.PHONY : develop build clean clean_pyc doc lint-diff black test-scripts
22

33
all: develop
44

@@ -25,3 +25,6 @@ doc:
2525
cd doc; \
2626
python make.py clean; \
2727
python make.py html
28+
29+
test-scripts:
30+
pytest scripts

doc/source/whatsnew/v1.3.0.rst

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,41 @@ In pandas 1.3.0, ``df`` continues to share data with ``values``
225225
np.shares_memory(df["A"], values)
226226
227227
228+
.. _whatsnew_130.notable_bug_fixes.setitem_never_inplace:
229+
230+
Never Operate Inplace When Setting ``frame[keys] = values``
231+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
232+
233+
When setting multiple columns using ``frame[keys] = values`` new arrays will
234+
replace pre-existing arrays for these keys, which will *not* be over-written
235+
(:issue:`39510`). As a result, the columns will retain the dtype(s) of ``values``,
236+
never casting to the dtypes of the existing arrays.
237+
238+
.. ipython:: python
239+
240+
df = pd.DataFrame(range(3), columns=["A"], dtype="float64")
241+
df[["A"]] = 5
242+
243+
In the old behavior, ``5`` was cast to ``float64`` and inserted into the existing
244+
array backing ``df``:
245+
246+
*pandas 1.2.x*
247+
248+
.. code-block:: ipython
249+
250+
In [1]: df.dtypes
251+
Out[1]:
252+
A float64
253+
254+
In the new behavior, we get a new array, and retain an integer-dtyped ``5``:
255+
256+
*pandas 1.3.0*
257+
258+
.. ipython:: python
259+
260+
df.dtypes
261+
262+
228263
.. _whatsnew_130.notable_bug_fixes.setitem_with_bool_casting:
229264

230265
Consistent Casting With Setting Into Boolean Series

pandas/_libs/hashing.pyi

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
import numpy as np
2+
3+
def hash_object_array(
4+
arr: np.ndarray, # np.ndarray[object]
5+
key: str,
6+
encoding: str = ...,
7+
) -> np.ndarray: ... # np.ndarray[np.uint64]

pandas/_libs/index.pyx

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
import warnings
22

3+
cimport cython
4+
35
import numpy as np
46

57
cimport numpy as cnp
@@ -47,6 +49,7 @@ cdef inline bint is_definitely_invalid_key(object val):
4749
_SIZE_CUTOFF = 1_000_000
4850

4951

52+
@cython.freelist(32)
5053
cdef class IndexEngine:
5154

5255
cdef readonly:
@@ -256,11 +259,11 @@ cdef class IndexEngine:
256259
self.monotonic_inc = 0
257260
self.monotonic_dec = 0
258261

259-
def get_indexer(self, values):
262+
def get_indexer(self, ndarray values):
260263
self._ensure_mapping_populated()
261264
return self.mapping.lookup(values)
262265

263-
def get_indexer_non_unique(self, targets):
266+
def get_indexer_non_unique(self, ndarray targets):
264267
"""
265268
Return an indexer suitable for taking from a non unique index
266269
return the labels in the same order as the target
@@ -448,11 +451,11 @@ cdef class DatetimeEngine(Int64Engine):
448451
except KeyError:
449452
raise KeyError(val)
450453

451-
def get_indexer_non_unique(self, targets):
454+
def get_indexer_non_unique(self, ndarray targets):
452455
# we may get datetime64[ns] or timedelta64[ns], cast these to int64
453456
return super().get_indexer_non_unique(targets.view("i8"))
454457

455-
def get_indexer(self, values):
458+
def get_indexer(self, ndarray values):
456459
self._ensure_mapping_populated()
457460
if values.dtype != self._get_box_dtype():
458461
return np.repeat(-1, len(values)).astype('i4')
@@ -591,15 +594,15 @@ cdef class BaseMultiIndexCodesEngine:
591594
in zip(self.levels, zip(*target))]
592595
return self._codes_to_ints(np.array(level_codes, dtype='uint64').T)
593596

594-
def get_indexer_no_fill(self, object target) -> np.ndarray:
597+
def get_indexer(self, ndarray[object] target) -> np.ndarray:
595598
"""
596599
Returns an array giving the positions of each value of `target` in
597600
`self.values`, where -1 represents a value in `target` which does not
598601
appear in `self.values`
599602

600603
Parameters
601604
----------
602-
target : list-like of keys
605+
target : ndarray[object]
603606
Each key is a tuple, with a label for each level of the index
604607

605608
Returns
@@ -610,8 +613,8 @@ cdef class BaseMultiIndexCodesEngine:
610613
lab_ints = self._extract_level_codes(target)
611614
return self._base.get_indexer(self, lab_ints)
612615

613-
def get_indexer(self, object target, object values = None,
614-
object method = None, object limit = None) -> np.ndarray:
616+
def get_indexer_with_fill(self, ndarray target, ndarray values,
617+
str method, object limit) -> np.ndarray:
615618
"""
616619
Returns an array giving the positions of each value of `target` in
617620
`values`, where -1 represents a value in `target` which does not
@@ -627,25 +630,22 @@ cdef class BaseMultiIndexCodesEngine:
627630

628631
Parameters
629632
----------
630-
target: list-like of tuples
633+
target: ndarray[object] of tuples
631634
need not be sorted, but all must have the same length, which must be
632635
the same as the length of all tuples in `values`
633-
values : list-like of tuples
636+
values : ndarray[object] of tuples
634637
must be sorted and all have the same length. Should be the set of
635638
the MultiIndex's values. Needed only if `method` is not None
636639
method: string
637640
"backfill" or "pad"
638-
limit: int, optional
641+
limit: int or None
639642
if provided, limit the number of fills to this value
640643

641644
Returns
642645
-------
643646
np.ndarray[int64_t, ndim=1] of the indexer of `target` into `values`,
644647
filled with the `method` (and optionally `limit`) specified
645648
"""
646-
if method is None:
647-
return self.get_indexer_no_fill(target)
648-
649649
assert method in ("backfill", "pad")
650650
cdef:
651651
int64_t i, j, next_code
@@ -655,8 +655,8 @@ cdef class BaseMultiIndexCodesEngine:
655655
ndarray[int64_t, ndim=1] new_codes, new_target_codes
656656
ndarray[int64_t, ndim=1] sorted_indexer
657657

658-
target_order = np.argsort(target.values).astype('int64')
659-
target_values = target.values[target_order]
658+
target_order = np.argsort(target).astype('int64')
659+
target_values = target[target_order]
660660
num_values, num_target_values = len(values), len(target_values)
661661
new_codes, new_target_codes = (
662662
np.empty((num_values,)).astype('int64'),
@@ -715,7 +715,7 @@ cdef class BaseMultiIndexCodesEngine:
715715

716716
return self._base.get_loc(self, lab_int)
717717

718-
def get_indexer_non_unique(self, object target):
718+
def get_indexer_non_unique(self, ndarray target):
719719
# This needs to be overridden just because the default one works on
720720
# target._values, and target can be itself a MultiIndex.
721721

pandas/_libs/missing.pyx

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,7 @@ cpdef bint checknull(object val):
104104
- np.datetime64 representation of NaT
105105
- np.timedelta64 representation of NaT
106106
- NA
107+
- Decimal("NaN")
107108
108109
Parameters
109110
----------
@@ -143,6 +144,8 @@ cpdef bint checknull_old(object val):
143144
- NaT
144145
- np.datetime64 representation of NaT
145146
- np.timedelta64 representation of NaT
147+
- NA
148+
- Decimal("NaN")
146149
147150
Parameters
148151
----------
@@ -175,6 +178,8 @@ cpdef ndarray[uint8_t] isnaobj(ndarray arr):
175178
- NaT
176179
- np.datetime64 representation of NaT
177180
- np.timedelta64 representation of NaT
181+
- NA
182+
- Decimal("NaN")
178183
179184
Parameters
180185
----------
@@ -211,6 +216,7 @@ def isnaobj_old(arr: ndarray) -> ndarray:
211216
- NEGINF
212217
- NaT
213218
- NA
219+
- Decimal("NaN")
214220

215221
Parameters
216222
----------
@@ -249,6 +255,8 @@ def isnaobj2d(arr: ndarray) -> ndarray:
249255
- NaT
250256
- np.datetime64 representation of NaT
251257
- np.timedelta64 representation of NaT
258+
- NA
259+
- Decimal("NaN")
252260

253261
Parameters
254262
----------
@@ -293,6 +301,8 @@ def isnaobj2d_old(arr: ndarray) -> ndarray:
293301
- NaT
294302
- np.datetime64 representation of NaT
295303
- np.timedelta64 representation of NaT
304+
- NA
305+
- Decimal("NaN")
296306

297307
Parameters
298308
----------

pandas/_libs/ops.pyi

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
from typing import (
2+
Any,
3+
Callable,
4+
)
5+
6+
import numpy as np
7+
8+
_BinOp = Callable[[Any, Any], Any]
9+
_BoolOp = Callable[[Any, Any], bool]
10+
11+
12+
def scalar_compare(
13+
values: np.ndarray, # object[:]
14+
val: object,
15+
op: _BoolOp, # {operator.eq, operator.ne, ...}
16+
) -> np.ndarray: ... # np.ndarray[bool]
17+
18+
def vec_compare(
19+
left: np.ndarray, # np.ndarray[object]
20+
right: np.ndarray, # np.ndarray[object]
21+
op: _BoolOp, # {operator.eq, operator.ne, ...}
22+
) -> np.ndarray: ... # np.ndarray[bool]
23+
24+
25+
def scalar_binop(
26+
values: np.ndarray, # object[:]
27+
val: object,
28+
op: _BinOp, # binary operator
29+
) -> np.ndarray: ...
30+
31+
32+
def vec_binop(
33+
left: np.ndarray, # object[:]
34+
right: np.ndarray, # object[:]
35+
op: _BinOp, # binary operator
36+
) -> np.ndarray: ...
37+
38+
39+
def maybe_convert_bool(
40+
arr: np.ndarray, # np.ndarray[object]
41+
true_values=...,
42+
false_values=...
43+
) -> np.ndarray: ...

pandas/_libs/ops_dispatch.pyi

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
import numpy as np
2+
3+
def maybe_dispatch_ufunc_to_dunder_op(
4+
self, ufunc: np.ufunc, method: str, *inputs, **kwargs
5+
): ...

pandas/_libs/reshape.pyi

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
import numpy as np
2+
3+
def unstack(
4+
values: np.ndarray, # reshape_t[:, :]
5+
mask: np.ndarray, # const uint8_t[:]
6+
stride: int,
7+
length: int,
8+
width: int,
9+
new_values: np.ndarray, # reshape_t[:, :]
10+
new_mask: np.ndarray, # uint8_t[:, :]
11+
) -> None: ...
12+
13+
14+
def explode(
15+
values: np.ndarray, # np.ndarray[object]
16+
) -> tuple[
17+
np.ndarray, # np.ndarray[object]
18+
np.ndarray, # np.ndarray[np.int64]
19+
]: ...

pandas/_libs/tslibs/ccalendar.pyi

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
2+
DAYS: list[str]
3+
MONTH_ALIASES: dict[int, str]
4+
MONTH_NUMBERS: dict[str, int]
5+
MONTHS: list[str]
6+
int_to_weekday: dict[int, str]
7+
8+
def get_firstbday(year: int, month: int) -> int: ...
9+
def get_lastbday(year: int, month: int) -> int: ...
10+
def get_day_of_year(year: int, month: int, day: int) -> int: ...
11+
def get_iso_calendar(year: int, month: int, day: int) -> tuple[int, int, int]: ...
12+
def get_week_of_year(year: int, month: int, day: int) -> int: ...
13+
def get_days_in_month(year: int, month: int) -> int: ...

pandas/_libs/tslibs/strptime.pyi

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
from typing import Optional
2+
3+
import numpy as np
4+
5+
def array_strptime(
6+
values: np.ndarray, # np.ndarray[object]
7+
fmt: Optional[str],
8+
exact: bool = True,
9+
errors: str = "raise"
10+
) -> tuple[np.ndarray, np.ndarray]: ...
11+
# first ndarray is M8[ns], second is object ndarray of Optional[tzinfo]

pandas/_libs/tslibs/timezones.pyi

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
from datetime import (
2+
datetime,
3+
tzinfo,
4+
)
5+
from typing import (
6+
Callable,
7+
Optional,
8+
Union,
9+
)
10+
11+
import numpy as np
12+
13+
# imported from dateutil.tz
14+
dateutil_gettz: Callable[[str], tzinfo]
15+
16+
17+
def tz_standardize(tz: tzinfo) -> tzinfo: ...
18+
19+
def tz_compare(start: Optional[tzinfo], end: Optional[tzinfo]) -> bool: ...
20+
21+
def infer_tzinfo(
22+
start: Optional[datetime], end: Optional[datetime],
23+
) -> Optional[tzinfo]: ...
24+
25+
# ndarrays returned are both int64_t
26+
def get_dst_info(tz: tzinfo) -> tuple[np.ndarray, np.ndarray, str]: ...
27+
28+
def maybe_get_tz(tz: Optional[Union[str, int, np.int64, tzinfo]]) -> Optional[tzinfo]: ...
29+
30+
def get_timezone(tz: tzinfo) -> Union[tzinfo, str]: ...
31+
32+
def is_utc(tz: Optional[tzinfo]) -> bool: ...

0 commit comments

Comments
 (0)