sthagen · sthagen · Mar 17, 2021 · Mar 16, 2021 · Mar 16, 2021 · Mar 16, 2021
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -163,12 +163,10 @@ jobs:
         pytest pandas/tests/resample/
         pytest pandas/tests/reshape/merge
         pytest pandas/tests/series/
-
-        # indexing subset (temporary since other tests don't pass yet)
-        pytest pandas/tests/indexing/multiindex/test_setitem.py::TestMultiIndexSetItem::test_astype_assignment_with_dups
-        pytest pandas/tests/indexing/multiindex/test_setitem.py::TestMultiIndexSetItem::test_frame_setitem_multi_column
+        pytest pandas/tests/indexing/
 
         pytest pandas/tests/api/
+        pytest pandas/tests/apply/
         pytest pandas/tests/arrays/
         pytest pandas/tests/base/
         pytest pandas/tests/computation/

diff --git a/Makefile b/Makefile
@@ -1,4 +1,4 @@
-.PHONY : develop build clean clean_pyc doc lint-diff black
+.PHONY : develop build clean clean_pyc doc lint-diff black test-scripts
 
 all: develop
 
@@ -25,3 +25,6 @@ doc:
 	cd doc; \
 	python make.py clean; \
 	python make.py html
+
+test-scripts:
+	pytest scripts
diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst
@@ -225,6 +225,41 @@ In pandas 1.3.0, ``df`` continues to share data with ``values``
    np.shares_memory(df["A"], values)
 
 
+.. _whatsnew_130.notable_bug_fixes.setitem_never_inplace:
+
+Never Operate Inplace When Setting ``frame[keys] = values``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+When setting multiple columns using ``frame[keys] = values`` new arrays will
+replace pre-existing arrays for these keys, which will *not* be over-written
+(:issue:`39510`).  As a result, the columns will retain the dtype(s) of ``values``,
+never casting to the dtypes of the existing arrays.
+
+.. ipython:: python
+
+   df = pd.DataFrame(range(3), columns=["A"], dtype="float64")
+   df[["A"]] = 5
+
+In the old behavior, ``5`` was cast to ``float64`` and inserted into the existing
+array backing ``df``:
+
+*pandas 1.2.x*
+
+.. code-block:: ipython
+
+   In [1]: df.dtypes
+   Out[1]:
+   A    float64
+
+In the new behavior, we get a new array, and retain an integer-dtyped ``5``:
+
+*pandas 1.3.0*
+
+.. ipython:: python
+
+   df.dtypes
+
+
 .. _whatsnew_130.notable_bug_fixes.setitem_with_bool_casting:
 
 Consistent Casting With Setting Into Boolean Series

diff --git a/pandas/_libs/hashing.pyi b/pandas/_libs/hashing.pyi
@@ -0,0 +1,7 @@
+import numpy as np
+
+def hash_object_array(
+    arr: np.ndarray,  # np.ndarray[object]
+    key: str,
+    encoding: str = ...,
+) -> np.ndarray: ...  # np.ndarray[np.uint64]
diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx
@@ -1,5 +1,7 @@
 import warnings
 
+cimport cython
+
 import numpy as np
 
 cimport numpy as cnp
@@ -47,6 +49,7 @@ cdef inline bint is_definitely_invalid_key(object val):
 _SIZE_CUTOFF = 1_000_000
 
 
+@cython.freelist(32)
 cdef class IndexEngine:
 
     cdef readonly:
@@ -256,11 +259,11 @@ cdef class IndexEngine:
         self.monotonic_inc = 0
         self.monotonic_dec = 0
 
-    def get_indexer(self, values):
+    def get_indexer(self, ndarray values):
         self._ensure_mapping_populated()
         return self.mapping.lookup(values)
 
-    def get_indexer_non_unique(self, targets):
+    def get_indexer_non_unique(self, ndarray targets):
         """
         Return an indexer suitable for taking from a non unique index
         return the labels in the same order as the target
@@ -448,11 +451,11 @@ cdef class DatetimeEngine(Int64Engine):
         except KeyError:
             raise KeyError(val)
 
-    def get_indexer_non_unique(self, targets):
+    def get_indexer_non_unique(self, ndarray targets):
         # we may get datetime64[ns] or timedelta64[ns], cast these to int64
         return super().get_indexer_non_unique(targets.view("i8"))
 
-    def get_indexer(self, values):
+    def get_indexer(self, ndarray values):
         self._ensure_mapping_populated()
         if values.dtype != self._get_box_dtype():
             return np.repeat(-1, len(values)).astype('i4')
@@ -591,15 +594,15 @@ cdef class BaseMultiIndexCodesEngine:
                        in zip(self.levels, zip(*target))]
         return self._codes_to_ints(np.array(level_codes, dtype='uint64').T)
 
-    def get_indexer_no_fill(self, object target) -> np.ndarray:
+    def get_indexer(self, ndarray[object] target) -> np.ndarray:
         """
         Returns an array giving the positions of each value of `target` in
         `self.values`, where -1 represents a value in `target` which does not
         appear in `self.values`
 
         Parameters
         ----------
-        target : list-like of keys
+        target : ndarray[object]
             Each key is a tuple, with a label for each level of the index
 
         Returns
@@ -610,8 +613,8 @@ cdef class BaseMultiIndexCodesEngine:
         lab_ints = self._extract_level_codes(target)
         return self._base.get_indexer(self, lab_ints)
 
-    def get_indexer(self, object target, object values = None,
-                    object method = None, object limit = None) -> np.ndarray:
+    def get_indexer_with_fill(self, ndarray target, ndarray values,
+                              str method, object limit) -> np.ndarray:
         """
         Returns an array giving the positions of each value of `target` in
         `values`, where -1 represents a value in `target` which does not
@@ -627,25 +630,22 @@ cdef class BaseMultiIndexCodesEngine:
 
         Parameters
         ----------
-        target: list-like of tuples
+        target: ndarray[object] of tuples
             need not be sorted, but all must have the same length, which must be
             the same as the length of all tuples in `values`
-        values : list-like of tuples
+        values : ndarray[object] of tuples
             must be sorted and all have the same length.  Should be the set of
             the MultiIndex's values.  Needed only if `method` is not None
         method: string
             "backfill" or "pad"
-        limit: int, optional
+        limit: int or None
             if provided, limit the number of fills to this value
 
         Returns
         -------
         np.ndarray[int64_t, ndim=1] of the indexer of `target` into `values`,
         filled with the `method` (and optionally `limit`) specified
         """
-        if method is None:
-            return self.get_indexer_no_fill(target)
-
         assert method in ("backfill", "pad")
         cdef:
             int64_t i, j, next_code
@@ -655,8 +655,8 @@ cdef class BaseMultiIndexCodesEngine:
             ndarray[int64_t, ndim=1] new_codes, new_target_codes
             ndarray[int64_t, ndim=1] sorted_indexer
 
-        target_order = np.argsort(target.values).astype('int64')
-        target_values = target.values[target_order]
+        target_order = np.argsort(target).astype('int64')
+        target_values = target[target_order]
         num_values, num_target_values = len(values), len(target_values)
         new_codes, new_target_codes = (
             np.empty((num_values,)).astype('int64'),
@@ -715,7 +715,7 @@ cdef class BaseMultiIndexCodesEngine:
 
         return self._base.get_loc(self, lab_int)
 
-    def get_indexer_non_unique(self, object target):
+    def get_indexer_non_unique(self, ndarray target):
         # This needs to be overridden just because the default one works on
         # target._values, and target can be itself a MultiIndex.
 

diff --git a/pandas/_libs/missing.pyx b/pandas/_libs/missing.pyx
@@ -104,6 +104,7 @@ cpdef bint checknull(object val):
      - np.datetime64 representation of NaT
      - np.timedelta64 representation of NaT
      - NA
+     - Decimal("NaN")
 
     Parameters
     ----------
@@ -143,6 +144,8 @@ cpdef bint checknull_old(object val):
      - NaT
      - np.datetime64 representation of NaT
      - np.timedelta64 representation of NaT
+     - NA
+     - Decimal("NaN")
 
     Parameters
     ----------
@@ -175,6 +178,8 @@ cpdef ndarray[uint8_t] isnaobj(ndarray arr):
      - NaT
      - np.datetime64 representation of NaT
      - np.timedelta64 representation of NaT
+     - NA
+     - Decimal("NaN")
 
     Parameters
     ----------
@@ -211,6 +216,7 @@ def isnaobj_old(arr: ndarray) -> ndarray:
      - NEGINF
      - NaT
      - NA
+     - Decimal("NaN")
 
     Parameters
     ----------
@@ -249,6 +255,8 @@ def isnaobj2d(arr: ndarray) -> ndarray:
      - NaT
      - np.datetime64 representation of NaT
      - np.timedelta64 representation of NaT
+     - NA
+     - Decimal("NaN")
 
     Parameters
     ----------
@@ -293,6 +301,8 @@ def isnaobj2d_old(arr: ndarray) -> ndarray:
      - NaT
      - np.datetime64 representation of NaT
      - np.timedelta64 representation of NaT
+     - NA
+     - Decimal("NaN")
 
     Parameters
     ----------

diff --git a/pandas/_libs/ops.pyi b/pandas/_libs/ops.pyi
@@ -0,0 +1,43 @@
+from typing import (
+    Any,
+    Callable,
+)
+
+import numpy as np
+
+_BinOp = Callable[[Any, Any], Any]
+_BoolOp = Callable[[Any, Any], bool]
+
+
+def scalar_compare(
+    values: np.ndarray,  # object[:]
+    val: object,
+    op: _BoolOp,          # {operator.eq, operator.ne, ...}
+) -> np.ndarray: ...     # np.ndarray[bool]
+
+def vec_compare(
+    left: np.ndarray,   # np.ndarray[object]
+    right: np.ndarray,  # np.ndarray[object]
+    op: _BoolOp,         # {operator.eq, operator.ne, ...}
+) -> np.ndarray: ...    # np.ndarray[bool]
+
+
+def scalar_binop(
+    values: np.ndarray,   # object[:]
+    val: object,
+    op: _BinOp,           # binary operator
+) -> np.ndarray: ...
+
+
+def vec_binop(
+    left: np.ndarray,   # object[:]
+    right: np.ndarray,  # object[:]
+    op: _BinOp,         # binary operator
+) -> np.ndarray: ...
+
+
+def maybe_convert_bool(
+    arr: np.ndarray,  # np.ndarray[object]
+    true_values=...,
+    false_values=...
+) -> np.ndarray: ...
diff --git a/pandas/_libs/ops_dispatch.pyi b/pandas/_libs/ops_dispatch.pyi
@@ -0,0 +1,5 @@
+import numpy as np
+
+def maybe_dispatch_ufunc_to_dunder_op(
+    self, ufunc: np.ufunc, method: str, *inputs, **kwargs
+): ...
diff --git a/pandas/_libs/reshape.pyi b/pandas/_libs/reshape.pyi
@@ -0,0 +1,19 @@
+import numpy as np
+
+def unstack(
+    values: np.ndarray,    # reshape_t[:, :]
+    mask: np.ndarray,      # const uint8_t[:]
+    stride: int,
+    length: int,
+    width: int,
+    new_values: np.ndarray,  # reshape_t[:, :]
+    new_mask: np.ndarray,    # uint8_t[:, :]
+) -> None: ...
+
+
+def explode(
+    values: np.ndarray,  # np.ndarray[object]
+) -> tuple[
+    np.ndarray,  # np.ndarray[object]
+    np.ndarray,  # np.ndarray[np.int64]
+]: ...
diff --git a/pandas/_libs/tslibs/ccalendar.pyi b/pandas/_libs/tslibs/ccalendar.pyi
@@ -0,0 +1,13 @@
+
+DAYS: list[str]
+MONTH_ALIASES: dict[int, str]
+MONTH_NUMBERS: dict[str, int]
+MONTHS: list[str]
+int_to_weekday: dict[int, str]
+
+def get_firstbday(year: int, month: int) -> int: ...
+def get_lastbday(year: int, month: int) -> int: ...
+def get_day_of_year(year: int, month: int, day: int) -> int: ...
+def get_iso_calendar(year: int, month: int, day: int) -> tuple[int, int, int]: ...
+def get_week_of_year(year: int, month: int, day: int) -> int: ...
+def get_days_in_month(year: int, month: int) -> int: ...
diff --git a/pandas/_libs/tslibs/strptime.pyi b/pandas/_libs/tslibs/strptime.pyi
@@ -0,0 +1,11 @@
+from typing import Optional
+
+import numpy as np
+
+def array_strptime(
+    values: np.ndarray,  # np.ndarray[object]
+    fmt: Optional[str],
+    exact: bool = True,
+    errors: str = "raise"
+) -> tuple[np.ndarray, np.ndarray]: ...
+# first  ndarray is M8[ns], second is object ndarray of Optional[tzinfo]
diff --git a/pandas/_libs/tslibs/timezones.pyi b/pandas/_libs/tslibs/timezones.pyi
@@ -0,0 +1,32 @@
+from datetime import (
+    datetime,
+    tzinfo,
+)
+from typing import (
+    Callable,
+    Optional,
+    Union,
+)
+
+import numpy as np
+
+# imported from dateutil.tz
+dateutil_gettz: Callable[[str], tzinfo]
+
+
+def tz_standardize(tz: tzinfo) -> tzinfo: ...
+
+def tz_compare(start: Optional[tzinfo], end: Optional[tzinfo]) -> bool: ...
+
+def infer_tzinfo(
+    start: Optional[datetime], end: Optional[datetime],
+) -> Optional[tzinfo]: ...
+
+# ndarrays returned are both int64_t
+def get_dst_info(tz: tzinfo) -> tuple[np.ndarray, np.ndarray, str]: ...
+
+def maybe_get_tz(tz: Optional[Union[str, int, np.int64, tzinfo]]) -> Optional[tzinfo]: ...
+
+def get_timezone(tz: tzinfo) -> Union[tzinfo, str]: ...
+
+def is_utc(tz: Optional[tzinfo]) -> bool: ...