pandas-dev
diff --git a/‎.github/ISSUE_TEMPLATE/documentation_improvement.md
Lines changed: 0 additions & 22 deletions b/‎.github/ISSUE_TEMPLATE/documentation_improvement.md
Lines changed: 0 additions & 22 deletions
diff --git a/‎.github/ISSUE_TEMPLATE/documentation_improvement.yaml
Lines changed: 40 additions & 0 deletions b/‎.github/ISSUE_TEMPLATE/documentation_improvement.yaml
Lines changed: 40 additions & 0 deletions
diff --git a/‎doc/source/user_guide/timeseries.rst
Lines changed: 4 additions & 2 deletions b/‎doc/source/user_guide/timeseries.rst
Lines changed: 4 additions & 2 deletions
diff --git a/‎doc/source/whatsnew/v1.4.0.rst
Lines changed: 14 additions & 3 deletions b/‎doc/source/whatsnew/v1.4.0.rst
Lines changed: 14 additions & 3 deletions
diff --git a/‎pandas/_libs/tslibs/parsing.pyx
Lines changed: 24 additions & 0 deletions b/‎pandas/_libs/tslibs/parsing.pyx
Lines changed: 24 additions & 0 deletions
diff --git a/‎pandas/core/generic.py
Lines changed: 1 addition & 1 deletion b/‎pandas/core/generic.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎pandas/core/internals/managers.py
Lines changed: 11 additions & 3 deletions b/‎pandas/core/internals/managers.py
Lines changed: 11 additions & 3 deletions
diff --git a/‎pandas/core/tools/datetimes.py
Lines changed: 13 additions & 4 deletions b/‎pandas/core/tools/datetimes.py
Lines changed: 13 additions & 4 deletions
diff --git a/‎pandas/tests/frame/indexing/test_where.py
Lines changed: 10 additions & 0 deletions b/‎pandas/tests/frame/indexing/test_where.py
Lines changed: 10 additions & 0 deletions
diff --git a/‎pandas/tests/indexing/multiindex/test_multiindex.py
Lines changed: 21 additions & 0 deletions b/‎pandas/tests/indexing/multiindex/test_multiindex.py
Lines changed: 21 additions & 0 deletions
@@ -0,0 +1,40 @@
+name: Documentation Improvement
+description: Report wrong or missing documentation
+title: "DOC: "
+labels: [Docs, Needs Triage]
+
+body:
+  - type: checkboxes
+    attributes:
+      options:
+        - label: >
+            I have checked that the issue still exists on the latest versions of the docs
+            on `master` [here](https://pandas.pydata.org/docs/dev/)
+          required: true
+  - type: textarea
+    id: location
+    attributes:
+      label: Location of the documentation
+      description: >
+        Please provide the location of the documentation, e.g. "pandas.read_csv" or the
+        URL of the documentation, e.g.
+        "https://pandas.pydata.org/docs/reference/api/pandas.read_csv.html"
+      placeholder: https://pandas.pydata.org/docs/reference/api/pandas.read_csv.html
+    validations:
+      required: true
+  - type: textarea
+    id: problem
+    attributes:
+      label: Documentation problem
+      description: >
+        Please provide a description of what documentation you believe needs to be fixed/improved
+    validations:
+      required: true
+  - type: textarea
+    id: suggested-fix
+    attributes:
+      label: Suggested fix for documentation
+      description: >
+        Please explain the suggested fix and **why** it's better than the existing documentation
+    validations:
+      required: true
@@ -204,16 +204,18 @@ If you use dates which start with the day first (i.e. European style),
 you can pass the ``dayfirst`` flag:
 
 .. ipython:: python
+   :okwarning:
 
     pd.to_datetime(["04-01-2012 10:00"], dayfirst=True)
 
     pd.to_datetime(["14-01-2012", "01-14-2012"], dayfirst=True)
 
 .. warning::
 
-   You see in the above example that ``dayfirst`` isn't strict, so if a date
+   You see in the above example that ``dayfirst`` isn't strict. If a date
    can't be parsed with the day being first it will be parsed as if
-   ``dayfirst`` were False.
+   ``dayfirst`` were False, and in the case of parsing delimited date strings
+   (e.g. ``31-12-2012``) then a warning will also be raised.
 
 If you pass a single string to ``to_datetime``, it returns a single ``Timestamp``.
 ``Timestamp`` can also accept string input, but it doesn't accept string parsing
 
@@ -103,10 +103,20 @@ Notable bug fixes
 
 These are bug fixes that might have notable behavior changes.
 
-.. _whatsnew_140.notable_bug_fixes.notable_bug_fix1:
+.. _whatsnew_140.notable_bug_fixes.inconsistent_date_string_parsing:
 
-notable_bug_fix1
-^^^^^^^^^^^^^^^^
+Inconsistent date string parsing
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The ``dayfirst`` option of :func:`to_datetime` isn't strict, and this can lead to surprising behaviour:
+
+.. ipython:: python
+    :okwarning:
+
+    pd.to_datetime(["31-12-2021"], dayfirst=False)
+
+Now, a warning will be raised if a date string cannot be parsed accordance to the given ``dayfirst`` value when
+the value is a delimited date string (e.g. ``31-12-2012``).
 
 .. _whatsnew_140.notable_bug_fixes.notable_bug_fix2:
 
@@ -253,6 +263,7 @@ Categorical
 Datetimelike
 ^^^^^^^^^^^^
 - Bug in :class:`DataFrame` constructor unnecessarily copying non-datetimelike 2D object arrays (:issue:`39272`)
+- :func:`to_datetime` would silently swap ``MM/DD/YYYY`` and ``DD/MM/YYYY`` formats if the given ``dayfirst`` option could not be respected - now, a warning is raised in the case of delimited date strings (e.g. ``31-12-2012``) (:issue:`12585`)
 -
 
 Timedelta
 
@@ -3,6 +3,7 @@ Parsing functions for datetime and datetime-like strings.
 """
 import re
 import time
+import warnings
 
 from libc.string cimport strchr
 
@@ -81,6 +82,11 @@ class DateParseError(ValueError):
 _DEFAULT_DATETIME = datetime(1, 1, 1).replace(hour=0, minute=0,
                                               second=0, microsecond=0)
 
+PARSING_WARNING_MSG = (
+    "Parsing '{date_string}' in {format} format. Provide format "
+    "or specify infer_datetime_format=True for consistent parsing."
+)
+
 cdef:
     set _not_datelike_strings = {'a', 'A', 'm', 'M', 'p', 'P', 't', 'T'}
 
@@ -168,10 +174,28 @@ cdef inline object _parse_delimited_date(str date_string, bint dayfirst):
         # date_string can't be converted to date, above format
         return None, None
 
+    swapped_day_and_month = False
     if 1 <= month <= MAX_DAYS_IN_MONTH and 1 <= day <= MAX_DAYS_IN_MONTH \
             and (month <= MAX_MONTH or day <= MAX_MONTH):
         if (month > MAX_MONTH or (day <= MAX_MONTH and dayfirst)) and can_swap:
             day, month = month, day
+            swapped_day_and_month = True
+        if dayfirst and not swapped_day_and_month:
+            warnings.warn(
+                PARSING_WARNING_MSG.format(
+                    date_string=date_string,
+                    format='MM/DD/YYYY'
+                ),
+                stacklevel=4,
+            )
+        elif not dayfirst and swapped_day_and_month:
+            warnings.warn(
+                PARSING_WARNING_MSG.format(
+                    date_string=date_string,
+                    format='DD/MM/YYYY'
+                ),
+                stacklevel=4,
+            )
         if PY_VERSION_HEX >= 0x03060100:
             # In Python <= 3.6.0 there is no range checking for invalid dates
             # in C api, thus we call faster C version for 3.6.1 or newer
 
@@ -2645,7 +2645,7 @@ def to_hdf(
               which may perform worse but allow more flexible operations
               like searching / selecting subsets of the data.
             - If None, pd.get_option('io.hdf.default_format') is checked,
-              followed by fallback to "fixed"
+              followed by fallback to "fixed".
         errors : str, default 'strict'
             Specifies how encoding and decoding errors are to be handled.
             See the errors argument for :func:`open` for a full list
 
@@ -1859,12 +1859,20 @@ def construction_error(
 # -----------------------------------------------------------------------
 
 
-def _grouping_func(tup: tuple[int, ArrayLike]) -> tuple[bool, DtypeObj]:
+def _grouping_func(tup: tuple[int, ArrayLike]) -> tuple[int, bool, DtypeObj]:
     # compat for numpy<1.21, in which comparing a np.dtype with an ExtensionDtype
     # raises instead of returning False. Once earlier numpy versions are dropped,
     # this can be simplified to `return tup[1].dtype`
     dtype = tup[1].dtype
-    return isinstance(dtype, np.dtype), dtype
+
+    if is_1d_only_ea_dtype(dtype):
+        # We know these won't be consolidated, so don't need to group these.
+        # This avoids expensive comparisons of CategoricalDtype objects
+        sep = id(dtype)
+    else:
+        sep = 0
+
+    return sep, isinstance(dtype, np.dtype), dtype
 
 
 def _form_blocks(arrays: list[ArrayLike], consolidate: bool) -> list[Block]:
@@ -1878,7 +1886,7 @@ def _form_blocks(arrays: list[ArrayLike], consolidate: bool) -> list[Block]:
     grouper = itertools.groupby(tuples, _grouping_func)
 
     nbs = []
-    for (_, dtype), tup_block in grouper:
+    for (_, _, dtype), tup_block in grouper:
         block_type = get_block_type(None, dtype)
 
         if isinstance(dtype, np.dtype):
 
@@ -701,8 +701,14 @@ def to_datetime(
         Specify a date parse order if `arg` is str or its list-likes.
         If True, parses dates with the day first, eg 10/11/12 is parsed as
         2012-11-10.
-        Warning: dayfirst=True is not strict, but will prefer to parse
-        with day first (this is a known bug, based on dateutil behavior).
+
+        .. warning::
+
+            dayfirst=True is not strict, but will prefer to parse
+            with day first. If a delimited date string cannot be parsed in
+            accordance with the given `dayfirst` option, e.g.
+            ``to_datetime(['31-12-2021'])``, then a warning will be shown.
+
     yearfirst : bool, default False
         Specify a date parse order if `arg` is str or its list-likes.
 
@@ -711,8 +717,11 @@ def to_datetime(
         - If both dayfirst and yearfirst are True, yearfirst is preceded (same
           as dateutil).
 
-        Warning: yearfirst=True is not strict, but will prefer to parse
-        with year first (this is a known bug, based on dateutil behavior).
+        .. warning::
+
+            yearfirst=True is not strict, but will prefer to parse
+            with year first.
+
     utc : bool, default None
         Return UTC DatetimeIndex if True (converting any tz-aware
         datetime.datetime objects as well).
 
@@ -771,3 +771,13 @@ def test_where_non_keyword_deprecation():
         result = s.where(s > 1, 10, False)
     expected = DataFrame([10, 10, 2, 3, 4])
     tm.assert_frame_equal(expected, result)
+
+
+def test_where_columns_casting():
+    # GH 42295
+
+    df = DataFrame({"a": [1.0, 2.0], "b": [3, np.nan]})
+    expected = df.copy()
+    result = df.where(pd.notnull(df), None)
+    # make sure dtypes don't change
+    tm.assert_frame_equal(expected, result)
@@ -98,3 +98,24 @@ def test_multiindex_with_datatime_level_preserves_freq(self):
         result = df.loc[0].index
         tm.assert_index_equal(result, dti)
         assert result.freq == dti.freq
+
+    def test_multiindex_complex(self):
+        # GH#42145
+        complex_data = [1 + 2j, 4 - 3j, 10 - 1j]
+        non_complex_data = [3, 4, 5]
+        result = DataFrame(
+            {
+                "x": complex_data,
+                "y": non_complex_data,
+                "z": non_complex_data,
+            }
+        )
+        result.set_index(["x", "y"], inplace=True)
+        expected = DataFrame(
+            {"z": non_complex_data},
+            index=MultiIndex.from_arrays(
+                [complex_data, non_complex_data],
+                names=("x", "y"),
+            ),
+        )
+        tm.assert_frame_equal(result, expected)