Skip to content

Commit ff83813

Browse files
Merge branch 'pandas-dev:master' into master
2 parents ce929d2 + cc5ef4f commit ff83813

File tree

14 files changed

+334
-265
lines changed

14 files changed

+334
-265
lines changed

.github/ISSUE_TEMPLATE/documentation_improvement.md

Lines changed: 0 additions & 22 deletions
This file was deleted.
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
name: Documentation Improvement
2+
description: Report wrong or missing documentation
3+
title: "DOC: "
4+
labels: [Docs, Needs Triage]
5+
6+
body:
7+
- type: checkboxes
8+
attributes:
9+
options:
10+
- label: >
11+
I have checked that the issue still exists on the latest versions of the docs
12+
on `master` [here](https://pandas.pydata.org/docs/dev/)
13+
required: true
14+
- type: textarea
15+
id: location
16+
attributes:
17+
label: Location of the documentation
18+
description: >
19+
Please provide the location of the documentation, e.g. "pandas.read_csv" or the
20+
URL of the documentation, e.g.
21+
"https://pandas.pydata.org/docs/reference/api/pandas.read_csv.html"
22+
placeholder: https://pandas.pydata.org/docs/reference/api/pandas.read_csv.html
23+
validations:
24+
required: true
25+
- type: textarea
26+
id: problem
27+
attributes:
28+
label: Documentation problem
29+
description: >
30+
Please provide a description of what documentation you believe needs to be fixed/improved
31+
validations:
32+
required: true
33+
- type: textarea
34+
id: suggested-fix
35+
attributes:
36+
label: Suggested fix for documentation
37+
description: >
38+
Please explain the suggested fix and **why** it's better than the existing documentation
39+
validations:
40+
required: true

doc/source/user_guide/timeseries.rst

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -204,16 +204,18 @@ If you use dates which start with the day first (i.e. European style),
204204
you can pass the ``dayfirst`` flag:
205205

206206
.. ipython:: python
207+
:okwarning:
207208
208209
pd.to_datetime(["04-01-2012 10:00"], dayfirst=True)
209210
210211
pd.to_datetime(["14-01-2012", "01-14-2012"], dayfirst=True)
211212
212213
.. warning::
213214

214-
You see in the above example that ``dayfirst`` isn't strict, so if a date
215+
You see in the above example that ``dayfirst`` isn't strict. If a date
215216
can't be parsed with the day being first it will be parsed as if
216-
``dayfirst`` were False.
217+
``dayfirst`` were False, and in the case of parsing delimited date strings
218+
(e.g. ``31-12-2012``) then a warning will also be raised.
217219

218220
If you pass a single string to ``to_datetime``, it returns a single ``Timestamp``.
219221
``Timestamp`` can also accept string input, but it doesn't accept string parsing

doc/source/whatsnew/v1.4.0.rst

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -103,10 +103,20 @@ Notable bug fixes
103103

104104
These are bug fixes that might have notable behavior changes.
105105

106-
.. _whatsnew_140.notable_bug_fixes.notable_bug_fix1:
106+
.. _whatsnew_140.notable_bug_fixes.inconsistent_date_string_parsing:
107107

108-
notable_bug_fix1
109-
^^^^^^^^^^^^^^^^
108+
Inconsistent date string parsing
109+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
110+
111+
The ``dayfirst`` option of :func:`to_datetime` isn't strict, and this can lead to surprising behaviour:
112+
113+
.. ipython:: python
114+
:okwarning:
115+
116+
pd.to_datetime(["31-12-2021"], dayfirst=False)
117+
118+
Now, a warning will be raised if a date string cannot be parsed accordance to the given ``dayfirst`` value when
119+
the value is a delimited date string (e.g. ``31-12-2012``).
110120

111121
.. _whatsnew_140.notable_bug_fixes.notable_bug_fix2:
112122

@@ -253,6 +263,7 @@ Categorical
253263
Datetimelike
254264
^^^^^^^^^^^^
255265
- Bug in :class:`DataFrame` constructor unnecessarily copying non-datetimelike 2D object arrays (:issue:`39272`)
266+
- :func:`to_datetime` would silently swap ``MM/DD/YYYY`` and ``DD/MM/YYYY`` formats if the given ``dayfirst`` option could not be respected - now, a warning is raised in the case of delimited date strings (e.g. ``31-12-2012``) (:issue:`12585`)
256267
-
257268

258269
Timedelta

pandas/_libs/tslibs/parsing.pyx

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ Parsing functions for datetime and datetime-like strings.
33
"""
44
import re
55
import time
6+
import warnings
67

78
from libc.string cimport strchr
89

@@ -81,6 +82,11 @@ class DateParseError(ValueError):
8182
_DEFAULT_DATETIME = datetime(1, 1, 1).replace(hour=0, minute=0,
8283
second=0, microsecond=0)
8384

85+
PARSING_WARNING_MSG = (
86+
"Parsing '{date_string}' in {format} format. Provide format "
87+
"or specify infer_datetime_format=True for consistent parsing."
88+
)
89+
8490
cdef:
8591
set _not_datelike_strings = {'a', 'A', 'm', 'M', 'p', 'P', 't', 'T'}
8692

@@ -168,10 +174,28 @@ cdef inline object _parse_delimited_date(str date_string, bint dayfirst):
168174
# date_string can't be converted to date, above format
169175
return None, None
170176

177+
swapped_day_and_month = False
171178
if 1 <= month <= MAX_DAYS_IN_MONTH and 1 <= day <= MAX_DAYS_IN_MONTH \
172179
and (month <= MAX_MONTH or day <= MAX_MONTH):
173180
if (month > MAX_MONTH or (day <= MAX_MONTH and dayfirst)) and can_swap:
174181
day, month = month, day
182+
swapped_day_and_month = True
183+
if dayfirst and not swapped_day_and_month:
184+
warnings.warn(
185+
PARSING_WARNING_MSG.format(
186+
date_string=date_string,
187+
format='MM/DD/YYYY'
188+
),
189+
stacklevel=4,
190+
)
191+
elif not dayfirst and swapped_day_and_month:
192+
warnings.warn(
193+
PARSING_WARNING_MSG.format(
194+
date_string=date_string,
195+
format='DD/MM/YYYY'
196+
),
197+
stacklevel=4,
198+
)
175199
if PY_VERSION_HEX >= 0x03060100:
176200
# In Python <= 3.6.0 there is no range checking for invalid dates
177201
# in C api, thus we call faster C version for 3.6.1 or newer

pandas/core/generic.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2645,7 +2645,7 @@ def to_hdf(
26452645
which may perform worse but allow more flexible operations
26462646
like searching / selecting subsets of the data.
26472647
- If None, pd.get_option('io.hdf.default_format') is checked,
2648-
followed by fallback to "fixed"
2648+
followed by fallback to "fixed".
26492649
errors : str, default 'strict'
26502650
Specifies how encoding and decoding errors are to be handled.
26512651
See the errors argument for :func:`open` for a full list

pandas/core/internals/managers.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1859,12 +1859,20 @@ def construction_error(
18591859
# -----------------------------------------------------------------------
18601860

18611861

1862-
def _grouping_func(tup: tuple[int, ArrayLike]) -> tuple[bool, DtypeObj]:
1862+
def _grouping_func(tup: tuple[int, ArrayLike]) -> tuple[int, bool, DtypeObj]:
18631863
# compat for numpy<1.21, in which comparing a np.dtype with an ExtensionDtype
18641864
# raises instead of returning False. Once earlier numpy versions are dropped,
18651865
# this can be simplified to `return tup[1].dtype`
18661866
dtype = tup[1].dtype
1867-
return isinstance(dtype, np.dtype), dtype
1867+
1868+
if is_1d_only_ea_dtype(dtype):
1869+
# We know these won't be consolidated, so don't need to group these.
1870+
# This avoids expensive comparisons of CategoricalDtype objects
1871+
sep = id(dtype)
1872+
else:
1873+
sep = 0
1874+
1875+
return sep, isinstance(dtype, np.dtype), dtype
18681876

18691877

18701878
def _form_blocks(arrays: list[ArrayLike], consolidate: bool) -> list[Block]:
@@ -1878,7 +1886,7 @@ def _form_blocks(arrays: list[ArrayLike], consolidate: bool) -> list[Block]:
18781886
grouper = itertools.groupby(tuples, _grouping_func)
18791887

18801888
nbs = []
1881-
for (_, dtype), tup_block in grouper:
1889+
for (_, _, dtype), tup_block in grouper:
18821890
block_type = get_block_type(None, dtype)
18831891

18841892
if isinstance(dtype, np.dtype):

pandas/core/tools/datetimes.py

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -701,8 +701,14 @@ def to_datetime(
701701
Specify a date parse order if `arg` is str or its list-likes.
702702
If True, parses dates with the day first, eg 10/11/12 is parsed as
703703
2012-11-10.
704-
Warning: dayfirst=True is not strict, but will prefer to parse
705-
with day first (this is a known bug, based on dateutil behavior).
704+
705+
.. warning::
706+
707+
dayfirst=True is not strict, but will prefer to parse
708+
with day first. If a delimited date string cannot be parsed in
709+
accordance with the given `dayfirst` option, e.g.
710+
``to_datetime(['31-12-2021'])``, then a warning will be shown.
711+
706712
yearfirst : bool, default False
707713
Specify a date parse order if `arg` is str or its list-likes.
708714
@@ -711,8 +717,11 @@ def to_datetime(
711717
- If both dayfirst and yearfirst are True, yearfirst is preceded (same
712718
as dateutil).
713719
714-
Warning: yearfirst=True is not strict, but will prefer to parse
715-
with year first (this is a known bug, based on dateutil behavior).
720+
.. warning::
721+
722+
yearfirst=True is not strict, but will prefer to parse
723+
with year first.
724+
716725
utc : bool, default None
717726
Return UTC DatetimeIndex if True (converting any tz-aware
718727
datetime.datetime objects as well).

pandas/tests/frame/indexing/test_where.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -771,3 +771,13 @@ def test_where_non_keyword_deprecation():
771771
result = s.where(s > 1, 10, False)
772772
expected = DataFrame([10, 10, 2, 3, 4])
773773
tm.assert_frame_equal(expected, result)
774+
775+
776+
def test_where_columns_casting():
777+
# GH 42295
778+
779+
df = DataFrame({"a": [1.0, 2.0], "b": [3, np.nan]})
780+
expected = df.copy()
781+
result = df.where(pd.notnull(df), None)
782+
# make sure dtypes don't change
783+
tm.assert_frame_equal(expected, result)

pandas/tests/indexing/multiindex/test_multiindex.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,3 +98,24 @@ def test_multiindex_with_datatime_level_preserves_freq(self):
9898
result = df.loc[0].index
9999
tm.assert_index_equal(result, dti)
100100
assert result.freq == dti.freq
101+
102+
def test_multiindex_complex(self):
103+
# GH#42145
104+
complex_data = [1 + 2j, 4 - 3j, 10 - 1j]
105+
non_complex_data = [3, 4, 5]
106+
result = DataFrame(
107+
{
108+
"x": complex_data,
109+
"y": non_complex_data,
110+
"z": non_complex_data,
111+
}
112+
)
113+
result.set_index(["x", "y"], inplace=True)
114+
expected = DataFrame(
115+
{"z": non_complex_data},
116+
index=MultiIndex.from_arrays(
117+
[complex_data, non_complex_data],
118+
names=("x", "y"),
119+
),
120+
)
121+
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)