Skip to content

Commit 964580b

Browse files
committed
Merge remote-tracking branch 'upstream/master' into krey-master
2 parents a70741d + ef77b57 commit 964580b

File tree

11 files changed

+181
-43
lines changed

11 files changed

+181
-43
lines changed

asv_bench/benchmarks/join_merge.py

Lines changed: 46 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -273,10 +273,10 @@ def time_merge_ordered(self):
273273

274274

275275
class MergeAsof:
276-
params = [["backward", "forward", "nearest"]]
277-
param_names = ["direction"]
276+
params = [["backward", "forward", "nearest"], [None, 5]]
277+
param_names = ["direction", "tolerance"]
278278

279-
def setup(self, direction):
279+
def setup(self, direction, tolerance):
280280
one_count = 200000
281281
two_count = 1000000
282282

@@ -303,6 +303,9 @@ def setup(self, direction):
303303
df1["time32"] = np.int32(df1.time)
304304
df2["time32"] = np.int32(df2.time)
305305

306+
df1["timeu64"] = np.uint64(df1.time)
307+
df2["timeu64"] = np.uint64(df2.time)
308+
306309
self.df1a = df1[["time", "value1"]]
307310
self.df2a = df2[["time", "value2"]]
308311
self.df1b = df1[["time", "key", "value1"]]
@@ -313,22 +316,52 @@ def setup(self, direction):
313316
self.df2d = df2[["time32", "value2"]]
314317
self.df1e = df1[["time", "key", "key2", "value1"]]
315318
self.df2e = df2[["time", "key", "key2", "value2"]]
319+
self.df1f = df1[["timeu64", "value1"]]
320+
self.df2f = df2[["timeu64", "value2"]]
321+
322+
def time_on_int(self, direction, tolerance):
323+
merge_asof(
324+
self.df1a, self.df2a, on="time", direction=direction, tolerance=tolerance
325+
)
316326

317-
def time_on_int(self, direction):
318-
merge_asof(self.df1a, self.df2a, on="time", direction=direction)
327+
def time_on_int32(self, direction, tolerance):
328+
merge_asof(
329+
self.df1d, self.df2d, on="time32", direction=direction, tolerance=tolerance
330+
)
319331

320-
def time_on_int32(self, direction):
321-
merge_asof(self.df1d, self.df2d, on="time32", direction=direction)
332+
def time_on_uint64(self, direction, tolerance):
333+
merge_asof(
334+
self.df1f, self.df2f, on="timeu64", direction=direction, tolerance=tolerance
335+
)
322336

323-
def time_by_object(self, direction):
324-
merge_asof(self.df1b, self.df2b, on="time", by="key", direction=direction)
337+
def time_by_object(self, direction, tolerance):
338+
merge_asof(
339+
self.df1b,
340+
self.df2b,
341+
on="time",
342+
by="key",
343+
direction=direction,
344+
tolerance=tolerance,
345+
)
325346

326-
def time_by_int(self, direction):
327-
merge_asof(self.df1c, self.df2c, on="time", by="key2", direction=direction)
347+
def time_by_int(self, direction, tolerance):
348+
merge_asof(
349+
self.df1c,
350+
self.df2c,
351+
on="time",
352+
by="key2",
353+
direction=direction,
354+
tolerance=tolerance,
355+
)
328356

329-
def time_multiby(self, direction):
357+
def time_multiby(self, direction, tolerance):
330358
merge_asof(
331-
self.df1e, self.df2e, on="time", by=["key", "key2"], direction=direction
359+
self.df1e,
360+
self.df2e,
361+
on="time",
362+
by=["key", "key2"],
363+
direction=direction,
364+
tolerance=tolerance,
332365
)
333366

334367

doc/source/whatsnew/v1.0.0.rst

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -355,7 +355,8 @@ I/O
355355
- Bug in :func:`read_hdf` closing stores that it didn't open when Exceptions are raised (:issue:`28699`)
356356
- Bug in :meth:`DataFrame.read_json` where using ``orient="index"`` would not maintain the order (:issue:`28557`)
357357
- Bug in :meth:`DataFrame.to_html` where the length of the ``formatters`` argument was not verified (:issue:`28469`)
358-
- Bug in :meth:`DataFrame.to_clipboard` which did not work reliably in ipython (:issue:`22707`)
358+
- Bug in :meth:`pandas.io.formats.style.Styler` formatting for floating values not displaying decimals correctly (:issue:`13257`)
359+
- Bug in :meth:`DataFrame.to_clipboard` which did not work reliably in ipython (:issue:`22707`)
359360

360361
Plotting
361362
^^^^^^^^
@@ -373,8 +374,9 @@ Plotting
373374
Groupby/resample/rolling
374375
^^^^^^^^^^^^^^^^^^^^^^^^
375376

376-
-
377377
- Bug in :meth:`DataFrame.rolling` not allowing for rolling over datetimes when ``axis=1`` (:issue: `28192`)
378+
- Bug in :meth:`DataFrame.rolling` not allowing rolling over multi-index levels (:issue: `15584`).
379+
- Bug in :meth:`DataFrame.rolling` not allowing rolling on monotonic decreasing time indexes (:issue: `19248`).
378380
- Bug in :meth:`DataFrame.groupby` not offering selection by column name when ``axis=1`` (:issue:`27614`)
379381
- Bug in :meth:`DataFrameGroupby.agg` not able to use lambda function with named aggregation (:issue:`27519`)
380382
- Bug in :meth:`DataFrame.groupby` losing column name information when grouping by a categorical column (:issue:`28787`)
@@ -388,6 +390,7 @@ Reshaping
388390
- Bug in :func:`merge`, did not append suffixes correctly with MultiIndex (:issue:`28518`)
389391
- :func:`qcut` and :func:`cut` now handle boolean input (:issue:`20303`)
390392
- Fix to ensure all int dtypes can be used in :func:`merge_asof` when using a tolerance value. Previously every non-int64 type would raise an erroneous ``MergeError`` (:issue:`28870`).
393+
- Better error message in :func:`get_dummies` when `columns` isn't a list-like value (:issue:`28383`)
391394

392395
Sparse
393396
^^^^^^

pandas/core/reshape/reshape.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -864,6 +864,8 @@ def get_dummies(
864864
# determine columns being encoded
865865
if columns is None:
866866
data_to_encode = data.select_dtypes(include=dtypes_to_encode)
867+
elif not is_list_like(columns):
868+
raise TypeError("Input must be a list-like for parameter `columns`")
867869
else:
868870
data_to_encode = data[columns]
869871

pandas/core/window/rolling.py

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ def __init__(
7070
center: Optional[bool] = False,
7171
win_type: Optional[str] = None,
7272
axis: Axis = 0,
73-
on: Optional[str] = None,
73+
on: Optional[Union[str, Index]] = None,
7474
closed: Optional[str] = None,
7575
**kwargs
7676
):
@@ -126,7 +126,7 @@ def _create_blocks(self):
126126
obj = self._selected_obj
127127

128128
# filter out the on from the object
129-
if self.on is not None:
129+
if self.on is not None and not isinstance(self.on, Index):
130130
if obj.ndim == 2:
131131
obj = obj.reindex(columns=obj.columns.difference([self.on]), copy=False)
132132
blocks = obj._to_dict_of_blocks(copy=False).values()
@@ -637,10 +637,10 @@ class Window(_Window):
637637
Provide a window type. If ``None``, all points are evenly weighted.
638638
See the notes below for further information.
639639
on : str, optional
640-
For a DataFrame, a datetime-like column on which to calculate the rolling
641-
window, rather than the DataFrame's index. Provided integer column is
642-
ignored and excluded from result since an integer index is not used to
643-
calculate the rolling window.
640+
For a DataFrame, a datetime-like column or MultiIndex level on which
641+
to calculate the rolling window, rather than the DataFrame's index.
642+
Provided integer column is ignored and excluded from result since
643+
an integer index is not used to calculate the rolling window.
644644
axis : int or str, default 0
645645
closed : str, default None
646646
Make the interval closed on the 'right', 'left', 'both' or
@@ -1651,18 +1651,19 @@ def is_datetimelike(self):
16511651

16521652
@cache_readonly
16531653
def _on(self):
1654-
16551654
if self.on is None:
16561655
if self.axis == 0:
16571656
return self.obj.index
16581657
elif self.axis == 1:
16591658
return self.obj.columns
1659+
elif isinstance(self.on, Index):
1660+
return self.on
16601661
elif isinstance(self.obj, ABCDataFrame) and self.on in self.obj.columns:
16611662
return Index(self.obj[self.on])
16621663
else:
16631664
raise ValueError(
16641665
"invalid on specified as {0}, "
1665-
"must be a column (if DataFrame) "
1666+
"must be a column (of DataFrame), an Index "
16661667
"or None".format(self.on)
16671668
)
16681669

@@ -1706,10 +1707,12 @@ def validate(self):
17061707

17071708
def _validate_monotonic(self):
17081709
"""
1709-
Validate on is_monotonic.
1710+
Validate monotonic (increasing or decreasing).
17101711
"""
1711-
if not self._on.is_monotonic:
1712-
formatted = self.on or "index"
1712+
if not (self._on.is_monotonic_increasing or self._on.is_monotonic_decreasing):
1713+
formatted = self.on
1714+
if self.on is None:
1715+
formatted = "index"
17131716
raise ValueError("{0} must be monotonic".format(formatted))
17141717

17151718
def _validate_freq(self):

pandas/io/formats/style.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,8 @@ def __init__(
156156

157157
def default_display_func(x):
158158
if is_float(x):
159-
return "{:>.{precision}g}".format(x, precision=self.precision)
159+
display_format = "{0:.{precision}f}".format(x, precision=self.precision)
160+
return display_format
160161
else:
161162
return x
162163

pandas/tests/io/formats/test_style.py

Lines changed: 32 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1157,28 +1157,51 @@ def test_display_format_raises(self):
11571157
with pytest.raises(TypeError):
11581158
df.style.format(True)
11591159

1160+
def test_display_set_precision(self):
1161+
# Issue #13257
1162+
df = pd.DataFrame(data=[[1.0, 2.0090], [3.2121, 4.566]], columns=["a", "b"])
1163+
s = Styler(df)
1164+
1165+
ctx = s.set_precision(1)._translate()
1166+
1167+
assert s.precision == 1
1168+
assert ctx["body"][0][1]["display_value"] == "1.0"
1169+
assert ctx["body"][0][2]["display_value"] == "2.0"
1170+
assert ctx["body"][1][1]["display_value"] == "3.2"
1171+
assert ctx["body"][1][2]["display_value"] == "4.6"
1172+
1173+
ctx = s.set_precision(2)._translate()
1174+
assert s.precision == 2
1175+
assert ctx["body"][0][1]["display_value"] == "1.00"
1176+
assert ctx["body"][0][2]["display_value"] == "2.01"
1177+
assert ctx["body"][1][1]["display_value"] == "3.21"
1178+
assert ctx["body"][1][2]["display_value"] == "4.57"
1179+
1180+
ctx = s.set_precision(3)._translate()
1181+
assert s.precision == 3
1182+
assert ctx["body"][0][1]["display_value"] == "1.000"
1183+
assert ctx["body"][0][2]["display_value"] == "2.009"
1184+
assert ctx["body"][1][1]["display_value"] == "3.212"
1185+
assert ctx["body"][1][2]["display_value"] == "4.566"
1186+
11601187
def test_display_subset(self):
11611188
df = pd.DataFrame([[0.1234, 0.1234], [1.1234, 1.1234]], columns=["a", "b"])
11621189
ctx = df.style.format(
11631190
{"a": "{:0.1f}", "b": "{0:.2%}"}, subset=pd.IndexSlice[0, :]
11641191
)._translate()
11651192
expected = "0.1"
1166-
assert ctx["body"][0][1]["display_value"] == expected
1167-
assert ctx["body"][1][1]["display_value"] == "1.1234"
1168-
assert ctx["body"][0][2]["display_value"] == "12.34%"
1169-
1170-
raw_11 = "1.1234"
1171-
ctx = df.style.format("{:0.1f}", subset=pd.IndexSlice[0, :])._translate()
1193+
raw_11 = "1.123400"
11721194
assert ctx["body"][0][1]["display_value"] == expected
11731195
assert ctx["body"][1][1]["display_value"] == raw_11
1196+
assert ctx["body"][0][2]["display_value"] == "12.34%"
11741197

11751198
ctx = df.style.format("{:0.1f}", subset=pd.IndexSlice[0, :])._translate()
11761199
assert ctx["body"][0][1]["display_value"] == expected
11771200
assert ctx["body"][1][1]["display_value"] == raw_11
11781201

11791202
ctx = df.style.format("{:0.1f}", subset=pd.IndexSlice["a"])._translate()
11801203
assert ctx["body"][0][1]["display_value"] == expected
1181-
assert ctx["body"][0][2]["display_value"] == "0.1234"
1204+
assert ctx["body"][0][2]["display_value"] == "0.123400"
11821205

11831206
ctx = df.style.format("{:0.1f}", subset=pd.IndexSlice[0, "a"])._translate()
11841207
assert ctx["body"][0][1]["display_value"] == expected
@@ -1189,8 +1212,8 @@ def test_display_subset(self):
11891212
)._translate()
11901213
assert ctx["body"][0][1]["display_value"] == expected
11911214
assert ctx["body"][1][1]["display_value"] == "1.1"
1192-
assert ctx["body"][0][2]["display_value"] == "0.1234"
1193-
assert ctx["body"][1][2]["display_value"] == "1.1234"
1215+
assert ctx["body"][0][2]["display_value"] == "0.123400"
1216+
assert ctx["body"][1][2]["display_value"] == raw_11
11941217

11951218
def test_display_dict(self):
11961219
df = pd.DataFrame([[0.1234, 0.1234], [1.1234, 1.1234]], columns=["a", "b"])

pandas/tests/reshape/merge/test_merge.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1437,6 +1437,17 @@ def test_merge_on_ints_floats(self, int_vals, float_vals, exp_vals):
14371437
result = B.merge(A, left_on="Y", right_on="X")
14381438
assert_frame_equal(result, expected[["Y", "X"]])
14391439

1440+
def test_merge_key_dtype_cast(self):
1441+
# GH 17044
1442+
df1 = DataFrame({"key": [1.0, 2.0], "v1": [10, 20]}, columns=["key", "v1"])
1443+
df2 = DataFrame({"key": [2], "v2": [200]}, columns=["key", "v2"])
1444+
result = df1.merge(df2, on="key", how="left")
1445+
expected = DataFrame(
1446+
{"key": [1.0, 2.0], "v1": [10, 20], "v2": [np.nan, 200.0]},
1447+
columns=["key", "v1", "v2"],
1448+
)
1449+
tm.assert_frame_equal(result, expected)
1450+
14401451
def test_merge_on_ints_floats_warning(self):
14411452
# GH 16572
14421453
# merge will produce a warning when merging on int and

pandas/tests/reshape/test_reshape.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -608,6 +608,23 @@ def test_get_dummies_all_sparse(self):
608608
)
609609
tm.assert_frame_equal(result, expected)
610610

611+
@pytest.mark.parametrize("values", ["baz"])
612+
def test_get_dummies_with_string_values(self, values):
613+
# issue #28383
614+
df = pd.DataFrame(
615+
{
616+
"bar": [1, 2, 3, 4, 5, 6],
617+
"foo": ["one", "one", "one", "two", "two", "two"],
618+
"baz": ["A", "B", "C", "A", "B", "C"],
619+
"zoo": ["x", "y", "z", "q", "w", "t"],
620+
}
621+
)
622+
623+
msg = "Input must be a list-like for parameter `columns`"
624+
625+
with pytest.raises(TypeError, match=msg):
626+
pd.get_dummies(df, columns=values)
627+
611628

612629
class TestCategoricalReshape:
613630
def test_reshaping_multi_index_categorical(self):

pandas/tests/tseries/offsets/test_offsets.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from datetime import date, datetime, time as dt_time, timedelta
2+
from typing import Type
23

34
import numpy as np
45
import pytest
@@ -92,7 +93,7 @@ def test_to_M8():
9293

9394

9495
class Base:
95-
_offset = None
96+
_offset = None # type: Type[DateOffset]
9697
d = Timestamp(datetime(2008, 1, 2))
9798

9899
timezones = [

0 commit comments

Comments
 (0)