Merge remote-tracking branch 'upstream/master' into krey-master

WillAyd · WillAyd · commit 964580bdaef2 · 2019-10-21T20:56:31.000-07:00
diff --git a/asv_bench/benchmarks/join_merge.py b/asv_bench/benchmarks/join_merge.py
@@ -273,10 +273,10 @@ def time_merge_ordered(self):
 
 
 class MergeAsof:
-    params = [["backward", "forward", "nearest"]]
-    param_names = ["direction"]
+    params = [["backward", "forward", "nearest"], [None, 5]]
+    param_names = ["direction", "tolerance"]
 
-    def setup(self, direction):
+    def setup(self, direction, tolerance):
         one_count = 200000
         two_count = 1000000
 
@@ -303,6 +303,9 @@ def setup(self, direction):
         df1["time32"] = np.int32(df1.time)
         df2["time32"] = np.int32(df2.time)
 
+        df1["timeu64"] = np.uint64(df1.time)
+        df2["timeu64"] = np.uint64(df2.time)
+
         self.df1a = df1[["time", "value1"]]
         self.df2a = df2[["time", "value2"]]
         self.df1b = df1[["time", "key", "value1"]]
@@ -313,22 +316,52 @@ def setup(self, direction):
         self.df2d = df2[["time32", "value2"]]
         self.df1e = df1[["time", "key", "key2", "value1"]]
         self.df2e = df2[["time", "key", "key2", "value2"]]
+        self.df1f = df1[["timeu64", "value1"]]
+        self.df2f = df2[["timeu64", "value2"]]
+
+    def time_on_int(self, direction, tolerance):
+        merge_asof(
+            self.df1a, self.df2a, on="time", direction=direction, tolerance=tolerance
+        )
 
-    def time_on_int(self, direction):
-        merge_asof(self.df1a, self.df2a, on="time", direction=direction)
+    def time_on_int32(self, direction, tolerance):
+        merge_asof(
+            self.df1d, self.df2d, on="time32", direction=direction, tolerance=tolerance
+        )
 
-    def time_on_int32(self, direction):
-        merge_asof(self.df1d, self.df2d, on="time32", direction=direction)
+    def time_on_uint64(self, direction, tolerance):
+        merge_asof(
+            self.df1f, self.df2f, on="timeu64", direction=direction, tolerance=tolerance
+        )
 
-    def time_by_object(self, direction):
-        merge_asof(self.df1b, self.df2b, on="time", by="key", direction=direction)
+    def time_by_object(self, direction, tolerance):
+        merge_asof(
+            self.df1b,
+            self.df2b,
+            on="time",
+            by="key",
+            direction=direction,
+            tolerance=tolerance,
+        )
 
-    def time_by_int(self, direction):
-        merge_asof(self.df1c, self.df2c, on="time", by="key2", direction=direction)
+    def time_by_int(self, direction, tolerance):
+        merge_asof(
+            self.df1c,
+            self.df2c,
+            on="time",
+            by="key2",
+            direction=direction,
+            tolerance=tolerance,
+        )
 
-    def time_multiby(self, direction):
+    def time_multiby(self, direction, tolerance):
         merge_asof(
-            self.df1e, self.df2e, on="time", by=["key", "key2"], direction=direction
+            self.df1e,
+            self.df2e,
+            on="time",
+            by=["key", "key2"],
+            direction=direction,
+            tolerance=tolerance,
         )
 
 
diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst
@@ -355,7 +355,8 @@ I/O
 - Bug in :func:`read_hdf` closing stores that it didn't open when Exceptions are raised (:issue:`28699`)
 - Bug in :meth:`DataFrame.read_json` where using ``orient="index"`` would not maintain the order (:issue:`28557`)
 - Bug in :meth:`DataFrame.to_html` where the length of the ``formatters`` argument was not verified (:issue:`28469`)
-- Bug in :meth:`DataFrame.to_clipboard` which did not work reliably in ipython (:issue:`22707`)
+- Bug in :meth:`pandas.io.formats.style.Styler` formatting for floating values not displaying decimals correctly (:issue:`13257`)
+- Bug in :meth:`DataFrame.to_clipboard` which did not work reliably in ipython (:issue:`22707`)  
 
 Plotting
 ^^^^^^^^
@@ -373,8 +374,9 @@ Plotting
 Groupby/resample/rolling
 ^^^^^^^^^^^^^^^^^^^^^^^^
 
--
 - Bug in :meth:`DataFrame.rolling` not allowing for rolling over datetimes when ``axis=1`` (:issue: `28192`)
+- Bug in :meth:`DataFrame.rolling` not allowing rolling over multi-index levels (:issue: `15584`).
+- Bug in :meth:`DataFrame.rolling` not allowing rolling on monotonic decreasing time indexes (:issue: `19248`).
 - Bug in :meth:`DataFrame.groupby` not offering selection by column name when ``axis=1`` (:issue:`27614`)
 - Bug in :meth:`DataFrameGroupby.agg` not able to use lambda function with named aggregation (:issue:`27519`)
 - Bug in :meth:`DataFrame.groupby` losing column name information when grouping by a categorical column (:issue:`28787`)
@@ -388,6 +390,7 @@ Reshaping
 - Bug in :func:`merge`, did not append suffixes correctly with MultiIndex (:issue:`28518`)
 - :func:`qcut` and :func:`cut` now handle boolean input (:issue:`20303`)
 - Fix to ensure all int dtypes can be used in :func:`merge_asof` when using a tolerance value. Previously every non-int64 type would raise an erroneous ``MergeError`` (:issue:`28870`).
+- Better error message in :func:`get_dummies` when `columns` isn't a list-like value (:issue:`28383`)
 
 Sparse
 ^^^^^^
diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py
@@ -864,6 +864,8 @@ def get_dummies(
         # determine columns being encoded
         if columns is None:
             data_to_encode = data.select_dtypes(include=dtypes_to_encode)
+        elif not is_list_like(columns):
+            raise TypeError("Input must be a list-like for parameter `columns`")
         else:
             data_to_encode = data[columns]
 
diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py
@@ -70,7 +70,7 @@ def __init__(
         center: Optional[bool] = False,
         win_type: Optional[str] = None,
         axis: Axis = 0,
-        on: Optional[str] = None,
+        on: Optional[Union[str, Index]] = None,
         closed: Optional[str] = None,
         **kwargs
     ):
@@ -126,7 +126,7 @@ def _create_blocks(self):
         obj = self._selected_obj
 
         # filter out the on from the object
-        if self.on is not None:
+        if self.on is not None and not isinstance(self.on, Index):
             if obj.ndim == 2:
                 obj = obj.reindex(columns=obj.columns.difference([self.on]), copy=False)
         blocks = obj._to_dict_of_blocks(copy=False).values()
@@ -637,10 +637,10 @@ class Window(_Window):
         Provide a window type. If ``None``, all points are evenly weighted.
         See the notes below for further information.
     on : str, optional
-        For a DataFrame, a datetime-like column on which to calculate the rolling
-        window, rather than the DataFrame's index. Provided integer column is
-        ignored and excluded from result since an integer index is not used to
-        calculate the rolling window.
+        For a DataFrame, a datetime-like column or MultiIndex level on which
+        to calculate the rolling window, rather than the DataFrame's index.
+        Provided integer column is ignored and excluded from result since
+        an integer index is not used to calculate the rolling window.
     axis : int or str, default 0
     closed : str, default None
         Make the interval closed on the 'right', 'left', 'both' or
@@ -1651,18 +1651,19 @@ def is_datetimelike(self):
 
     @cache_readonly
     def _on(self):
-
         if self.on is None:
             if self.axis == 0:
                 return self.obj.index
             elif self.axis == 1:
                 return self.obj.columns
+        elif isinstance(self.on, Index):
+            return self.on
         elif isinstance(self.obj, ABCDataFrame) and self.on in self.obj.columns:
             return Index(self.obj[self.on])
         else:
             raise ValueError(
                 "invalid on specified as {0}, "
-                "must be a column (if DataFrame) "
+                "must be a column (of DataFrame), an Index "
                 "or None".format(self.on)
             )
 
@@ -1706,10 +1707,12 @@ def validate(self):
 
     def _validate_monotonic(self):
         """
-        Validate on is_monotonic.
+        Validate monotonic (increasing or decreasing).
         """
-        if not self._on.is_monotonic:
-            formatted = self.on or "index"
+        if not (self._on.is_monotonic_increasing or self._on.is_monotonic_decreasing):
+            formatted = self.on
+            if self.on is None:
+                formatted = "index"
             raise ValueError("{0} must be monotonic".format(formatted))
 
     def _validate_freq(self):
diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py
@@ -156,7 +156,8 @@ def __init__(
 
         def default_display_func(x):
             if is_float(x):
-                return "{:>.{precision}g}".format(x, precision=self.precision)
+                display_format = "{0:.{precision}f}".format(x, precision=self.precision)
+                return display_format
             else:
                 return x
 
diff --git a/pandas/tests/io/formats/test_style.py b/pandas/tests/io/formats/test_style.py
@@ -1157,28 +1157,51 @@ def test_display_format_raises(self):
         with pytest.raises(TypeError):
             df.style.format(True)
 
+    def test_display_set_precision(self):
+        # Issue #13257
+        df = pd.DataFrame(data=[[1.0, 2.0090], [3.2121, 4.566]], columns=["a", "b"])
+        s = Styler(df)
+
+        ctx = s.set_precision(1)._translate()
+
+        assert s.precision == 1
+        assert ctx["body"][0][1]["display_value"] == "1.0"
+        assert ctx["body"][0][2]["display_value"] == "2.0"
+        assert ctx["body"][1][1]["display_value"] == "3.2"
+        assert ctx["body"][1][2]["display_value"] == "4.6"
+
+        ctx = s.set_precision(2)._translate()
+        assert s.precision == 2
+        assert ctx["body"][0][1]["display_value"] == "1.00"
+        assert ctx["body"][0][2]["display_value"] == "2.01"
+        assert ctx["body"][1][1]["display_value"] == "3.21"
+        assert ctx["body"][1][2]["display_value"] == "4.57"
+
+        ctx = s.set_precision(3)._translate()
+        assert s.precision == 3
+        assert ctx["body"][0][1]["display_value"] == "1.000"
+        assert ctx["body"][0][2]["display_value"] == "2.009"
+        assert ctx["body"][1][1]["display_value"] == "3.212"
+        assert ctx["body"][1][2]["display_value"] == "4.566"
+
     def test_display_subset(self):
         df = pd.DataFrame([[0.1234, 0.1234], [1.1234, 1.1234]], columns=["a", "b"])
         ctx = df.style.format(
             {"a": "{:0.1f}", "b": "{0:.2%}"}, subset=pd.IndexSlice[0, :]
         )._translate()
         expected = "0.1"
-        assert ctx["body"][0][1]["display_value"] == expected
-        assert ctx["body"][1][1]["display_value"] == "1.1234"
-        assert ctx["body"][0][2]["display_value"] == "12.34%"
-
-        raw_11 = "1.1234"
-        ctx = df.style.format("{:0.1f}", subset=pd.IndexSlice[0, :])._translate()
+        raw_11 = "1.123400"
         assert ctx["body"][0][1]["display_value"] == expected
         assert ctx["body"][1][1]["display_value"] == raw_11
+        assert ctx["body"][0][2]["display_value"] == "12.34%"
 
         ctx = df.style.format("{:0.1f}", subset=pd.IndexSlice[0, :])._translate()
         assert ctx["body"][0][1]["display_value"] == expected
         assert ctx["body"][1][1]["display_value"] == raw_11
 
         ctx = df.style.format("{:0.1f}", subset=pd.IndexSlice["a"])._translate()
         assert ctx["body"][0][1]["display_value"] == expected
-        assert ctx["body"][0][2]["display_value"] == "0.1234"
+        assert ctx["body"][0][2]["display_value"] == "0.123400"
 
         ctx = df.style.format("{:0.1f}", subset=pd.IndexSlice[0, "a"])._translate()
         assert ctx["body"][0][1]["display_value"] == expected
@@ -1189,8 +1212,8 @@ def test_display_subset(self):
         )._translate()
         assert ctx["body"][0][1]["display_value"] == expected
         assert ctx["body"][1][1]["display_value"] == "1.1"
-        assert ctx["body"][0][2]["display_value"] == "0.1234"
-        assert ctx["body"][1][2]["display_value"] == "1.1234"
+        assert ctx["body"][0][2]["display_value"] == "0.123400"
+        assert ctx["body"][1][2]["display_value"] == raw_11
 
     def test_display_dict(self):
         df = pd.DataFrame([[0.1234, 0.1234], [1.1234, 1.1234]], columns=["a", "b"])
diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py
@@ -1437,6 +1437,17 @@ def test_merge_on_ints_floats(self, int_vals, float_vals, exp_vals):
         result = B.merge(A, left_on="Y", right_on="X")
         assert_frame_equal(result, expected[["Y", "X"]])
 
+    def test_merge_key_dtype_cast(self):
+        # GH 17044
+        df1 = DataFrame({"key": [1.0, 2.0], "v1": [10, 20]}, columns=["key", "v1"])
+        df2 = DataFrame({"key": [2], "v2": [200]}, columns=["key", "v2"])
+        result = df1.merge(df2, on="key", how="left")
+        expected = DataFrame(
+            {"key": [1.0, 2.0], "v1": [10, 20], "v2": [np.nan, 200.0]},
+            columns=["key", "v1", "v2"],
+        )
+        tm.assert_frame_equal(result, expected)
+
     def test_merge_on_ints_floats_warning(self):
         # GH 16572
         # merge will produce a warning when merging on int and
diff --git a/pandas/tests/reshape/test_reshape.py b/pandas/tests/reshape/test_reshape.py
@@ -608,6 +608,23 @@ def test_get_dummies_all_sparse(self):
         )
         tm.assert_frame_equal(result, expected)
 
+    @pytest.mark.parametrize("values", ["baz"])
+    def test_get_dummies_with_string_values(self, values):
+        # issue #28383
+        df = pd.DataFrame(
+            {
+                "bar": [1, 2, 3, 4, 5, 6],
+                "foo": ["one", "one", "one", "two", "two", "two"],
+                "baz": ["A", "B", "C", "A", "B", "C"],
+                "zoo": ["x", "y", "z", "q", "w", "t"],
+            }
+        )
+
+        msg = "Input must be a list-like for parameter `columns`"
+
+        with pytest.raises(TypeError, match=msg):
+            pd.get_dummies(df, columns=values)
+
 
 class TestCategoricalReshape:
     def test_reshaping_multi_index_categorical(self):
diff --git a/pandas/tests/tseries/offsets/test_offsets.py b/pandas/tests/tseries/offsets/test_offsets.py
@@ -1,4 +1,5 @@
 from datetime import date, datetime, time as dt_time, timedelta
+from typing import Type
 
 import numpy as np
 import pytest
@@ -92,7 +93,7 @@ def test_to_M8():
 
 
 class Base:
-    _offset = None
+    _offset = None  # type: Type[DateOffset]
     d = Timestamp(datetime(2008, 1, 2))
 
     timezones = [
diff --git a/pandas/tests/window/test_timeseries_window.py b/pandas/tests/window/test_timeseries_window.py
diff --git a/setup.cfg b/setup.cfg