Skip to content

API: consistently raise TypeError for invalid-typed fill_value #37733

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Nov 13, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions doc/source/whatsnew/v1.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -322,6 +322,10 @@ Other API changes
^^^^^^^^^^^^^^^^^

- Sorting in descending order is now stable for :meth:`Series.sort_values` and :meth:`Index.sort_values` for DateTime-like :class:`Index` subclasses. This will affect sort order when sorting :class:`DataFrame` on multiple columns, sorting with a key function that produces duplicates, or requesting the sorting index when using :meth:`Index.sort_values`. When using :meth:`Series.value_counts`, count of missing values is no longer the last in the list of duplicate counts, and its position corresponds to the position in the original :class:`Series`. When using :meth:`Index.sort_values` for DateTime-like :class:`Index` subclasses, NaTs ignored the ``na_position`` argument and were sorted to the beggining. Now they respect ``na_position``, the default being ``last``, same as other :class:`Index` subclasses. (:issue:`35992`)
- Passing an invalid ``fill_value`` to :meth:`Categorical.take`, :meth:`DatetimeArray.take`, :meth:`TimedeltaArray.take`, :meth:`PeriodArray.take` now raises ``TypeError`` instead of ``ValueError`` (:issue:`37733`)
- Passing an invalid ``fill_value`` to :meth:`Series.shift` with a ``CategoricalDtype`` now raises ``TypeError`` instead of ``ValueError`` (:issue:`37733`)
- Passing an invalid value to :meth:`IntervalIndex.insert` or :meth:`CategoricalIndex.insert` now raises a ``TypeError`` instead of a ``ValueError`` (:issue:`37733`)
- Attempting to reindex a :class:`Series` with a :class:`CategoricalIndex` with an invalid ``fill_value`` now raises ``TypeError`` instead of ``ValueError`` (:issue:`37733`)

.. ---------------------------------------------------------------------------

Expand Down
4 changes: 2 additions & 2 deletions pandas/core/arrays/_mixins.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def take(
def _validate_fill_value(self, fill_value):
"""
If a fill_value is passed to `take` convert it to a representation
suitable for self._ndarray, raising ValueError if this is not possible.
suitable for self._ndarray, raising TypeError if this is not possible.

Parameters
----------
Expand All @@ -86,7 +86,7 @@ def _validate_fill_value(self, fill_value):

Raises
------
ValueError
TypeError
"""
raise AbstractMethodError(self)

Expand Down
7 changes: 3 additions & 4 deletions pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -1190,7 +1190,7 @@ def _validate_searchsorted_value(self, value):
def _validate_fill_value(self, fill_value):
"""
Convert a user-facing fill_value to a representation to use with our
underlying ndarray, raising ValueError if this is not possible.
underlying ndarray, raising TypeError if this is not possible.

Parameters
----------
Expand All @@ -1202,15 +1202,15 @@ def _validate_fill_value(self, fill_value):

Raises
------
ValueError
TypeError
"""

if is_valid_nat_for_dtype(fill_value, self.categories.dtype):
fill_value = -1
elif fill_value in self.categories:
fill_value = self._unbox_scalar(fill_value)
else:
raise ValueError(
raise TypeError(
f"'fill_value={fill_value}' is not present "
"in this Categorical's categories"
)
Expand Down Expand Up @@ -1659,7 +1659,6 @@ def fillna(self, value=None, method=None, limit=None):
# We get ndarray or Categorical if called via Series.fillna,
# where it will unwrap another aligned Series before getting here
codes[mask] = new_codes[mask]

else:
codes[mask] = new_codes

Expand Down
16 changes: 3 additions & 13 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -462,7 +462,7 @@ def _validate_comparison_value(self, other):
def _validate_fill_value(self, fill_value):
"""
If a fill_value is passed to `take` convert it to an i8 representation,
raising ValueError if this is not possible.
raising TypeError if this is not possible.

Parameters
----------
Expand All @@ -474,19 +474,9 @@ def _validate_fill_value(self, fill_value):

Raises
------
ValueError
TypeError
"""
msg = (
f"'fill_value' should be a {self._scalar_type}. "
f"Got '{str(fill_value)}'."
)
try:
return self._validate_scalar(fill_value)
except TypeError as err:
if "Cannot compare tz-naive and tz-aware" in str(err):
# tzawareness-compat
raise
raise ValueError(msg) from err
return self._validate_scalar(fill_value)

def _validate_shift_value(self, fill_value):
# TODO(2.0): once this deprecation is enforced, use _validate_fill_value
Expand Down
15 changes: 2 additions & 13 deletions pandas/core/arrays/interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -649,7 +649,7 @@ def fillna(self, value=None, method=None, limit=None):
if limit is not None:
raise TypeError("limit is not supported for IntervalArray.")

value_left, value_right = self._validate_fillna_value(value)
value_left, value_right = self._validate_fill_value(value)

left = self.left.fillna(value=value_left)
right = self.right.fillna(value=value_right)
Expand Down Expand Up @@ -870,25 +870,14 @@ def _validate_scalar(self, value):
# GH#18295
left = right = value
else:
raise ValueError(
raise TypeError(
"can only insert Interval objects and NA into an IntervalArray"
)
return left, right

def _validate_fill_value(self, value):
return self._validate_scalar(value)

def _validate_fillna_value(self, value):
# This mirrors Datetimelike._validate_fill_value
try:
return self._validate_scalar(value)
except ValueError as err:
msg = (
"'IntervalArray.fillna' only supports filling with a "
f"scalar 'pandas.Interval'. Got a '{type(value).__name__}' instead."
)
raise TypeError(msg) from err

def _validate_setitem_value(self, value):
needs_float_conversion = False

Expand Down
7 changes: 6 additions & 1 deletion pandas/core/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -3660,7 +3660,12 @@ def insert(self, loc: int, item):
# must insert at end otherwise you have to recompute all the
# other codes
lev_loc = len(level)
level = level.insert(lev_loc, k)
try:
level = level.insert(lev_loc, k)
except TypeError:
# TODO: Should this be done inside insert?
# TODO: smarter casting rules?
level = level.astype(object).insert(lev_loc, k)
else:
lev_loc = level.get_loc(k)

Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/arrays/categorical/test_take.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ def test_take_fill_value_new_raises(self):
# https://github.com/pandas-dev/pandas/issues/23296
cat = Categorical(["a", "b", "c"])
xpr = r"'fill_value=d' is not present in this Categorical's categories"
with pytest.raises(ValueError, match=xpr):
with pytest.raises(TypeError, match=xpr):
cat.take([0, 1, -1], fill_value="d", allow_fill=True)

def test_take_nd_deprecated(self):
Expand Down
39 changes: 17 additions & 22 deletions pandas/tests/arrays/test_datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,8 +145,8 @@ def test_take_fill_raises(self, fill_value):

arr = self.array_cls._simple_new(data, freq="D")

msg = f"'fill_value' should be a {self.dtype}. Got '{fill_value}'"
with pytest.raises(ValueError, match=msg):
msg = f"value should be a '{arr._scalar_type.__name__}' or 'NaT'. Got"
with pytest.raises(TypeError, match=msg):
arr.take([0, 1], allow_fill=True, fill_value=fill_value)

def test_take_fill(self):
Expand All @@ -169,8 +169,8 @@ def test_take_fill_str(self, arr1d):
expected = arr1d[[-1, 1]]
tm.assert_equal(result, expected)

msg = r"'fill_value' should be a <.*>\. Got 'foo'"
with pytest.raises(ValueError, match=msg):
msg = f"value should be a '{arr1d._scalar_type.__name__}' or 'NaT'. Got"
with pytest.raises(TypeError, match=msg):
arr1d.take([-1, 1], allow_fill=True, fill_value="foo")

def test_concat_same_type(self):
Expand Down Expand Up @@ -745,13 +745,12 @@ def test_take_fill_valid(self, arr1d):
result = arr.take([-1, 1], allow_fill=True, fill_value=now)
assert result[0] == now

msg = f"'fill_value' should be a {self.dtype}. Got '0 days 00:00:00'."
with pytest.raises(ValueError, match=msg):
msg = f"value should be a '{arr1d._scalar_type.__name__}' or 'NaT'. Got"
with pytest.raises(TypeError, match=msg):
# fill_value Timedelta invalid
arr.take([-1, 1], allow_fill=True, fill_value=now - now)

msg = f"'fill_value' should be a {self.dtype}. Got '2014Q1'."
with pytest.raises(ValueError, match=msg):
with pytest.raises(TypeError, match=msg):
# fill_value Period invalid
arr.take([-1, 1], allow_fill=True, fill_value=pd.Period("2014Q1"))

Expand All @@ -763,14 +762,13 @@ def test_take_fill_valid(self, arr1d):
arr.take([-1, 1], allow_fill=True, fill_value=now)

value = pd.NaT.value
msg = f"'fill_value' should be a {self.dtype}. Got '{value}'."
with pytest.raises(ValueError, match=msg):
msg = f"value should be a '{arr1d._scalar_type.__name__}' or 'NaT'. Got"
with pytest.raises(TypeError, match=msg):
# require NaT, not iNaT, as it could be confused with an integer
arr.take([-1, 1], allow_fill=True, fill_value=value)

value = np.timedelta64("NaT", "ns")
msg = f"'fill_value' should be a {self.dtype}. Got '{str(value)}'."
with pytest.raises(ValueError, match=msg):
with pytest.raises(TypeError, match=msg):
# require appropriate-dtype if we have a NA value
arr.take([-1, 1], allow_fill=True, fill_value=value)

Expand Down Expand Up @@ -932,20 +930,18 @@ def test_take_fill_valid(self, timedelta_index):

now = pd.Timestamp.now()
value = now
msg = f"'fill_value' should be a {self.dtype}. Got '{value}'."
with pytest.raises(ValueError, match=msg):
msg = f"value should be a '{arr._scalar_type.__name__}' or 'NaT'. Got"
with pytest.raises(TypeError, match=msg):
# fill_value Timestamp invalid
arr.take([0, 1], allow_fill=True, fill_value=value)

value = now.to_period("D")
msg = f"'fill_value' should be a {self.dtype}. Got '{value}'."
with pytest.raises(ValueError, match=msg):
with pytest.raises(TypeError, match=msg):
# fill_value Period invalid
arr.take([0, 1], allow_fill=True, fill_value=value)

value = np.datetime64("NaT", "ns")
msg = f"'fill_value' should be a {self.dtype}. Got '{str(value)}'."
with pytest.raises(ValueError, match=msg):
with pytest.raises(TypeError, match=msg):
# require appropriate-dtype if we have a NA value
arr.take([-1, 1], allow_fill=True, fill_value=value)

Expand Down Expand Up @@ -981,14 +977,13 @@ def test_take_fill_valid(self, arr1d):
arr = arr1d

value = pd.NaT.value
msg = f"'fill_value' should be a {self.dtype}. Got '{value}'."
with pytest.raises(ValueError, match=msg):
msg = f"value should be a '{arr1d._scalar_type.__name__}' or 'NaT'. Got"
with pytest.raises(TypeError, match=msg):
# require NaT, not iNaT, as it could be confused with an integer
arr.take([-1, 1], allow_fill=True, fill_value=value)

value = np.timedelta64("NaT", "ns")
msg = f"'fill_value' should be a {self.dtype}. Got '{str(value)}'."
with pytest.raises(ValueError, match=msg):
with pytest.raises(TypeError, match=msg):
# require appropriate-dtype if we have a NA value
arr.take([-1, 1], allow_fill=True, fill_value=value)

Expand Down
3 changes: 2 additions & 1 deletion pandas/tests/arrays/test_period.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,8 @@ def test_take_raises():
with pytest.raises(IncompatibleFrequency, match="freq"):
arr.take([0, -1], allow_fill=True, fill_value=pd.Period("2000", freq="W"))

with pytest.raises(ValueError, match="foo"):
msg = "value should be a 'Period' or 'NaT'. Got 'str' instead"
with pytest.raises(TypeError, match=msg):
arr.take([0, -1], allow_fill=True, fill_value="foo")


Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/extension/test_interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,8 +136,8 @@ def test_fillna_limit_backfill(self):
def test_fillna_series(self):
pass

def test_non_scalar_raises(self, data_missing):
msg = "Got a 'list' instead."
def test_fillna_non_scalar_raises(self, data_missing):
msg = "can only insert Interval objects and NA into an IntervalArray"
with pytest.raises(TypeError, match=msg):
data_missing.fillna([1, 1])

Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/frame/test_stack_unstack.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,7 @@ def test_unstack_fill_frame_categorical(self):

# Fill with non-category results in a ValueError
msg = r"'fill_value=d' is not present in"
with pytest.raises(ValueError, match=msg):
with pytest.raises(TypeError, match=msg):
data.unstack(fill_value="d")

# Fill with category value replaces missing values as expected
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/indexes/categorical/test_category.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ def test_insert(self):

# invalid
msg = "'fill_value=d' is not present in this Categorical's categories"
with pytest.raises(ValueError, match=msg):
with pytest.raises(TypeError, match=msg):
ci.insert(0, "d")

# GH 18295 (test missing)
Expand All @@ -184,7 +184,7 @@ def test_insert(self):
def test_insert_na_mismatched_dtype(self):
ci = CategoricalIndex([0, 1, 1])
msg = "'fill_value=NaT' is not present in this Categorical's categories"
with pytest.raises(ValueError, match=msg):
with pytest.raises(TypeError, match=msg):
ci.insert(0, pd.NaT)

def test_delete(self):
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/indexes/categorical/test_reindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,5 +57,5 @@ def test_reindex_missing_category(self):
# GH: 18185
ser = Series([1, 2, 3, 1], dtype="category")
msg = "'fill_value=-1' is not present in this Categorical's categories"
with pytest.raises(ValueError, match=msg):
with pytest.raises(TypeError, match=msg):
ser.reindex([1, 2, 3, 4, 5], fill_value=-1)
4 changes: 2 additions & 2 deletions pandas/tests/indexes/interval/test_interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ def test_insert(self, data):

# invalid type
msg = "can only insert Interval objects and NA into an IntervalArray"
with pytest.raises(ValueError, match=msg):
with pytest.raises(TypeError, match=msg):
data.insert(1, "foo")

# invalid closed
Expand All @@ -213,7 +213,7 @@ def test_insert(self, data):
if data.left.dtype.kind not in ["m", "M"]:
# trying to insert pd.NaT into a numeric-dtyped Index should cast/raise
msg = "can only insert Interval objects and NA into an IntervalArray"
with pytest.raises(ValueError, match=msg):
with pytest.raises(TypeError, match=msg):
result = data.insert(1, pd.NaT)
else:
result = data.insert(1, pd.NaT)
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/indexing/test_categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,9 @@ def test_loc_scalar(self):
df.loc["d"] = 10

msg = "'fill_value=d' is not present in this Categorical's categories"
with pytest.raises(ValueError, match=msg):
with pytest.raises(TypeError, match=msg):
df.loc["d", "A"] = 10
with pytest.raises(ValueError, match=msg):
with pytest.raises(TypeError, match=msg):
df.loc["d", "C"] = 10

with pytest.raises(KeyError, match="^1$"):
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/series/methods/test_shift.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ def test_shift_categorical_fill_value(self):

# check for incorrect fill_value
msg = "'fill_value=f' is not present in this Categorical's categories"
with pytest.raises(ValueError, match=msg):
with pytest.raises(TypeError, match=msg):
ts.shift(1, fill_value="f")

def test_shift_dst(self):
Expand Down