check_untyped_defs pandas.core.arrays.categorical closes #28669

simonjayhawkins · simonjayhawkins · commit 8298620a3a2e · 2019-09-28T23:42:54.000+01:00
diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
@@ -428,7 +428,9 @@ def _values_for_argsort(self) -> np.ndarray:
         # Note: this is used in `ExtensionArray.argsort`.
         return np.array(self)
 
-    def argsort(self, ascending=True, kind="quicksort", *args, **kwargs):
+    def argsort(
+        self, ascending: bool = True, kind: str = "quicksort", *args, **kwargs
+    ) -> np.ndarray:
         """
         Return the indices that would sort this array.
 
@@ -444,7 +446,7 @@ def argsort(self, ascending=True, kind="quicksort", *args, **kwargs):
 
         Returns
         -------
-        index_array : ndarray
+        ndarray
             Array of indices that sort ``self``. If NaN values are contained,
             NaN values are placed at the end.
 
diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
@@ -3,7 +3,7 @@
 import operator
 from shutil import get_terminal_size
 import textwrap
-from typing import Type, Union, cast
+from typing import Optional, Type, Union, cast
 from warnings import warn
 
 import numpy as np
@@ -368,6 +368,7 @@ def __init__(
                 values = _convert_to_list_like(values)
 
                 # By convention, empty lists result in object dtype:
+                sanitize_dtype: Optional[str]
                 if len(values) == 0:
                     sanitize_dtype = "object"
                 else:
@@ -670,6 +671,7 @@ def from_codes(cls, codes, categories=None, ordered=None, dtype=None):
         dtype = CategoricalDtype._from_values_or_dtype(
             categories=categories, ordered=ordered, dtype=dtype
         )
+        msg: Optional[str]
         if dtype.categories is None:
             msg = (
                 "The categories must be provided in 'categories' or "
@@ -1115,7 +1117,7 @@ def remove_categories(self, removals, inplace=False):
             removals = [removals]
 
         removal_set = set(list(removals))
-        not_included = removal_set - set(self.dtype.categories)
+        not_included = list(removal_set - set(self.dtype.categories))
         new_categories = [c for c in self.dtype.categories if c not in removal_set]
 
         # GH 10156
@@ -1561,7 +1563,9 @@ def check_for_ordered(self, op):
     def _values_for_argsort(self):
         return self._codes.copy()
 
-    def argsort(self, ascending=True, kind="quicksort", *args, **kwargs):
+    def argsort(
+        self, ascending: bool = True, kind: str = "quicksort", *args, **kwargs
+    ) -> np.ndarray:
         """
         Return the indices that would sort the Categorical.
 
@@ -1612,7 +1616,14 @@ def argsort(self, ascending=True, kind="quicksort", *args, **kwargs):
         >>> cat.argsort()
         array([2, 0, 1])
         """
-        return super().argsort(ascending=ascending, kind=kind, *args, **kwargs)
+        # https://github.com/python/mypy/issues/2582
+        # error: "argsort" of "ExtensionArray" gets multiple values for keyword
+        #  argument "ascending"  [misc]
+        # error: "argsort" of "ExtensionArray" gets multiple values for keyword
+        #  argument "kind"  [misc]
+        return super().argsort(  # type: ignore[misc]
+            ascending=ascending, kind=kind, *args, **kwargs
+        )
 
     def sort_values(self, inplace=False, ascending=True, na_position="last"):
         """
@@ -2193,8 +2204,8 @@ def _reverse_indexer(self):
             self.codes.astype("int64"), categories.size
         )
         counts = counts.cumsum()
-        result = (r[start:end] for start, end in zip(counts, counts[1:]))
-        result = dict(zip(categories, result))
+        result_ = (r[start:end] for start, end in zip(counts, counts[1:]))
+        result = dict(zip(categories, result_))
         return result
 
     # reduction ops #
diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py
@@ -235,7 +235,9 @@ def lexsort_indexer(keys, orders=None, na_position="last"):
     return indexer_from_factorized(labels, shape)
 
 
-def nargsort(items, kind="quicksort", ascending=True, na_position="last"):
+def nargsort(
+    items, kind: str = "quicksort", ascending: bool = True, na_position: str = "last"
+) -> np.ndarray:
     """
     This is intended to be a drop-in replacement for np.argsort which
     handles NaNs. It adds ascending and na_position parameters.
diff --git a/pandas/tests/arrays/categorical/test_api.py b/pandas/tests/arrays/categorical/test_api.py
@@ -1,3 +1,5 @@
+import re
+
 import numpy as np
 import pytest
 
@@ -340,9 +342,13 @@ def test_remove_categories(self):
         assert res is None
 
         # removal is not in categories
-        with pytest.raises(ValueError):
+        msg = re.escape("removals must all be in old categories: ['c']")
+        with pytest.raises(ValueError, match=msg):
             cat.remove_categories(["c"])
 
+        with pytest.raises(ValueError, match=msg):
+            cat.remove_categories(["c", np.nan])
+
     def test_remove_unused_categories(self):
         c = Categorical(["a", "b", "c", "d", "a"], categories=["a", "b", "c", "d", "e"])
         exp_categories_all = Index(["a", "b", "c", "d", "e"])
diff --git a/setup.cfg b/setup.cfg
@@ -173,9 +173,6 @@ check_untyped_defs=False
 [mypy-pandas._version]
 check_untyped_defs=False
 
-[mypy-pandas.core.arrays.categorical]
-check_untyped_defs=False
-
 [mypy-pandas.core.arrays.interval]
 check_untyped_defs=False