Fill in column a bit (#141)

MarcoGorelli · rgommers · web-flow · commit 705908137c64 · 2023-04-27T13:50:18.000+02:00
* add some missing Column methods

* update isnan to only be about nan

* fixup return types of reductions

* skipna -&gt; skip_nulls

* make fillnan changes only to the coolumn page

* Specify more precisely what dtypes are supported

* Fix Myst rendering issue

* Bug: `median` behaves like `mean`, not like `min`

* Remove rendering of `__hash__`

---------

Co-authored-by: MarcoGorelli &lt;&gt;
Co-authored-by: Ralf Gommers &lt;ralf.gommers@gmail.com&gt;
diff --git a/spec/API_specification/dataframe_api/column_object.py b/spec/API_specification/dataframe_api/column_object.py
@@ -2,7 +2,7 @@
 
 from typing import Sequence
 
-from ._types import dtype
+from ._types import Scalar, dtype
 
 
 __all__ = ['Column']
@@ -35,3 +35,352 @@ def from_sequence(cls, sequence: Sequence[object], dtype: dtype) -> Column:
         -------
         Column
         """
+
+    def __eq__(self, other: Column | Scalar) -> Column:
+        """
+        Compare for equality.
+
+        Parameters
+        ----------
+        other : Column or Scalar
+            If Column, must have same length.
+            "Scalar" here is defined implicitly by what scalar types are allowed
+            for the operation by the underling dtypes.
+
+        Returns
+        -------
+        Column
+        """
+
+    def __ne__(self, other: Column | Scalar) -> Column:
+        """
+        Compare for non-equality.
+
+        Parameters
+        ----------
+        other : Column or Scalar
+            If Column, must have same length.
+            "Scalar" here is defined implicitly by what scalar types are allowed
+            for the operation by the underling dtypes.
+
+        Returns
+        -------
+        Column
+        """
+
+    def __ge__(self, other: Column | Scalar) -> Column:
+        """
+        Compare for "greater than or equal to" `other`.
+
+        Parameters
+        ----------
+        other : Column or Scalar
+            If Column, must have same length.
+            "Scalar" here is defined implicitly by what scalar types are allowed
+            for the operation by the underling dtypes.
+
+        Returns
+        -------
+        Column
+        """
+
+    def __gt__(self, other: Column | Scalar) -> Column:
+        """
+        Compare for "greater than" `other`.
+
+        Parameters
+        ----------
+        other : Column or Scalar
+            If Column, must have same length.
+            "Scalar" here is defined implicitly by what scalar types are allowed
+            for the operation by the underling dtypes.
+
+        Returns
+        -------
+        Column
+        """
+
+    def __le__(self, other: Column | Scalar) -> Column:
+        """
+        Compare for "less than or equal to" `other`.
+
+        Parameters
+        ----------
+        other : Column or Scalar
+            If Column, must have same length.
+            "Scalar" here is defined implicitly by what scalar types are allowed
+            for the operation by the underling dtypes.
+
+        Returns
+        -------
+        Column
+        """
+
+    def __lt__(self, other: Column | Scalar) -> Column:
+        """
+        Compare for "less than" `other`.
+
+        Parameters
+        ----------
+        other : Column or Scalar
+            If Column, must have same length.
+            "Scalar" here is defined implicitly by what scalar types are allowed
+            for the operation by the underling dtypes.
+
+        Returns
+        -------
+        Column
+        """
+
+    def __and__(self, other: Column | Scalar) -> Column:
+        """
+        Add `other` dataframe or scalar to this column.
+
+        Parameters
+        ----------
+        other : Column or Scalar
+            If Column, must have same length.
+            "Scalar" here is defined implicitly by what scalar types are allowed
+            for the operation by the underling dtypes.
+
+        Returns
+        -------
+        Column
+        """
+
+    def __sub__(self, other: Column | Scalar) -> Column:
+        """
+        Subtract `other` dataframe or scalar from this column.
+
+        Parameters
+        ----------
+        other : Column or Scalar
+            If Column, must have same length.
+            "Scalar" here is defined implicitly by what scalar types are allowed
+            for the operation by the underling dtypes.
+
+        Returns
+        -------
+        Column
+        """
+
+    def __mul__(self, other: Column | Scalar) -> Column:
+        """
+        Multiply `other` dataframe or scalar with this column.
+
+        Parameters
+        ----------
+        other : Column or Scalar
+            If Column, must have same length.
+            "Scalar" here is defined implicitly by what scalar types are allowed
+            for the operation by the underling dtypes.
+
+        Returns
+        -------
+        Column
+        """
+
+    def __truediv__(self, other: Column | Scalar) -> Column:
+        """
+        Divide this column by `other` column or scalar. True division, returns floats.
+
+        Parameters
+        ----------
+        other : Column or Scalar
+            If Column, must have same length.
+            "Scalar" here is defined implicitly by what scalar types are allowed
+            for the operation by the underling dtypes.
+
+        Returns
+        -------
+        Column
+        """
+
+    def __floordiv__(self, other: Column | Scalar) -> Column:
+        """
+        Floor-divide `other` dataframe or scalar to this column.
+
+        Parameters
+        ----------
+        other : Column or Scalar
+            If Column, must have same length.
+            "Scalar" here is defined implicitly by what scalar types are allowed
+            for the operation by the underling dtypes.
+
+        Returns
+        -------
+        Column
+        """
+
+    def __pow__(self, other: Column | Scalar) -> Column:
+        """
+        Raise this column to the power of `other`.
+
+        Parameters
+        ----------
+        other : Column or Scalar
+            If Column, must have same length.
+            "Scalar" here is defined implicitly by what scalar types are allowed
+            for the operation by the underling dtypes.
+
+        Returns
+        -------
+        Column
+        """
+
+    def __mod__(self, other: Column | Scalar) -> Column:
+        """
+        Returns modulus of this column by `other` (`%` operator).
+
+        Parameters
+        ----------
+        other : Column or Scalar
+            If Column, must have same length.
+            "Scalar" here is defined implicitly by what scalar types are allowed
+            for the operation by the underling dtypes.
+
+        Returns
+        -------
+        Column
+        """
+
+    def __divmod__(self, other: Column | Scalar) -> tuple[Column, Column]:
+        """
+        Return quotient and remainder of integer division. See `divmod` builtin function.
+
+        Parameters
+        ----------
+        other : Column or Scalar
+            If Column, must have same length.
+            "Scalar" here is defined implicitly by what scalar types are allowed
+            for the operation by the underling dtypes.
+
+        Returns
+        -------
+        Column
+        """
+
+    def __invert__(self) -> Column:
+        """
+        Invert truthiness of (boolean) elements.
+
+        Raises
+        ------
+        ValueError
+            If any of the Column's columns is not boolean.
+        """
+
+    def any(self, skip_nulls: bool = True) -> bool:
+        """
+        Reduction returns a bool.
+
+        Raises
+        ------
+        ValueError
+            If column is not boolean.
+        """
+
+    def all(self, skip_nulls: bool = True) -> bool:
+        """
+        Reduction returns a bool.
+
+        Raises
+        ------
+        ValueError
+            If column is not boolean.
+        """
+
+    def min(self, skip_nulls: bool = True) -> dtype:
+        """
+        Reduction returns a scalar. Any data type that supports comparisons
+        must be supported. The returned value has the same dtype as the column.
+        """
+
+    def max(self, skip_nulls: bool = True) -> dtype:
+        """
+        Reduction returns a scalar. Any data type that supports comparisons
+        must be supported. The returned value has the same dtype as the column.
+        """
+
+    def sum(self, skip_nulls: bool = True) -> dtype:
+        """
+        Reduction returns a scalar. Must be supported for numerical and
+        datetime data types. The returned value has the same dtype as the
+        column.
+        """
+
+    def prod(self, skip_nulls: bool = True) -> dtype:
+        """
+        Reduction returns a scalar. Must be supported for numerical data types.
+        The returned value has the same dtype as the column.
+        """
+
+    def median(self, skip_nulls: bool = True) -> dtype:
+        """
+        Reduction returns a scalar. Must be supported for numerical and
+        datetime data types. Returns a float for numerical data types, and
+        datetime (with the appropriate timedelta format string) for datetime
+        dtypes.
+        """
+
+    def mean(self, skip_nulls: bool = True) -> dtype:
+        """
+        Reduction returns a scalar. Must be supported for numerical and
+        datetime data types. Returns a float for numerical data types, and
+        datetime (with the appropriate timedelta format string) for datetime
+        dtypes.
+        """
+
+    def std(self, skip_nulls: bool = True) -> dtype:
+        """
+        Reduction returns a scalar. Must be supported for numerical and
+        datetime data types. Returns a float for numerical data types, and
+        datetime (with the appropriate timedelta format string) for datetime
+        dtypes.
+        """
+
+    def var(self, skip_nulls: bool = True) -> dtype:
+        """
+        Reduction returns a scalar. Must be supported for numerical and
+        datetime data types. Returns a float for numerical data types, and
+        datetime (with the appropriate timedelta format string) for datetime
+        dtypes.
+        """
+
+    def isnull(self) -> Column:
+        """
+        Check for 'missing' or 'null' entries.
+
+        Returns
+        -------
+        Column
+
+        See also
+        --------
+        isnan
+
+        Notes
+        -----
+        Does *not* include NaN-like entries.
+        May optionally include 'NaT' values (if present in an implementation),
+        but note that the Standard makes no guarantees about them.
+        """
+
+    def isnan(self) -> Column:
+        """
+        Check for nan entries.
+
+        Returns
+        -------
+        Column
+
+        See also
+        --------
+        isnull
+
+        Notes
+        -----
+        This only checks for 'NaN'.
+        Does *not* include 'missing' or 'null' entries.
+        In particular, does not check for `np.timedelta64('NaT')`.
+        """
diff --git a/spec/API_specification/dataframe_api/dataframe_object.py b/spec/API_specification/dataframe_api/dataframe_object.py
@@ -265,7 +265,7 @@ def sorted_indices(
             If a sequence, it must be the same length as `keys`,
             and determines the direction with which to use each
             key to sort by.
-        nulls_position : {'first', 'last'}
+        nulls_position : ``{'first', 'last'}``
             Whether null values should be placed at the beginning
             or at the end of the result.
             Note that the position of NaNs is unspecified and may
@@ -565,7 +565,7 @@ def all(self, skip_nulls: bool = True) -> DataFrame:
         """
         ...
     
-    def any_rowwise(self, skipna: bool = True) -> Column:
+    def any_rowwise(self, skip_nulls: bool = True) -> Column:
         """
         Reduction returns a Column.
 
@@ -579,7 +579,7 @@ def any_rowwise(self, skipna: bool = True) -> Column:
         """
         ...
 
-    def all_rowwise(self, skipna: bool = True) -> Column:
+    def all_rowwise(self, skip_nulls: bool = True) -> Column:
         """
         Reduction returns a Column.
 
diff --git a/spec/conf.py b/spec/conf.py
@@ -51,7 +51,7 @@
     'members':          True,
     'special-members':  True,
     'undoc-members':    True,
-    'exclude-members': '__annotations__, __dict__,__weakref__,__module__',
+    'exclude-members': '__annotations__, __dict__,__weakref__,__module__,__hash__',
 }
 add_module_names = False
 napoleon_numpy_docstring = True

Original file line number	Diff line number	Diff line change
`@@ -51,7 +51,7 @@`
`51`	`51`	`'members': True,`
`52`	`52`	`'special-members': True,`
`53`	`53`	`'undoc-members': True,`
`54`		`- 'exclude-members': '__annotations__, __dict__,__weakref__,__module__',`
	`54`	`+ 'exclude-members': '__annotations__, __dict__,__weakref__,__module__,__hash__',`
`55`	`55`	`}`
`56`	`56`	`add_module_names = False`
`57`	`57`	`napoleon_numpy_docstring = True`