Skip to content

Commit 7059081

Browse files
Fill in column a bit (#141)
* add some missing Column methods * update isnan to only be about nan * fixup return types of reductions * skipna -> skip_nulls * make fillnan changes only to the coolumn page * Specify more precisely what dtypes are supported * Fix Myst rendering issue * Bug: `median` behaves like `mean`, not like `min` * Remove rendering of `__hash__` --------- Co-authored-by: MarcoGorelli <> Co-authored-by: Ralf Gommers <[email protected]>
1 parent c1b1ab1 commit 7059081

File tree

3 files changed

+354
-5
lines changed

3 files changed

+354
-5
lines changed

spec/API_specification/dataframe_api/column_object.py

Lines changed: 350 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
from typing import Sequence
44

5-
from ._types import dtype
5+
from ._types import Scalar, dtype
66

77

88
__all__ = ['Column']
@@ -35,3 +35,352 @@ def from_sequence(cls, sequence: Sequence[object], dtype: dtype) -> Column:
3535
-------
3636
Column
3737
"""
38+
39+
def __eq__(self, other: Column | Scalar) -> Column:
40+
"""
41+
Compare for equality.
42+
43+
Parameters
44+
----------
45+
other : Column or Scalar
46+
If Column, must have same length.
47+
"Scalar" here is defined implicitly by what scalar types are allowed
48+
for the operation by the underling dtypes.
49+
50+
Returns
51+
-------
52+
Column
53+
"""
54+
55+
def __ne__(self, other: Column | Scalar) -> Column:
56+
"""
57+
Compare for non-equality.
58+
59+
Parameters
60+
----------
61+
other : Column or Scalar
62+
If Column, must have same length.
63+
"Scalar" here is defined implicitly by what scalar types are allowed
64+
for the operation by the underling dtypes.
65+
66+
Returns
67+
-------
68+
Column
69+
"""
70+
71+
def __ge__(self, other: Column | Scalar) -> Column:
72+
"""
73+
Compare for "greater than or equal to" `other`.
74+
75+
Parameters
76+
----------
77+
other : Column or Scalar
78+
If Column, must have same length.
79+
"Scalar" here is defined implicitly by what scalar types are allowed
80+
for the operation by the underling dtypes.
81+
82+
Returns
83+
-------
84+
Column
85+
"""
86+
87+
def __gt__(self, other: Column | Scalar) -> Column:
88+
"""
89+
Compare for "greater than" `other`.
90+
91+
Parameters
92+
----------
93+
other : Column or Scalar
94+
If Column, must have same length.
95+
"Scalar" here is defined implicitly by what scalar types are allowed
96+
for the operation by the underling dtypes.
97+
98+
Returns
99+
-------
100+
Column
101+
"""
102+
103+
def __le__(self, other: Column | Scalar) -> Column:
104+
"""
105+
Compare for "less than or equal to" `other`.
106+
107+
Parameters
108+
----------
109+
other : Column or Scalar
110+
If Column, must have same length.
111+
"Scalar" here is defined implicitly by what scalar types are allowed
112+
for the operation by the underling dtypes.
113+
114+
Returns
115+
-------
116+
Column
117+
"""
118+
119+
def __lt__(self, other: Column | Scalar) -> Column:
120+
"""
121+
Compare for "less than" `other`.
122+
123+
Parameters
124+
----------
125+
other : Column or Scalar
126+
If Column, must have same length.
127+
"Scalar" here is defined implicitly by what scalar types are allowed
128+
for the operation by the underling dtypes.
129+
130+
Returns
131+
-------
132+
Column
133+
"""
134+
135+
def __and__(self, other: Column | Scalar) -> Column:
136+
"""
137+
Add `other` dataframe or scalar to this column.
138+
139+
Parameters
140+
----------
141+
other : Column or Scalar
142+
If Column, must have same length.
143+
"Scalar" here is defined implicitly by what scalar types are allowed
144+
for the operation by the underling dtypes.
145+
146+
Returns
147+
-------
148+
Column
149+
"""
150+
151+
def __sub__(self, other: Column | Scalar) -> Column:
152+
"""
153+
Subtract `other` dataframe or scalar from this column.
154+
155+
Parameters
156+
----------
157+
other : Column or Scalar
158+
If Column, must have same length.
159+
"Scalar" here is defined implicitly by what scalar types are allowed
160+
for the operation by the underling dtypes.
161+
162+
Returns
163+
-------
164+
Column
165+
"""
166+
167+
def __mul__(self, other: Column | Scalar) -> Column:
168+
"""
169+
Multiply `other` dataframe or scalar with this column.
170+
171+
Parameters
172+
----------
173+
other : Column or Scalar
174+
If Column, must have same length.
175+
"Scalar" here is defined implicitly by what scalar types are allowed
176+
for the operation by the underling dtypes.
177+
178+
Returns
179+
-------
180+
Column
181+
"""
182+
183+
def __truediv__(self, other: Column | Scalar) -> Column:
184+
"""
185+
Divide this column by `other` column or scalar. True division, returns floats.
186+
187+
Parameters
188+
----------
189+
other : Column or Scalar
190+
If Column, must have same length.
191+
"Scalar" here is defined implicitly by what scalar types are allowed
192+
for the operation by the underling dtypes.
193+
194+
Returns
195+
-------
196+
Column
197+
"""
198+
199+
def __floordiv__(self, other: Column | Scalar) -> Column:
200+
"""
201+
Floor-divide `other` dataframe or scalar to this column.
202+
203+
Parameters
204+
----------
205+
other : Column or Scalar
206+
If Column, must have same length.
207+
"Scalar" here is defined implicitly by what scalar types are allowed
208+
for the operation by the underling dtypes.
209+
210+
Returns
211+
-------
212+
Column
213+
"""
214+
215+
def __pow__(self, other: Column | Scalar) -> Column:
216+
"""
217+
Raise this column to the power of `other`.
218+
219+
Parameters
220+
----------
221+
other : Column or Scalar
222+
If Column, must have same length.
223+
"Scalar" here is defined implicitly by what scalar types are allowed
224+
for the operation by the underling dtypes.
225+
226+
Returns
227+
-------
228+
Column
229+
"""
230+
231+
def __mod__(self, other: Column | Scalar) -> Column:
232+
"""
233+
Returns modulus of this column by `other` (`%` operator).
234+
235+
Parameters
236+
----------
237+
other : Column or Scalar
238+
If Column, must have same length.
239+
"Scalar" here is defined implicitly by what scalar types are allowed
240+
for the operation by the underling dtypes.
241+
242+
Returns
243+
-------
244+
Column
245+
"""
246+
247+
def __divmod__(self, other: Column | Scalar) -> tuple[Column, Column]:
248+
"""
249+
Return quotient and remainder of integer division. See `divmod` builtin function.
250+
251+
Parameters
252+
----------
253+
other : Column or Scalar
254+
If Column, must have same length.
255+
"Scalar" here is defined implicitly by what scalar types are allowed
256+
for the operation by the underling dtypes.
257+
258+
Returns
259+
-------
260+
Column
261+
"""
262+
263+
def __invert__(self) -> Column:
264+
"""
265+
Invert truthiness of (boolean) elements.
266+
267+
Raises
268+
------
269+
ValueError
270+
If any of the Column's columns is not boolean.
271+
"""
272+
273+
def any(self, skip_nulls: bool = True) -> bool:
274+
"""
275+
Reduction returns a bool.
276+
277+
Raises
278+
------
279+
ValueError
280+
If column is not boolean.
281+
"""
282+
283+
def all(self, skip_nulls: bool = True) -> bool:
284+
"""
285+
Reduction returns a bool.
286+
287+
Raises
288+
------
289+
ValueError
290+
If column is not boolean.
291+
"""
292+
293+
def min(self, skip_nulls: bool = True) -> dtype:
294+
"""
295+
Reduction returns a scalar. Any data type that supports comparisons
296+
must be supported. The returned value has the same dtype as the column.
297+
"""
298+
299+
def max(self, skip_nulls: bool = True) -> dtype:
300+
"""
301+
Reduction returns a scalar. Any data type that supports comparisons
302+
must be supported. The returned value has the same dtype as the column.
303+
"""
304+
305+
def sum(self, skip_nulls: bool = True) -> dtype:
306+
"""
307+
Reduction returns a scalar. Must be supported for numerical and
308+
datetime data types. The returned value has the same dtype as the
309+
column.
310+
"""
311+
312+
def prod(self, skip_nulls: bool = True) -> dtype:
313+
"""
314+
Reduction returns a scalar. Must be supported for numerical data types.
315+
The returned value has the same dtype as the column.
316+
"""
317+
318+
def median(self, skip_nulls: bool = True) -> dtype:
319+
"""
320+
Reduction returns a scalar. Must be supported for numerical and
321+
datetime data types. Returns a float for numerical data types, and
322+
datetime (with the appropriate timedelta format string) for datetime
323+
dtypes.
324+
"""
325+
326+
def mean(self, skip_nulls: bool = True) -> dtype:
327+
"""
328+
Reduction returns a scalar. Must be supported for numerical and
329+
datetime data types. Returns a float for numerical data types, and
330+
datetime (with the appropriate timedelta format string) for datetime
331+
dtypes.
332+
"""
333+
334+
def std(self, skip_nulls: bool = True) -> dtype:
335+
"""
336+
Reduction returns a scalar. Must be supported for numerical and
337+
datetime data types. Returns a float for numerical data types, and
338+
datetime (with the appropriate timedelta format string) for datetime
339+
dtypes.
340+
"""
341+
342+
def var(self, skip_nulls: bool = True) -> dtype:
343+
"""
344+
Reduction returns a scalar. Must be supported for numerical and
345+
datetime data types. Returns a float for numerical data types, and
346+
datetime (with the appropriate timedelta format string) for datetime
347+
dtypes.
348+
"""
349+
350+
def isnull(self) -> Column:
351+
"""
352+
Check for 'missing' or 'null' entries.
353+
354+
Returns
355+
-------
356+
Column
357+
358+
See also
359+
--------
360+
isnan
361+
362+
Notes
363+
-----
364+
Does *not* include NaN-like entries.
365+
May optionally include 'NaT' values (if present in an implementation),
366+
but note that the Standard makes no guarantees about them.
367+
"""
368+
369+
def isnan(self) -> Column:
370+
"""
371+
Check for nan entries.
372+
373+
Returns
374+
-------
375+
Column
376+
377+
See also
378+
--------
379+
isnull
380+
381+
Notes
382+
-----
383+
This only checks for 'NaN'.
384+
Does *not* include 'missing' or 'null' entries.
385+
In particular, does not check for `np.timedelta64('NaT')`.
386+
"""

spec/API_specification/dataframe_api/dataframe_object.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -265,7 +265,7 @@ def sorted_indices(
265265
If a sequence, it must be the same length as `keys`,
266266
and determines the direction with which to use each
267267
key to sort by.
268-
nulls_position : {'first', 'last'}
268+
nulls_position : ``{'first', 'last'}``
269269
Whether null values should be placed at the beginning
270270
or at the end of the result.
271271
Note that the position of NaNs is unspecified and may
@@ -565,7 +565,7 @@ def all(self, skip_nulls: bool = True) -> DataFrame:
565565
"""
566566
...
567567

568-
def any_rowwise(self, skipna: bool = True) -> Column:
568+
def any_rowwise(self, skip_nulls: bool = True) -> Column:
569569
"""
570570
Reduction returns a Column.
571571
@@ -579,7 +579,7 @@ def any_rowwise(self, skipna: bool = True) -> Column:
579579
"""
580580
...
581581

582-
def all_rowwise(self, skipna: bool = True) -> Column:
582+
def all_rowwise(self, skip_nulls: bool = True) -> Column:
583583
"""
584584
Reduction returns a Column.
585585

spec/conf.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@
5151
'members': True,
5252
'special-members': True,
5353
'undoc-members': True,
54-
'exclude-members': '__annotations__, __dict__,__weakref__,__module__',
54+
'exclude-members': '__annotations__, __dict__,__weakref__,__module__,__hash__',
5555
}
5656
add_module_names = False
5757
napoleon_numpy_docstring = True

0 commit comments

Comments
 (0)