Skip to content

REF: Fuse all the types #23022

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 15 commits into from
Oct 17, 2018
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 14 additions & 18 deletions pandas/_libs/algos_common_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -16,33 +16,29 @@ WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in

{{py:

# name, c_type, dest_type, dest_dtype
dtypes = [('float64', 'float64_t', 'float64_t', 'np.float64'),
('float32', 'float32_t', 'float32_t', 'np.float32'),
('int8', 'int8_t', 'float32_t', 'np.float32'),
('int16', 'int16_t', 'float32_t', 'np.float32'),
('int32', 'int32_t', 'float64_t', 'np.float64'),
('int64', 'int64_t', 'float64_t', 'np.float64')]
# name, c_type, dest_type
dtypes = [('float64', 'float64_t', 'float64_t'),
('float32', 'float32_t', 'float32_t'),
('int8', 'int8_t', 'float32_t'),
('int16', 'int16_t', 'float32_t'),
('int32', 'int32_t', 'float64_t'),
('int64', 'int64_t', 'float64_t')]

def get_dispatch(dtypes):

for name, c_type, dest_type, dest_dtype, in dtypes:

dest_type2 = dest_type
dest_type = dest_type.replace('_t', '')

yield name, c_type, dest_type, dest_type2, dest_dtype
for name, c_type, dest_type, in dtypes:
yield name, c_type, dest_type

}}

{{for name, c_type, dest_type, dest_type2, dest_dtype
{{for name, c_type, dest_type
in get_dispatch(dtypes)}}


@cython.boundscheck(False)
@cython.wraparound(False)
def diff_2d_{{name}}(ndarray[{{c_type}}, ndim=2] arr,
ndarray[{{dest_type2}}, ndim=2] out,
ndarray[{{dest_type}}, ndim=2] out,
Py_ssize_t periods, int axis):
cdef:
Py_ssize_t i, j, sx, sy
Expand Down Expand Up @@ -84,9 +80,9 @@ def diff_2d_{{name}}(ndarray[{{c_type}}, ndim=2] arr,
out[i, j] = arr[i, j] - arr[i, j - periods]


def put2d_{{name}}_{{dest_type}}(ndarray[{{c_type}}, ndim=2, cast=True] values,
ndarray[int64_t] indexer, Py_ssize_t loc,
ndarray[{{dest_type2}}] out):
def put2d_{{name}}_{{dest_type[:-2]}}(ndarray[{{c_type}}, ndim=2, cast=True] values,
ndarray[int64_t] indexer, Py_ssize_t loc,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is a it obtuse can u make it more explicit (the slice)

ndarray[{{dest_type}}] out):
cdef:
Py_ssize_t i, j, k

Expand Down
102 changes: 66 additions & 36 deletions pandas/_libs/groupby_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -600,7 +600,7 @@ def group_rank_{{name}}(ndarray[float64_t, ndim=2] out,

{{py:

# name, c_type, dest_type2, nan_val
# name, dest_type2, nan_val, inf_val
dtypes = [('float64', 'float64_t', 'NAN', 'np.inf'),
('float32', 'float32_t', 'NAN', 'np.inf'),
('int64', 'int64_t', 'iNaT', '_int64_max')]
Expand Down Expand Up @@ -725,24 +725,37 @@ def group_min_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
out[i, j] = minx[i, j]



{{endfor}}


ctypedef fused groupby_t:
float64_t
float32_t
int64_t


@cython.boundscheck(False)
@cython.wraparound(False)
def group_cummin_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
ndarray[{{dest_type2}}, ndim=2] values,
ndarray[int64_t] labels,
bint is_datetimelike):
def group_cummin(ndarray[groupby_t, ndim=2] out,
ndarray[groupby_t, ndim=2] values,
ndarray[int64_t] labels,
bint is_datetimelike):
"""
Only transforms on axis=0
"""
cdef:
Py_ssize_t i, j, N, K, size
{{dest_type2}} val, mval
ndarray[{{dest_type2}}, ndim=2] accum
groupby_t val, mval
ndarray[groupby_t, ndim=2] accum
int64_t lab

N, K = (<object> values).shape
accum = np.empty_like(values)
accum.fill({{inf_val}})
if groupby_t is int64_t:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can u make this more generic ? she what if we expand this to other int types?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Presumably. The MO with these PRs is to keep the logic unchanged.

I think there is also a cost in compile-time.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is a small cost of compile time (actually maybe nothing as cython is pretty smart). but i suppose can handle later.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fair enough. Easy to implement if/when its actually needed.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

see some comments above

accum.fill(_int64_max)
else:
accum.fill(np.inf)

with nogil:
for i in range(N):
Expand All @@ -754,37 +767,48 @@ def group_cummin_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
val = values[i, j]

# val = nan
{{if name == 'int64'}}
if is_datetimelike and val == {{nan_val}}:
out[i, j] = {{nan_val}}
if groupby_t is int64_t:
if is_datetimelike and val == iNaT:
out[i, j] = iNaT
else:
mval = accum[lab, j]
if val < mval:
accum[lab, j] = mval = val
out[i, j] = mval
else:
{{else}}
if val == val:
{{endif}}
mval = accum[lab, j]
if val < mval:
accum[lab, j] = mval = val
out[i, j] = mval
if val == val:
mval = accum[lab, j]
if val < mval:
accum[lab, j] = mval = val
out[i, j] = mval


group_cummin_float64 = group_cummin["float64_t"]
group_cummin_float32 = group_cummin["float32_t"]
group_cummin_int64 = group_cummin["int64_t"]


@cython.boundscheck(False)
@cython.wraparound(False)
def group_cummax_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
ndarray[{{dest_type2}}, ndim=2] values,
ndarray[int64_t] labels,
bint is_datetimelike):
def group_cummax(ndarray[groupby_t, ndim=2] out,
ndarray[groupby_t, ndim=2] values,
ndarray[int64_t] labels,
bint is_datetimelike):
"""
Only transforms on axis=0
"""
cdef:
Py_ssize_t i, j, N, K, size
{{dest_type2}} val, mval
ndarray[{{dest_type2}}, ndim=2] accum
groupby_t val, mval
ndarray[groupby_t, ndim=2] accum
int64_t lab

N, K = (<object> values).shape
accum = np.empty_like(values)
accum.fill(-{{inf_val}})
if groupby_t is int64_t:
accum.fill(-_int64_max)
else:
accum.fill(-np.inf)

with nogil:
for i in range(N):
Expand All @@ -795,16 +819,22 @@ def group_cummax_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
for j in range(K):
val = values[i, j]

{{if name == 'int64'}}
if is_datetimelike and val == {{nan_val}}:
out[i, j] = {{nan_val}}
if groupby_t is int64_t:
if is_datetimelike and val == iNaT:
out[i, j] = iNaT
else:
mval = accum[lab, j]
if val > mval:
accum[lab, j] = mval = val
out[i, j] = mval
else:
{{else}}
if val == val:
{{endif}}
mval = accum[lab, j]
if val > mval:
accum[lab, j] = mval = val
out[i, j] = mval
if val == val:
mval = accum[lab, j]
if val > mval:
accum[lab, j] = mval = val
out[i, j] = mval

{{endfor}}

group_cummax_float64 = group_cummax["float64_t"]
group_cummax_float32 = group_cummax["float32_t"]
group_cummax_int64 = group_cummax["int64_t"]
86 changes: 29 additions & 57 deletions pandas/_libs/sparse_op_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,12 @@ WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in
# Sparse op
#----------------------------------------------------------------------

{{py:

# dtype, float_group
dtypes = [('float64', True), ('int64', False)]

}}
ctypedef fused sparse_t:
float64_t
int64_t

{{for dtype, float_group in dtypes}}

{{if float_group}}

cdef inline {{dtype}}_t __div_{{dtype}}({{dtype}}_t a, {{dtype}}_t b):
cdef inline float64_t __div(sparse_t a, sparse_t b):
if b == 0:
if a > 0:
return INF
Expand All @@ -30,63 +24,41 @@ cdef inline {{dtype}}_t __div_{{dtype}}({{dtype}}_t a, {{dtype}}_t b):
else:
return float(a) / b

cdef inline {{dtype}}_t __truediv_{{dtype}}({{dtype}}_t a, {{dtype}}_t b):
return __div_{{dtype}}(a, b)

cdef inline {{dtype}}_t __floordiv_{{dtype}}({{dtype}}_t a, {{dtype}}_t b):
if b == 0:
# numpy >= 1.11 returns NaN
# for a // 0, rather than +-inf
if _np_version_under1p11:
if a > 0:
return INF
elif a < 0:
return -INF
return NaN
else:
return a // b
cdef inline float64_t __truediv(sparse_t a, sparse_t b):
return __div(a, b)

cdef inline {{dtype}}_t __mod_{{dtype}}({{dtype}}_t a, {{dtype}}_t b):
if b == 0:
return NaN
else:
return a % b

{{else}}

cdef inline float64_t __div_{{dtype}}({{dtype}}_t a, {{dtype}}_t b):
cdef inline sparse_t __mod(sparse_t a, sparse_t b):
if b == 0:
if a > 0:
return INF
elif a < 0:
return -INF
else:
if sparse_t is float64_t:
return NaN
else:
return 0
else:
return float(a) / b
return a % b

cdef inline float64_t __truediv_{{dtype}}({{dtype}}_t a, {{dtype}}_t b):
return __div_{{dtype}}(a, b)

cdef inline {{dtype}}_t __floordiv_{{dtype}}({{dtype}}_t a, {{dtype}}_t b):
cdef inline sparse_t __floordiv(sparse_t a, sparse_t b):
if b == 0:
return 0
if sparse_t is float64_t:
# numpy >= 1.11 returns NaN
# for a // 0, rather than +-inf
if _np_version_under1p11:
if a > 0:
return INF
elif a < 0:
return -INF
return NaN
else:
return 0
else:
return a // b

cdef inline {{dtype}}_t __mod_{{dtype}}({{dtype}}_t a, {{dtype}}_t b):
if b == 0:
return 0
else:
return a % b

{{endif}}

{{endfor}}

#----------------------------------------------------------------------
# ----------------------------------------------------------------------
# sparse array op
#----------------------------------------------------------------------
# ----------------------------------------------------------------------

{{py:

Expand All @@ -106,10 +78,10 @@ def get_op(tup):
ops_dict = {'add': '{0} + {1}',
'sub': '{0} - {1}',
'mul': '{0} * {1}',
'div': '__div_{2}({0}, {1})',
'mod': '__mod_{2}({0}, {1})',
'truediv': '__truediv_{2}({0}, {1})',
'floordiv': '__floordiv_{2}({0}, {1})',
'div': '__div({0}, {1})',
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

these names are odd

maybe just call them div and so on

'mod': '__mod({0}, {1})',
'truediv': '__truediv({0}, {1})',
'floordiv': '__floordiv({0}, {1})',
'pow': '{0} ** {1}',
'eq': '{0} == {1}',
'ne': '{0} != {1}',
Expand Down
15 changes: 8 additions & 7 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -1153,7 +1153,7 @@ def check_int_bool(self, inplace):
inplace=inplace, limit=limit,
fill_value=fill_value,
coerce=coerce,
downcast=downcast, mgr=mgr)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Edits here are unrelated, should be removed from this PR.

downcast=downcast)
# try an interp method
try:
m = missing.clean_interp_method(method, **kwargs)
Expand All @@ -1169,13 +1169,14 @@ def check_int_bool(self, inplace):
limit_direction=limit_direction,
limit_area=limit_area,
fill_value=fill_value, inplace=inplace,
downcast=downcast, mgr=mgr, **kwargs)
downcast=downcast, **kwargs)

raise ValueError("invalid method '{0}' to interpolate.".format(method))
raise ValueError("invalid method '{method}' to interpolate."
.format(method=method))

def _interpolate_with_fill(self, method='pad', axis=0, inplace=False,
limit=None, fill_value=None, coerce=False,
downcast=None, mgr=None):
downcast=None):
""" fillna but using the interpolate machinery """

inplace = validate_bool_kwarg(inplace, 'inplace')
Expand All @@ -1202,7 +1203,7 @@ def _interpolate_with_fill(self, method='pad', axis=0, inplace=False,
def _interpolate(self, method=None, index=None, values=None,
fill_value=None, axis=0, limit=None,
limit_direction='forward', limit_area=None,
inplace=False, downcast=None, mgr=None, **kwargs):
inplace=False, downcast=None, **kwargs):
""" interpolate using scipy wrappers """

inplace = validate_bool_kwarg(inplace, 'inplace')
Expand All @@ -1219,8 +1220,8 @@ def _interpolate(self, method=None, index=None, values=None,

if method in ('krogh', 'piecewise_polynomial', 'pchip'):
if not index.is_monotonic:
raise ValueError("{0} interpolation requires that the "
"index be monotonic.".format(method))
raise ValueError("{method} interpolation requires that the "
"index be monotonic.".format(method=method))
# process 1-d slices in the axis direction

def func(x):
Expand Down