Skip to content

REF: Fuse all the types #23022

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 15 commits into from
Oct 17, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 14 additions & 17 deletions pandas/_libs/algos_common_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -16,33 +16,30 @@ WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in

{{py:

# name, c_type, dest_type, dest_dtype
dtypes = [('float64', 'float64_t', 'float64_t', 'np.float64'),
('float32', 'float32_t', 'float32_t', 'np.float32'),
('int8', 'int8_t', 'float32_t', 'np.float32'),
('int16', 'int16_t', 'float32_t', 'np.float32'),
('int32', 'int32_t', 'float64_t', 'np.float64'),
('int64', 'int64_t', 'float64_t', 'np.float64')]
# name, c_type, dest_type
dtypes = [('float64', 'float64_t', 'float64_t'),
('float32', 'float32_t', 'float32_t'),
('int8', 'int8_t', 'float32_t'),
('int16', 'int16_t', 'float32_t'),
('int32', 'int32_t', 'float64_t'),
('int64', 'int64_t', 'float64_t')]

def get_dispatch(dtypes):

for name, c_type, dest_type, dest_dtype, in dtypes:

dest_type2 = dest_type
dest_type = dest_type.replace('_t', '')

yield name, c_type, dest_type, dest_type2, dest_dtype
for name, c_type, dest_type, in dtypes:
dest_name = dest_type[:-2] # i.e. strip "_t"
yield name, c_type, dest_type, dest_name

}}

{{for name, c_type, dest_type, dest_type2, dest_dtype
{{for name, c_type, dest_type, dest_name
in get_dispatch(dtypes)}}


@cython.boundscheck(False)
@cython.wraparound(False)
def diff_2d_{{name}}(ndarray[{{c_type}}, ndim=2] arr,
ndarray[{{dest_type2}}, ndim=2] out,
ndarray[{{dest_type}}, ndim=2] out,
Py_ssize_t periods, int axis):
cdef:
Py_ssize_t i, j, sx, sy
Expand Down Expand Up @@ -84,9 +81,9 @@ def diff_2d_{{name}}(ndarray[{{c_type}}, ndim=2] arr,
out[i, j] = arr[i, j] - arr[i, j - periods]


def put2d_{{name}}_{{dest_type}}(ndarray[{{c_type}}, ndim=2, cast=True] values,
def put2d_{{name}}_{{dest_name}}(ndarray[{{c_type}}, ndim=2, cast=True] values,
ndarray[int64_t] indexer, Py_ssize_t loc,
ndarray[{{dest_type2}}] out):
ndarray[{{dest_type}}] out):
cdef:
Py_ssize_t i, j, k

Expand Down
57 changes: 21 additions & 36 deletions pandas/_libs/algos_rank_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -131,45 +131,20 @@ def rank_1d_{{dtype}}(object in_arr, ties_method='average',
argsorted = _as.astype('i8')

{{if dtype == 'object'}}
for i in range(n):
sum_ranks += i + 1
dups += 1
isnan = sorted_mask[i]
val = util.get_value_at(sorted_data, i)

if isnan and keep_na:
ranks[argsorted[i]] = nan
continue
count += 1.0

if (i == n - 1 or
are_diff(util.get_value_at(sorted_data, i + 1), val) or
i == non_na_idx):
if tiebreak == TIEBREAK_AVERAGE:
for j in range(i - dups + 1, i + 1):
ranks[argsorted[j]] = sum_ranks / dups
elif tiebreak == TIEBREAK_MIN:
for j in range(i - dups + 1, i + 1):
ranks[argsorted[j]] = i - dups + 2
elif tiebreak == TIEBREAK_MAX:
for j in range(i - dups + 1, i + 1):
ranks[argsorted[j]] = i + 1
elif tiebreak == TIEBREAK_FIRST:
raise ValueError('first not supported for non-numeric data')
elif tiebreak == TIEBREAK_FIRST_DESCENDING:
for j in range(i - dups + 1, i + 1):
ranks[argsorted[j]] = 2 * i - j - dups + 2
elif tiebreak == TIEBREAK_DENSE:
total_tie_count += 1
for j in range(i - dups + 1, i + 1):
ranks[argsorted[j]] = total_tie_count
sum_ranks = dups = 0
if True:
{{else}}
with nogil:
{{endif}}
# TODO: why does the 2d version not have a nogil block?
for i in range(n):
sum_ranks += i + 1
dups += 1

{{if dtype == 'object'}}
val = util.get_value_at(sorted_data, i)
{{else}}
val = sorted_data[i]
{{endif}}

{{if dtype != 'uint64'}}
isnan = sorted_mask[i]
Expand All @@ -180,8 +155,14 @@ def rank_1d_{{dtype}}(object in_arr, ties_method='average',

count += 1.0

if (i == n - 1 or sorted_data[i + 1] != val or
i == non_na_idx):
{{if dtype == 'object'}}
if (i == n - 1 or
are_diff(util.get_value_at(sorted_data, i + 1), val) or
i == non_na_idx):
{{else}}
if (i == n - 1 or sorted_data[i + 1] != val or i == non_na_idx):
{{endif}}

if tiebreak == TIEBREAK_AVERAGE:
for j in range(i - dups + 1, i + 1):
ranks[argsorted[j]] = sum_ranks / dups
Expand All @@ -192,8 +173,13 @@ def rank_1d_{{dtype}}(object in_arr, ties_method='average',
for j in range(i - dups + 1, i + 1):
ranks[argsorted[j]] = i + 1
elif tiebreak == TIEBREAK_FIRST:
{{if dtype == 'object'}}
raise ValueError('first not supported for '
'non-numeric data')
{{else}}
for j in range(i - dups + 1, i + 1):
ranks[argsorted[j]] = j + 1
{{endif}}
elif tiebreak == TIEBREAK_FIRST_DESCENDING:
for j in range(i - dups + 1, i + 1):
ranks[argsorted[j]] = 2 * i - j - dups + 2
Expand All @@ -202,7 +188,6 @@ def rank_1d_{{dtype}}(object in_arr, ties_method='average',
for j in range(i - dups + 1, i + 1):
ranks[argsorted[j]] = total_tie_count
sum_ranks = dups = 0
{{endif}}
if pct:
if tiebreak == TIEBREAK_DENSE:
return ranks / total_tie_count
Expand Down
Loading