Skip to content

FIX: add support for desc order when ranking infs with nans #19538 #20091

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Mar 30, 2018
6 changes: 3 additions & 3 deletions pandas/_libs/algos_rank_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ def rank_1d_{{dtype}}(object in_arr, ties_method='average', ascending=True,

sorted_data = values.take(_as)
sorted_mask = mask.take(_as)
_indices = order[1].take(_as).nonzero()[0]
_indices = np.diff(sorted_mask).nonzero()[0]
non_na_idx = _indices[0] if len(_indices) > 0 else -1
argsorted = _as.astype('i8')

Expand All @@ -153,7 +153,7 @@ def rank_1d_{{dtype}}(object in_arr, ties_method='average', ascending=True,

if (i == n - 1 or
are_diff(util.get_value_at(sorted_data, i + 1), val) or
i == non_na_idx - 1):
i == non_na_idx):
if tiebreak == TIEBREAK_AVERAGE:
for j in range(i - dups + 1, i + 1):
ranks[argsorted[j]] = sum_ranks / dups
Expand Down Expand Up @@ -190,7 +190,7 @@ def rank_1d_{{dtype}}(object in_arr, ties_method='average', ascending=True,
count += 1.0

if (i == n - 1 or sorted_data[i + 1] != val or
i == non_na_idx - 1):
i == non_na_idx):
if tiebreak == TIEBREAK_AVERAGE:
for j in range(i - dups + 1, i + 1):
ranks[argsorted[j]] = sum_ranks / dups
Expand Down
17 changes: 11 additions & 6 deletions pandas/tests/series/test_rank.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from pandas.tests.series.common import TestData
from pandas._libs.tslib import iNaT
from pandas._libs.algos import Infinity, NegInfinity
from itertools import chain


class TestSeriesRank(TestData):
Expand Down Expand Up @@ -263,8 +264,11 @@ def test_rank_tie_methods_on_infs_nans(self):
chunk = 3
disabled = set([('object', 'first')])

def _check(s, expected, method='average', na_option='keep'):
result = s.rank(method=method, na_option=na_option)
def _check(s, expected, method='average', na_option='keep',
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you parametrize this test

ascending=True):
expected = list(chain.from_iterable(expected))
result = s.rank(method=method, na_option=na_option,
ascending=ascending)
tm.assert_series_equal(result, Series(expected, dtype='float64'))

exp_ranks = {
Expand All @@ -283,12 +287,13 @@ def _check(s, expected, method='average', na_option='keep'):
if (dtype, method) in disabled:
continue
if na_opt == 'top':
order = ranks[1] + ranks[0] + ranks[2]
order = [ranks[1], ranks[0], ranks[2]]
elif na_opt == 'bottom':
order = ranks[0] + ranks[2] + ranks[1]
order = [ranks[0], ranks[2], ranks[1]]
else:
order = ranks[0] + [np.nan] * chunk + ranks[1]
_check(iseries, order, method, na_opt)
order = [ranks[0], [np.nan] * chunk, ranks[1]]
_check(iseries, order, method, na_opt, True)
_check(iseries, order[::-1], method, na_opt, False)

def test_rank_methods_series(self):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ca you change this to use the @td.skip_if_no_scipy decorator instead

pytest.importorskip('scipy.stats.special')
Expand Down