@@ -931,6 +931,32 @@ ctypedef fused rank_t:
931
931
int64_t
932
932
933
933
934
+ cdef rank_t get_rank_nan_fill_val(bint rank_nans_highest, rank_t[:] _ = None ):
935
+ """
936
+ Return the value we'll use to represent missing values when sorting depending
937
+ on if we'd like missing values to end up at the top/bottom. (The second parameter
938
+ is unused, but needed for fused type specialization)
939
+ """
940
+ if rank_nans_highest:
941
+ if rank_t is object :
942
+ return Infinity()
943
+ elif rank_t is int64_t:
944
+ return util.INT64_MAX
945
+ elif rank_t is uint64_t:
946
+ return util.UINT64_MAX
947
+ else :
948
+ return np.inf
949
+ else :
950
+ if rank_t is object :
951
+ return NegInfinity()
952
+ elif rank_t is int64_t:
953
+ return NPY_NAT
954
+ elif rank_t is uint64_t:
955
+ return 0
956
+ else :
957
+ return - np.inf
958
+
959
+
934
960
@ cython.wraparound (False )
935
961
@ cython.boundscheck (False )
936
962
def rank_1d (
@@ -980,7 +1006,7 @@ def rank_1d(
980
1006
ndarray[rank_t, ndim= 1 ] masked_vals
981
1007
rank_t[:] masked_vals_memview
982
1008
uint8_t[:] mask
983
- bint keep_na, check_labels, check_mask
1009
+ bint keep_na, nans_rank_highest, check_labels, check_mask
984
1010
rank_t nan_fill_val
985
1011
986
1012
tiebreak = tiebreakers[ties_method]
@@ -1026,27 +1052,12 @@ def rank_1d(
1026
1052
# If descending, fill with highest value since descending
1027
1053
# will flip the ordering to still end up with lowest rank.
1028
1054
# Symmetric logic applies to `na_option == 'bottom'`
1029
- if ascending ^ (na_option == ' top' ):
1030
- if rank_t is object :
1031
- nan_fill_val = Infinity()
1032
- elif rank_t is int64_t:
1033
- nan_fill_val = util.INT64_MAX
1034
- elif rank_t is uint64_t:
1035
- nan_fill_val = util.UINT64_MAX
1036
- else :
1037
- nan_fill_val = np.inf
1055
+ nans_rank_highest = ascending ^ (na_option == ' top' )
1056
+ nan_fill_val = get_rank_nan_fill_val[rank_t](nans_rank_highest)
1057
+ if nans_rank_highest:
1038
1058
order = (masked_vals, mask, labels)
1039
1059
else :
1040
- if rank_t is object :
1041
- nan_fill_val = NegInfinity()
1042
- elif rank_t is int64_t:
1043
- nan_fill_val = NPY_NAT
1044
- elif rank_t is uint64_t:
1045
- nan_fill_val = 0
1046
- else :
1047
- nan_fill_val = - np.inf
1048
-
1049
- order = (masked_vals, ~ (np.array(mask, copy = False )), labels)
1060
+ order = (masked_vals, ~ (np.asarray(mask)), labels)
1050
1061
1051
1062
np.putmask(masked_vals, mask, nan_fill_val)
1052
1063
# putmask doesn't accept a memoryview, so we assign as a separate step
@@ -1073,14 +1084,11 @@ def rank_1d(
1073
1084
check_mask,
1074
1085
check_labels,
1075
1086
keep_na,
1087
+ pct,
1076
1088
N,
1077
1089
)
1078
- if pct:
1079
- for i in range (N):
1080
- if grp_sizes[i] != 0 :
1081
- out[i] = out[i] / grp_sizes[i]
1082
1090
1083
- return np.array (out)
1091
+ return np.asarray (out)
1084
1092
1085
1093
1086
1094
@ cython.wraparound (False )
@@ -1097,6 +1105,7 @@ cdef void rank_sorted_1d(
1097
1105
bint check_mask,
1098
1106
bint check_labels,
1099
1107
bint keep_na,
1108
+ bint pct,
1100
1109
Py_ssize_t N,
1101
1110
) nogil:
1102
1111
"""
@@ -1108,7 +1117,7 @@ cdef void rank_sorted_1d(
1108
1117
out : float64_t[::1]
1109
1118
Array to store computed ranks
1110
1119
grp_sizes : int64_t[::1]
1111
- Array to store group counts.
1120
+ Array to store group counts, only used if pct=True
1112
1121
labels : See rank_1d.__doc__
1113
1122
sort_indexer : intp_t[:]
1114
1123
Array of indices which sorts masked_vals
@@ -1118,12 +1127,14 @@ cdef void rank_sorted_1d(
1118
1127
Array where entries are True if the value is missing, False otherwise
1119
1128
tiebreak : TiebreakEnumType
1120
1129
See rank_1d.__doc__ for the different modes
1121
- check_mask : bint
1130
+ check_mask : bool
1122
1131
If False, assumes the mask is all False to skip mask indexing
1123
- check_labels : bint
1132
+ check_labels : bool
1124
1133
If False, assumes all labels are the same to skip group handling logic
1125
- keep_na : bint
1134
+ keep_na : bool
1126
1135
Whether or not to keep nulls
1136
+ pct : bool
1137
+ Compute percentage rank of data within each group
1127
1138
N : Py_ssize_t
1128
1139
The number of elements to rank. Note: it is not always true that
1129
1140
N == len(out) or N == len(masked_vals) (see `nancorr_spearman` usage for why)
@@ -1342,6 +1353,11 @@ cdef void rank_sorted_1d(
1342
1353
grp_start = i + 1
1343
1354
grp_vals_seen = 1
1344
1355
1356
+ if pct:
1357
+ for i in range (N):
1358
+ if grp_sizes[i] != 0 :
1359
+ out[i] = out[i] / grp_sizes[i]
1360
+
1345
1361
1346
1362
def rank_2d (
1347
1363
ndarray[rank_t , ndim = 2 ] in_arr,
@@ -1362,11 +1378,11 @@ def rank_2d(
1362
1378
ndarray[rank_t, ndim= 2 ] values
1363
1379
ndarray[intp_t, ndim= 2 ] argsort_indexer
1364
1380
ndarray[uint8_t, ndim= 2 ] mask
1365
- rank_t val, nan_value
1381
+ rank_t val, nan_fill_val
1366
1382
float64_t count, sum_ranks = 0.0
1367
1383
int tiebreak = 0
1368
1384
int64_t idx
1369
- bint check_mask, condition, keep_na
1385
+ bint check_mask, condition, keep_na, nans_rank_highest
1370
1386
1371
1387
tiebreak = tiebreakers[ties_method]
1372
1388
@@ -1384,27 +1400,9 @@ def rank_2d(
1384
1400
if values.dtype != np.object_:
1385
1401
values = values.astype(' O' )
1386
1402
1403
+ nans_rank_highest = ascending ^ (na_option == ' top' )
1387
1404
if check_mask:
1388
- if ascending ^ (na_option == ' top' ):
1389
- if rank_t is object :
1390
- nan_value = Infinity()
1391
- elif rank_t is float64_t:
1392
- nan_value = np.inf
1393
-
1394
- # int64 and datetimelike
1395
- else :
1396
- nan_value = util.INT64_MAX
1397
-
1398
- else :
1399
- if rank_t is object :
1400
- nan_value = NegInfinity()
1401
- elif rank_t is float64_t:
1402
- nan_value = - np.inf
1403
-
1404
- # int64 and datetimelike
1405
- else :
1406
- nan_value = NPY_NAT
1407
-
1405
+ nan_fill_val = get_rank_nan_fill_val[rank_t](nans_rank_highest)
1408
1406
if rank_t is object :
1409
1407
mask = missing.isnaobj2d(values)
1410
1408
elif rank_t is float64_t:
@@ -1414,7 +1412,7 @@ def rank_2d(
1414
1412
else :
1415
1413
mask = values == NPY_NAT
1416
1414
1417
- np.putmask(values, mask, nan_value )
1415
+ np.putmask(values, mask, nan_fill_val )
1418
1416
else :
1419
1417
mask = np.zeros_like(values, dtype = bool )
1420
1418
0 commit comments