pandas-dev · jreback · Sep 14, 2021 · Aug 31, 2021 · Aug 31, 2021 · Aug 31, 2021
diff --git a/asv_bench/benchmarks/rolling.py b/asv_bench/benchmarks/rolling.py
@@ -180,6 +180,26 @@ def time_quantile(self, constructor, window, dtype, percentile, interpolation):
         self.roll.quantile(percentile, interpolation=interpolation)
 
 
+class Rank:
+    params = (
+        ["DataFrame", "Series"],
+        [10, 1000],
+        ["int", "float"],
+        [True, False],
+        [True, False],
+        ["min", "max", "average"],
+    )
+    param_names = ["constructor", "window", "dtype", "percentile", "ascending", "method"]
+
+    def setup(self, constructor, window, dtype, percentile, ascending, method):
+        N = 10 ** 5
+        arr = np.random.random(N).astype(dtype)
+        self.roll = getattr(pd, constructor)(arr).rolling(window)
+
+    def time_rank(self, constructor, window, dtype, percentile, ascending, method):
+        self.roll.rank(pct=percentile, ascending=ascending, method=method)
+
+
 class PeakMemFixedWindowMinMax:
 
     params = ["min", "max"]

diff --git a/pandas/_libs/src/skiplist.h b/pandas/_libs/src/skiplist.h
@@ -180,10 +180,28 @@ PANDAS_INLINE double skiplist_get(skiplist_t *skp, int i, int *ret) {
     return node->value;
 }
 
+PANDAS_INLINE int skiplist_min_rank(skiplist_t *skp, double value) {
+    node_t *node;
+    int level, rank = 0;
+
+    node = skp->head;
+    for (level = skp->maxlevels - 1; level >= 0; --level) {
+        while (_node_cmp(node->next[level], value) > 0) {
+            rank += node->width[level];
+            node = node->next[level];
+        }
+    }
+
+    return rank + 1;
+}
+
+// Returns the rank of the inserted element. When there are duplicates, `rank` is the highest of
+// the group, i.e. the 'max' method of
+// https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.rank.html
 PANDAS_INLINE int skiplist_insert(skiplist_t *skp, double value) {
     node_t *node, *prevnode, *newnode, *next_at_level;
     int *steps_at_level;
-    int size, steps, level;
+    int size, steps, level, rank = 0;
     node_t **chain;
 
     chain = skp->tmp_chain;
@@ -197,6 +215,7 @@ PANDAS_INLINE int skiplist_insert(skiplist_t *skp, double value) {
         next_at_level = node->next[level];
         while (_node_cmp(next_at_level, value) >= 0) {
             steps_at_level[level] += node->width[level];
+            rank += node->width[level];
             node = next_at_level;
             next_at_level = node->next[level];
         }
@@ -230,7 +249,7 @@ PANDAS_INLINE int skiplist_insert(skiplist_t *skp, double value) {
 
     ++(skp->size);
 
-    return 1;
+    return rank + 1;
 }
 
 PANDAS_INLINE int skiplist_remove(skiplist_t *skp, double value) {

diff --git a/pandas/_libs/window/aggregations.pyi b/pandas/_libs/window/aggregations.pyi
@@ -63,6 +63,15 @@ def roll_quantile(
     quantile: float,  # float64_t
     interpolation: Literal["linear", "lower", "higher", "nearest", "midpoint"],
 ) -> np.ndarray: ...  # np.ndarray[float]
+def roll_rank(
+    values: np.ndarray,
+    start: np.ndarray,
+    end: np.ndarray,
+    minp: int,
+    percentile: bool,
+    method: Literal["average", "min", "max"],
+    ascending: bool,
+) -> np.ndarray: ...  # np.ndarray[float]
 def roll_apply(
     obj: object,
     start: np.ndarray,  # np.ndarray[np.int64]

diff --git a/pandas/_libs/window/aggregations.pyx b/pandas/_libs/window/aggregations.pyx
@@ -50,6 +50,8 @@ cdef extern from "../src/skiplist.h":
     double skiplist_get(skiplist_t*, int, int*) nogil
     int skiplist_insert(skiplist_t*, double) nogil
     int skiplist_remove(skiplist_t*, double) nogil
+    int skiplist_rank(skiplist_t*, double) nogil
+    int skiplist_min_rank(skiplist_t*, double) nogil
 
 cdef:
     float32_t MINfloat32 = np.NINF
@@ -795,7 +797,7 @@ def roll_median_c(const float64_t[:] values, ndarray[int64_t] start,
                     val = values[j]
                     if notnan(val):
                         nobs += 1
-                        err = skiplist_insert(sl, val) != 1
+                        err = skiplist_insert(sl, val) == -1
                         if err:
                             break
 
@@ -806,7 +808,7 @@ def roll_median_c(const float64_t[:] values, ndarray[int64_t] start,
                     val = values[j]
                     if notnan(val):
                         nobs += 1
-                        err = skiplist_insert(sl, val) != 1
+                        err = skiplist_insert(sl, val) == -1
                         if err:
                             break
 
@@ -1139,6 +1141,111 @@ def roll_quantile(const float64_t[:] values, ndarray[int64_t] start,
     return output
 
 
+cdef enum RankType:
+    AVERAGE,
+    MIN,
+    MAX,
+
+
+rank_types = {
+    'average': AVERAGE,
+    'min': MIN,
+    'max': MAX,
+}
+
+
+def roll_rank(const float64_t[:] values, ndarray[int64_t] start,
+              ndarray[int64_t] end, int64_t minp, bint percentile, str method, bint ascending) -> np.ndarray:
+    """
+    O(N log(window)) implementation using skip list
+
+    derived from roll_quantile
+    """
+    cdef:
+        Py_ssize_t i, j, s, e, N = len(values), idx
+        float64_t rank_min = 0, rank = 0
+        int64_t nobs = 0, win
+        float64_t val
+        skiplist_t *skiplist
+        float64_t[::1] output = None
-        float64_t[::1] output = None
+        float64_t[::1] output
-        float64_t[::1] output = None
+        float64_t[::1] output
+        RankType rank_type
+
+    try:
+        rank_type = rank_types[method]
+    except KeyError:
+        raise ValueError(f"Method '{method}' is not supported")
+
+    is_monotonic_increasing_bounds = is_monotonic_increasing_start_end_bounds(
+        start, end
+    )
+    # we use the Fixed/Variable Indexer here as the
+    # actual skiplist ops outweigh any window computation costs
+    output = np.empty(N, dtype=np.float64)
+
+    win = (end - start).max()
+    if win == 0:
+        output[:] = NaN
+        return np.asarray(output)
+    skiplist = skiplist_init(<int>win)
+    if skiplist == NULL:
+        raise MemoryError("skiplist_init failed")
+
+    with nogil:
+        for i in range(N):
+            s = start[i]
+            e = end[i]
+
+            if i == 0 or not is_monotonic_increasing_bounds:
+                if not is_monotonic_increasing_bounds:
+                    nobs = 0
+                    skiplist_destroy(skiplist)
+                    skiplist = skiplist_init(<int>win)
+
+                # setup
+                for j in range(s, e):
+                    val = values[j] if ascending else -values[j]
+                    if notnan(val):
+                        nobs += 1
+                        rank = skiplist_insert(skiplist, val)
+                        if rank == -1:
+                            raise MemoryError("skiplist_insert failed")
+                        if rank_type == AVERAGE:
+                            rank_min = skiplist_min_rank(skiplist, val)
+                            rank = ((rank * (rank + 1) / 2) - ((rank_min - 1) * rank_min / 2)) / (rank - rank_min + 1)
+                        elif rank_type == MIN:
+                            rank = skiplist_min_rank(skiplist, val)
+
+            else:
+                # calculate deletes
+                for j in range(start[i - 1], s):
+                    val = values[j] if ascending else -values[j]
+                    if notnan(val):
+                        skiplist_remove(skiplist, val)
+                        nobs -= 1
+
+                # calculate adds
+                for j in range(end[i - 1], e):
+                    val = values[j] if ascending else -values[j]
+                    if notnan(val):
+                        nobs += 1
+                        rank = skiplist_insert(skiplist, val)
+                        if rank == -1:
+                            raise MemoryError("skiplist_insert failed")
+                        if rank_type == AVERAGE:
+                            rank_min = skiplist_min_rank(skiplist, val)
+                            rank = ((rank * (rank + 1) / 2) - ((rank_min - 1) * rank_min / 2)) / (rank - rank_min + 1)
+                        elif rank_type == MIN:
+                            rank = skiplist_min_rank(skiplist, val)
+            if nobs >= minp:
+                output[i] = <float64_t>(rank) / nobs if percentile else rank
+            else:
+                output[i] = NaN
+
+    skiplist_destroy(skiplist)
+
+    return np.asarray(output)
+
+
 def roll_apply(object obj,
                ndarray[int64_t] start, ndarray[int64_t] end,
                int64_t minp,

diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py
@@ -1409,6 +1409,16 @@ def quantile(self, quantile: float, interpolation: str = "linear", **kwargs):
 
         return self._apply(window_func, name="quantile", **kwargs)
 
+    def rank(self, pct: bool = False, method: str = "average", ascending: bool = True, **kwargs):
+        window_func = partial(
+            window_aggregations.roll_rank,
+            percentile=pct,
+            method=method,
+            ascending=ascending,
+        )
+
+        return self._apply(window_func, name="rank", **kwargs)
+
     def cov(
         self,
         other: DataFrame | Series | None = None,

diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py
@@ -1500,3 +1500,23 @@ def test_rolling_numeric_dtypes():
         dtype="float64",
     )
     tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("window", [1, 3, 10, 50, 1000])
+@pytest.mark.parametrize("method", ["min", "max", "average"])
+@pytest.mark.parametrize("pct", [True, False])
+@pytest.mark.parametrize("ascending", [True, False])
+@pytest.mark.parametrize("test_data", ["default", "duplicates", "nans"])
+def test_rank(window, method, pct, ascending, test_data):
+    length = 1000
+    if test_data == "default":
+        ser = Series(data=np.random.rand(length))
+    elif test_data == "duplicates":
+        ser = Series(data=np.random.choice(3, length))
+    elif test_data == "nans":
+        ser = Series(data=np.random.choice([1.0, 0.25, 0.75, np.nan], length))
+
+    expected = ser.rolling(window).apply(lambda x: x.rank(method=method, pct=pct, ascending=ascending).iloc[-1])
+    result = ser.rolling(window).rank(method=method, pct=pct, ascending=ascending)
+
+    tm.assert_series_equal(result, expected)