pandas-dev · jreback · Jan 22, 2018 · Aug 18, 2017 · Aug 18, 2017 · Aug 18, 2017
diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt
@@ -379,6 +379,8 @@ Performance Improvements
 - Improved performance of :func:`IntervalIndex.symmetric_difference()` (:issue:`18475`)
 - Improved performance of ``DatetimeIndex`` and ``Series`` arithmetic operations with Business-Month and Business-Quarter frequencies (:issue:`18489`)
 - :func:`Series` / :func:`DataFrame` tab completion limits to 100 values, for better performance. (:issue:`18587`)
+- Improved performance of :func:`DataFrame.median` with ``axis=1`` when bottleneck is not installed (:issue:`16468`)
+
 
 .. _whatsnew_0230.docs:
 

diff --git a/pandas/_libs/algos.pxd b/pandas/_libs/algos.pxd
diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx
@@ -8,8 +8,6 @@ from cython cimport Py_ssize_t
 
 np.import_array()
 
-cdef float64_t FP_ERR = 1e-13
-
 cimport util
 
 from libc.stdlib cimport malloc, free
@@ -24,6 +22,7 @@ from numpy cimport (ndarray,
                     double_t)
 
 
+cdef float64_t FP_ERR = 1e-13
 cdef double NaN = <double> np.NaN
 cdef double nan = NaN
 
@@ -166,54 +165,6 @@ def groupsort_indexer(ndarray[int64_t] index, Py_ssize_t ngroups):
     return result, counts
 
 
-@cython.boundscheck(False)
-@cython.wraparound(False)
-cpdef numeric kth_smallest(numeric[:] a, Py_ssize_t k) nogil:
-    cdef:
-        Py_ssize_t i, j, l, m, n = a.shape[0]
-        numeric x
-
-    with nogil:
-        l = 0
-        m = n - 1
-
-        while l < m:
-            x = a[k]
-            i = l
-            j = m
-
-            while 1:
-                while a[i] < x: i += 1
-                while x < a[j]: j -= 1
-                if i <= j:
-                    swap(&a[i], &a[j])
-                    i += 1; j -= 1
-
-                if i > j: break
-
-            if j < k: l = i
-            if k < i: m = j
-    return a[k]
-
-
-cpdef numeric median(numeric[:] arr):
-    """
-    A faster median
-    """
-    cdef Py_ssize_t n = arr.size
-
-    if n == 0:
-        return np.NaN
-
-    arr = arr.copy()
-
-    if n % 2:
-        return kth_smallest(arr, n // 2)
-    else:
-        return (kth_smallest(arr, n // 2) +
-                kth_smallest(arr, n // 2 - 1)) / 2
-
-
 # ----------------------------------------------------------------------
 # Pairwise correlation/covariance
 

diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx
@@ -16,7 +16,6 @@ from numpy cimport (ndarray,
 from libc.stdlib cimport malloc, free
 
 from util cimport numeric, get_nat
-from algos cimport swap
 from algos import take_2d_axis1_float64_float64, groupsort_indexer
 
 cdef int64_t iNaT = get_nat()
@@ -25,6 +24,16 @@ cdef double NaN = <double> np.NaN
 cdef double nan = NaN
 
 
+cdef inline Py_ssize_t swap(numeric *a, numeric *b) nogil:
+    cdef numeric t
+
+    # cython doesn't allow pointer dereference so use array syntax
+    t = a[0]
+    a[0] = b[0]
+    b[0] = t
+    return 0
+
+
 # TODO: aggregate multiple columns in single pass
 # ----------------------------------------------------------------------
 # first, nth, last

diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py
@@ -6,7 +6,7 @@
 
 import numpy as np
 from pandas import compat
-from pandas._libs import tslib, algos, lib
+from pandas._libs import tslib, lib
 from pandas.core.dtypes.common import (
     _get_dtype,
     is_float, is_scalar,
@@ -370,14 +370,13 @@ def nanmean(values, axis=None, skipna=True):
 @bottleneck_switch()
 def nanmedian(values, axis=None, skipna=True):
 
-    values, mask, dtype, dtype_max = _get_values(values, skipna)
-
     def get_median(x):
         mask = notna(x)
         if not skipna and not mask.all():
             return np.nan
-        return algos.median(com._values_from_object(x[mask]))
+        return np.nanmedian(x[mask])
 
+    values, mask, dtype, dtype_max = _get_values(values, skipna)
     if not is_float_dtype(values):
         values = values.astype('f8')
         values[mask] = np.nan
@@ -389,10 +388,15 @@ def get_median(x):
 
     # an array from a frame
     if values.ndim > 1:
+
         # there's a non-empty array to apply over otherwise numpy raises
         if notempty:
-            return _wrap_results(
-                np.apply_along_axis(get_median, axis, values), dtype)
+            if not skipna:
+                return _wrap_results(
+                    np.apply_along_axis(get_median, axis, values), dtype)
+
+            # fastpath for the skipna case
+            return _wrap_results(np.nanmedian(values, axis), dtype)
 
         # must return the correct shape, but median is not defined for the
         # empty set so return nans of shape "everything but the passed axis"

diff --git a/setup.py b/setup.py
@@ -460,11 +460,11 @@ def pxd(name):
 ext_data = {
     '_libs.algos': {
         'pyxfile': '_libs/algos',
-        'pxdfiles': ['_libs/src/util', '_libs/algos', '_libs/hashtable'],
+        'pxdfiles': ['_libs/src/util', '_libs/hashtable'],
         'depends': _pxi_dep['algos']},
     '_libs.groupby': {
         'pyxfile': '_libs/groupby',
-        'pxdfiles': ['_libs/src/util', '_libs/algos'],
+        'pxdfiles': ['_libs/src/util'],
         'depends': _pxi_dep['groupby']},
     '_libs.hashing': {
         'pyxfile': '_libs/hashing'},