Closed
Description
Floats below 1e-10 seem to all be receiving the same rank, incorrectly:
In [1]: import pandas
In [3]: import numpy
In [4]: series = pandas.Series([1e-100, 1e-25, 1e-20, 1e-15, 1e-10,
1e-5, 1e-4, 1e-3, 1e-2, 1e-1])
In [5]: series
Out[5]:
0 1.000000e-100
1 1.000000e-25
2 1.000000e-20
3 1.000000e-15
4 1.000000e-10
5 1.000000e-05
6 1.000000e-04
7 1.000000e-03
8 1.000000e-02
9 1.000000e-01
dtype: float64
In [6]: series.rank()
Out[6]:
0 2.5
1 2.5
2 2.5
3 2.5
4 5.0
5 6.0
6 7.0
7 8.0
8 9.0
9 10.0
dtype: float64
In [7]: from scipy import stats
In [8]: stats.rankdata(series)
Out[8]: array([ 1., 2., 3., 4., 5., 6., 7., 8., 9., 10.])
In [13]: pandas.show_versions()
INSTALLED VERSIONS
------------------
commit: None
python: 2.7.3.final.0
python-bits: 64
OS: Darwin
OS-release: 10.8.0
machine: x86_64
processor: i386
byteorder: little
LC_ALL: None
LANG: en_US.UTF-8
pandas: 0.13.1
Cython: 0.19.1
numpy: 1.8.0
scipy: 0.12.0.dev-1d5c886
statsmodels: 0.5.0
IPython: 1.2.1
sphinx: 1.2.2
patsy: 0.2.0
scikits.timeseries: None
dateutil: 2.2
pytz: 2013b
bottleneck: None
tables: None
numexpr: None
matplotlib: 1.2.0
openpyxl: 1.5.7
xlrd: 0.7.1
xlwt: None
xlsxwriter: None
sqlalchemy: 0.6.6
lxml: None
bs4: None
html5lib: None
bq: None
apiclient: None