Skip to content

Commit 2e46565

Browse files
committed
TST: add systematic tests for nlargest/nsmallest
1 parent 41e8ac4 commit 2e46565

File tree

1 file changed

+85
-90
lines changed

1 file changed

+85
-90
lines changed

pandas/tests/frame/test_analytics.py

Lines changed: 85 additions & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,12 @@
77
import sys
88
import pytest
99

10+
from string import ascii_lowercase
1011
from numpy import nan
1112
from numpy.random import randn
1213
import numpy as np
1314

14-
from pandas.compat import lrange
15+
from pandas.compat import lrange, product
1516
from pandas import (compat, isnull, notnull, DataFrame, Series,
1617
MultiIndex, date_range, Timestamp)
1718
import pandas as pd
@@ -1119,95 +1120,6 @@ def __nonzero__(self):
11191120
self.assertTrue(r0.all())
11201121
self.assertTrue(r1.all())
11211122

1122-
# ----------------------------------------------------------------------
1123-
# Top / bottom
1124-
1125-
def test_nlargest(self):
1126-
# GH10393
1127-
from string import ascii_lowercase
1128-
df = pd.DataFrame({'a': np.random.permutation(10),
1129-
'b': list(ascii_lowercase[:10])})
1130-
result = df.nlargest(5, 'a')
1131-
expected = df.sort_values('a', ascending=False).head(5)
1132-
tm.assert_frame_equal(result, expected)
1133-
1134-
def test_nlargest_multiple_columns(self):
1135-
from string import ascii_lowercase
1136-
df = pd.DataFrame({'a': np.random.permutation(10),
1137-
'b': list(ascii_lowercase[:10]),
1138-
'c': np.random.permutation(10).astype('float64')})
1139-
result = df.nlargest(5, ['a', 'b'])
1140-
expected = df.sort_values(['a', 'b'], ascending=False).head(5)
1141-
tm.assert_frame_equal(result, expected)
1142-
1143-
def test_nlargest_nsmallest_identical_values(self):
1144-
# GH15297
1145-
df = pd.DataFrame({'a': [1] * 5, 'b': [1, 2, 3, 4, 5]})
1146-
1147-
result = df.nlargest(3, 'a')
1148-
expected = pd.DataFrame(
1149-
{'a': [1] * 3, 'b': [1, 2, 3]}, index=[0, 1, 2]
1150-
)
1151-
tm.assert_frame_equal(result, expected)
1152-
1153-
result = df.nsmallest(3, 'a')
1154-
expected = pd.DataFrame({'a': [1] * 3, 'b': [1, 2, 3]})
1155-
tm.assert_frame_equal(result, expected)
1156-
1157-
def test_nsmallest(self):
1158-
from string import ascii_lowercase
1159-
df = pd.DataFrame({'a': np.random.permutation(10),
1160-
'b': list(ascii_lowercase[:10])})
1161-
result = df.nsmallest(5, 'a')
1162-
expected = df.sort_values('a').head(5)
1163-
tm.assert_frame_equal(result, expected)
1164-
1165-
def test_nsmallest_multiple_columns(self):
1166-
from string import ascii_lowercase
1167-
df = pd.DataFrame({'a': np.random.permutation(10),
1168-
'b': list(ascii_lowercase[:10]),
1169-
'c': np.random.permutation(10).astype('float64')})
1170-
result = df.nsmallest(5, ['a', 'c'])
1171-
expected = df.sort_values(['a', 'c']).head(5)
1172-
tm.assert_frame_equal(result, expected)
1173-
1174-
def test_nsmallest_nlargest_duplicate_index(self):
1175-
# GH 13412
1176-
df = pd.DataFrame({'a': [1, 2, 3, 4, 4],
1177-
'b': [1, 1, 1, 1, 1],
1178-
'c': [0, 1, 2, 5, 4]},
1179-
index=[0, 0, 1, 1, 1])
1180-
1181-
result = df.nsmallest(4, ['a', 'b', 'c'])
1182-
expected = df.sort_values(['a', 'b', 'c']).head(4)
1183-
tm.assert_frame_equal(result, expected)
1184-
1185-
result = df.nlargest(4, ['a', 'b', 'c'])
1186-
expected = df.sort_values(['a', 'b', 'c'], ascending=False).head(4)
1187-
tm.assert_frame_equal(result, expected)
1188-
1189-
result = df.nlargest(4, ['c', 'b', 'a'])
1190-
expected = df.sort_values(['c', 'b', 'a'], ascending=False).head(4)
1191-
tm.assert_frame_equal(result, expected)
1192-
1193-
result = df.nsmallest(4, ['c', 'b', 'a'])
1194-
expected = df.sort_values(['c', 'b', 'a']).head(4)
1195-
tm.assert_frame_equal(result, expected)
1196-
1197-
# Test all duplicates still returns df of size n
1198-
result = df.nsmallest(2, 'b')
1199-
expected = df.sort_values('b').head(2)
1200-
tm.assert_frame_equal(result, expected)
1201-
1202-
def test_nsmallest_nlargest_duplicate_multi_index(self):
1203-
df = pd.DataFrame({'a': [1, 2, 3, 3, 3],
1204-
'b': [1, 1, 1, 1, 1],
1205-
'c': [0, 1, 2, 5, 4]},
1206-
index=[[0, 0, 0, 0, 0], [1, 1, 1, 1, 1]])
1207-
result = df.nsmallest(4, ['a', 'b', 'c'])
1208-
expected = df.sort_values(['a', 'b', 'c']).head(4)
1209-
tm.assert_frame_equal(result, expected)
1210-
12111123
# ----------------------------------------------------------------------
12121124
# Isin
12131125

@@ -1987,3 +1899,86 @@ def test_dot(self):
19871899

19881900
with tm.assertRaisesRegexp(ValueError, 'aligned'):
19891901
df.dot(df2)
1902+
1903+
1904+
@pytest.fixture
1905+
def df_duplicates():
1906+
return pd.DataFrame({'a': [1, 2, 3, 4, 4],
1907+
'b': [1, 1, 1, 1, 1],
1908+
'c': [0, 1, 2, 5, 4]},
1909+
index=[0, 0, 1, 1, 1])
1910+
1911+
1912+
@pytest.fixture
1913+
def df_strings():
1914+
return pd.DataFrame({'a': np.random.permutation(10),
1915+
'b': list(ascii_lowercase[:10]),
1916+
'c': np.random.permutation(10).astype('float64')})
1917+
1918+
1919+
class TestNLargestSmallest(object):
1920+
1921+
# ----------------------------------------------------------------------
1922+
# Top / bottom
1923+
@pytest.mark.parametrize(
1924+
'n, order',
1925+
product(range(1, 11),
1926+
[['a', 'b', 'c'],
1927+
['c', 'b', 'a'],
1928+
['a'],
1929+
['c'],
1930+
['a', 'b'],
1931+
['c', 'b']]))
1932+
def test_n(self, df_strings, n, order):
1933+
# GH10393
1934+
df = df_strings
1935+
result = df.nsmallest(n, order)
1936+
expected = df.sort_values(order).head(n)
1937+
tm.assert_frame_equal(result, expected)
1938+
1939+
result = df.nlargest(n, order)
1940+
expected = df.sort_values(order, ascending=False).head(n)
1941+
tm.assert_frame_equal(result, expected)
1942+
1943+
def test_n_error(self, df_strings):
1944+
# b alone raises a TypeError
1945+
df = df_strings
1946+
with pytest.raises(TypeError):
1947+
df.nsmallest(1, 'b')
1948+
with pytest.raises(TypeError):
1949+
df.nlargest(1, 'b')
1950+
1951+
def test_n_identical_values(self):
1952+
# GH15297
1953+
df = pd.DataFrame({'a': [1] * 5, 'b': [1, 2, 3, 4, 5]})
1954+
1955+
result = df.nlargest(3, 'a')
1956+
expected = pd.DataFrame(
1957+
{'a': [1] * 3, 'b': [1, 2, 3]}, index=[0, 1, 2]
1958+
)
1959+
tm.assert_frame_equal(result, expected)
1960+
1961+
result = df.nsmallest(3, 'a')
1962+
expected = pd.DataFrame({'a': [1] * 3, 'b': [1, 2, 3]})
1963+
tm.assert_frame_equal(result, expected)
1964+
1965+
@pytest.mark.parametrize(
1966+
'n, order',
1967+
product([1, 2, 3, 4, 5],
1968+
[['a', 'b', 'c'],
1969+
['c', 'b', 'a'],
1970+
['a'],
1971+
['b'],
1972+
['a', 'b'],
1973+
['c', 'b']]))
1974+
def test_n_duplicate_index(self, df_duplicates, n, order):
1975+
# GH 13412
1976+
1977+
df = df_duplicates
1978+
result = df.nsmallest(n, order)
1979+
expected = df.sort_values(order).head(n)
1980+
tm.assert_frame_equal(result, expected)
1981+
1982+
result = df.nlargest(n, order)
1983+
expected = df.sort_values(order, ascending=False).head(n)
1984+
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)