Skip to content

TST: Clean up tests of DataFrame.sort_{index,values} #13496

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Jul 11, 2016
8 changes: 6 additions & 2 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,12 @@
# ---------------------------------------------------------------------
# Docstring templates

_shared_doc_kwargs = dict(axes='index, columns', klass='DataFrame',
axes_single_arg="{0, 1, 'index', 'columns'}")
_shared_doc_kwargs = dict(
axes='index, columns', klass='DataFrame',
axes_single_arg="{0, 1, 'index', 'columns'}",
optional_by="""
by : str or list of str
Name or list of names which refer to the axis items.""")

_numeric_only_doc = """numeric_only : boolean, default None
Include only float, int, boolean data. If None, will attempt to use
Expand Down
32 changes: 17 additions & 15 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,13 @@
# goal is to be able to define the docs close to function, while still being
# able to share
_shared_docs = dict()
_shared_doc_kwargs = dict(axes='keywords for axes', klass='NDFrame',
axes_single_arg='int or labels for object',
args_transpose='axes to permute (int or label for'
' object)')
_shared_doc_kwargs = dict(
axes='keywords for axes', klass='NDFrame',
axes_single_arg='int or labels for object',
args_transpose='axes to permute (int or label for object)',
optional_by="""
by : str or list of str
Name or list of names which refer to the axis items.""")


def is_dictlike(x):
Expand Down Expand Up @@ -1956,21 +1959,20 @@ def add_suffix(self, suffix):
.. versionadded:: 0.17.0

Parameters
----------
by : string name or list of names which refer to the axis items
axis : %(axes)s to direct sorting
ascending : bool or list of bool
----------%(optional_by)s
axis : %(axes)s to direct sorting, default 0
ascending : bool or list of bool, default True
Sort ascending vs. descending. Specify list for multiple sort
orders. If this is a list of bools, must match the length of
the by.
inplace : bool
inplace : bool, default False
if True, perform operation in-place
kind : {`quicksort`, `mergesort`, `heapsort`}
kind : {'quicksort', 'mergesort', 'heapsort'}, default 'quicksort'
Choice of sorting algorithm. See also ndarray.np.sort for more
information. `mergesort` is the only stable algorithm. For
DataFrames, this option is only applied when sorting on a single
column or label.
na_position : {'first', 'last'}
na_position : {'first', 'last'}, default 'last'
`first` puts NaNs at the beginning, `last` puts NaNs at the end

Returns
Expand All @@ -1992,16 +1994,16 @@ def sort_values(self, by, axis=0, ascending=True, inplace=False,
if not None, sort on values in specified index level(s)
ascending : boolean, default True
Sort ascending vs. descending
inplace : bool
inplace : bool, default False
if True, perform operation in-place
kind : {`quicksort`, `mergesort`, `heapsort`}
kind : {'quicksort', 'mergesort', 'heapsort'}, default 'quicksort'
Choice of sorting algorithm. See also ndarray.np.sort for more
information. `mergesort` is the only stable algorithm. For
DataFrames, this option is only applied when sorting on a single
column or label.
na_position : {'first', 'last'}
na_position : {'first', 'last'}, default 'last'
`first` puts NaNs at the beginning, `last` puts NaNs at the end
sort_remaining : bool
sort_remaining : bool, default True
if true and sorting by level and index is multilevel, sort by other
levels too (in order) after sorting by specified level

Expand Down
3 changes: 2 additions & 1 deletion pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,8 @@
axes='index', klass='Series', axes_single_arg="{0, 'index'}",
inplace="""inplace : boolean, default False
If True, performs operation inplace and returns None.""",
duplicated='Series')
duplicated='Series',
optional_by='')


def _coerce_method(converter):
Expand Down
116 changes: 55 additions & 61 deletions pandas/tests/frame/test_sorting.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,75 +21,68 @@ class TestDataFrameSorting(tm.TestCase, TestData):

_multiprocess_can_split_ = True

def test_sort_values(self):
# API for 9816
def test_sort_index(self):
# GH13496

# sort_index
frame = DataFrame(np.arange(16).reshape(4, 4), index=[1, 2, 3, 4],
Copy link
Contributor

@jreback jreback Jul 9, 2016

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

add this issue number (the PR number) as a comment (so future readers can reference)

columns=['A', 'B', 'C', 'D'])

# 9816 deprecated
with tm.assert_produces_warning(FutureWarning):
frame.sort(columns='A')
with tm.assert_produces_warning(FutureWarning):
frame.sort()

# axis=0 : sort rows by index labels
unordered = frame.ix[[3, 2, 4, 1]]
expected = unordered.sort_index()

result = unordered.sort_index(axis=0)
expected = frame
assert_frame_equal(result, expected)

unordered = frame.ix[:, [2, 1, 3, 0]]
expected = unordered.sort_index(axis=1)
result = unordered.sort_index(ascending=False)
expected = frame[::-1]
assert_frame_equal(result, expected)

# axis=1 : sort columns by column names
unordered = frame.ix[:, [2, 1, 3, 0]]
result = unordered.sort_index(axis=1)
assert_frame_equal(result, expected)
assert_frame_equal(result, frame)

result = unordered.sort_index(axis=1, ascending=False)
expected = frame.ix[:, ::-1]
assert_frame_equal(result, expected)

# sortlevel
mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list('ABC'))
def test_sort_index_multiindex(self):
# GH13496

# sort rows by specified level of multi-index
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would make multi-index part of a 2nd test (e.g. separate long tests into multiple ones)

mi = MultiIndex.from_tuples([[2, 1, 3], [1, 1, 1]], names=list('ABC'))
df = DataFrame([[1, 2], [3, 4]], mi)

result = df.sort_index(level='A', sort_remaining=False)
expected = df.sortlevel('A', sort_remaining=False)
assert_frame_equal(result, expected)

# sort columns by specified level of multi-index
df = df.T
result = df.sort_index(level='A', axis=1, sort_remaining=False)
expected = df.sortlevel('A', axis=1, sort_remaining=False)
assert_frame_equal(result, expected)

# MI sort, but no by
# MI sort, but no level: sort_level has no effect
mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list('ABC'))
df = DataFrame([[1, 2], [3, 4]], mi)
result = df.sort_index(sort_remaining=False)
expected = df.sort_index()
assert_frame_equal(result, expected)

def test_sort_index(self):
def test_sort(self):
frame = DataFrame(np.arange(16).reshape(4, 4), index=[1, 2, 3, 4],
columns=['A', 'B', 'C', 'D'])

# axis=0
unordered = frame.ix[[3, 2, 4, 1]]
sorted_df = unordered.sort_index(axis=0)
expected = frame
assert_frame_equal(sorted_df, expected)

sorted_df = unordered.sort_index(ascending=False)
expected = frame[::-1]
assert_frame_equal(sorted_df, expected)

# axis=1
unordered = frame.ix[:, ['D', 'B', 'C', 'A']]
sorted_df = unordered.sort_index(axis=1)
expected = frame
assert_frame_equal(sorted_df, expected)
# 9816 deprecated
with tm.assert_produces_warning(FutureWarning):
frame.sort(columns='A')
with tm.assert_produces_warning(FutureWarning):
frame.sort()

sorted_df = unordered.sort_index(axis=1, ascending=False)
expected = frame.ix[:, ::-1]
assert_frame_equal(sorted_df, expected)
def test_sort_values(self):
frame = DataFrame([[1, 1, 2], [3, 1, 0], [4, 5, 6]],
index=[1, 2, 3], columns=list('ABC'))

# by column
sorted_df = frame.sort_values(by='A')
Expand All @@ -109,16 +102,17 @@ def test_sort_index(self):
sorted_df = frame.sort_values(by=['A'], ascending=[False])
assert_frame_equal(sorted_df, expected)

# check for now
sorted_df = frame.sort_values(by='A')
assert_frame_equal(sorted_df, expected[::-1])
expected = frame.sort_values(by='A')
# multiple bys
sorted_df = frame.sort_values(by=['B', 'C'])
expected = frame.loc[[2, 1, 3]]
assert_frame_equal(sorted_df, expected)

expected = frame.sort_values(by=['A', 'B'], ascending=False)
sorted_df = frame.sort_values(by=['A', 'B'])
sorted_df = frame.sort_values(by=['B', 'C'], ascending=False)
assert_frame_equal(sorted_df, expected[::-1])

sorted_df = frame.sort_values(by=['B', 'A'], ascending=[True, False])
assert_frame_equal(sorted_df, expected)

self.assertRaises(ValueError, lambda: frame.sort_values(
by=['A', 'B'], axis=2, inplace=True))

Expand All @@ -130,6 +124,25 @@ def test_sort_index(self):
with assertRaisesRegexp(ValueError, msg):
frame.sort_values(by=['A', 'B'], axis=0, ascending=[True] * 5)

def test_sort_values_inplace(self):
frame = DataFrame(np.random.randn(4, 4), index=[1, 2, 3, 4],
columns=['A', 'B', 'C', 'D'])

sorted_df = frame.copy()
sorted_df.sort_values(by='A', inplace=True)
expected = frame.sort_values(by='A')
assert_frame_equal(sorted_df, expected)

sorted_df = frame.copy()
sorted_df.sort_values(by='A', ascending=False, inplace=True)
expected = frame.sort_values(by='A', ascending=False)
assert_frame_equal(sorted_df, expected)

sorted_df = frame.copy()
sorted_df.sort_values(by=['A', 'B'], ascending=False, inplace=True)
expected = frame.sort_values(by=['A', 'B'], ascending=False)
assert_frame_equal(sorted_df, expected)

def test_sort_index_categorical_index(self):

df = (DataFrame({'A': np.arange(6, dtype='int64'),
Expand Down Expand Up @@ -361,25 +374,6 @@ def test_sort_index_different_sortorder(self):
result = idf['C'].sort_index(ascending=[1, 0])
assert_series_equal(result, expected['C'])

def test_sort_inplace(self):
frame = DataFrame(np.random.randn(4, 4), index=[1, 2, 3, 4],
columns=['A', 'B', 'C', 'D'])

sorted_df = frame.copy()
sorted_df.sort_values(by='A', inplace=True)
expected = frame.sort_values(by='A')
assert_frame_equal(sorted_df, expected)

sorted_df = frame.copy()
sorted_df.sort_values(by='A', ascending=False, inplace=True)
expected = frame.sort_values(by='A', ascending=False)
assert_frame_equal(sorted_df, expected)

sorted_df = frame.copy()
sorted_df.sort_values(by=['A', 'B'], ascending=False, inplace=True)
expected = frame.sort_values(by=['A', 'B'], ascending=False)
assert_frame_equal(sorted_df, expected)

def test_sort_index_duplicates(self):

# with 9816, these are all translated to .sort_values
Expand Down
Loading