|
5 | 5 | from numpy.random import randn
|
6 | 6 |
|
7 | 7 | from datetime import datetime
|
8 |
| -from pandas.compat import StringIO, iteritems, PY2 |
| 8 | +from pandas.compat import StringIO, iteritems |
9 | 9 | import pandas as pd
|
10 | 10 | from pandas import (DataFrame, concat,
|
11 | 11 | read_csv, isna, Series, date_range,
|
@@ -852,8 +852,9 @@ def test_append_dtype_coerce(self):
|
852 | 852 | dt.datetime(2013, 1, 2, 0, 0),
|
853 | 853 | dt.datetime(2013, 1, 3, 0, 0),
|
854 | 854 | dt.datetime(2013, 1, 4, 0, 0)],
|
855 |
| - name='start_time')], axis=1) |
856 |
| - result = df1.append(df2, ignore_index=True) |
| 855 | + name='start_time')], |
| 856 | + axis=1, sort=True) |
| 857 | + result = df1.append(df2, ignore_index=True, sort=True) |
857 | 858 | assert_frame_equal(result, expected)
|
858 | 859 |
|
859 | 860 | def test_append_missing_column_proper_upcast(self):
|
@@ -1011,7 +1012,8 @@ def test_concat_ignore_index(self):
|
1011 | 1012 | frame1.index = Index(["x", "y", "z"])
|
1012 | 1013 | frame2.index = Index(["x", "y", "q"])
|
1013 | 1014 |
|
1014 |
| - v1 = concat([frame1, frame2], axis=1, ignore_index=True) |
| 1015 | + v1 = concat([frame1, frame2], axis=1, |
| 1016 | + ignore_index=True, sort=True) |
1015 | 1017 |
|
1016 | 1018 | nan = np.nan
|
1017 | 1019 | expected = DataFrame([[nan, nan, nan, 4.3],
|
@@ -1463,7 +1465,7 @@ def test_concat_series_axis1(self):
|
1463 | 1465 | # must reindex, #2603
|
1464 | 1466 | s = Series(randn(3), index=['c', 'a', 'b'], name='A')
|
1465 | 1467 | s2 = Series(randn(4), index=['d', 'a', 'b', 'c'], name='B')
|
1466 |
| - result = concat([s, s2], axis=1) |
| 1468 | + result = concat([s, s2], axis=1, sort=True) |
1467 | 1469 | expected = DataFrame({'A': s, 'B': s2})
|
1468 | 1470 | assert_frame_equal(result, expected)
|
1469 | 1471 |
|
@@ -2070,8 +2072,6 @@ def test_concat_order(self):
|
2070 | 2072 | for i in range(100)]
|
2071 | 2073 | result = pd.concat(dfs).columns
|
2072 | 2074 | expected = dfs[0].columns
|
2073 |
| - if PY2: |
2074 |
| - expected = expected.sort_values() |
2075 | 2075 | tm.assert_index_equal(result, expected)
|
2076 | 2076 |
|
2077 | 2077 | def test_concat_datetime_timezone(self):
|
@@ -2155,3 +2155,24 @@ def test_concat_empty_and_non_empty_series_regression():
|
2155 | 2155 | expected = s1
|
2156 | 2156 | result = pd.concat([s1, s2])
|
2157 | 2157 | tm.assert_series_equal(result, expected)
|
| 2158 | + |
| 2159 | + |
| 2160 | +def test_concat_preserve_column_order_differing_columns(): |
| 2161 | + # GH 4588 regression test |
| 2162 | + # for new columns in concat |
| 2163 | + dfa = pd.DataFrame(columns=['C', 'A'], data=[[1, 2]]) |
| 2164 | + dfb = pd.DataFrame(columns=['C', 'Z'], data=[[5, 6]]) |
| 2165 | + result = pd.concat([dfa, dfb]) |
| 2166 | + assert result.columns.tolist() == ['C', 'A', 'Z'] |
| 2167 | + |
| 2168 | + |
| 2169 | +def test_concat_preserve_column_order_uneven_data(): |
| 2170 | + # GH 4588 regression test |
| 2171 | + # add to column, concat with uneven data |
| 2172 | + df = pd.DataFrame() |
| 2173 | + df['b'] = [1, 2, 3] |
| 2174 | + df['c'] = [1, 2, 3] |
| 2175 | + df['a'] = [1, 2, 3] |
| 2176 | + df2 = pd.DataFrame({'a': [4, 5]}) |
| 2177 | + df3 = pd.concat([df, df2]) |
| 2178 | + assert df3.columns.tolist() == ['b', 'c', 'a'] |
0 commit comments