Skip to content

Commit 0835997

Browse files
committed
merge with master yet again
2 parents 0e60770 + f06b969 commit 0835997

File tree

295 files changed

+13297
-10440
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

295 files changed

+13297
-10440
lines changed

.circleci/config.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,5 +34,5 @@ jobs:
3434
command: |
3535
export PATH="$MINICONDA_DIR/bin:$PATH"
3636
source activate pandas-dev
37-
echo "pytest --strict --durations=10 --color=no --junitxml=$CIRCLE_TEST_REPORTS/reports/junit.xml --skip-slow --skip-network pandas"
38-
pytest --strict --durations=10 --color=no --junitxml=$CIRCLE_TEST_REPORTS/reports/junit.xml --skip-slow --skip-network pandas
37+
echo "pytest -m "not slow and not network" --strict --durations=10 --color=no --junitxml=$CIRCLE_TEST_REPORTS/reports/junit.xml pandas"
38+
pytest -m "not slow and not network" --strict --durations=10 --color=no --junitxml=$CIRCLE_TEST_REPORTS/reports/junit.xml pandas

.travis.yml

Lines changed: 10 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -34,28 +34,28 @@ matrix:
3434
include:
3535
- dist: trusty
3636
env:
37-
- JOB="3.7" ENV_FILE="ci/deps/travis-37.yaml" TEST_ARGS="--skip-slow --skip-network"
37+
- JOB="3.7" ENV_FILE="ci/deps/travis-37.yaml" PATTERN="not slow and not network"
3838

3939
- dist: trusty
4040
env:
41-
- JOB="2.7, locale, slow, old NumPy" ENV_FILE="ci/deps/travis-27-locale.yaml" LOCALE_OVERRIDE="zh_CN.UTF-8" SLOW=true
41+
- JOB="2.7, locale, slow, old NumPy" ENV_FILE="ci/deps/travis-27-locale.yaml" LOCALE_OVERRIDE="zh_CN.UTF-8" PATTERN="slow"
4242
addons:
4343
apt:
4444
packages:
4545
- language-pack-zh-hans
4646
- dist: trusty
4747
env:
48-
- JOB="2.7" ENV_FILE="ci/deps/travis-27.yaml" TEST_ARGS="--skip-slow"
48+
- JOB="2.7" ENV_FILE="ci/deps/travis-27.yaml" PATTERN="not slow"
4949
addons:
5050
apt:
5151
packages:
5252
- python-gtk2
5353
- dist: trusty
5454
env:
55-
- JOB="3.6, lint, coverage" ENV_FILE="ci/deps/travis-36.yaml" TEST_ARGS="--skip-slow --skip-network" PANDAS_TESTING_MODE="deprecate" COVERAGE=true LINT=true
55+
- JOB="3.6, coverage" ENV_FILE="ci/deps/travis-36.yaml" PATTERN="not slow and not network" PANDAS_TESTING_MODE="deprecate" COVERAGE=true
5656
- dist: trusty
5757
env:
58-
- JOB="3.7, NumPy dev" ENV_FILE="ci/deps/travis-37-numpydev.yaml" TEST_ARGS="--skip-slow --skip-network -W error" PANDAS_TESTING_MODE="deprecate"
58+
- JOB="3.7, NumPy dev" ENV_FILE="ci/deps/travis-37-numpydev.yaml" PATTERN="not slow and not network" TEST_ARGS="-W error" PANDAS_TESTING_MODE="deprecate"
5959
addons:
6060
apt:
6161
packages:
@@ -64,7 +64,7 @@ matrix:
6464
# In allow_failures
6565
- dist: trusty
6666
env:
67-
- JOB="3.6, slow" ENV_FILE="ci/deps/travis-36-slow.yaml" SLOW=true
67+
- JOB="3.6, slow" ENV_FILE="ci/deps/travis-36-slow.yaml" PATTERN="slow"
6868

6969
# In allow_failures
7070
- dist: trusty
@@ -73,7 +73,7 @@ matrix:
7373
allow_failures:
7474
- dist: trusty
7575
env:
76-
- JOB="3.6, slow" ENV_FILE="ci/deps/travis-36-slow.yaml" SLOW=true
76+
- JOB="3.6, slow" ENV_FILE="ci/deps/travis-36-slow.yaml" PATTERN="slow"
7777
- dist: trusty
7878
env:
7979
- JOB="3.6, doc" ENV_FILE="ci/deps/travis-36-doc.yaml" DOC=true
@@ -107,20 +107,15 @@ script:
107107
- echo "script start"
108108
- source activate pandas-dev
109109
- ci/run_build_docs.sh
110-
- ci/script_single.sh
111-
- ci/script_multi.sh
112-
- ci/code_checks.sh
113-
114-
after_success:
115-
- ci/upload_coverage.sh
110+
- ci/run_tests.sh
116111

117112
after_script:
118113
- echo "after_script start"
119114
- source activate pandas-dev && pushd /tmp && python -c "import pandas; pandas.show_versions();" && popd
120115
- if [ -e test-data-single.xml ]; then
121-
ci/print_skipped.py test-data-single.xml;
116+
ci/print_skipped.py test-data-single.xml;
122117
fi
123118
- if [ -e test-data-multiple.xml ]; then
124-
ci/print_skipped.py test-data-multiple.xml;
119+
ci/print_skipped.py test-data-multiple.xml;
125120
fi
126121
- echo "after_script done"

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,7 @@ pip install pandas
171171
```
172172

173173
## Dependencies
174-
- [NumPy](https://www.numpy.org): 1.9.0 or higher
174+
- [NumPy](https://www.numpy.org): 1.12.0 or higher
175175
- [python-dateutil](https://labix.org/python-dateutil): 2.5.0 or higher
176176
- [pytz](https://pythonhosted.org/pytz): 2011k or higher
177177

asv_bench/benchmarks/binary_ops.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,8 @@ def setup(self):
5252
np.iinfo(np.int16).max,
5353
size=(N, N)))
5454

55+
self.s = Series(np.random.randn(N))
56+
5557
# Division
5658

5759
def time_frame_float_div(self):
@@ -74,6 +76,17 @@ def time_frame_int_mod(self):
7476
def time_frame_float_mod(self):
7577
self.df % self.df2
7678

79+
# Dot product
80+
81+
def time_frame_dot(self):
82+
self.df.dot(self.df2)
83+
84+
def time_series_dot(self):
85+
self.s.dot(self.s)
86+
87+
def time_frame_series_dot(self):
88+
self.df.dot(self.s)
89+
7790

7891
class Timeseries(object):
7992

asv_bench/benchmarks/categoricals.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,8 @@ def setup(self):
4646
self.values_some_nan = list(np.tile(self.categories + [np.nan], N))
4747
self.values_all_nan = [np.nan] * len(self.values)
4848
self.values_all_int8 = np.ones(N, 'int8')
49+
self.categorical = pd.Categorical(self.values, self.categories)
50+
self.series = pd.Series(self.categorical)
4951

5052
def time_regular(self):
5153
pd.Categorical(self.values, self.categories)
@@ -68,6 +70,12 @@ def time_all_nan(self):
6870
def time_from_codes_all_int8(self):
6971
pd.Categorical.from_codes(self.values_all_int8, self.categories)
7072

73+
def time_existing_categorical(self):
74+
pd.Categorical(self.categorical)
75+
76+
def time_existing_series(self):
77+
pd.Categorical(self.series)
78+
7179

7280
class ValueCounts(object):
7381

asv_bench/benchmarks/frame_methods.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,36 @@ def time_reindex_upcast(self):
6969
self.df2.reindex(np.random.permutation(range(1200)))
7070

7171

72+
class Rename(object):
73+
74+
def setup(self):
75+
N = 10**3
76+
self.df = DataFrame(np.random.randn(N * 10, N))
77+
self.idx = np.arange(4 * N, 7 * N)
78+
self.dict_idx = {k: k for k in self.idx}
79+
self.df2 = DataFrame(
80+
{c: {0: np.random.randint(0, 2, N).astype(np.bool_),
81+
1: np.random.randint(0, N, N).astype(np.int16),
82+
2: np.random.randint(0, N, N).astype(np.int32),
83+
3: np.random.randint(0, N, N).astype(np.int64)}
84+
[np.random.randint(0, 4)] for c in range(N)})
85+
86+
def time_rename_single(self):
87+
self.df.rename({0: 0})
88+
89+
def time_rename_axis0(self):
90+
self.df.rename(self.dict_idx)
91+
92+
def time_rename_axis1(self):
93+
self.df.rename(columns=self.dict_idx)
94+
95+
def time_rename_both_axes(self):
96+
self.df.rename(index=self.dict_idx, columns=self.dict_idx)
97+
98+
def time_dict_rename_both_axes(self):
99+
self.df.rename(index=self.dict_idx, columns=self.dict_idx)
100+
101+
72102
class Iteration(object):
73103

74104
def setup(self):

asv_bench/benchmarks/plotting.py

Lines changed: 42 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -8,17 +8,48 @@
88
matplotlib.use('Agg')
99

1010

11-
class Plotting(object):
12-
13-
def setup(self):
14-
self.s = Series(np.random.randn(1000000))
15-
self.df = DataFrame({'col': self.s})
16-
17-
def time_series_plot(self):
18-
self.s.plot()
19-
20-
def time_frame_plot(self):
21-
self.df.plot()
11+
class SeriesPlotting(object):
12+
params = [['line', 'bar', 'area', 'barh', 'hist', 'kde', 'pie']]
13+
param_names = ['kind']
14+
15+
def setup(self, kind):
16+
if kind in ['bar', 'barh', 'pie']:
17+
n = 100
18+
elif kind in ['kde']:
19+
n = 10000
20+
else:
21+
n = 1000000
22+
23+
self.s = Series(np.random.randn(n))
24+
if kind in ['area', 'pie']:
25+
self.s = self.s.abs()
26+
27+
def time_series_plot(self, kind):
28+
self.s.plot(kind=kind)
29+
30+
31+
class FramePlotting(object):
32+
params = [['line', 'bar', 'area', 'barh', 'hist', 'kde', 'pie', 'scatter',
33+
'hexbin']]
34+
param_names = ['kind']
35+
36+
def setup(self, kind):
37+
if kind in ['bar', 'barh', 'pie']:
38+
n = 100
39+
elif kind in ['kde', 'scatter', 'hexbin']:
40+
n = 10000
41+
else:
42+
n = 1000000
43+
44+
self.x = Series(np.random.randn(n))
45+
self.y = Series(np.random.randn(n))
46+
if kind in ['area', 'pie']:
47+
self.x = self.x.abs()
48+
self.y = self.y.abs()
49+
self.df = DataFrame({'x': self.x, 'y': self.y})
50+
51+
def time_frame_plot(self, kind):
52+
self.df.plot(x='x', y='y', kind=kind)
2253

2354

2455
class TimeseriesPlotting(object):

asv_bench/benchmarks/reshape.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,4 +146,42 @@ def time_get_dummies_1d_sparse(self):
146146
pd.get_dummies(self.s, sparse=True)
147147

148148

149+
class Cut(object):
150+
params = [[4, 10, 1000]]
151+
param_names = ['bins']
152+
153+
def setup(self, bins):
154+
N = 10**5
155+
self.int_series = pd.Series(np.arange(N).repeat(5))
156+
self.float_series = pd.Series(np.random.randn(N).repeat(5))
157+
self.timedelta_series = pd.Series(np.random.randint(N, size=N),
158+
dtype='timedelta64[ns]')
159+
self.datetime_series = pd.Series(np.random.randint(N, size=N),
160+
dtype='datetime64[ns]')
161+
162+
def time_cut_int(self, bins):
163+
pd.cut(self.int_series, bins)
164+
165+
def time_cut_float(self, bins):
166+
pd.cut(self.float_series, bins)
167+
168+
def time_cut_timedelta(self, bins):
169+
pd.cut(self.timedelta_series, bins)
170+
171+
def time_cut_datetime(self, bins):
172+
pd.cut(self.datetime_series, bins)
173+
174+
def time_qcut_int(self, bins):
175+
pd.qcut(self.int_series, bins)
176+
177+
def time_qcut_float(self, bins):
178+
pd.qcut(self.float_series, bins)
179+
180+
def time_qcut_timedelta(self, bins):
181+
pd.qcut(self.timedelta_series, bins)
182+
183+
def time_qcut_datetime(self, bins):
184+
pd.qcut(self.datetime_series, bins)
185+
186+
149187
from .pandas_vb_common import setup # noqa: F401

asv_bench/benchmarks/rolling.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,42 @@ def time_rolling(self, constructor, window, dtype, method):
2121
getattr(self.roll, method)()
2222

2323

24+
class ExpandingMethods(object):
25+
26+
sample_time = 0.2
27+
params = (['DataFrame', 'Series'],
28+
['int', 'float'],
29+
['median', 'mean', 'max', 'min', 'std', 'count', 'skew', 'kurt',
30+
'sum'])
31+
param_names = ['contructor', 'window', 'dtype', 'method']
32+
33+
def setup(self, constructor, dtype, method):
34+
N = 10**5
35+
arr = (100 * np.random.random(N)).astype(dtype)
36+
self.expanding = getattr(pd, constructor)(arr).expanding()
37+
38+
def time_expanding(self, constructor, dtype, method):
39+
getattr(self.expanding, method)()
40+
41+
42+
class EWMMethods(object):
43+
44+
sample_time = 0.2
45+
params = (['DataFrame', 'Series'],
46+
[10, 1000],
47+
['int', 'float'],
48+
['mean', 'std'])
49+
param_names = ['contructor', 'window', 'dtype', 'method']
50+
51+
def setup(self, constructor, window, dtype, method):
52+
N = 10**5
53+
arr = (100 * np.random.random(N)).astype(dtype)
54+
self.ewm = getattr(pd, constructor)(arr).ewm(halflife=window)
55+
56+
def time_ewm(self, constructor, window, dtype, method):
57+
getattr(self.ewm, method)()
58+
59+
2460
class VariableWindowMethods(Methods):
2561
sample_time = 0.2
2662
params = (['DataFrame', 'Series'],

asv_bench/benchmarks/stat_ops.py

Lines changed: 32 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -96,14 +96,42 @@ def time_average_old(self, constructor, pct):
9696

9797
class Correlation(object):
9898

99-
params = ['spearman', 'kendall', 'pearson']
100-
param_names = ['method']
99+
params = [['spearman', 'kendall', 'pearson'], [True, False]]
100+
param_names = ['method', 'use_bottleneck']
101101

102-
def setup(self, method):
102+
def setup(self, method, use_bottleneck):
103+
try:
104+
pd.options.compute.use_bottleneck = use_bottleneck
105+
except TypeError:
106+
from pandas.core import nanops
107+
nanops._USE_BOTTLENECK = use_bottleneck
103108
self.df = pd.DataFrame(np.random.randn(1000, 30))
109+
self.s = pd.Series(np.random.randn(1000))
110+
self.s2 = pd.Series(np.random.randn(1000))
104111

105-
def time_corr(self, method):
112+
def time_corr(self, method, use_bottleneck):
106113
self.df.corr(method=method)
107114

115+
def time_corr_series(self, method, use_bottleneck):
116+
self.s.corr(self.s2, method=method)
117+
118+
119+
class Covariance(object):
120+
121+
params = [[True, False]]
122+
param_names = ['use_bottleneck']
123+
124+
def setup(self, use_bottleneck):
125+
try:
126+
pd.options.compute.use_bottleneck = use_bottleneck
127+
except TypeError:
128+
from pandas.core import nanops
129+
nanops._USE_BOTTLENECK = use_bottleneck
130+
self.s = pd.Series(np.random.randn(100000))
131+
self.s2 = pd.Series(np.random.randn(100000))
132+
133+
def time_cov_series(self, use_bottleneck):
134+
self.s.cov(self.s2)
135+
108136

109137
from .pandas_vb_common import setup # noqa: F401

0 commit comments

Comments
 (0)