Skip to content

Commit 0cf7d9c

Browse files
committed
Merge branch 'master' into refactor/describe
2 parents b88cd6b + 4a08c02 commit 0cf7d9c

File tree

407 files changed

+14226
-12184
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

407 files changed

+14226
-12184
lines changed

.github/workflows/ci.yml

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -37,12 +37,6 @@ jobs:
3737
ci/code_checks.sh lint
3838
if: always()
3939

40-
- name: Dependencies consistency
41-
run: |
42-
source activate pandas-dev
43-
ci/code_checks.sh dependencies
44-
if: always()
45-
4640
- name: Checks on imported code
4741
run: |
4842
source activate pandas-dev

.pre-commit-config.yaml

Lines changed: 35 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -9,22 +9,23 @@ repos:
99
- id: flake8
1010
additional_dependencies: [flake8-comprehensions>=3.1.0]
1111
- id: flake8
12-
name: flake8-pyx
13-
files: \.(pyx|pxd)$
14-
types:
15-
- file
12+
name: flake8 (cython)
13+
types: [cython]
1614
args: [--append-config=flake8/cython.cfg]
1715
- id: flake8
18-
name: flake8-pxd
16+
name: flake8 (cython template)
1917
files: \.pxi\.in$
2018
types:
2119
- file
2220
args: [--append-config=flake8/cython-template.cfg]
2321
- repo: https://github.com/PyCQA/isort
24-
rev: 5.2.2
22+
rev: 5.6.3
2523
hooks:
2624
- id: isort
27-
exclude: ^pandas/__init__\.py$|^pandas/core/api\.py$
25+
name: isort (python)
26+
- id: isort
27+
name: isort (cython)
28+
types: [cython]
2829
- repo: https://github.com/asottile/pyupgrade
2930
rev: v2.7.2
3031
hooks:
@@ -39,10 +40,33 @@ repos:
3940
- id: pip_to_conda
4041
name: Generate pip dependency from conda
4142
description: This hook checks if the conda environment.yml and requirements-dev.txt are equal
42-
language: system
43+
language: python
4344
entry: python -m scripts.generate_pip_deps_from_conda
4445
files: ^(environment.yml|requirements-dev.txt)$
4546
pass_filenames: false
47+
additional_dependencies: [pyyaml]
48+
- id: flake8-rst
49+
name: flake8-rst
50+
description: Run flake8 on code snippets in docstrings or RST files
51+
language: python
52+
entry: flake8-rst
53+
types: [rst]
54+
args: [--filename=*.rst]
55+
additional_dependencies: [flake8-rst==0.7.0, flake8==3.7.9]
56+
- id: incorrect-sphinx-directives
57+
name: Check for incorrect Sphinx directives
58+
language: pygrep
59+
entry: >-
60+
\.\. (autosummary|contents|currentmodule|deprecated
61+
|function|image|important|include|ipython|literalinclude
62+
|math|module|note|raw|seealso|toctree|versionadded
63+
|versionchanged|warning):[^:]
64+
files: \.(py|pyx|rst)$
65+
- id: incorrect-code-directives
66+
name: Check for incorrect code block or IPython directives
67+
language: pygrep
68+
entry: (\.\. code-block ::|\.\. ipython ::)
69+
files: \.(py|pyx|rst)$
4670
- repo: https://github.com/asottile/yesqa
4771
rev: v1.2.2
4872
hooks:
@@ -51,4 +75,6 @@ repos:
5175
rev: v3.2.0
5276
hooks:
5377
- id: end-of-file-fixer
54-
exclude: '.html$|^LICENSES/|.csv$|.txt$|.svg$|.py$'
78+
exclude: ^LICENSES/|\.(html|csv|txt|svg|py)$
79+
- id: trailing-whitespace
80+
exclude: \.(html|svg)$

asv_bench/benchmarks/groupby.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -358,6 +358,26 @@ def time_category_size(self):
358358
self.draws.groupby(self.cats).size()
359359

360360

361+
class FillNA:
362+
def setup(self):
363+
N = 100
364+
self.df = DataFrame(
365+
{"group": [1] * N + [2] * N, "value": [np.nan, 1.0] * N}
366+
).set_index("group")
367+
368+
def time_df_ffill(self):
369+
self.df.groupby("group").fillna(method="ffill")
370+
371+
def time_df_bfill(self):
372+
self.df.groupby("group").fillna(method="bfill")
373+
374+
def time_srs_ffill(self):
375+
self.df.groupby("group")["value"].fillna(method="ffill")
376+
377+
def time_srs_bfill(self):
378+
self.df.groupby("group")["value"].fillna(method="bfill")
379+
380+
361381
class GroupByMethods:
362382

363383
param_names = ["dtype", "method", "application"]

asv_bench/benchmarks/io/pickle.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,5 +24,11 @@ def time_read_pickle(self):
2424
def time_write_pickle(self):
2525
self.df.to_pickle(self.fname)
2626

27+
def peakmem_read_pickle(self):
28+
read_pickle(self.fname)
29+
30+
def peakmem_write_pickle(self):
31+
self.df.to_pickle(self.fname)
32+
2733

2834
from ..pandas_vb_common import setup # noqa: F401 isort:skip

asv_bench/benchmarks/rolling.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,12 +76,21 @@ class ExpandingMethods:
7676

7777
def setup(self, constructor, dtype, method):
7878
N = 10 ** 5
79+
N_groupby = 100
7980
arr = (100 * np.random.random(N)).astype(dtype)
8081
self.expanding = getattr(pd, constructor)(arr).expanding()
82+
self.expanding_groupby = (
83+
pd.DataFrame({"A": arr[:N_groupby], "B": range(N_groupby)})
84+
.groupby("B")
85+
.expanding()
86+
)
8187

8288
def time_expanding(self, constructor, dtype, method):
8389
getattr(self.expanding, method)()
8490

91+
def time_expanding_groupby(self, constructor, dtype, method):
92+
getattr(self.expanding_groupby, method)()
93+
8594

8695
class EWMMethods:
8796

asv_bench/benchmarks/timeseries.py

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,14 @@
33
import dateutil
44
import numpy as np
55

6-
from pandas import DataFrame, Series, date_range, period_range, to_datetime
6+
from pandas import (
7+
DataFrame,
8+
Series,
9+
date_range,
10+
period_range,
11+
timedelta_range,
12+
to_datetime,
13+
)
714

815
from pandas.tseries.frequencies import infer_freq
916

@@ -121,12 +128,15 @@ def time_convert(self):
121128

122129
class Iteration:
123130

124-
params = [date_range, period_range]
131+
params = [date_range, period_range, timedelta_range]
125132
param_names = ["time_index"]
126133

127134
def setup(self, time_index):
128135
N = 10 ** 6
129-
self.idx = time_index(start="20140101", freq="T", periods=N)
136+
if time_index is timedelta_range:
137+
self.idx = time_index(start=0, freq="T", periods=N)
138+
else:
139+
self.idx = time_index(start="20140101", freq="T", periods=N)
130140
self.exit = 10000
131141

132142
def time_iter(self, time_index):

ci/code_checks.sh

Lines changed: 16 additions & 93 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,10 @@
1515
# $ ./ci/code_checks.sh code # checks on imported code
1616
# $ ./ci/code_checks.sh doctests # run doctests
1717
# $ ./ci/code_checks.sh docstrings # validate docstring errors
18-
# $ ./ci/code_checks.sh dependencies # check that dependencies are consistent
1918
# $ ./ci/code_checks.sh typing # run static type analysis
2019

21-
[[ -z "$1" || "$1" == "lint" || "$1" == "patterns" || "$1" == "code" || "$1" == "doctests" || "$1" == "docstrings" || "$1" == "dependencies" || "$1" == "typing" ]] || \
22-
{ echo "Unknown command $1. Usage: $0 [lint|patterns|code|doctests|docstrings|dependencies|typing]"; exit 9999; }
20+
[[ -z "$1" || "$1" == "lint" || "$1" == "patterns" || "$1" == "code" || "$1" == "doctests" || "$1" == "docstrings" || "$1" == "typing" ]] || \
21+
{ echo "Unknown command $1. Usage: $0 [lint|patterns|code|doctests|docstrings|typing]"; exit 9999; }
2322

2423
BASE_DIR="$(dirname $0)/.."
2524
RET=0
@@ -38,6 +37,12 @@ function invgrep {
3837
return $((! $EXIT_STATUS))
3938
}
4039

40+
function check_namespace {
41+
local -r CLASS="${1}"
42+
grep -R -l --include "*.py" " ${CLASS}(" pandas/tests | xargs grep -n "pd\.${CLASS}("
43+
test $? -gt 0
44+
}
45+
4146
if [[ "$GITHUB_ACTIONS" == "true" ]]; then
4247
FLAKE8_FORMAT="##[error]%(path)s:%(row)s:%(col)s:%(code)s:%(text)s"
4348
INVGREP_PREPEND="##[error]"
@@ -48,38 +53,6 @@ fi
4853
### LINTING ###
4954
if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then
5055

51-
echo "black --version"
52-
black --version
53-
54-
MSG='Checking black formatting' ; echo $MSG
55-
black . --check
56-
RET=$(($RET + $?)) ; echo $MSG "DONE"
57-
58-
# `setup.cfg` contains the list of error codes that are being ignored in flake8
59-
60-
echo "flake8 --version"
61-
flake8 --version
62-
63-
# pandas/_libs/src is C code, so no need to search there.
64-
MSG='Linting .py code' ; echo $MSG
65-
flake8 --format="$FLAKE8_FORMAT" .
66-
RET=$(($RET + $?)) ; echo $MSG "DONE"
67-
68-
MSG='Linting .pyx and .pxd code' ; echo $MSG
69-
flake8 --format="$FLAKE8_FORMAT" pandas --append-config=flake8/cython.cfg
70-
RET=$(($RET + $?)) ; echo $MSG "DONE"
71-
72-
MSG='Linting .pxi.in' ; echo $MSG
73-
flake8 --format="$FLAKE8_FORMAT" pandas/_libs --append-config=flake8/cython-template.cfg
74-
RET=$(($RET + $?)) ; echo $MSG "DONE"
75-
76-
echo "flake8-rst --version"
77-
flake8-rst --version
78-
79-
MSG='Linting code-blocks in .rst documentation' ; echo $MSG
80-
flake8-rst doc/source --filename=*.rst --format="$FLAKE8_FORMAT"
81-
RET=$(($RET + $?)) ; echo $MSG "DONE"
82-
8356
# Check that cython casting is of the form `<type>obj` as opposed to `<type> obj`;
8457
# it doesn't make a difference, but we want to be internally consistent.
8558
# Note: this grep pattern is (intended to be) equivalent to the python
@@ -132,19 +105,6 @@ if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then
132105
fi
133106
RET=$(($RET + $?)) ; echo $MSG "DONE"
134107

135-
echo "isort --version-number"
136-
isort --version-number
137-
138-
# Imports - Check formatting using isort see setup.cfg for settings
139-
MSG='Check import format using isort' ; echo $MSG
140-
ISORT_CMD="isort --quiet --check-only pandas asv_bench scripts web"
141-
if [[ "$GITHUB_ACTIONS" == "true" ]]; then
142-
eval $ISORT_CMD | awk '{print "##[error]" $0}'; RET=$(($RET + ${PIPESTATUS[0]}))
143-
else
144-
eval $ISORT_CMD
145-
fi
146-
RET=$(($RET + $?)) ; echo $MSG "DONE"
147-
148108
fi
149109

150110
### PATTERNS ###
@@ -187,14 +147,6 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then
187147
invgrep -r -E --include '*.py' "[[:space:]] pytest.raises" pandas/tests/
188148
RET=$(($RET + $?)) ; echo $MSG "DONE"
189149

190-
MSG='Check for python2-style file encodings' ; echo $MSG
191-
invgrep -R --include="*.py" --include="*.pyx" -E "# -\*- coding: utf-8 -\*-" pandas scripts
192-
RET=$(($RET + $?)) ; echo $MSG "DONE"
193-
194-
MSG='Check for python2-style super usage' ; echo $MSG
195-
invgrep -R --include="*.py" -E "super\(\w*, (self|cls)\)" pandas
196-
RET=$(($RET + $?)) ; echo $MSG "DONE"
197-
198150
MSG='Check for use of builtin filter function' ; echo $MSG
199151
invgrep -R --include="*.py" -P '(?<!def)[\(\s]filter\(' pandas
200152
RET=$(($RET + $?)) ; echo $MSG "DONE"
@@ -213,35 +165,15 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then
213165
invgrep -R --include="*.py" --include="*.pyx" -E "(DEPRECATED|DEPRECATE|Deprecated)(:|,|\.)" pandas
214166
RET=$(($RET + $?)) ; echo $MSG "DONE"
215167

216-
MSG='Check for python2 new-style classes and for empty parentheses' ; echo $MSG
217-
invgrep -R --include="*.py" --include="*.pyx" -E "class\s\S*\((object)?\):" pandas asv_bench/benchmarks scripts
218-
RET=$(($RET + $?)) ; echo $MSG "DONE"
219-
220168
MSG='Check for backticks incorrectly rendering because of missing spaces' ; echo $MSG
221169
invgrep -R --include="*.rst" -E "[a-zA-Z0-9]\`\`?[a-zA-Z0-9]" doc/source/
222170
RET=$(($RET + $?)) ; echo $MSG "DONE"
223171

224-
MSG='Check for incorrect sphinx directives' ; echo $MSG
225-
invgrep -R --include="*.py" --include="*.pyx" --include="*.rst" -E "\.\. (autosummary|contents|currentmodule|deprecated|function|image|important|include|ipython|literalinclude|math|module|note|raw|seealso|toctree|versionadded|versionchanged|warning):[^:]" ./pandas ./doc/source
226-
RET=$(($RET + $?)) ; echo $MSG "DONE"
227-
228172
# Check for the following code in testing: `unittest.mock`, `mock.Mock()` or `mock.patch`
229173
MSG='Check that unittest.mock is not used (pytest builtin monkeypatch fixture should be used instead)' ; echo $MSG
230174
invgrep -r -E --include '*.py' '(unittest(\.| import )mock|mock\.Mock\(\)|mock\.patch)' pandas/tests/
231175
RET=$(($RET + $?)) ; echo $MSG "DONE"
232176

233-
MSG='Check for wrong space after code-block directive and before colon (".. code-block ::" instead of ".. code-block::")' ; echo $MSG
234-
invgrep -R --include="*.rst" ".. code-block ::" doc/source
235-
RET=$(($RET + $?)) ; echo $MSG "DONE"
236-
237-
MSG='Check for wrong space after ipython directive and before colon (".. ipython ::" instead of ".. ipython::")' ; echo $MSG
238-
invgrep -R --include="*.rst" ".. ipython ::" doc/source
239-
RET=$(($RET + $?)) ; echo $MSG "DONE"
240-
241-
MSG='Check for extra blank lines after the class definition' ; echo $MSG
242-
invgrep -R --include="*.py" --include="*.pyx" -E 'class.*:\n\n( )+"""' .
243-
RET=$(($RET + $?)) ; echo $MSG "DONE"
244-
245177
MSG='Check for use of {foo!r} instead of {repr(foo)}' ; echo $MSG
246178
invgrep -R --include=*.{py,pyx} '!r}' pandas
247179
RET=$(($RET + $?)) ; echo $MSG "DONE"
@@ -266,15 +198,16 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then
266198
invgrep -R --include=*.{py,pyx} '\.__class__' pandas
267199
RET=$(($RET + $?)) ; echo $MSG "DONE"
268200

269-
MSG='Check for use of xrange instead of range' ; echo $MSG
270-
invgrep -R --include=*.{py,pyx} 'xrange' pandas
201+
MSG='Check code for instances of os.remove' ; echo $MSG
202+
invgrep -R --include="*.py*" --exclude "common.py" --exclude "test_writers.py" --exclude "test_store.py" -E "os\.remove" pandas/tests/
271203
RET=$(($RET + $?)) ; echo $MSG "DONE"
272204

273-
MSG='Check that no file in the repo contains trailing whitespaces' ; echo $MSG
274-
INVGREP_APPEND=" <- trailing whitespaces found"
275-
invgrep -RI --exclude=\*.{svg,c,cpp,html,js} --exclude-dir=env "\s$" *
276-
RET=$(($RET + $?)) ; echo $MSG "DONE"
277-
unset INVGREP_APPEND
205+
MSG='Check for inconsistent use of pandas namespace in tests' ; echo $MSG
206+
check_namespace "Series"
207+
RET=$(($RET + $?))
208+
check_namespace "DataFrame"
209+
RET=$(($RET + $?))
210+
echo $MSG "DONE"
278211
fi
279212

280213
### CODE ###
@@ -395,15 +328,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
395328

396329
fi
397330

398-
### DEPENDENCIES ###
399-
if [[ -z "$CHECK" || "$CHECK" == "dependencies" ]]; then
400-
401-
MSG='Check that requirements-dev.txt has been generated from environment.yml' ; echo $MSG
402-
$BASE_DIR/scripts/generate_pip_deps_from_conda.py --compare --azure
403-
RET=$(($RET + $?)) ; echo $MSG "DONE"
404-
405-
fi
406-
407331
### TYPING ###
408332
if [[ -z "$CHECK" || "$CHECK" == "typing" ]]; then
409333

@@ -415,5 +339,4 @@ if [[ -z "$CHECK" || "$CHECK" == "typing" ]]; then
415339
RET=$(($RET + $?)) ; echo $MSG "DONE"
416340
fi
417341

418-
419342
exit $RET

ci/deps/travis-37-cov.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,15 +32,15 @@ dependencies:
3232
- google-cloud-bigquery>=1.27.2 # GH 36436
3333
- psycopg2
3434
- pyarrow>=0.15.0
35-
- pymysql=0.7.11
35+
- pymysql<0.10.0 # temporary pin, GH 36465
3636
- pytables
3737
- python-snappy
3838
- python-dateutil
3939
- pytz
4040
- s3fs>=0.4.0
4141
- scikit-learn
4242
- scipy
43-
- sqlalchemy=1.3.0
43+
- sqlalchemy
4444
- statsmodels
4545
- xarray
4646
- xlrd

0 commit comments

Comments
 (0)