Skip to content

Commit 6cc04ee

Browse files
authored
Merge branch 'master' into readjson-lines-chunks
2 parents 71e8a6b + 6a6faf5 commit 6cc04ee

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

78 files changed

+1305
-529
lines changed

.devcontainer.json

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,9 @@
1717
"python.linting.pylintEnabled": false,
1818
"python.linting.mypyEnabled": true,
1919
"python.testing.pytestEnabled": true,
20-
"python.testing.cwd": "pandas/tests"
20+
"python.testing.pytestArgs": [
21+
"pandas"
22+
]
2123
},
2224

2325
// Add the IDs of extensions you want installed when the container is created in the array below.

asv_bench/benchmarks/groupby.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
from .pandas_vb_common import tm
1818

19-
method_blacklist = {
19+
method_blocklist = {
2020
"object": {
2121
"median",
2222
"prod",
@@ -403,7 +403,7 @@ class GroupByMethods:
403403
]
404404

405405
def setup(self, dtype, method, application):
406-
if method in method_blacklist.get(dtype, {}):
406+
if method in method_blocklist.get(dtype, {}):
407407
raise NotImplementedError # skip benchmark
408408
ngroups = 1000
409409
size = ngroups * 2

ci/code_checks.sh

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -248,19 +248,19 @@ fi
248248
### CODE ###
249249
if [[ -z "$CHECK" || "$CHECK" == "code" ]]; then
250250

251-
MSG='Check import. No warnings, and blacklist some optional dependencies' ; echo $MSG
251+
MSG='Check import. No warnings, and blocklist some optional dependencies' ; echo $MSG
252252
python -W error -c "
253253
import sys
254254
import pandas
255255
256-
blacklist = {'bs4', 'gcsfs', 'html5lib', 'http', 'ipython', 'jinja2', 'hypothesis',
256+
blocklist = {'bs4', 'gcsfs', 'html5lib', 'http', 'ipython', 'jinja2', 'hypothesis',
257257
'lxml', 'matplotlib', 'numexpr', 'openpyxl', 'py', 'pytest', 's3fs', 'scipy',
258258
'tables', 'urllib.request', 'xlrd', 'xlsxwriter', 'xlwt'}
259259
260260
# GH#28227 for some of these check for top-level modules, while others are
261261
# more specific (e.g. urllib.request)
262262
import_mods = set(m.split('.')[0] for m in sys.modules) | set(sys.modules)
263-
mods = blacklist & import_mods
263+
mods = blocklist & import_mods
264264
if mods:
265265
sys.stderr.write('err: pandas should not import: {}\n'.format(', '.join(mods)))
266266
sys.exit(len(mods))

ci/deps/azure-36-locale.yaml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@ dependencies:
1515

1616
# pandas dependencies
1717
- beautifulsoup4
18-
- gcsfs
1918
- html5lib
2019
- ipython
2120
- jinja2
@@ -31,7 +30,6 @@ dependencies:
3130
- pytables
3231
- python-dateutil
3332
- pytz
34-
- s3fs
3533
- scipy
3634
- xarray
3735
- xlrd

ci/deps/azure-37-locale.yaml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@ dependencies:
2727
- pytables
2828
- python-dateutil
2929
- pytz
30-
- s3fs
3130
- scipy
3231
- xarray
3332
- xlrd

ci/deps/azure-windows-37.yaml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,8 @@ dependencies:
1515
# pandas dependencies
1616
- beautifulsoup4
1717
- bottleneck
18-
- gcsfs
18+
- fsspec>=0.7.4
19+
- gcsfs>=0.6.0
1920
- html5lib
2021
- jinja2
2122
- lxml
@@ -28,7 +29,7 @@ dependencies:
2829
- pytables
2930
- python-dateutil
3031
- pytz
31-
- s3fs
32+
- s3fs>=0.4.0
3233
- scipy
3334
- sqlalchemy
3435
- xlrd

ci/deps/travis-36-cov.yaml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,8 @@ dependencies:
1818
- cython>=0.29.16
1919
- dask
2020
- fastparquet>=0.3.2
21-
- gcsfs
21+
- fsspec>=0.7.4
22+
- gcsfs>=0.6.0
2223
- geopandas
2324
- html5lib
2425
- matplotlib
@@ -35,7 +36,7 @@ dependencies:
3536
- pytables
3637
- python-snappy
3738
- pytz
38-
- s3fs
39+
- s3fs>=0.4.0
3940
- scikit-learn
4041
- scipy
4142
- sqlalchemy

ci/deps/travis-36-locale.yaml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@ dependencies:
1616
- blosc=1.14.3
1717
- python-blosc
1818
- fastparquet=0.3.2
19-
- gcsfs=0.2.2
2019
- html5lib
2120
- ipython
2221
- jinja2
@@ -33,7 +32,6 @@ dependencies:
3332
- pytables
3433
- python-dateutil
3534
- pytz
36-
- s3fs=0.3.0
3735
- scipy
3836
- sqlalchemy=1.1.4
3937
- xarray=0.10

ci/deps/travis-36-slow.yaml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ dependencies:
1313

1414
# pandas dependencies
1515
- beautifulsoup4
16+
- fsspec>=0.7.4
1617
- html5lib
1718
- lxml
1819
- matplotlib
@@ -25,7 +26,7 @@ dependencies:
2526
- pytables
2627
- python-dateutil
2728
- pytz
28-
- s3fs
29+
- s3fs>=0.4.0
2930
- scipy
3031
- sqlalchemy
3132
- xlrd

ci/deps/travis-37.yaml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,13 @@ dependencies:
1313

1414
# pandas dependencies
1515
- botocore>=1.11
16+
- fsspec>=0.7.4
1617
- numpy
1718
- python-dateutil
1819
- nomkl
1920
- pyarrow
2021
- pytz
21-
- s3fs
22+
- s3fs>=0.4.0
2223
- tabulate
2324
- pyreadstat
2425
- pip

doc/source/getting_started/install.rst

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -267,8 +267,9 @@ SQLAlchemy 1.1.4 SQL support for databases other tha
267267
SciPy 0.19.0 Miscellaneous statistical functions
268268
XLsxWriter 0.9.8 Excel writing
269269
blosc Compression for HDF5
270+
fsspec 0.7.4 Handling files aside from local and HTTP
270271
fastparquet 0.3.2 Parquet reading / writing
271-
gcsfs 0.2.2 Google Cloud Storage access
272+
gcsfs 0.6.0 Google Cloud Storage access
272273
html5lib HTML parser for read_html (see :ref:`note <optional_html>`)
273274
lxml 3.8.0 HTML parser for read_html (see :ref:`note <optional_html>`)
274275
matplotlib 2.2.2 Visualization
@@ -282,7 +283,7 @@ pyreadstat SPSS files (.sav) reading
282283
pytables 3.4.3 HDF5 reading / writing
283284
pyxlsb 1.0.6 Reading for xlsb files
284285
qtpy Clipboard I/O
285-
s3fs 0.3.0 Amazon S3 access
286+
s3fs 0.4.0 Amazon S3 access
286287
tabulate 0.8.3 Printing in Markdown-friendly format (see `tabulate`_)
287288
xarray 0.8.2 pandas-like API for N-dimensional data
288289
xclip Clipboard I/O on linux

doc/source/user_guide/advanced.rst

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -260,7 +260,9 @@ You don't have to specify all levels of the ``MultiIndex`` by passing only the
260260
first elements of the tuple. For example, you can use "partial" indexing to
261261
get all elements with ``bar`` in the first level as follows:
262262

263-
df.loc['bar']
263+
.. ipython:: python
264+
265+
df.loc['bar']
264266
265267
This is a shortcut for the slightly more verbose notation ``df.loc[('bar',),]`` (equivalent
266268
to ``df.loc['bar',]`` in this example).

doc/source/whatsnew/v0.14.1.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ Enhancements
131131

132132
- Implemented ``sem`` (standard error of the mean) operation for ``Series``,
133133
``DataFrame``, ``Panel``, and ``Groupby`` (:issue:`6897`)
134-
- Add ``nlargest`` and ``nsmallest`` to the ``Series`` ``groupby`` whitelist,
134+
- Add ``nlargest`` and ``nsmallest`` to the ``Series`` ``groupby`` allowlist,
135135
which means you can now use these methods on a ``SeriesGroupBy`` object
136136
(:issue:`7053`).
137137
- All offsets ``apply``, ``rollforward`` and ``rollback`` can now handle ``np.datetime64``, previously results in ``ApplyTypeError`` (:issue:`7452`)

doc/source/whatsnew/v1.1.0.rst

Lines changed: 27 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -245,6 +245,22 @@ If needed you can adjust the bins with the argument ``offset`` (a Timedelta) tha
245245

246246
For a full example, see: :ref:`timeseries.adjust-the-start-of-the-bins`.
247247

248+
fsspec now used for filesystem handling
249+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
250+
251+
For reading and writing to filesystems other than local and reading from HTTP(S),
252+
the optional dependency ``fsspec`` will be used to dispatch operations (:issue:`33452`).
253+
This will give unchanged
254+
functionality for S3 and GCS storage, which were already supported, but also add
255+
support for several other storage implementations such as `Azure Datalake and Blob`_,
256+
SSH, FTP, dropbox and github. For docs and capabilities, see the `fsspec docs`_.
257+
258+
The existing capability to interface with S3 and GCS will be unaffected by this
259+
change, as ``fsspec`` will still bring in the same packages as before.
260+
261+
.. _Azure Datalake and Blob: https://github.com/dask/adlfs
262+
263+
.. _fsspec docs: https://filesystem-spec.readthedocs.io/en/latest/
248264

249265
.. _whatsnew_110.enhancements.other:
250266

@@ -292,12 +308,15 @@ Other enhancements
292308
- :meth:`DataFrame.to_csv` and :meth:`Series.to_csv` now accept an ``errors`` argument (:issue:`22610`)
293309
- :meth:`groupby.transform` now allows ``func`` to be ``pad``, ``backfill`` and ``cumcount`` (:issue:`31269`).
294310
- :meth:`~pandas.io.json.read_json` now accepts `nrows` parameter. (:issue:`33916`).
311+
- :meth:`DataFrame.hist`, :meth:`Series.hist`, :meth:`core.groupby.DataFrameGroupBy.hist`, and :meth:`core.groupby.SeriesGroupBy.hist` have gained the ``legend`` argument. Set to True to show a legend in the histogram. (:issue:`6279`)
295312
- :func:`concat` and :meth:`~DataFrame.append` now preserve extension dtypes, for example
296313
combining a nullable integer column with a numpy integer column will no longer
297314
result in object dtype but preserve the integer dtype (:issue:`33607`, :issue:`34339`).
298315
- :meth:`~pandas.io.gbq.read_gbq` now allows to disable progress bar (:issue:`33360`).
299316
- :meth:`~pandas.io.gbq.read_gbq` now supports the ``max_results`` kwarg from ``pandas-gbq`` (:issue:`34639`).
317+
- :meth:`Dataframe.cov` and :meth:`Series.cov` now support a new parameter ddof to support delta degrees of freedom as in the corresponding numpy methods (:issue:`34611`).
300318
- :meth:`DataFrame.to_html` and :meth:`DataFrame.to_string`'s ``col_space`` parameter now accepts a list of dict to change only some specific columns' width (:issue:`28917`).
319+
- :meth:`DataFrame.to_excel` can now also write OpenOffice spreadsheet (.ods) files (:issue:`27222`)
301320

302321
.. ---------------------------------------------------------------------------
303322
@@ -700,7 +719,9 @@ Optional libraries below the lowest tested version may still work, but are not c
700719
+-----------------+-----------------+---------+
701720
| fastparquet | 0.3.2 | |
702721
+-----------------+-----------------+---------+
703-
| gcsfs | 0.2.2 | |
722+
| fsspec | 0.7.4 | |
723+
+-----------------+-----------------+---------+
724+
| gcsfs | 0.6.0 | X |
704725
+-----------------+-----------------+---------+
705726
| lxml | 3.8.0 | |
706727
+-----------------+-----------------+---------+
@@ -716,7 +737,7 @@ Optional libraries below the lowest tested version may still work, but are not c
716737
+-----------------+-----------------+---------+
717738
| pytables | 3.4.3 | X |
718739
+-----------------+-----------------+---------+
719-
| s3fs | 0.3.0 | |
740+
| s3fs | 0.4.0 | X |
720741
+-----------------+-----------------+---------+
721742
| scipy | 1.2.0 | X |
722743
+-----------------+-----------------+---------+
@@ -933,6 +954,7 @@ Indexing
933954
- Bug in :meth:`Series.at` when used with a :class:`MultiIndex` would raise an exception on valid inputs (:issue:`26989`)
934955
- Bug in :meth:`DataFrame.loc` with dictionary of values changes columns with dtype of ``int`` to ``float`` (:issue:`34573`)
935956
- Bug in :meth:`Series.loc` when used with a :class:`MultiIndex` would raise an IndexingError when accessing a None value (:issue:`34318`)
957+
- Bug in :meth:`DataFrame.reset_index` and :meth:`Series.reset_index` would not preserve data types on an empty :class:`DataFrame` or :class:`Series` with a :class:`MultiIndex` (:issue:`19602`)
936958

937959
Missing
938960
^^^^^^^
@@ -997,7 +1019,7 @@ I/O
9971019
- Bug in :meth:`~SQLDatabase.execute` was raising a ``ProgrammingError`` for some DB-API drivers when the SQL statement contained the `%` character and no parameters were present (:issue:`34211`)
9981020
- Bug in :meth:`~pandas.io.stata.StataReader` which resulted in categorical variables with difference dtypes when reading data using an iterator. (:issue:`31544`)
9991021
- :meth:`HDFStore.keys` has now an optional `include` parameter that allows the retrieval of all native HDF5 table names (:issue:`29916`)
1000-
- :meth:`read_json` now could read line-delimited JSON from file url. (:issue:`27135`)
1022+
- Bug in :meth:`read_excel` for ODS files removes 0.0 values (:issue:`27222`)
10011023

10021024
Plotting
10031025
^^^^^^^^
@@ -1008,6 +1030,7 @@ Plotting
10081030
- Bug in :meth:`DataFrame.hist` where the order of ``column`` argument was ignored (:issue:`29235`)
10091031
- Bug in :meth:`DataFrame.plot.scatter` that when adding multiple plots with different ``cmap``, colorbars alway use the first ``cmap`` (:issue:`33389`)
10101032
- Bug in :meth:`DataFrame.plot.scatter` was adding a colorbar to the plot even if the argument `c` was assigned to a column containing color names (:issue:`34316`)
1033+
- Bug in :meth:`pandas.plotting.bootstrap_plot` was causing cluttered axes and overlapping labels (:issue:`34905`)
10111034

10121035
Groupby/resample/rolling
10131036
^^^^^^^^^^^^^^^^^^^^^^^^
@@ -1100,6 +1123,7 @@ Other
11001123
- :class:`IntegerArray` now implements the ``sum`` operation (:issue:`33172`)
11011124
- Bug in :class:`Tick` comparisons raising ``TypeError`` when comparing against timedelta-like objects (:issue:`34088`)
11021125
- Bug in :class:`Tick` multiplication raising ``TypeError`` when multiplying by a float (:issue:`34486`)
1126+
- Passing a `set` as `names` argument to :func:`pandas.read_csv`, :func:`pandas.read_table`, or :func:`pandas.read_fwf` will raise ``ValueError: Names should be an ordered collection.`` (:issue:`34946`)
11031127

11041128
.. ---------------------------------------------------------------------------
11051129

environment.yml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,9 @@ dependencies:
9898

9999
- pyqt>=5.9.2 # pandas.read_clipboard
100100
- pytables>=3.4.3 # pandas.read_hdf, DataFrame.to_hdf
101-
- s3fs # pandas.read_csv... when using 's3://...' path
101+
- s3fs>=0.4.0 # file IO when using 's3://...' path
102+
- fsspec>=0.7.4 # for generic remote file operations
103+
- gcsfs>=0.6.0 # file IO when using 'gcs://...' path
102104
- sqlalchemy # pandas.read_sql, DataFrame.to_sql
103105
- xarray # DataFrame.to_xarray
104106
- cftime # Needed for downstream xarray.CFTimeIndex test

pandas/_libs/window/aggregations.pyx

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1377,17 +1377,11 @@ def roll_generic_fixed(object obj,
13771377
output[i] = NaN
13781378

13791379
# remaining full-length windows
1380-
buf = <float64_t *>arr.data
1381-
bufarr = np.empty(win, dtype=float)
1382-
oldbuf = <float64_t *>bufarr.data
1383-
for i in range((win - offset), (N - offset)):
1384-
buf = buf + 1
1385-
bufarr.data = <char *>buf
1380+
for j, i in enumerate(range((win - offset), (N - offset)), 1):
13861381
if counts[i] >= minp:
1387-
output[i] = func(bufarr, *args, **kwargs)
1382+
output[i] = func(arr[j:j + win], *args, **kwargs)
13881383
else:
13891384
output[i] = NaN
1390-
bufarr.data = <char *>oldbuf
13911385

13921386
# truncated windows at the end
13931387
for i in range(int_max(N - offset, 0), N):

pandas/compat/_optional.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,9 @@
88
VERSIONS = {
99
"bs4": "4.6.0",
1010
"bottleneck": "1.2.1",
11+
"fsspec": "0.7.4",
1112
"fastparquet": "0.3.2",
12-
"gcsfs": "0.2.2",
13+
"gcsfs": "0.6.0",
1314
"lxml.etree": "3.8.0",
1415
"matplotlib": "2.2.2",
1516
"numexpr": "2.6.2",
@@ -20,7 +21,7 @@
2021
"pytables": "3.4.3",
2122
"pytest": "5.0.1",
2223
"pyxlsb": "1.0.6",
23-
"s3fs": "0.3.0",
24+
"s3fs": "0.4.0",
2425
"scipy": "1.2.0",
2526
"sqlalchemy": "1.1.4",
2627
"tables": "3.4.3",

pandas/core/common.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -404,7 +404,7 @@ def random_state(state=None):
404404
If receives `None`, returns np.random.
405405
If receives anything else, raises an informative ValueError.
406406
407-
..versionchanged:: 1.1.0
407+
.. versionchanged:: 1.1.0
408408
409409
array-like and BitGenerator (for NumPy>=1.18) object now passed to
410410
np.random.RandomState() as seed

pandas/core/config_init.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -553,6 +553,7 @@ def use_inf_as_na_cb(key):
553553
_xls_options = ["xlwt"]
554554
_xlsm_options = ["openpyxl"]
555555
_xlsx_options = ["openpyxl", "xlsxwriter"]
556+
_ods_options = ["odf"]
556557

557558

558559
with cf.config_prefix("io.excel.xls"):
@@ -581,6 +582,15 @@ def use_inf_as_na_cb(key):
581582
)
582583

583584

585+
with cf.config_prefix("io.excel.ods"):
586+
cf.register_option(
587+
"writer",
588+
"auto",
589+
writer_engine_doc.format(ext="ods", others=", ".join(_ods_options)),
590+
validator=str,
591+
)
592+
593+
584594
# Set up the io.parquet specific configuration.
585595
parquet_engine_doc = """
586596
: string

0 commit comments

Comments
 (0)