Skip to content

Commit 1240f1e

Browse files
committed
WIP gcsfs
1 parent bb095a6 commit 1240f1e

18 files changed

+60
-5
lines changed

ci/check_imports.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
blacklist = {
77
'bs4',
8+
'gcsfs',
89
'html5lib',
910
'ipython',
1011
'jinja2'

ci/requirements-2.7.run

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,3 +18,4 @@ patsy
1818
pymysql=0.6.3
1919
jinja2=2.8
2020
xarray=0.8.0
21+
gcsfs

ci/requirements-2.7_WIN.run

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,3 +16,4 @@ bottleneck
1616
html5lib
1717
beautifulsoup4
1818
jinja2=2.8
19+
gcsfs

ci/requirements-3.5.run

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,3 +18,4 @@ psycopg2
1818
s3fs
1919
beautifulsoup4
2020
ipython
21+
gcsfs

ci/requirements-3.5_OSX.run

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,3 +14,4 @@ bottleneck
1414
xarray
1515
s3fs
1616
beautifulsoup4
17+
gcsfs

ci/requirements-3.6.run

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,3 +23,4 @@ beautifulsoup4
2323
s3fs
2424
xarray
2525
ipython
26+
gcsfs

ci/requirements-3.6_LOCALE.run

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,3 +20,4 @@ beautifulsoup4
2020
s3fs
2121
xarray
2222
ipython
23+
gcsfs

ci/requirements-3.6_LOCALE_SLOW.run

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,3 +20,4 @@ beautifulsoup4
2020
s3fs
2121
xarray
2222
ipython
23+
gcsfs

ci/requirements-3.6_SLOW.run

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,3 +17,4 @@ psycopg2
1717
pymysql
1818
html5lib
1919
beautifulsoup4
20+
gcsfs

ci/requirements-optional-conda.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ blosc
33
bottleneck
44
fastparquet
55
feather-format
6+
gcsfs
67
html5lib
78
ipython
89
ipykernel

ci/requirements-optional-pip.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
beautifulsoup4>=4.2.1
44
blosc
55
bottleneck
6+
gcsfs
67
fastparquet
78
feather-format
89
html5lib
@@ -26,4 +27,4 @@ sqlalchemy
2627
xarray
2728
xlrd
2829
xlsxwriter
29-
xlwt
30+
xlwt

doc/source/install.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -275,6 +275,7 @@ Optional Dependencies
275275

276276
* `Jinja2 <http://jinja.pocoo.org/>`__: Template engine for conditional HTML formatting.
277277
* `s3fs <http://s3fs.readthedocs.io/>`__: necessary for Amazon S3 access (s3fs >= 0.0.7).
278+
* `gcsfs <http://gcsfs.readthedocs.io/>`__: necessary for Google Cloud Storage access (gcsfs >= 0.6.0).
278279
* `blosc <https://pypi.python.org/pypi/blosc>`__: for msgpack compression using ``blosc``
279280
* One of
280281
`qtpy <https://github.com/spyder-ide/qtpy>`__ (requires PyQt or PySide),

doc/source/whatsnew/v0.23.0.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -441,6 +441,7 @@ Other Enhancements
441441
- Updated ``to_gbq`` and ``read_gbq`` signature and documentation to reflect changes from
442442
the Pandas-GBQ library version 0.4.0. Adds intersphinx mapping to Pandas-GBQ
443443
library. (:issue:`20564`)
444+
- Added support for reading from Google Cloud Storage via the ``gcsfs`` library (:issue:`19454`)
444445

445446
.. _whatsnew_0230.api_breaking:
446447

pandas/io/common.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,14 @@ def is_s3_url(url):
169169
return False
170170

171171

172+
def is_gcs_url(url):
173+
"""Check for a gcs url"""
174+
try:
175+
return parse_url(url).scheme in ['gcs', 'gs']
176+
except: # noqa
177+
return False
178+
179+
172180
def get_filepath_or_buffer(filepath_or_buffer, encoding=None,
173181
compression=None, mode=None):
174182
"""
@@ -208,6 +216,13 @@ def get_filepath_or_buffer(filepath_or_buffer, encoding=None,
208216
compression=compression,
209217
mode=mode)
210218

219+
if is_gcs_url(filepath_or_buffer):
220+
from pandas.io import gcs
221+
return gcs.get_filepath_or_buffer(filepath_or_buffer,
222+
encoding=encoding,
223+
compression=compression,
224+
mode=mode)
225+
211226
if isinstance(filepath_or_buffer, (compat.string_types,
212227
compat.binary_type,
213228
mmap.mmap)):

pandas/io/excel.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@
4646
io : string, path object (pathlib.Path or py._path.local.LocalPath),
4747
file-like object, pandas ExcelFile, or xlrd workbook.
4848
The string could be a URL. Valid URL schemes include http, ftp, s3,
49-
and file. For file URLs, a host is expected. For instance, a local
49+
gcs, and file. For file URLs, a host is expected. For instance, a local
5050
file could be file://localhost/path/to/workbook.xlsx
5151
sheet_name : string, int, mixed list of strings/ints, or None, default 0
5252

pandas/io/gcs.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
""" GCS support for remote file interactivity """
2+
from pandas import compat
3+
try:
4+
from google.auth.exceptions import GoogleAuthError
5+
import gcsfs
6+
except ImportError:
7+
raise ImportError("The gcsfs library is required to handle GCS files")
8+
9+
10+
def get_filepath_or_buffer(filepath_or_buffer, encoding=None,
11+
compression=None, mode=None):
12+
13+
if mode is None:
14+
mode = 'rb'
15+
16+
gcsfs_logger_disabled = gcsfs.core.logger.disabled
17+
gcsfs.core.logger.disabled = True
18+
fs = gcsfs.GCSFileSystem()
19+
try:
20+
filepath_or_buffer = fs.open(filepath_or_buffer, mode)
21+
except (compat.FileNotFoundError, GoogleAuthError, gcsfs.utils.HtmlError):
22+
fs = gcsfs.GCSFileSystem(token='anon')
23+
filepath_or_buffer = fs.open(filepath_or_buffer, mode)
24+
finally:
25+
gcsfs.core.logger.disabled = gcsfs_logger_disabled
26+
return filepath_or_buffer, None, compression, True

pandas/io/json/json.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -231,9 +231,9 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True,
231231
Parameters
232232
----------
233233
path_or_buf : a valid JSON string or file-like, default: None
234-
The string could be a URL. Valid URL schemes include http, ftp, s3, and
235-
file. For file URLs, a host is expected. For instance, a local file
236-
could be ``file://localhost/path/to/table.json``
234+
The string could be a URL. Valid URL schemes include http, ftp, s3,
235+
gcs, and file. For file URLs, a host is expected. For instance, a local
236+
file could be ``file://localhost/path/to/table.json``
237237
238238
orient : string,
239239
Indication of expected JSON string format.

pandas/util/_print_versions.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@ def show_versions(as_json=False):
9696
("fastparquet", lambda mod: mod.__version__),
9797
("pandas_gbq", lambda mod: mod.__version__),
9898
("pandas_datareader", lambda mod: mod.__version__),
99+
("gcsfs", lambda mod: mod.__version__),
99100
]
100101

101102
deps_blob = list()

0 commit comments

Comments
 (0)