-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
[ArrayManager] Enable read_parquet to not create 2D blocks when using ArrayManager #40303
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 6 commits
16b8a05
1fea5ef
0fe4b1e
5a070b9
6f085f2
ecf1163
d05e9f5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,6 +9,8 @@ | |
import numpy as np | ||
import pytest | ||
|
||
from pandas._config import get_option | ||
|
||
from pandas.compat import ( | ||
PY38, | ||
is_platform_windows, | ||
|
@@ -41,20 +43,21 @@ | |
_HAVE_FASTPARQUET = False | ||
|
||
|
||
pytestmark = [ | ||
pytest.mark.filterwarnings("ignore:RangeIndex.* is deprecated:DeprecationWarning"), | ||
# TODO(ArrayManager) fastparquet / pyarrow rely on BlockManager internals | ||
td.skip_array_manager_not_yet_implemented, | ||
] | ||
pytestmark = pytest.mark.filterwarnings( | ||
"ignore:RangeIndex.* is deprecated:DeprecationWarning" | ||
) | ||
|
||
|
||
# TODO(ArrayManager) fastparquet relies on BlockManager internals | ||
|
||
# setup engines & skips | ||
@pytest.fixture( | ||
params=[ | ||
pytest.param( | ||
"fastparquet", | ||
marks=pytest.mark.skipif( | ||
not _HAVE_FASTPARQUET, reason="fastparquet is not installed" | ||
not _HAVE_FASTPARQUET or get_option("mode.data_manager") == "array", | ||
reason="fastparquet is not installed or ArrayManager is used", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is this a "for now" or a "ever"? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If your question is about "will ArrayManager be supported with fastparquet engine", that's probably a question for the fastparquet package (and since this is only optional for now, there is still time to discuss that with them) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok, so not actionable on our end, thanks |
||
), | ||
), | ||
pytest.param( | ||
|
@@ -80,6 +83,8 @@ def pa(): | |
def fp(): | ||
if not _HAVE_FASTPARQUET: | ||
pytest.skip("fastparquet is not installed") | ||
elif get_option("mode.data_manager") == "array": | ||
pytest.skip("ArrayManager is not supported with fastparquet") | ||
return "fastparquet" | ||
|
||
|
||
|
@@ -923,6 +928,18 @@ def test_filter_row_groups(self, pa): | |
) | ||
assert len(result) == 1 | ||
|
||
def test_read_parquet_manager(self, pa, using_array_manager): | ||
# ensure that read_parquet honors the pandas.options.mode.data_manager option | ||
df = pd.DataFrame(np.random.randn(10, 3), columns=["A", "B", "C"]) | ||
|
||
with tm.ensure_clean() as path: | ||
df.to_parquet(path, pa) | ||
result = read_parquet(path, pa) | ||
if using_array_manager: | ||
assert isinstance(result._mgr, pd.core.internals.ArrayManager) | ||
else: | ||
assert isinstance(result._mgr, pd.core.internals.BlockManager) | ||
|
||
|
||
class TestParquetFastParquet(Base): | ||
def test_basic(self, fp, df_full): | ||
|
Uh oh!
There was an error while loading. Please reload this page.