Skip to content

ENH: Add if_sheet_exists parameter to ExcelWriter #40231

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 21 commits into from
Apr 22, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.3.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,7 @@ Other enhancements
- :func:`to_numeric` now supports downcasting of nullable ``ExtensionDtype`` objects (:issue:`33013`)
- Add support for dict-like names in :class:`MultiIndex.set_names` and :class:`MultiIndex.rename` (:issue:`20421`)
- :func:`pandas.read_excel` can now auto detect .xlsb files (:issue:`35416`)
- :class:`pandas.ExcelWriter` now accepts an ``if_sheet_exists`` parameter to control the behaviour of append mode when writing to existing sheets (:issue:`40230`)
- :meth:`.Rolling.sum`, :meth:`.Expanding.sum`, :meth:`.Rolling.mean`, :meth:`.Expanding.mean`, :meth:`.Rolling.median`, :meth:`.Expanding.median`, :meth:`.Rolling.max`, :meth:`.Expanding.max`, :meth:`.Rolling.min`, and :meth:`.Expanding.min` now support ``Numba`` execution with the ``engine`` keyword (:issue:`38895`)
- :meth:`DataFrame.apply` can now accept NumPy unary operators as strings, e.g. ``df.apply("sqrt")``, which was already the case for :meth:`Series.apply` (:issue:`39116`)
- :meth:`DataFrame.apply` can now accept non-callable DataFrame properties as strings, e.g. ``df.apply("size")``, which was already the case for :meth:`Series.apply` (:issue:`39116`)
Expand Down
22 changes: 22 additions & 0 deletions pandas/io/excel/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -664,6 +664,15 @@ class ExcelWriter(metaclass=abc.ABCMeta):
be parsed by ``fsspec``, e.g., starting "s3://", "gcs://".

.. versionadded:: 1.2.0
if_sheet_exists : {'error', 'new', 'replace'}, default 'error'
How to behave when trying to write to a sheet that already
exists (append mode only).

* error: raise a ValueError.
* new: Create a new sheet, with a name determined by the engine.
* replace: Delete the contents of the sheet before writing to it.

.. versionadded:: 1.3.0
engine_kwargs : dict, optional
Keyword arguments to be passed into the engine.

Expand Down Expand Up @@ -760,6 +769,7 @@ def __new__(
datetime_format=None,
mode: str = "w",
storage_options: StorageOptions = None,
if_sheet_exists: str | None = None,
engine_kwargs: dict | None = None,
**kwargs,
):
Expand Down Expand Up @@ -861,6 +871,7 @@ def __init__(
datetime_format=None,
mode: str = "w",
storage_options: StorageOptions = None,
if_sheet_exists: str | None = None,
engine_kwargs: dict | None = None,
**kwargs,
):
Expand Down Expand Up @@ -896,6 +907,17 @@ def __init__(

self.mode = mode

if if_sheet_exists not in [None, "error", "new", "replace"]:
raise ValueError(
f"'{if_sheet_exists}' is not valid for if_sheet_exists. "
"Valid options are 'error', 'new' and 'replace'."
)
if if_sheet_exists and "r+" not in mode:
raise ValueError("if_sheet_exists is only valid in append mode (mode='a')")
if if_sheet_exists is None:
if_sheet_exists = "error"
self.if_sheet_exists = if_sheet_exists

def __fspath__(self):
return getattr(self.handles.handle, "name", "")

Expand Down
2 changes: 2 additions & 0 deletions pandas/io/excel/_odswriter.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ def __init__(
datetime_format=None,
mode: str = "w",
storage_options: StorageOptions = None,
if_sheet_exists: Optional[str] = None,
engine_kwargs: Optional[Dict[str, Any]] = None,
):
from odf.opendocument import OpenDocumentSpreadsheet
Expand All @@ -41,6 +42,7 @@ def __init__(
path,
mode=mode,
storage_options=storage_options,
if_sheet_exists=if_sheet_exists,
engine_kwargs=engine_kwargs,
)

Expand Down
26 changes: 24 additions & 2 deletions pandas/io/excel/_openpyxl.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ def __init__(
datetime_format=None,
mode: str = "w",
storage_options: StorageOptions = None,
if_sheet_exists: str | None = None,
engine_kwargs: dict[str, Any] | None = None,
):
# Use the openpyxl module as the Excel writer.
Expand All @@ -46,6 +47,7 @@ def __init__(
path,
mode=mode,
storage_options=storage_options,
if_sheet_exists=if_sheet_exists,
engine_kwargs=engine_kwargs,
)

Expand All @@ -56,6 +58,8 @@ def __init__(

self.book = load_workbook(self.handles.handle)
self.handles.handle.seek(0)
self.sheets = {name: self.book[name] for name in self.book.sheetnames}

else:
# Create workbook object with default optimized_write=True.
self.book = Workbook()
Expand Down Expand Up @@ -414,8 +418,26 @@ def write_cells(

_style_cache: dict[str, dict[str, Serialisable]] = {}

if sheet_name in self.sheets:
wks = self.sheets[sheet_name]
if sheet_name in self.sheets and self.if_sheet_exists != "new":
if "r+" in self.mode:
if self.if_sheet_exists == "replace":
old_wks = self.sheets[sheet_name]
target_index = self.book.index(old_wks)
del self.book[sheet_name]
wks = self.book.create_sheet(sheet_name, target_index)
self.sheets[sheet_name] = wks
elif self.if_sheet_exists == "error":
raise ValueError(
f"Sheet '{sheet_name}' already exists and "
f"if_sheet_exists is set to 'error'."
)
else:
raise ValueError(
f"'{self.if_sheet_exists}' is not valid for if_sheet_exists. "
"Valid options are 'error', 'new' and 'replace'."
)
else:
wks = self.sheets[sheet_name]
else:
wks = self.book.create_sheet()
wks.title = sheet_name
Expand Down
2 changes: 2 additions & 0 deletions pandas/io/excel/_xlsxwriter.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,7 @@ def __init__(
datetime_format=None,
mode: str = "w",
storage_options: StorageOptions = None,
if_sheet_exists: Optional[str] = None,
engine_kwargs: Optional[Dict[str, Any]] = None,
):
# Use the xlsxwriter module as the Excel writer.
Expand All @@ -194,6 +195,7 @@ def __init__(
datetime_format=datetime_format,
mode=mode,
storage_options=storage_options,
if_sheet_exists=if_sheet_exists,
engine_kwargs=engine_kwargs,
)

Expand Down
2 changes: 2 additions & 0 deletions pandas/io/excel/_xlwt.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ def __init__(
encoding=None,
mode: str = "w",
storage_options: StorageOptions = None,
if_sheet_exists: Optional[str] = None,
engine_kwargs: Optional[Dict[str, Any]] = None,
):
# Use the xlwt module as the Excel writer.
Expand All @@ -40,6 +41,7 @@ def __init__(
path,
mode=mode,
storage_options=storage_options,
if_sheet_exists=if_sheet_exists,
engine_kwargs=engine_kwargs,
)

Expand Down
65 changes: 64 additions & 1 deletion pandas/tests/io/excel/test_openpyxl.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from pathlib import Path
import re

import numpy as np
import pytest
Expand Down Expand Up @@ -109,6 +110,66 @@ def test_write_append_mode(ext, mode, expected):
assert wb2.worksheets[index]["A1"].value == cell_value


@pytest.mark.parametrize(
"if_sheet_exists,num_sheets,expected",
[
("new", 2, ["apple", "banana"]),
("replace", 1, ["pear"]),
],
)
def test_if_sheet_exists_append_modes(ext, if_sheet_exists, num_sheets, expected):
# GH 40230
df1 = DataFrame({"fruit": ["apple", "banana"]})
df2 = DataFrame({"fruit": ["pear"]})

with tm.ensure_clean(ext) as f:
df1.to_excel(f, engine="openpyxl", sheet_name="foo", index=False)
with ExcelWriter(
f, engine="openpyxl", mode="a", if_sheet_exists=if_sheet_exists
) as writer:
df2.to_excel(writer, sheet_name="foo", index=False)

wb = openpyxl.load_workbook(f)
assert len(wb.sheetnames) == num_sheets
assert wb.sheetnames[0] == "foo"
result = pd.read_excel(wb, "foo", engine="openpyxl")
assert list(result["fruit"]) == expected
if len(wb.sheetnames) == 2:
result = pd.read_excel(wb, wb.sheetnames[1], engine="openpyxl")
tm.assert_frame_equal(result, df2)
wb.close()


@pytest.mark.parametrize(
"if_sheet_exists,msg",
[
(
"invalid",
"'invalid' is not valid for if_sheet_exists. Valid options "
"are 'error', 'new' and 'replace'.",
),
(
"error",
"Sheet 'foo' already exists and if_sheet_exists is set to 'error'.",
),
(
None,
"Sheet 'foo' already exists and if_sheet_exists is set to 'error'.",
),
],
)
def test_if_sheet_exists_raises(ext, if_sheet_exists, msg):
# GH 40230
df = DataFrame({"fruit": ["pear"]})
with tm.ensure_clean(ext) as f:
with pytest.raises(ValueError, match=re.escape(msg)):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what about other engines?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Openpyxl is the only engine which supports append mode currently, and this option only affects append mode

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok, so we completely ignore on other engines is fine. is there a reason to raise / warn in that case if its not None?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As it's written at the moment passing if_sheet_exists in write mode will raise an error with all engines (mostly as a feedback mechanism for the user)

df.to_excel(f, "foo", engine="openpyxl")
with ExcelWriter(
f, engine="openpyxl", mode="a", if_sheet_exists=if_sheet_exists
) as writer:
df.to_excel(writer, sheet_name="foo")


def test_to_excel_with_openpyxl_engine(ext):
# GH 29854
with tm.ensure_clean(ext) as filename:
Expand Down Expand Up @@ -175,7 +236,9 @@ def test_append_mode_file(ext):
with tm.ensure_clean(ext) as f:
df.to_excel(f, engine="openpyxl")

with ExcelWriter(f, mode="a", engine="openpyxl") as writer:
with ExcelWriter(
f, mode="a", engine="openpyxl", if_sheet_exists="new"
) as writer:
df.to_excel(writer)

# make sure that zip files are not concatenated by making sure that
Expand Down
8 changes: 8 additions & 0 deletions pandas/tests/io/excel/test_writers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1325,6 +1325,14 @@ def test_excel_duplicate_columns_with_names(self, path):
expected = DataFrame([[0, 10, 0], [1, 11, 1]], columns=["A", "B", "A.1"])
tm.assert_frame_equal(result, expected)

def test_if_sheet_exists_raises(self, ext):
# GH 40230
msg = "if_sheet_exists is only valid in append mode (mode='a')"

with tm.ensure_clean(ext) as f:
with pytest.raises(ValueError, match=re.escape(msg)):
ExcelWriter(f, if_sheet_exists="replace")


class TestExcelWriterEngineTests:
@pytest.mark.parametrize(
Expand Down