BUG: to_xml raising for pd.NA (#45116)

phofl · web-flow · commit 58b6e0627868 · 2021-12-30T09:10:51.000-05:00
diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
@@ -856,6 +856,7 @@ I/O
 - Bug in :func:`to_csv` always coercing datetime columns with different formats to the same format (:issue:`21734`)
 - :meth:`DataFrame.to_csv` and :meth:`Series.to_csv` with ``compression`` set to ``'zip'`` no longer create a zip file containing a file ending with ".zip". Instead, they try to infer the inner file name more smartly. (:issue:`39465`)
 - Bug in :func:`read_csv` where reading a mixed column of booleans and missing values to a float type results in the missing values becoming 1.0 rather than NaN (:issue:`42808`, :issue:`34120`)
+- Bug in :func:`to_xml` raising error for ``pd.NA`` with extension array dtype (:issue:`43903`)
 - Bug in :func:`read_csv` when passing simultaneously a parser in ``date_parser`` and ``parse_dates=False``, the parsing was still called (:issue:`44366`)
 - Bug in :func:`read_csv` not setting name of :class:`MultiIndex` columns correctly when ``index_col`` is not the first column (:issue:`38549`)
 - Bug in :func:`read_csv` silently ignoring errors when failing to create a memory-mapped file (:issue:`44766`)
diff --git a/pandas/io/formats/xml.py b/pandas/io/formats/xml.py
@@ -18,6 +18,7 @@
 from pandas.util._decorators import doc
 
 from pandas.core.dtypes.common import is_list_like
+from pandas.core.dtypes.missing import isna
 
 from pandas.core.frame import DataFrame
 from pandas.core.shared_docs import _shared_docs
@@ -571,9 +572,7 @@ def build_elems(self) -> None:
             elem_name = f"{self.prefix_uri}{flat_col}"
             try:
                 val = (
-                    None
-                    if self.d[col] in [None, ""] or self.d[col] != self.d[col]
-                    else str(self.d[col])
+                    None if isna(self.d[col]) or self.d[col] == "" else str(self.d[col])
                 )
                 SubElement(self.elem_row, elem_name).text = val
             except KeyError:
diff --git a/pandas/tests/io/xml/__init__.py b/pandas/tests/io/xml/__init__.py
diff --git a/pandas/tests/io/xml/test_to_xml.py b/pandas/tests/io/xml/test_to_xml.py
@@ -12,6 +12,7 @@
 import pandas.util._test_decorators as td
 
 from pandas import (
+    NA,
     DataFrame,
     Index,
 )
@@ -1307,15 +1308,25 @@ def test_filename_and_suffix_comp(parser, compression_only):
     assert geom_xml == output.strip()
 
 
+@td.skip_if_no("lxml")
+def test_ea_dtypes(any_numeric_ea_dtype):
+    # GH#43903
+    expected = """<?xml version='1.0' encoding='utf-8'?>
+<data>
+  <row>
+    <index>0</index>
+    <a/>
+  </row>
+</data>"""
+    df = DataFrame({"a": [NA]}).astype(any_numeric_ea_dtype)
+    result = df.to_xml()
+    assert result.strip() == expected
+
+
 def test_unsuported_compression(datapath, parser):
     with pytest.raises(ValueError, match="Unrecognized compression type"):
         with tm.ensure_clean() as path:
-            # Argument "compression" to "to_xml" of "DataFrame" has incompatible type
-            # "Literal['7z']"; expected "Union[Literal['infer'], Literal['gzip'],
-            # Literal['bz2'], Literal['zip'], Literal['xz'], Dict[str, Any], None]"
-            geom_df.to_xml(
-                path, parser=parser, compression="7z"  # type: ignore[arg-type]
-            )
+            geom_df.to_xml(path, parser=parser, compression="7z")
 
 
 # STORAGE OPTIONS
diff --git a/pandas/tests/io/xml/test_xml.py b/pandas/tests/io/xml/test_xml.py
@@ -684,9 +684,7 @@ def test_names_option_wrong_type(datapath, parser):
     filename = datapath("io", "data", "xml", "books.xml")
 
     with pytest.raises(TypeError, match=("is not a valid type for names")):
-        read_xml(
-            filename, names="Col1, Col2, Col3", parser=parser  # type: ignore[arg-type]
-        )
+        read_xml(filename, names="Col1, Col2, Col3", parser=parser)
 
 
 # ENCODING
@@ -1056,10 +1054,7 @@ def test_wrong_compression(parser, compression, compression_only):
 def test_unsuported_compression(datapath, parser):
     with pytest.raises(ValueError, match="Unrecognized compression type"):
         with tm.ensure_clean() as path:
-            # error: Argument "compression" to "read_xml" has incompatible type
-            # "Literal['7z']"; expected "Union[Literal['infer'], Literal['gzip'],
-            # Literal['bz2'], Literal['zip'], Literal['xz'], Dict[str, Any], None]"
-            read_xml(path, parser=parser, compression="7z")  # type: ignore[arg-type]
+            read_xml(path, parser=parser, compression="7z")
 
 
 # STORAGE OPTIONS