pandas-dev · mroeschke · Jun 21, 2022 · Jun 18, 2022 · Jun 18, 2022 · Jun 18, 2022
diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst
@@ -874,6 +874,7 @@ I/O
 - Bug in :class:`StataWriter` where value labels were always written with default encoding (:issue:`46750`)
 - Bug in :class:`StataWriterUTF8` where some valid characters were removed from variable names (:issue:`47276`)
 - Bug in :meth:`DataFrame.to_excel` when writing an empty dataframe with :class:`MultiIndex` (:issue:`19543`)
+- Bug in :func:`read_xml` when reading XML with duplicate element and attribute names (:issue:`47343`)
 
 Period
 ^^^^^^

diff --git a/pandas/io/xml.py b/pandas/io/xml.py
@@ -413,11 +413,21 @@ def _iterparse_nodes(self) -> list[dict[str, str | None]]:
                     row = {}
 
             if row is not None:
-                for col in self.iterparse[row_node]:
-                    if curr_elem == col:
-                        row[col] = elem.text.strip() if elem.text else None
-                    if col in elem.attrib:
-                        row[col] = elem.attrib[col]
+                if self.names:
+                    for col, nm in zip(self.iterparse[row_node], self.names):
+                        if curr_elem == col:
+                            elem_val = elem.text.strip() if elem.text else None
+                            if elem_val not in row.values() and nm not in row:
+                                row[nm] = elem_val
+                        if col in elem.attrib:
+                            if elem.attrib[col] not in row.values() and nm not in row:
+                                row[nm] = elem.attrib[col]
+                else:
+                    for col in self.iterparse[row_node]:
+                        if curr_elem == col:
+                            row[col] = elem.text.strip() if elem.text else None
+                        if col in elem.attrib:
+                            row[col] = elem.attrib[col]
 
             if event == "end":
                 if curr_elem == row_node and row is not None:
@@ -661,11 +671,21 @@ def _iterparse_nodes(self) -> list[dict[str, str | None]]:
                     row = {}
 
             if row is not None:
-                for col in self.iterparse[row_node]:
-                    if curr_elem == col:
-                        row[col] = elem.text.strip() if elem.text else None
-                    if col in elem.attrib:
-                        row[col] = elem.attrib[col]
+                if self.names:
+                    for col, nm in zip(self.iterparse[row_node], self.names):
+                        if curr_elem == col:
+                            elem_val = elem.text.strip() if elem.text else None
+                            if elem_val not in row.values() and nm not in row:
+                                row[nm] = elem_val
+                        if col in elem.attrib:
+                            if elem.attrib[col] not in row.values() and nm not in row:
+                                row[nm] = elem.attrib[col]
+                else:
+                    for col in self.iterparse[row_node]:
+                        if curr_elem == col:
+                            row[col] = elem.text.strip() if elem.text else None
+                        if col in elem.attrib:
+                            row[col] = elem.attrib[col]
 
             if event == "end":
                 if curr_elem == row_node and row is not None:
@@ -1020,7 +1040,8 @@ def read_xml(
 
     names :  list-like, optional
         Column names for DataFrame of parsed XML data. Use this parameter to
-        rename original element names and distinguish same named elements.
+        rename original element names and distinguish same named elements and
+        attributes.
 
     dtype : Type name or dict of column -> type, optional
         Data type for data or columns. E.g. {{'a': np.float64, 'b': np.int32,

diff --git a/pandas/tests/io/xml/test_xml.py b/pandas/tests/io/xml/test_xml.py
@@ -789,6 +789,41 @@ def test_names_option_output(datapath, parser):
     tm.assert_frame_equal(df_iter, df_expected)
 
 
+def test_repeat_names(parser):
+    xml = """\
+<shapes>
+  <shape type="2D">
+    <name>circle</name>
+    <type>curved</type>
+  </shape>
+  <shape type="3D">
+    <name>sphere</name>
+    <type>curved</type>
+  </shape>
+</shapes>"""
+    df_xpath = read_xml(
+        xml, xpath=".//shape", parser=parser, names=["type_dim", "shape", "type_edge"]
+    )
+
+    df_iter = read_xml_iterparse(
+        xml,
+        parser=parser,
+        iterparse={"shape": ["type", "name", "type"]},
+        names=["type_dim", "shape", "type_edge"],
+    )
+
+    df_expected = DataFrame(
+        {
+            "type_dim": ["2D", "3D"],
+            "shape": ["circle", "sphere"],
+            "type_edge": ["curved", "curved"],
+        }
+    )
+
+    tm.assert_frame_equal(df_xpath, df_expected)
+    tm.assert_frame_equal(df_iter, df_expected)
+
+
 def test_names_option_wrong_length(datapath, parser):
     filename = datapath("io", "data", "xml", "books.xml")