Closed
Description
import pandas as pd
import plotly.express as px
df = pd.DataFrame(dict(x=[0, 1], y=[1, 10], z=[0.1, 0.8], money=[100, 200]))
df2 = pd.DataFrame(dict(time=[23, 26], money=[100, 200]))
fig = px.scatter(df, x="z", y=df2.money, size=df.y)
With pandas 2.2.3:
Traceback (most recent call last):
File "/home/marcogorelli/scratch/.venv/lib/python3.12/site-packages/marimo/_runtime/executor.py", line 157, in execute_cell
exec(cell.body, glbls)
Cell marimo://trying_plotly.py#cell=cell-0
, line 5, in <module>
fig = px.scatter(df, x="z", y=df2.money, size=df.y)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/marcogorelli/scratch/.venv/lib/python3.12/site-packages/plotly/express/_chart_types.py", line 66, in scatter
return make_figure(args=locals(), constructor=go.Scatter)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/marcogorelli/scratch/.venv/lib/python3.12/site-packages/plotly/express/_core.py", line 2117, in make_figure
args = build_dataframe(args, constructor)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/marcogorelli/scratch/.venv/lib/python3.12/site-packages/plotly/express/_core.py", line 1513, in build_dataframe
df_output, wide_id_vars = process_args_into_dataframe(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/marcogorelli/scratch/.venv/lib/python3.12/site-packages/plotly/express/_core.py", line 1271, in process_args_into_dataframe
col_name = _check_name_not_reserved(field, reserved_names)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/marcogorelli/scratch/.venv/lib/python3.12/site-packages/plotly/express/_core.py", line 1006, in _check_name_not_reserved
raise NameError(
NameError: A name conflict was encountered for argument 'y'. A column or index with name 'y' is ambiguous.
With the latest pandas nightly (installable with pip install --pre --extra-index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple pandas
) it just plots, without raising
The difference is due to pandas no longer caching __getitem__
for columns:
in pandas 3.0+
In [1]: import pandas as pd
In [2]: df = pd.DataFrame({'a': [1,2,3], 'b': [4,5,6]})
In [3]: df['a'] is df['a']
Out[3]: False
in pandas 2.2.3
In [1]: import pandas as pd
In [2]: df = pd.DataFrame({'a': [1,2,3], 'b': [4,5,6]})
In [3]: df['a'] is df['a']
Out[3]: True