Skip to content

pd.merge_asof not working when merging TZ-aware index+series  #29864

Closed
@smallstepman

Description

@smallstepman

Hi!

I don't know how to solve following issue, can you please take a look? What am I doing wrong?

Problem description

import io
import pandas as pd


data_1 = io.StringIO("""
                           xyz  
from_date                                                       
2019-10-01 00:30:00+00:00  0.9
2019-10-01 01:00:00+00:00  0.8
2019-10-01 01:30:00+00:00  0.7
2019-10-01 02:00:00+00:00  0.6""")
df = pd.read_csv(data_1, sep='\s{2,}', engine='python')
df.index = pd.to_datetime(df.index, utc=True)


data_2 = io.StringIO("""
                from_date         abc
2019-10-01 00:00:00+00:00        2.46
2019-10-01 00:30:00+00:00        2.46
2019-10-01 01:00:00+00:00        2.46
2019-10-01 01:30:00+00:00        2.46
2019-10-01 02:00:00+00:00        2.19
""")
df2 = pd.read_csv(data_2, sep='\s{2,}', engine='python')
df2['from_date'] = pd.to_datetime(df2['from_date'], utc=True)


print(f"pandas version: {pd.__version__}")
print(f"df index dtype: {df.index.dtype}")
print(f"df2 dt column dtype: {df2['from_date'].dtype}")
print("check", df.index.dtype == df2.from_date.dtype )
pd.merge_asof(left=df, right=df2, left_index=True, right_on=['from_date'])

Output

pandas version: 0.25.3
df index dtype: datetime64[ns, UTC]
df2 dt column dtype: datetime64[ns, UTC]
check True
---------------------------------------------------------------------------
MergeError                                Traceback (most recent call last)
<ipython-input-82-bdb9feab8f76> in <module>
     28 print(f"df2 dt column dtype: {df2['from_date'].dtype}")
     29 print("check", df.index.dtype == df2.from_date.dtype )
---> 30 pd.merge_asof(left=df, right=df2, left_index=True, right_on=['from_date'], direction='nearest')

c:\users\asd\lib\site-packages\pandas\core\reshape\merge.py in merge_asof(left, right, on, left_on, right_on, left_index, right_index, by, left_by, right_by, suffixes, tolerance, allow_exact_matches, direction)
    537         tolerance=tolerance,
    538         allow_exact_matches=allow_exact_matches,
--> 539         direction=direction,
    540     )
    541     return op.get_result()

c:\users\asd\lib\site-packages\pandas\core\reshape\merge.py in __init__(self, left, right, on, left_on, right_on, left_index, right_index, by, left_by, right_by, axis, suffixes, copy, fill_method, how, tolerance, allow_exact_matches, direction)
   1552             how=how,
   1553             suffixes=suffixes,
-> 1554             fill_method=fill_method,
   1555         )
   1556 

c:\users\asd\lib\site-packages\pandas\core\reshape\merge.py in __init__(self, left, right, on, left_on, right_on, left_index, right_index, axis, suffixes, copy, fill_method, how)
   1442             how=how,
   1443             suffixes=suffixes,
-> 1444             sort=True,  # factorize sorts
   1445         )
   1446 

c:\users\asd\lib\site-packages\pandas\core\reshape\merge.py in __init__(self, left, right, how, on, left_on, right_on, axis, left_index, right_index, sort, suffixes, copy, indicator, validate)
    624             self.right_join_keys,
    625             self.join_names,
--> 626         ) = self._get_merge_keys()
    627 
    628         # validate the merge keys dtypes. We may need to coerce

c:\users\asd\lib\site-packages\pandas\core\reshape\merge.py in _get_merge_keys(self)
   1636                         )
   1637                     )
-> 1638                 raise MergeError(msg)
   1639 
   1640         # validate tolerance; must be a Timedelta if we have a DTI

MergeError: incompatible merge keys [0] dtype('<M8[ns]') and datetime64[ns, UTC], must be the same type

Expected Output

merged dataframes

Metadata

Metadata

Assignees

No one assigned

    Labels

    ReshapingConcat, Merge/Join, Stack/Unstack, ExplodeTimezonesTimezone data dtype

    Type

    No type

    Projects

    No projects

    Milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions