-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
ENH: make "closed" part of IntervalDtype #38394
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 7 commits
747926e
25d5925
15613e6
7bd2db6
10c0225
2a82a78
1b93617
f2bb5a1
ded6c31
9816690
aae7d84
b262bdc
c2efb79
5ef58db
bf86746
70c7de6
517c8f1
922ce1a
82ee698
e783761
6e47156
367feee
b10b0bf
370a843
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,6 +15,7 @@ | |
Union, | ||
cast, | ||
) | ||
import warnings | ||
|
||
import numpy as np | ||
import pytz | ||
|
@@ -1011,28 +1012,60 @@ class IntervalDtype(PandasExtensionDtype): | |
str = "|O08" | ||
base = np.dtype("O") | ||
num = 103 | ||
_metadata = ("subtype",) | ||
_match = re.compile(r"(I|i)nterval\[(?P<subtype>.+)\]") | ||
_metadata = ( | ||
"subtype", | ||
"closed", | ||
) | ||
_match = re.compile( | ||
r"(I|i)nterval\[(?P<subtype>[^,]+)(, (?P<closed>(right|left|both|neither)))?\]" | ||
) | ||
_cache: Dict[str_type, PandasExtensionDtype] = {} | ||
|
||
def __new__(cls, subtype=None): | ||
def __new__(cls, subtype=None, closed: Optional[str_type] = None): | ||
from pandas.core.dtypes.common import is_string_dtype, pandas_dtype | ||
|
||
if closed is not None and closed not in {"right", "left", "both", "neither"}: | ||
raise ValueError("closed must be one of 'right', 'left', 'both', 'neither'") | ||
|
||
if isinstance(subtype, IntervalDtype): | ||
if closed is not None and closed != subtype.closed: | ||
jreback marked this conversation as resolved.
Show resolved
Hide resolved
|
||
raise ValueError( | ||
"dtype.closed and 'closed' do not match. " | ||
"Try IntervalDtype(dtype.subtype, closed) instead." | ||
) | ||
return subtype | ||
elif subtype is None: | ||
# we are called as an empty constructor | ||
# generally for pickle compat | ||
u = object.__new__(cls) | ||
u._subtype = None | ||
u._closed = closed | ||
return u | ||
elif isinstance(subtype, str) and subtype.lower() == "interval": | ||
subtype = None | ||
else: | ||
if isinstance(subtype, str): | ||
m = cls._match.search(subtype) | ||
if m is not None: | ||
subtype = m.group("subtype") | ||
gd = m.groupdict() | ||
subtype = gd["subtype"] | ||
if gd.get("closed", None) is not None: | ||
closed = gd["closed"] | ||
elif closed is not None: | ||
# user passed eg. IntervalDtype("interval[int64]", "left") | ||
pass | ||
else: | ||
warnings.warn( | ||
"Constructing an IntervalDtype from a string without " | ||
"specifying 'closed' is deprecated and will raise in " | ||
"a future version. " | ||
f"Use e.g. 'interval[{subtype}, left]'. " | ||
"Defaulting to closed='right'.", | ||
FutureWarning, | ||
stacklevel=2, | ||
) | ||
# default to "right" | ||
closed = "right" | ||
|
||
try: | ||
subtype = pandas_dtype(subtype) | ||
|
@@ -1047,14 +1080,32 @@ def __new__(cls, subtype=None): | |
) | ||
raise TypeError(msg) | ||
|
||
if closed is None and subtype is not None: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. if we have this one do we need to warn on L1058? (e.g. which one is getting hit) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you respond to this There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. they are both hit separately |
||
warnings.warn( | ||
"Constructing an IntervalDtype without " | ||
"specifying 'closed' is deprecated and will raise in " | ||
"a future version. " | ||
"Use e.g. IntervalDtype(np.int64, 'left'). " | ||
"Defaulting to closed='right'.", | ||
FutureWarning, | ||
stacklevel=2, | ||
) | ||
closed = "right" | ||
|
||
key = str(subtype) + str(closed) | ||
try: | ||
return cls._cache[str(subtype)] | ||
return cls._cache[key] | ||
except KeyError: | ||
u = object.__new__(cls) | ||
u._subtype = subtype | ||
cls._cache[str(subtype)] = u | ||
u._closed = closed | ||
cls._cache[key] = u | ||
return u | ||
|
||
@property | ||
def closed(self): | ||
return self._closed | ||
|
||
@property | ||
def subtype(self): | ||
""" | ||
|
@@ -1104,7 +1155,7 @@ def type(self): | |
def __str__(self) -> str_type: | ||
if self.subtype is None: | ||
return "interval" | ||
return f"interval[{self.subtype}]" | ||
return f"interval[{self.subtype}, {self.closed}]" | ||
|
||
def __hash__(self) -> int: | ||
# make myself hashable | ||
|
@@ -1118,6 +1169,8 @@ def __eq__(self, other: Any) -> bool: | |
elif self.subtype is None or other.subtype is None: | ||
# None should match any subtype | ||
return True | ||
elif self.closed != other.closed: | ||
return False | ||
else: | ||
from pandas.core.dtypes.common import is_dtype_equal | ||
|
||
|
@@ -1128,6 +1181,15 @@ def __setstate__(self, state): | |
# PandasExtensionDtype superclass and uses the public properties to | ||
# pickle -> need to set the settable private ones here (see GH26067) | ||
self._subtype = state["subtype"] | ||
# backward-compat older pickles won't have "closed" key | ||
self._closed = state.pop("closed", None) | ||
if self._closed is None: | ||
warnings.warn( | ||
"Unpickled legacy IntervalDtype does not specify 'closed' " | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. test for this? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, we have tests for all of the newly-added warning/raising cases in the IntervalDtype constructor. (just added a missing consistency check https://github.com/pandas-dev/pandas/pull/38394/files#diff-f99ef42afad339d00e36197a60ccc76d74c6a94c30e05aef69d18e0ec4b10d4eR1055) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. woops, dont have a test for this one. will update. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. huh, this is definitely reached, but tm.assert_produces_warning isnt seeing anything |
||
"attribute. Set dtype._closed to one of 'left', 'right', 'both', " | ||
"'neither' before using this IntervalDtype object.", | ||
UserWarning, | ||
) | ||
|
||
@classmethod | ||
def is_dtype(cls, dtype: object) -> bool: | ||
|
Uh oh!
There was an error while loading. Please reload this page.