-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
[READY] Improved performance of Period
's default formatter (period_format
)
#51459
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 3 commits
2504a90
c1bbfd6
3693b30
3c4efea
7ccb423
e4a58e7
714d518
04fbf6b
279114d
35ec182
28d8b5d
a69aeca
705ff81
08d8d5e
c1ff5fb
5b0acc8
f4cef3a
4cefded
99b82f9
3df0666
576e475
d55e47f
316d2b7
e62867e
2cd568c
a07d89e
8ad4827
03d9778
8d21053
13acb29
4fd20ad
5d4bfc1
77b8b7a
24a6b63
a309388
17eedeb
14b7489
e1a81ef
faf97eb
f6ced94
3da1e4b
55d180e
ea9fc47
4becd71
74cbabf
b3d5963
1301795
61ce96e
6890776
e14c383
ac68ab9
1a05c22
d3c19d0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -32,10 +32,13 @@ def time_frame_date_formatting_custom(self, obs): | |
def time_frame_datetime_to_str(self, obs): | ||
self.data["dt"].astype(str) | ||
|
||
def time_frame_datetime_formatting_default_date_only(self, obs): | ||
def time_frame_datetime_formatting_default(self, obs): | ||
self.data["dt"].dt.strftime(date_format=None) | ||
|
||
def time_frame_datetime_formatting_default_explicit_date_only(self, obs): | ||
self.data["dt"].dt.strftime(date_format="%Y-%m-%d") | ||
|
||
def time_frame_datetime_formatting_default(self, obs): | ||
def time_frame_datetime_formatting_default_explicit(self, obs): | ||
self.data["dt"].dt.strftime(date_format="%Y-%m-%d %H:%M:%S") | ||
|
||
def time_frame_datetime_formatting_default_with_float(self, obs): | ||
|
@@ -45,6 +48,44 @@ def time_frame_datetime_formatting_custom(self, obs): | |
self.data["dt"].dt.strftime(date_format="%Y-%m-%d --- %H:%M:%S") | ||
|
||
|
||
class PeriodStrftime: | ||
timeout = 1500 | ||
params = ([1000, 10000], ["D", "H"]) | ||
param_names = ["obs", "fq"] | ||
|
||
def setup(self, obs, fq): | ||
self.data = pd.DataFrame( | ||
{ | ||
"p": pd.period_range(start="2000-01-01", periods=obs, freq=fq), | ||
"r": [np.random.uniform()] * obs, | ||
} | ||
) | ||
|
||
def time_frame_period_to_str(self, obs, fq): | ||
self.data["p"].astype(str) | ||
|
||
def time_frame_period_formatting_default(self, obs, fq): | ||
"""Note that as opposed to datetimes, the default format of periods are | ||
many and depend from the period characteristics, so we have almost no chance | ||
to reach the same level of performance if a 'default' format string is | ||
explicitly provided by the user. See | ||
time_frame_datetime_formatting_default_explicit above.""" | ||
self.data["p"].dt.strftime(date_format=None) | ||
|
||
def time_frame_period_formatting_index_default(self, obs, fq): | ||
self.data.set_index("p").index.format() | ||
smarie marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
def time_frame_period_formatting_custom(self, obs, fq): | ||
self.data["p"].dt.strftime(date_format="%Y-%m-%d --- %H:%M:%S") | ||
|
||
def time_frame_period_formatting_iso8601_strftime_Z(self, obs, fq): | ||
self.data["p"].dt.strftime(date_format="%Y-%m-%dT%H:%M:%SZ") | ||
|
||
def time_frame_period_formatting_iso8601_strftime_offset(self, obs, fq): | ||
"""Not optimized yet as %z is not supported by `convert_strftime_format`""" | ||
self.data["p"].dt.strftime(date_format="%Y-%m-%dT%H:%M:%S%z") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why do these not show up in the asv results you posted? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. good point, it seems that I forgot them in the copy paste. I'll try to rerun asv partially to get them There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. EDIT: Actually I had removed all asvs that were expected not to show improvements (custom date formats). Here are all ASVs for
|
||
|
||
|
||
class BusinessHourStrftime: | ||
timeout = 1500 | ||
params = [1000, 10000] | ||
|
@@ -62,3 +103,26 @@ def time_frame_offset_str(self, obs): | |
|
||
def time_frame_offset_repr(self, obs): | ||
self.data["off"].apply(repr) | ||
|
||
|
||
if __name__ == "__main__": | ||
# A __main__ to easily debug this script | ||
for cls in (DatetimeStrftime, PeriodStrftime, BusinessHourStrftime): | ||
all_params = dict() | ||
all_p_values = cls.params | ||
if len(cls.param_names) == 1: | ||
all_p_values = (all_p_values,) | ||
for p_name, p_values in zip(cls.param_names, all_p_values): | ||
all_params[p_name] = p_values | ||
|
||
from itertools import product | ||
|
||
for case in product(*all_params.values()): | ||
p_dict = {p_name: p_val for p_name, p_val in zip(all_params.keys(), case)} | ||
print(f"{cls.__name__} - {p_dict}") | ||
o = cls() | ||
o.setup(**p_dict) | ||
for m_name, m in cls.__dict__.items(): | ||
if callable(m): | ||
print(m_name) | ||
m(o, **p_dict) | ||
smarie marked this conversation as resolved.
Show resolved
Hide resolved
|
Original file line number | Diff line number | Diff line change | ||
---|---|---|---|---|
|
@@ -1152,46 +1152,89 @@ cdef int64_t period_ordinal_to_dt64(int64_t ordinal, int freq) except? -1: | |||
|
||||
|
||||
cdef str period_format(int64_t value, int freq, object fmt=None): | ||||
cdef: | ||||
int freq_group | ||||
|
||||
if value == NPY_NAT: | ||||
return "NaT" | ||||
|
||||
if isinstance(fmt, str): | ||||
# Encode using current locale, in case fmt contains non-utf8 chars | ||||
fmt = <bytes>util.string_encode_locale(fmt) | ||||
|
||||
if fmt is None: | ||||
freq_group = get_freq_group(freq) | ||||
if freq_group == FR_ANN: | ||||
fmt = b"%Y" | ||||
elif freq_group == FR_QTR: | ||||
fmt = b"%FQ%q" | ||||
elif freq_group == FR_MTH: | ||||
fmt = b"%Y-%m" | ||||
elif freq_group == FR_WK: | ||||
left = period_asfreq(value, freq, FR_DAY, 0) | ||||
right = period_asfreq(value, freq, FR_DAY, 1) | ||||
return f"{period_format(left, FR_DAY)}/{period_format(right, FR_DAY)}" | ||||
elif freq_group == FR_BUS or freq_group == FR_DAY: | ||||
fmt = b"%Y-%m-%d" | ||||
elif freq_group == FR_HR: | ||||
fmt = b"%Y-%m-%d %H:00" | ||||
elif freq_group == FR_MIN: | ||||
fmt = b"%Y-%m-%d %H:%M" | ||||
elif freq_group == FR_SEC: | ||||
fmt = b"%Y-%m-%d %H:%M:%S" | ||||
elif freq_group == FR_MS: | ||||
fmt = b"%Y-%m-%d %H:%M:%S.%l" | ||||
elif freq_group == FR_US: | ||||
fmt = b"%Y-%m-%d %H:%M:%S.%u" | ||||
elif freq_group == FR_NS: | ||||
fmt = b"%Y-%m-%d %H:%M:%S.%n" | ||||
else: | ||||
raise ValueError(f"Unknown freq: {freq}") | ||||
return _period_default_format(value, freq) | ||||
else: | ||||
if isinstance(fmt, str): | ||||
# Encode using current locale, in case fmt contains non-utf8 chars | ||||
fmt = <bytes>util.string_encode_locale(fmt) | ||||
|
||||
return _period_strftime(value, freq, fmt) | ||||
|
||||
return _period_strftime(value, freq, fmt) | ||||
|
||||
cdef str _period_default_format(int64_t value, int freq): | ||||
"""A faster default formatting function leveraging string formatting.""" | ||||
smarie marked this conversation as resolved.
Show resolved
Hide resolved
|
||||
|
||||
cdef: | ||||
int freq_group, quarter | ||||
npy_datetimestruct dts | ||||
|
||||
# fill dts | ||||
get_date_info(value, freq, &dts) | ||||
|
||||
# get the appropriate format depending on frequency group | ||||
freq_group = get_freq_group(freq) | ||||
if freq_group == FR_ANN: | ||||
# fmt = b"%Y" | ||||
return f"{dts.year}" | ||||
|
||||
elif freq_group == FR_QTR: | ||||
# fmt = b"%FQ%q" | ||||
# get quarter and modify dts.year to be the fiscal year (?) | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. the changes themselves look good, I'm just confused by this There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes @MarcoGorelli , "fiscal year" is the wording to confirm in this comment, I wasn't sure that this was the right definition.
pandas/pandas/_libs/tslibs/period.pyx Line 900 in 74cbabf
|
||||
quarter = get_yq(value, freq, &dts) | ||||
return f"{dts.year}Q{quarter}" | ||||
|
||||
elif freq_group == FR_MTH: | ||||
# fmt = b"%Y-%m" | ||||
return f"{dts.year}-{dts.month:02d}" | ||||
|
||||
elif freq_group == FR_WK: | ||||
# special: start_date/end_date. Recurse | ||||
left = period_asfreq(value, freq, FR_DAY, 0) | ||||
right = period_asfreq(value, freq, FR_DAY, 1) | ||||
return f"{period_format(left, FR_DAY)}/{period_format(right, FR_DAY)}" | ||||
|
||||
elif freq_group == FR_BUS or freq_group == FR_DAY: | ||||
# fmt = b"%Y-%m-%d" | ||||
return f"{dts.year}-{dts.month:02d}-{dts.day:02d}" | ||||
|
||||
elif freq_group == FR_HR: | ||||
# fmt = b"%Y-%m-%d %H:00" | ||||
return f"{dts.year}-{dts.month:02d}-{dts.day:02d} {dts.hour:02d}:00" | ||||
|
||||
elif freq_group == FR_MIN: | ||||
# fmt = b"%Y-%m-%d %H:%M" | ||||
return (f"{dts.year}-{dts.month:02d}-{dts.day:02d} " | ||||
f"{dts.hour:02d}:{dts.min:02d}") | ||||
|
||||
elif freq_group == FR_SEC: | ||||
# fmt = b"%Y-%m-%d %H:%M:%S" | ||||
return (f"{dts.year}-{dts.month:02d}-{dts.day:02d} " | ||||
f"{dts.hour:02d}:{dts.min:02d}:{dts.sec:02d}") | ||||
|
||||
elif freq_group == FR_MS: | ||||
# fmt = b"%Y-%m-%d %H:%M:%S.%l" | ||||
return (f"{dts.year}-{dts.month:02d}-{dts.day:02d} " | ||||
f"{dts.hour:02d}:{dts.min:02d}:{dts.sec:02d}" | ||||
f".{(dts.us // 1_000):03d}") | ||||
|
||||
elif freq_group == FR_US: | ||||
# fmt = b"%Y-%m-%d %H:%M:%S.%u" | ||||
return (f"{dts.year}-{dts.month:02d}-{dts.day:02d} " | ||||
f"{dts.hour:02d}:{dts.min:02d}:{dts.sec:02d}" | ||||
f".{(dts.us):06d}") | ||||
|
||||
elif freq_group == FR_NS: | ||||
# fmt = b"%Y-%m-%d %H:%M:%S.%n" | ||||
return (f"{dts.year}-{dts.month:02d}-{dts.day:02d} " | ||||
f"{dts.hour:02d}:{dts.min:02d}:{dts.sec:02d}" | ||||
f".{((dts.us * 1000) + (dts.ps // 1000)):09d}") | ||||
|
||||
else: | ||||
raise ValueError(f"Unknown freq: {freq}") | ||||
|
||||
|
||||
cdef list extra_fmts = [(b"%q", b"^`AB`^"), | ||||
|
Uh oh!
There was an error while loading. Please reload this page.