Skip to content

CLN: ASV FromDictwithTimestamp #18527

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
91 changes: 19 additions & 72 deletions asv_bench/benchmarks/frame_ctor.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,9 @@
import pandas.util.testing as tm
from pandas import DataFrame, Series, MultiIndex, Timestamp, date_range
try:
from pandas.tseries import offsets
except:
from pandas.tseries.offsets import Nano, Hour
except ImportError:
# For compatability with older versions
from pandas.core.datetools import * # noqa

from .pandas_vb_common import setup # noqa
Expand All @@ -24,16 +25,16 @@ def setup(self):
self.data2 = {i: {j: float(j) for j in range(100)}
for i in range(2000)}

def time_frame_ctor_list_of_dict(self):
def time_list_of_dict(self):
DataFrame(self.dict_list)

def time_frame_ctor_nested_dict(self):
def time_nested_dict(self):
DataFrame(self.data)

def time_series_ctor_from_dict(self):
def time_dict(self):
Series(self.some_dict)

def time_frame_ctor_nested_dict_int64(self):
def time_nested_dict_int64(self):
# nested dict, integer indexes, regression described in #621
DataFrame(self.data2)

Expand All @@ -46,78 +47,24 @@ def setup(self):
mi = MultiIndex.from_product([range(100), range(100)])
self.s = Series(np.random.randn(10000), index=mi)

def time_frame_from_mi_series(self):
def time_mi_series(self):
DataFrame(self.s)

# ----------------------------------------------------------------------
# From dict with DatetimeIndex with all offsets

# dynamically generate benchmarks for every offset
#
# get_period_count & get_index_for_offset are there because blindly taking each
# offset times 1000 can easily go out of Timestamp bounds and raise errors.
class FromDictwithTimestamp(object):

goal_time = 0.2
params = [Nano(1), Hour(1)]
param_names = ['offset']

def get_period_count(start_date, off):
ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days
if (ten_offsets_in_days == 0):
return 1000
else:
periods = 9 * (Timestamp.max - start_date).days // ten_offsets_in_days
return min(periods, 1000)


def get_index_for_offset(off):
start_date = Timestamp('1/1/1900')
return date_range(start_date,
periods=get_period_count(start_date, off),
freq=off)


all_offsets = offsets.__all__
# extra cases
for off in ['FY5253', 'FY5253Quarter']:
all_offsets.pop(all_offsets.index(off))
all_offsets.extend([off + '_1', off + '_2'])


class FromDictwithTimestampOffsets(object):

params = [all_offsets, [1, 2]]
param_names = ['offset', 'n_steps']

offset_kwargs = {'WeekOfMonth': {'weekday': 1, 'week': 1},
'LastWeekOfMonth': {'weekday': 1, 'week': 1},
'FY5253': {'startingMonth': 1, 'weekday': 1},
'FY5253Quarter': {'qtr_with_extra_week': 1,
'startingMonth': 1,
'weekday': 1}}

offset_extra_cases = {'FY5253': {'variation': ['nearest', 'last']},
'FY5253Quarter': {'variation': ['nearest', 'last']}}

def setup(self, offset, n_steps):
def setup(self, offset):
N = 10**3
np.random.seed(1234)
extra = False
if offset.endswith("_", None, -1):
extra = int(offset[-1])
offset = offset[:-2]

kwargs = {}
if offset in self.offset_kwargs:
kwargs = self.offset_kwargs[offset]

if extra:
extras = self.offset_extra_cases[offset]
for extra_arg in extras:
kwargs[extra_arg] = extras[extra_arg][extra - 1]

offset = getattr(offsets, offset)
self.idx = get_index_for_offset(offset(n_steps, **kwargs))
self.df = DataFrame(np.random.randn(len(self.idx), 10), index=self.idx)
self.d = self.df.to_dict()

def time_frame_ctor(self, offset, n_steps):
idx = date_range(Timestamp('1/1/1900'), freq=offset, periods=N)
df = DataFrame(np.random.randn(N, 10), index=idx)
self.d = df.to_dict()

def time_dict_with_timestamp_offsets(self, offset):
DataFrame(self.d)


Expand Down