Closed
Description
With df = pd.DataFrame({'a': [1, 2], 'b': [3, 4]})
, my expectation is that:
df.agg([func])
df.agg({'a': func, 'b': func})
Behave the same. However, the former allows partial failure when a TypeError is raised, whereas the latter does not. Similarly with groupby as well.
While I'd like to focus this issue on just agg with list/dict-likes, comparisons for all frame/series, agg/transform, list/dict with functions that raise ValueError/TypeError combinations are below.
Results
Series <class 'ValueError'>
.agg with dict: raised ValueError()
.agg with list: raised ValueError()
.transform with dict: partial failure allowed
.transform with list: partial failure allowed
groupby(...).agg with dict: raised SpecificationError('nested renamer is not supported')
groupby(...).agg with list: raised ValueError()
groupby(...).transform with dict: raised TypeError("unhashable type: 'dict'")
groupby(...).transform with list: raised TypeError("unhashable type: 'list'")
Frame <class 'ValueError'>
.agg with dict: raised ValueError()
.agg with list: raised ValueError()
.transform with dict: partial failure allowed
.transform with list: partial failure allowed
groupby(...).agg with dict: raised ValueError()
groupby(...).agg with list: raised ValueError()
groupby(...).transform with dict: raised TypeError("unhashable type: 'dict'")
groupby(...).transform with list: raised TypeError("unhashable type: 'list'")
Series <class 'TypeError'>
.agg with dict: raised TypeError()
.agg with list: partial failure allowed
.transform with dict: partial failure allowed
.transform with list: partial failure allowed
groupby(...).agg with dict: raised SpecificationError('nested renamer is not supported')
groupby(...).agg with list: raised TypeError()
groupby(...).transform with dict: raised TypeError("unhashable type: 'dict'")
groupby(...).transform with list: raised TypeError("unhashable type: 'list'")
Frame <class 'TypeError'>
.agg with dict: raised TypeError('DataFrame constructor called with incompatible data and dtype: ')
.agg with list: partial failure allowed
.transform with dict: partial failure allowed
.transform with list: partial failure allowed
groupby(...).agg with dict: raised TypeError()
groupby(...).agg with list: partial failure allowed
groupby(...).transform with dict: raised TypeError("unhashable type: 'dict'")
groupby(...).transform with list: raised TypeError("unhashable type: 'list'")
Code
def run_series(err_klass):
def failop(x): raise err_klass
def passop_agg(x): return x.sum()
def passop_transform(x): return x
ser = pd.Series([1, 2])
print('Series', err_klass)
args = it.product((False, True), ('agg', 'transform'), (False, True))
for use_gb, method, is_list in args:
obj = ser.groupby(level=0) if use_gb else ser
op = passop_transform if method == 'transform' else passop_agg
func = [failop, op] if is_list else {'a': failop, 'b': op}
try:
getattr(obj, method)(func)
result = 'partial failure allowed'
except Exception as err:
result = f'raised {repr(err)}'
print(
f' {"groupby(...)" if use_gb else ""}.{method} '
f'{"with list" if is_list else "with dict"}: {result}'
)
print()
def run_frame(err_klass):
def op_agg(x):
if x.sum().sum() < 100:
raise err_klass
return x.sum()
def op_transform(x):
if x.sum().sum() < 100:
raise err_klass
return x
df = pd.DataFrame({'a': [1, 2], 'b': [100, 200]})
print('Frame', err_klass)
args = it.product((False, True), ('agg', 'transform'), (False, True))
for use_gb, method, is_list in args:
obj = df.groupby(level=0) if use_gb else df
op = op_transform if method == 'transform' else op_agg
func = [op] if is_list else {'a': op, 'b': op}
try:
getattr(obj, method)(func)
result = 'partial failure allowed'
except Exception as err:
result = f'raised {repr(err)}'
print(
f' {"groupby(...)" if use_gb else ""}.{method} '
f'{"with list" if is_list else "with dict"}: {result}'
)
print()
run_series(ValueError)
run_frame(ValueError)
run_series(TypeError)
run_frame(TypeError)