-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
BUG: fix tzaware dataframe transpose bug #26825
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
c9130f8
908465a
2b89d35
7bcdf16
f5759e6
3419983
528015e
508f8ae
c64d31f
6bd1a0a
baacaaf
c23edcc
b559753
e39370c
00b31e4
0a9a886
e88bc00
3c49874
5c38a76
657aa0c
be106cc
820c4e4
8b2372e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -21,10 +21,12 @@ | |
from pandas.errors import AbstractMethodError | ||
from pandas.util._decorators import Appender, Substitution | ||
|
||
from pandas.core.dtypes.cast import maybe_downcast_to_dtype | ||
from pandas.core.dtypes.cast import ( | ||
maybe_convert_objects, maybe_downcast_to_dtype) | ||
from pandas.core.dtypes.common import ( | ||
ensure_int64, ensure_platform_int, is_bool, is_datetimelike, | ||
is_integer_dtype, is_interval_dtype, is_numeric_dtype, is_scalar) | ||
is_integer_dtype, is_interval_dtype, is_numeric_dtype, is_object_dtype, | ||
is_scalar) | ||
from pandas.core.dtypes.missing import isna, notna | ||
|
||
from pandas._typing import FrameOrSeries | ||
|
@@ -334,7 +336,6 @@ def _decide_output_index(self, output, labels): | |
|
||
def _wrap_applied_output(self, keys, values, not_indexed_same=False): | ||
from pandas.core.index import _all_indexes_same | ||
from pandas.core.tools.numeric import to_numeric | ||
|
||
if len(keys) == 0: | ||
return DataFrame(index=keys) | ||
|
@@ -406,7 +407,6 @@ def first_not_none(values): | |
# provide a reduction (Frame -> Series) if groups are | ||
# unique | ||
if self.squeeze: | ||
|
||
# assign the name to this series | ||
if singular_series: | ||
values[0].name = keys[0] | ||
|
@@ -481,14 +481,7 @@ def first_not_none(values): | |
# as we are stacking can easily have object dtypes here | ||
so = self._selected_obj | ||
if so.ndim == 2 and so.dtypes.apply(is_datetimelike).any(): | ||
result = result.apply( | ||
lambda x: to_numeric(x, errors='ignore')) | ||
date_cols = self._selected_obj.select_dtypes( | ||
include=['datetime', 'timedelta']).columns | ||
date_cols = date_cols.intersection(result.columns) | ||
result[date_cols] = (result[date_cols] | ||
._convert(datetime=True, | ||
coerce=True)) | ||
result = _recast_datetimelike_result(result) | ||
else: | ||
result = result._convert(datetime=True) | ||
|
||
|
@@ -1710,3 +1703,35 @@ def _normalize_keyword_aggregation(kwargs): | |
order.append((column, | ||
com.get_callable_name(aggfunc) or aggfunc)) | ||
return aggspec, columns, order | ||
|
||
|
||
def _recast_datetimelike_result(result: DataFrame) -> DataFrame: | ||
""" | ||
If we have date/time like in the original, then coerce dates | ||
as we are stacking can easily have object dtypes here. | ||
|
||
Parameters | ||
---------- | ||
result : DataFrame | ||
|
||
Returns | ||
------- | ||
DataFrame | ||
|
||
Notes | ||
----- | ||
- Assumes Groupby._selected_obj has ndim==2 and at least one | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this note doesn't seem relevant as you are passing in frame right? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That wasn't obvious to me bc were talking about the dimensions of two separate objects. Are they necessarily the same? |
||
datetimelike column | ||
""" | ||
result = result.copy() | ||
|
||
obj_cols = [idx for idx in range(len(result.columns)) | ||
if is_object_dtype(result.dtypes[idx])] | ||
|
||
# See GH#26285 | ||
for n in obj_cols: | ||
converted = maybe_convert_objects(result.iloc[:, n].values, | ||
convert_numeric=False) | ||
|
||
result.iloc[:, n] = converted | ||
return result |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -159,9 +159,28 @@ def init_ndarray(values, index, columns, dtype=None, copy=False): | |
# on the entire block; this is to convert if we have datetimelike's | ||
# embedded in an object type | ||
if dtype is None and is_object_dtype(values): | ||
values = maybe_infer_to_datetimelike(values) | ||
|
||
return create_block_manager_from_blocks([values], [columns, index]) | ||
if values.ndim == 2 and values.shape[0] != 1: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is much more messy, can we change something else to make this nicer? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not really. I'm looking into the other places where maybe_infer_to_datetimelike is used to see if some of this can go into that. We could separate this whole block into a dedicated function. But one way or another we need to bite the bullet. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. so the inside of list loop should be in pandas.core.dtypes.cast, no? (obviously up until you make the blocks themselves) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd like to leave this for the next pass when I'm taking a more systematic look at maybe_infer_to_datetimelike |
||
# transpose and separate blocks | ||
|
||
dvals_list = [maybe_infer_to_datetimelike(row) for row in values] | ||
for n in range(len(dvals_list)): | ||
if isinstance(dvals_list[n], np.ndarray): | ||
dvals_list[n] = dvals_list[n].reshape(1, -1) | ||
|
||
from pandas.core.internals.blocks import make_block | ||
|
||
# TODO: What about re-joining object columns? | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. pls reuse the block creation routines below There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. attempts so far have broken everything. do you have a particular routine in mind? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what I mean is you can remove the create_block_manager_from_blocks and let it fall thru to 184 with I think a very small change, e.g.
of course pls use a longer name than dvals |
||
block_values = [make_block(dvals_list[n], placement=[n]) | ||
for n in range(len(dvals_list))] | ||
|
||
else: | ||
datelike_vals = maybe_infer_to_datetimelike(values) | ||
block_values = [datelike_vals] | ||
else: | ||
block_values = [values] | ||
|
||
return create_block_manager_from_blocks(block_values, [columns, index]) | ||
|
||
|
||
def init_dict(data, index, columns, dtype=None): | ||
|
Uh oh!
There was an error while loading. Please reload this page.