Skip to content

Commit cb04f60

Browse files
authored
Merge branch 'pandas-dev:main' into main
2 parents 8011856 + 627d1b6 commit cb04f60

File tree

13 files changed

+169
-227
lines changed

13 files changed

+169
-227
lines changed

asv_bench/benchmarks/io/excel.py

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -42,9 +42,8 @@ def setup(self, engine):
4242
def time_write_excel(self, engine):
4343
bio = BytesIO()
4444
bio.seek(0)
45-
writer = ExcelWriter(bio, engine=engine)
46-
self.df.to_excel(writer, sheet_name="Sheet1")
47-
writer.save()
45+
with ExcelWriter(bio, engine=engine) as writer:
46+
self.df.to_excel(writer, sheet_name="Sheet1")
4847

4948

5049
class WriteExcelStyled:
@@ -57,13 +56,12 @@ def setup(self, engine):
5756
def time_write_excel_style(self, engine):
5857
bio = BytesIO()
5958
bio.seek(0)
60-
writer = ExcelWriter(bio, engine=engine)
61-
df_style = self.df.style
62-
df_style.applymap(lambda x: "border: red 1px solid;")
63-
df_style.applymap(lambda x: "color: blue")
64-
df_style.applymap(lambda x: "border-color: green black", subset=["float1"])
65-
df_style.to_excel(writer, sheet_name="Sheet1")
66-
writer.save()
59+
with ExcelWriter(bio, engine=engine) as writer:
60+
df_style = self.df.style
61+
df_style.applymap(lambda x: "border: red 1px solid;")
62+
df_style.applymap(lambda x: "color: blue")
63+
df_style.applymap(lambda x: "border-color: green black", subset=["float1"])
64+
df_style.to_excel(writer, sheet_name="Sheet1")
6765

6866

6967
class ReadExcel:

doc/source/whatsnew/v2.0.0.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ Other enhancements
8585
- :func:`timedelta_range` now supports a ``unit`` keyword ("s", "ms", "us", or "ns") to specify the desired resolution of the output index (:issue:`49824`)
8686
- :meth:`DataFrame.to_json` now supports a ``mode`` keyword with supported inputs 'w' and 'a'. Defaulting to 'w', 'a' can be used when lines=True and orient='records' to append record oriented json lines to an existing json file. (:issue:`35849`)
8787
- Added ``name`` parameter to :meth:`IntervalIndex.from_breaks`, :meth:`IntervalIndex.from_arrays` and :meth:`IntervalIndex.from_tuples` (:issue:`48911`)
88+
- Improved error message for :func:`merge_asof` when join-columns were duplicated (:issue:`50102`)
8889
- Added :meth:`Index.infer_objects` analogous to :meth:`Series.infer_objects` (:issue:`50034`)
8990
- Added ``copy`` parameter to :meth:`Series.infer_objects` and :meth:`DataFrame.infer_objects`, passing ``False`` will avoid making copies for series or columns that are already non-object or where no better dtype can be inferred (:issue:`50096`)
9091
- :meth:`DataFrame.plot.hist` now recognizes ``xlabel`` and ``ylabel`` arguments (:issue:`49793`)
@@ -676,6 +677,8 @@ Removal of prior version deprecations/changes
676677
- Changed default of ``numeric_only`` to ``False`` in :class:`.Resampler` methods (:issue:`47177`)
677678
- Using the method :meth:`DataFrameGroupBy.transform` with a callable that returns DataFrames will align to the input's index (:issue:`47244`)
678679
- When providing a list of columns of length one to :meth:`DataFrame.groupby`, the keys that are returned by iterating over the resulting :class:`DataFrameGroupBy` object will now be tuples of length one (:issue:`47761`)
680+
- Removed deprecated methods :meth:`ExcelWriter.write_cells`, :meth:`ExcelWriter.save`, :meth:`ExcelWriter.cur_sheet`, :meth:`ExcelWriter.handles`, :meth:`ExcelWriter.path` (:issue:`45795`)
681+
- The :class:`ExcelWriter` attribute ``book`` can no longer be set; it is still available to be accessed and mutated (:issue:`48943`)
679682
-
680683

681684
.. ---------------------------------------------------------------------------
@@ -790,6 +793,7 @@ Indexing
790793
- Bug in :meth:`DataFrame.loc` raising ``ValueError`` with ``bool`` indexer and :class:`MultiIndex` (:issue:`47687`)
791794
- Bug in :meth:`DataFrame.__setitem__` raising ``ValueError`` when right hand side is :class:`DataFrame` with :class:`MultiIndex` columns (:issue:`49121`)
792795
- Bug in :meth:`DataFrame.reindex` casting dtype to ``object`` when :class:`DataFrame` has single extension array column when re-indexing ``columns`` and ``index`` (:issue:`48190`)
796+
- Bug in :meth:`DataFrame.iloc` raising ``IndexError`` when indexer is a :class:`Series` with numeric extension array dtype (:issue:`49521`)
793797
- Bug in :func:`~DataFrame.describe` when formatting percentiles in the resulting index showed more decimals than needed (:issue:`46362`)
794798
- Bug in :meth:`DataFrame.compare` does not recognize differences when comparing ``NA`` with value in nullable dtypes (:issue:`48939`)
795799
-

pandas/core/groupby/generic.py

Lines changed: 100 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -427,7 +427,51 @@ def _aggregate_named(self, func, *args, **kwargs):
427427

428428
return result
429429

430-
@Substitution(klass="Series")
430+
__examples_series_doc = dedent(
431+
"""
432+
>>> ser = pd.Series(
433+
... [390.0, 350.0, 30.0, 20.0],
434+
... index=["Falcon", "Falcon", "Parrot", "Parrot"],
435+
... name="Max Speed")
436+
>>> grouped = ser.groupby([1, 1, 2, 2])
437+
>>> grouped.transform(lambda x: (x - x.mean()) / x.std())
438+
Falcon 0.707107
439+
Falcon -0.707107
440+
Parrot 0.707107
441+
Parrot -0.707107
442+
Name: Max Speed, dtype: float64
443+
444+
Broadcast result of the transformation
445+
446+
>>> grouped.transform(lambda x: x.max() - x.min())
447+
Falcon 40.0
448+
Falcon 40.0
449+
Parrot 10.0
450+
Parrot 10.0
451+
Name: Max Speed, dtype: float64
452+
453+
>>> grouped.transform("mean")
454+
Falcon 370.0
455+
Falcon 370.0
456+
Parrot 25.0
457+
Parrot 25.0
458+
Name: Max Speed, dtype: float64
459+
460+
.. versionchanged:: 1.3.0
461+
462+
The resulting dtype will reflect the return value of the passed ``func``,
463+
for example:
464+
465+
>>> grouped.transform(lambda x: x.astype(int).max())
466+
Falcon 390
467+
Falcon 390
468+
Parrot 30
469+
Parrot 30
470+
Name: Max Speed, dtype: int64
471+
"""
472+
)
473+
474+
@Substitution(klass="Series", example=__examples_series_doc)
431475
@Appender(_transform_template)
432476
def transform(self, func, *args, engine=None, engine_kwargs=None, **kwargs):
433477
return self._transform(
@@ -1407,7 +1451,61 @@ def _transform_general(self, func, *args, **kwargs):
14071451
concatenated = concatenated.reindex(concat_index, axis=other_axis, copy=False)
14081452
return self._set_result_index_ordered(concatenated)
14091453

1410-
@Substitution(klass="DataFrame")
1454+
__examples_dataframe_doc = dedent(
1455+
"""
1456+
>>> df = pd.DataFrame({'A' : ['foo', 'bar', 'foo', 'bar',
1457+
... 'foo', 'bar'],
1458+
... 'B' : ['one', 'one', 'two', 'three',
1459+
... 'two', 'two'],
1460+
... 'C' : [1, 5, 5, 2, 5, 5],
1461+
... 'D' : [2.0, 5., 8., 1., 2., 9.]})
1462+
>>> grouped = df.groupby('A')[['C', 'D']]
1463+
>>> grouped.transform(lambda x: (x - x.mean()) / x.std())
1464+
C D
1465+
0 -1.154701 -0.577350
1466+
1 0.577350 0.000000
1467+
2 0.577350 1.154701
1468+
3 -1.154701 -1.000000
1469+
4 0.577350 -0.577350
1470+
5 0.577350 1.000000
1471+
1472+
Broadcast result of the transformation
1473+
1474+
>>> grouped.transform(lambda x: x.max() - x.min())
1475+
C D
1476+
0 4.0 6.0
1477+
1 3.0 8.0
1478+
2 4.0 6.0
1479+
3 3.0 8.0
1480+
4 4.0 6.0
1481+
5 3.0 8.0
1482+
1483+
>>> grouped.transform("mean")
1484+
C D
1485+
0 3.666667 4.0
1486+
1 4.000000 5.0
1487+
2 3.666667 4.0
1488+
3 4.000000 5.0
1489+
4 3.666667 4.0
1490+
5 4.000000 5.0
1491+
1492+
.. versionchanged:: 1.3.0
1493+
1494+
The resulting dtype will reflect the return value of the passed ``func``,
1495+
for example:
1496+
1497+
>>> grouped.transform(lambda x: x.astype(int).max())
1498+
C D
1499+
0 5 8
1500+
1 5 9
1501+
2 5 8
1502+
3 5 9
1503+
4 5 8
1504+
5 5 9
1505+
"""
1506+
)
1507+
1508+
@Substitution(klass="DataFrame", example=__examples_dataframe_doc)
14111509
@Appender(_transform_template)
14121510
def transform(self, func, *args, engine=None, engine_kwargs=None, **kwargs):
14131511
return self._transform(

pandas/core/groupby/groupby.py

Lines changed: 10 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -402,15 +402,22 @@ class providing the base-class of operations.
402402
f : function, str
403403
Function to apply to each group. See the Notes section below for requirements.
404404
405-
Can also accept a Numba JIT function with
406-
``engine='numba'`` specified.
405+
Accepted inputs are:
406+
407+
- String
408+
- Python function
409+
- Numba JIT function with ``engine='numba'`` specified.
407410
411+
Only passing a single function is supported with this engine.
408412
If the ``'numba'`` engine is chosen, the function must be
409413
a user defined function with ``values`` and ``index`` as the
410414
first and second arguments respectively in the function signature.
411415
Each group's index will be passed to the user defined function
412416
and optionally available for use.
413417
418+
If a string is chosen, then it needs to be the name
419+
of the groupby method you want to use.
420+
414421
.. versionchanged:: 1.1.0
415422
*args
416423
Positional arguments to pass to func.
@@ -480,48 +487,7 @@ class providing the base-class of operations.
480487
481488
Examples
482489
--------
483-
484-
>>> df = pd.DataFrame({'A' : ['foo', 'bar', 'foo', 'bar',
485-
... 'foo', 'bar'],
486-
... 'B' : ['one', 'one', 'two', 'three',
487-
... 'two', 'two'],
488-
... 'C' : [1, 5, 5, 2, 5, 5],
489-
... 'D' : [2.0, 5., 8., 1., 2., 9.]})
490-
>>> grouped = df.groupby('A')[['C', 'D']]
491-
>>> grouped.transform(lambda x: (x - x.mean()) / x.std())
492-
C D
493-
0 -1.154701 -0.577350
494-
1 0.577350 0.000000
495-
2 0.577350 1.154701
496-
3 -1.154701 -1.000000
497-
4 0.577350 -0.577350
498-
5 0.577350 1.000000
499-
500-
Broadcast result of the transformation
501-
502-
>>> grouped.transform(lambda x: x.max() - x.min())
503-
C D
504-
0 4.0 6.0
505-
1 3.0 8.0
506-
2 4.0 6.0
507-
3 3.0 8.0
508-
4 4.0 6.0
509-
5 3.0 8.0
510-
511-
.. versionchanged:: 1.3.0
512-
513-
The resulting dtype will reflect the return value of the passed ``func``,
514-
for example:
515-
516-
>>> grouped.transform(lambda x: x.astype(int).max())
517-
C D
518-
0 5 8
519-
1 5 9
520-
2 5 8
521-
3 5 9
522-
4 5 8
523-
5 5 9
524-
"""
490+
%(example)s"""
525491

526492
_agg_template = """
527493
Aggregate using one or more operations over the specified axis.

pandas/core/indexing.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1481,7 +1481,12 @@ def _validate_key(self, key, axis: AxisInt):
14811481
# so don't treat a tuple as a valid indexer
14821482
raise IndexingError("Too many indexers")
14831483
elif is_list_like_indexer(key):
1484-
arr = np.array(key)
1484+
if isinstance(key, ABCSeries):
1485+
arr = key._values
1486+
elif is_array_like(key):
1487+
arr = key
1488+
else:
1489+
arr = np.array(key)
14851490
len_axis = len(self.obj._get_axis(axis))
14861491

14871492
# check that the key has a numeric dtype

pandas/core/reshape/merge.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1933,7 +1933,7 @@ def _validate_left_right_on(self, left_on, right_on):
19331933
lo_dtype = left_on_0.dtype
19341934
else:
19351935
lo_dtype = (
1936-
self.left[left_on_0].dtype
1936+
self.left._get_label_or_level_values(left_on_0).dtype
19371937
if left_on_0 in self.left.columns
19381938
else self.left.index.get_level_values(left_on_0)
19391939
)
@@ -1946,7 +1946,7 @@ def _validate_left_right_on(self, left_on, right_on):
19461946
ro_dtype = right_on_0.dtype
19471947
else:
19481948
ro_dtype = (
1949-
self.right[right_on_0].dtype
1949+
self.right._get_label_or_level_values(right_on_0).dtype
19501950
if right_on_0 in self.right.columns
19511951
else self.right.index.get_level_values(right_on_0)
19521952
)

0 commit comments

Comments
 (0)