Skip to content

Commit a7b1439

Browse files
author
MomIsBestFriend
committed
Merge remote-tracking branch 'upstream/master' into STY-repr-batch-3
2 parents 81c5aa5 + 150cae9 commit a7b1439

File tree

12 files changed

+108
-260
lines changed

12 files changed

+108
-260
lines changed

doc/redirects.csv

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -636,7 +636,6 @@ generated/pandas.Index.equals,../reference/api/pandas.Index.equals
636636
generated/pandas.Index.factorize,../reference/api/pandas.Index.factorize
637637
generated/pandas.Index.fillna,../reference/api/pandas.Index.fillna
638638
generated/pandas.Index.format,../reference/api/pandas.Index.format
639-
generated/pandas.Index.get_duplicates,../reference/api/pandas.Index.get_duplicates
640639
generated/pandas.Index.get_indexer_for,../reference/api/pandas.Index.get_indexer_for
641640
generated/pandas.Index.get_indexer,../reference/api/pandas.Index.get_indexer
642641
generated/pandas.Index.get_indexer_non_unique,../reference/api/pandas.Index.get_indexer_non_unique

doc/source/reference/indexing.rst

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,6 @@ Selecting
152152
Index.asof
153153
Index.asof_locs
154154
Index.contains
155-
Index.get_duplicates
156155
Index.get_indexer
157156
Index.get_indexer_for
158157
Index.get_indexer_non_unique

doc/source/whatsnew/v1.0.0.rst

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -506,6 +506,9 @@ or ``matplotlib.Axes.plot``. See :ref:`plotting.formatters` for more.
506506
- Removed the previously deprecated :meth:`Series.get_value`, :meth:`Series.set_value`, :meth:`DataFrame.get_value`, :meth:`DataFrame.set_value` (:issue:`17739`)
507507
- Changed the the default value of `inplace` in :meth:`DataFrame.set_index` and :meth:`Series.set_axis`. It now defaults to ``False`` (:issue:`27600`)
508508
- Removed the previously deprecated :attr:`Series.cat.categorical`, :attr:`Series.cat.index`, :attr:`Series.cat.name` (:issue:`24751`)
509+
- Removed the previously deprecated ``time_rule`` keyword from (non-public) :func:`offsets.generate_range`, which has been moved to :func:`core.arrays._ranges.generate_range` (:issue:`24157`)
510+
- :meth:`DataFrame.loc` or :meth:`Series.loc` with listlike indexers and missing labels will no longer reindex (:issue:`17295`)
511+
- :meth:`DataFrame.to_excel` and :meth:`Series.to_excel` with non-existent columns will no longer reindex (:issue:`17295`)
509512
- Removed the previously deprecated "by" keyword from :meth:`DataFrame.sort_index`, use :meth:`DataFrame.sort_values` instead (:issue:`10726`)
510513
- Removed support for nested renaming in :meth:`DataFrame.aggregate`, :meth:`Series.aggregate`, :meth:`DataFrameGroupBy.aggregate`, :meth:`SeriesGroupBy.aggregate`, :meth:`Rolling.aggregate` (:issue:`18529`)
511514
- Passing ``datetime64`` data to :class:`TimedeltaIndex` or ``timedelta64`` data to ``DatetimeIndex`` now raises ``TypeError`` (:issue:`23539`, :issue:`23937`)
@@ -548,7 +551,7 @@ or ``matplotlib.Axes.plot``. See :ref:`plotting.formatters` for more.
548551
- Removed the previously properties :attr:`DataFrame.is_copy`, :attr:`Series.is_copy` (:issue:`18812`)
549552
- Removed the previously deprecated :meth:`DataFrame.get_ftype_counts`, :meth:`Series.get_ftype_counts` (:issue:`18243`)
550553
- Removed the previously deprecated :meth:`DataFrame.ftypes`, :meth:`Series.ftypes`, :meth:`Series.ftype` (:issue:`26744`)
551-
- Removed the previously deprecated :meth:`Index.get_duplicated`, use ``idx[idx.duplicated()].unique()`` instead (:issue:`20239`)
554+
- Removed the previously deprecated :meth:`Index.get_duplicates`, use ``idx[idx.duplicated()].unique()`` instead (:issue:`20239`)
552555
- Removed the previously deprecated :meth:`Series.clip_upper`, :meth:`Series.clip_lower`, :meth:`DataFrame.clip_upper`, :meth:`DataFrame.clip_lower` (:issue:`24203`)
553556
- Removed the ability to alter :attr:`DatetimeIndex.freq`, :attr:`TimedeltaIndex.freq`, or :attr:`PeriodIndex.freq` (:issue:`20772`)
554557
- Removed the previously deprecated :attr:`DatetimeIndex.offset` (:issue:`20730`)

pandas/core/indexes/base.py

Lines changed: 0 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -2138,68 +2138,6 @@ def duplicated(self, keep="first"):
21382138
"""
21392139
return super().duplicated(keep=keep)
21402140

2141-
def get_duplicates(self):
2142-
"""
2143-
Extract duplicated index elements.
2144-
2145-
.. deprecated:: 0.23.0
2146-
Use idx[idx.duplicated()].unique() instead
2147-
2148-
Returns a sorted list of index elements which appear more than once in
2149-
the index.
2150-
2151-
Returns
2152-
-------
2153-
array-like
2154-
List of duplicated indexes.
2155-
2156-
See Also
2157-
--------
2158-
Index.duplicated : Return boolean array denoting duplicates.
2159-
Index.drop_duplicates : Return Index with duplicates removed.
2160-
2161-
Examples
2162-
--------
2163-
2164-
Works on different Index of types.
2165-
2166-
>>> pd.Index([1, 2, 2, 3, 3, 3, 4]).get_duplicates() # doctest: +SKIP
2167-
[2, 3]
2168-
2169-
Note that for a DatetimeIndex, it does not return a list but a new
2170-
DatetimeIndex:
2171-
2172-
>>> dates = pd.to_datetime(['2018-01-01', '2018-01-02', '2018-01-03',
2173-
... '2018-01-03', '2018-01-04', '2018-01-04'],
2174-
... format='%Y-%m-%d')
2175-
>>> pd.Index(dates).get_duplicates() # doctest: +SKIP
2176-
DatetimeIndex(['2018-01-03', '2018-01-04'],
2177-
dtype='datetime64[ns]', freq=None)
2178-
2179-
Sorts duplicated elements even when indexes are unordered.
2180-
2181-
>>> pd.Index([1, 2, 3, 2, 3, 4, 3]).get_duplicates() # doctest: +SKIP
2182-
[2, 3]
2183-
2184-
Return empty array-like structure when all elements are unique.
2185-
2186-
>>> pd.Index([1, 2, 3, 4]).get_duplicates() # doctest: +SKIP
2187-
[]
2188-
>>> dates = pd.to_datetime(['2018-01-01', '2018-01-02', '2018-01-03'],
2189-
... format='%Y-%m-%d')
2190-
>>> pd.Index(dates).get_duplicates() # doctest: +SKIP
2191-
DatetimeIndex([], dtype='datetime64[ns]', freq=None)
2192-
"""
2193-
warnings.warn(
2194-
"'get_duplicates' is deprecated and will be removed in "
2195-
"a future release. You can use "
2196-
"idx[idx.duplicated()].unique() instead",
2197-
FutureWarning,
2198-
stacklevel=2,
2199-
)
2200-
2201-
return self[self.duplicated()].unique()
2202-
22032141
def _get_unique_index(self, dropna=False):
22042142
"""
22052143
Returns an index containing unique values.

pandas/io/pytables.py

Lines changed: 34 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -361,9 +361,6 @@ def read_hdf(
361361
>>> df.to_hdf('./store.h5', 'data')
362362
>>> reread = pd.read_hdf('./store.h5')
363363
"""
364-
assert not kwargs, kwargs
365-
# NB: in principle more kwargs could be passed to HDFStore, but in
366-
# tests none are.
367364

368365
if mode not in ["r", "r+", "a"]:
369366
raise ValueError(
@@ -500,13 +497,14 @@ class HDFStore:
500497
"""
501498

502499
_handle: Optional["File"]
500+
_mode: str
503501
_complevel: int
504502
_fletcher32: bool
505503

506504
def __init__(
507505
self,
508506
path,
509-
mode=None,
507+
mode: str = "a",
510508
complevel: Optional[int] = None,
511509
complib=None,
512510
fletcher32: bool = False,
@@ -837,16 +835,24 @@ def select_as_coordinates(
837835
raise TypeError("can only read_coordinates with a table")
838836
return tbl.read_coordinates(where=where, start=start, stop=stop)
839837

840-
def select_column(self, key: str, column: str, **kwargs):
838+
def select_column(
839+
self,
840+
key: str,
841+
column: str,
842+
start: Optional[int] = None,
843+
stop: Optional[int] = None,
844+
):
841845
"""
842846
return a single column from the table. This is generally only useful to
843847
select an indexable
844848
845849
Parameters
846850
----------
847851
key : str
848-
column: str
852+
column : str
849853
The column of interest.
854+
start : int or None, default None
855+
stop : int or None, default None
850856
851857
Raises
852858
------
@@ -859,7 +865,7 @@ def select_column(self, key: str, column: str, **kwargs):
859865
tbl = self.get_storer(key)
860866
if not isinstance(tbl, Table):
861867
raise TypeError("can only read_column with a table")
862-
return tbl.read_column(column=column, **kwargs)
868+
return tbl.read_column(column=column, start=start, stop=stop)
863869

864870
def select_as_multiple(
865871
self,
@@ -2582,9 +2588,9 @@ class Fixed:
25822588
25832589
Parameters
25842590
----------
2585-
2586-
parent : my parent HDFStore
2587-
group : the group node where the table resides
2591+
parent : HDFStore
2592+
group : Node
2593+
The group node where the table resides.
25882594
"""
25892595

25902596
pandas_kind: str
@@ -2871,7 +2877,7 @@ def read_index(
28712877
return self.read_multi_index(key, start=start, stop=stop)
28722878
elif variety == "regular":
28732879
node = getattr(self.group, key)
2874-
_, index = self.read_index_node(node, start=start, stop=stop)
2880+
index = self.read_index_node(node, start=start, stop=stop)
28752881
return index
28762882
else: # pragma: no cover
28772883
raise TypeError(f"unrecognized index variety: {variety}")
@@ -2931,13 +2937,13 @@ def read_multi_index(
29312937

29322938
levels = []
29332939
codes = []
2934-
names = []
2940+
names: List[Optional[Hashable]] = []
29352941
for i in range(nlevels):
29362942
level_key = f"{key}_level{i}"
29372943
node = getattr(self.group, level_key)
2938-
name, lev = self.read_index_node(node, start=start, stop=stop)
2944+
lev = self.read_index_node(node, start=start, stop=stop)
29392945
levels.append(lev)
2940-
names.append(name)
2946+
names.append(lev.name)
29412947

29422948
label_key = f"{key}_label{i}"
29432949
level_codes = self.read_array(label_key, start=start, stop=stop)
@@ -2949,7 +2955,7 @@ def read_multi_index(
29492955

29502956
def read_index_node(
29512957
self, node: "Node", start: Optional[int] = None, stop: Optional[int] = None
2952-
):
2958+
) -> Index:
29532959
data = node[start:stop]
29542960
# If the index was an empty array write_array_empty() will
29552961
# have written a sentinel. Here we relace it with the original.
@@ -2997,7 +3003,7 @@ def read_index_node(
29973003

29983004
index.name = name
29993005

3000-
return name, index
3006+
return index
30013007

30023008
def write_array_empty(self, key: str, value):
30033009
""" write a 0-len array """
@@ -3131,7 +3137,6 @@ def write(self, obj, **kwargs):
31313137

31323138
class BlockManagerFixed(GenericFixed):
31333139
attributes = ["ndim", "nblocks"]
3134-
is_shape_reversed = False
31353140

31363141
nblocks: int
31373142

@@ -3158,10 +3163,6 @@ def shape(self):
31583163

31593164
shape.append(items)
31603165

3161-
# hacky - this works for frames, but is reversed for panels
3162-
if self.is_shape_reversed:
3163-
shape = shape[::-1]
3164-
31653166
return shape
31663167
except AttributeError:
31673168
return None
@@ -3259,7 +3260,6 @@ class Table(Fixed):
32593260
table_type: str
32603261
levels = 1
32613262
is_table = True
3262-
is_shape_reversed = False
32633263

32643264
index_axes: List[IndexCol]
32653265
non_index_axes: List[Tuple[int, Any]]
@@ -3302,7 +3302,7 @@ def __repr__(self) -> str:
33023302
f"ncols->{self.ncols},indexers->[{jindex_axes}]{dc})"
33033303
)
33043304

3305-
def __getitem__(self, c):
3305+
def __getitem__(self, c: str):
33063306
""" return the axis for c """
33073307
for a in self.axes:
33083308
if c == a.name:
@@ -3345,10 +3345,6 @@ def is_multi_index(self) -> bool:
33453345
"""the levels attribute is 1 or a list in the case of a multi-index"""
33463346
return isinstance(self.levels, list)
33473347

3348-
def validate_metadata(self, existing):
3349-
""" create / validate metadata """
3350-
self.metadata = [c.name for c in self.values_axes if c.metadata is not None]
3351-
33523348
def validate_multiindex(self, obj):
33533349
"""validate that we can store the multi-index; reset and return the
33543350
new object
@@ -3651,8 +3647,8 @@ def read_axes(
36513647
Parameters
36523648
----------
36533649
where : ???
3654-
start: int or None, default None
3655-
stop: int or None, default None
3650+
start : int or None, default None
3651+
stop : int or None, default None
36563652
36573653
Returns
36583654
-------
@@ -3946,7 +3942,7 @@ def get_blk_items(mgr, blocks):
39463942
self.validate_min_itemsize(min_itemsize)
39473943

39483944
# validate our metadata
3949-
self.validate_metadata(existing_table)
3945+
self.metadata = [c.name for c in self.values_axes if c.metadata is not None]
39503946

39513947
# validate the axes if we have an existing table
39523948
if validate:
@@ -4122,7 +4118,13 @@ class WORMTable(Table):
41224118

41234119
table_type = "worm"
41244120

4125-
def read(self, **kwargs):
4121+
def read(
4122+
self,
4123+
where=None,
4124+
columns=None,
4125+
start: Optional[int] = None,
4126+
stop: Optional[int] = None,
4127+
):
41264128
""" read the indices and the indexing array, calculate offset rows and
41274129
return """
41284130
raise NotImplementedError("WORMTable needs to implement read")
@@ -4479,8 +4481,7 @@ def write(self, obj, data_columns=None, **kwargs):
44794481
""" we are going to write this as a frame table """
44804482
if not isinstance(obj, DataFrame):
44814483
name = obj.name or "values"
4482-
obj = DataFrame({name: obj}, index=obj.index)
4483-
obj.columns = [name]
4484+
obj = obj.to_frame(name)
44844485
return super().write(obj=obj, data_columns=obj.columns.tolist(), **kwargs)
44854486

44864487
def read(

0 commit comments

Comments
 (0)