Skip to content

Commit 0f565fc

Browse files
committed
Merge branch 'master' of https://github.com/pandas-dev/pandas into inline_cleanup
2 parents 6953dbd + f797c1d commit 0f565fc

File tree

115 files changed

+2612
-970
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

115 files changed

+2612
-970
lines changed

appveyor.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,12 +74,18 @@ install:
7474
# create our env
7575
- cmd: conda create -n pandas python=%PYTHON_VERSION% cython pytest>=3.1.0 pytest-xdist
7676
- cmd: activate pandas
77+
- cmd: pip install moto
7778
- SET REQ=ci\requirements-%PYTHON_VERSION%_WIN.run
7879
- cmd: echo "installing requirements from %REQ%"
7980
- cmd: conda install -n pandas --file=%REQ%
8081
- cmd: conda list -n pandas
8182
- cmd: echo "installing requirements from %REQ% - done"
8283

84+
# add some pip only reqs to the env
85+
- SET REQ=ci\requirements-%PYTHON_VERSION%_WIN.pip
86+
- cmd: echo "installing requirements from %REQ%"
87+
- cmd: pip install -Ur %REQ%
88+
8389
# build em using the local source checkout in the correct windows env
8490
- cmd: '%CMD_IN_ENV% python setup.py build_ext --inplace'
8591

asv_bench/benchmarks/categoricals.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,9 @@ def time_value_counts_dropna(self):
6767
def time_rendering(self):
6868
str(self.sel)
6969

70+
def time_set_categories(self):
71+
self.ts.cat.set_categories(self.ts.cat.categories[::2])
72+
7073

7174
class Categoricals3(object):
7275
goal_time = 0.2

asv_bench/benchmarks/index_object.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -199,3 +199,23 @@ def time_datetime_level_values_full(self):
199199

200200
def time_datetime_level_values_sliced(self):
201201
self.mi[:10].values
202+
203+
204+
class Range(object):
205+
goal_time = 0.2
206+
207+
def setup(self):
208+
self.idx_inc = RangeIndex(start=0, stop=10**7, step=3)
209+
self.idx_dec = RangeIndex(start=10**7, stop=-1, step=-3)
210+
211+
def time_max(self):
212+
self.idx_inc.max()
213+
214+
def time_max_trivial(self):
215+
self.idx_dec.max()
216+
217+
def time_min(self):
218+
self.idx_dec.min()
219+
220+
def time_min_trivial(self):
221+
self.idx_inc.min()

asv_bench/benchmarks/period.py

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,65 @@ def time_value_counts_pindex(self):
7878
self.i.value_counts()
7979

8080

81+
class Properties(object):
82+
def setup(self):
83+
self.per = Period('2017-09-06 08:28', freq='min')
84+
85+
def time_year(self):
86+
self.per.year
87+
88+
def time_month(self):
89+
self.per.month
90+
91+
def time_day(self):
92+
self.per.day
93+
94+
def time_hour(self):
95+
self.per.hour
96+
97+
def time_minute(self):
98+
self.per.minute
99+
100+
def time_second(self):
101+
self.per.second
102+
103+
def time_is_leap_year(self):
104+
self.per.is_leap_year
105+
106+
def time_quarter(self):
107+
self.per.quarter
108+
109+
def time_qyear(self):
110+
self.per.qyear
111+
112+
def time_week(self):
113+
self.per.week
114+
115+
def time_daysinmonth(self):
116+
self.per.daysinmonth
117+
118+
def time_dayofweek(self):
119+
self.per.dayofweek
120+
121+
def time_dayofyear(self):
122+
self.per.dayofyear
123+
124+
def time_start_time(self):
125+
self.per.start_time
126+
127+
def time_end_time(self):
128+
self.per.end_time
129+
130+
def time_to_timestamp():
131+
self.per.to_timestamp()
132+
133+
def time_now():
134+
self.per.now()
135+
136+
def time_asfreq():
137+
self.per.asfreq('A')
138+
139+
81140
class period_standard_indexing(object):
82141
goal_time = 0.2
83142

asv_bench/benchmarks/timestamp.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
from .pandas_vb_common import *
22
from pandas import to_timedelta, Timestamp
3+
import pytz
4+
import datetime
35

46

57
class TimestampProperties(object):
@@ -58,3 +60,30 @@ def time_is_leap_year(self):
5860

5961
def time_microsecond(self):
6062
self.ts.microsecond
63+
64+
65+
class TimestampOps(object):
66+
goal_time = 0.2
67+
68+
def setup(self):
69+
self.ts = Timestamp('2017-08-25 08:16:14')
70+
self.ts_tz = Timestamp('2017-08-25 08:16:14', tz='US/Eastern')
71+
72+
dt = datetime.datetime(2016, 3, 27, 1)
73+
self.tzinfo = pytz.timezone('CET').localize(dt, is_dst=False).tzinfo
74+
self.ts2 = Timestamp(dt)
75+
76+
def time_replace_tz(self):
77+
self.ts.replace(tzinfo=pytz.timezone('US/Eastern'))
78+
79+
def time_replace_across_dst(self):
80+
self.ts2.replace(tzinfo=self.tzinfo)
81+
82+
def time_replace_None(self):
83+
self.ts_tz.replace(tzinfo=None)
84+
85+
def time_to_pydatetime(self):
86+
self.ts.to_pydatetime()
87+
88+
def time_to_pydatetime_tz(self):
89+
self.ts_tz.to_pydatetime()

ci/install_circle.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ time conda create -n pandas -q --file=${REQ_BUILD} || exit 1
6767
time conda install -n pandas pytest>=3.1.0 || exit 1
6868

6969
source activate pandas
70+
time pip install moto || exit 1
7071

7172
# build but don't install
7273
echo "[build em]"

ci/install_travis.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ if [ -e ${REQ} ]; then
104104
fi
105105

106106
time conda install -n pandas pytest>=3.1.0
107-
time pip install pytest-xdist
107+
time pip install pytest-xdist moto
108108

109109
if [ "$LINT" ]; then
110110
conda install flake8

ci/requirements-2.7_WIN.pip

Whitespace-only changes.

ci/requirements-3.6_NUMPY_DEV.pip

Whitespace-only changes.

ci/requirements-3.6_WIN.pip

Whitespace-only changes.

ci/requirements-3.6_WIN.run

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@ xlrd
88
xlwt
99
scipy
1010
feather-format
11-
pyarrow
1211
numexpr
1312
pytables
1413
matplotlib

ci/requirements_dev.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,3 +5,4 @@ cython
55
pytest>=3.1.0
66
pytest-cov
77
flake8
8+
moto

doc/README.rst

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,11 @@
33
Contributing to the documentation
44
=================================
55

6-
If you're not the developer type, contributing to the documentation is still
7-
of huge value. You don't even have to be an expert on
8-
*pandas* to do so! Something as simple as rewriting small passages for clarity
6+
Whether you are someone who loves writing, teaching, or development,
7+
contributing to the documentation is a huge value. If you don't see yourself
8+
as a developer type, please don't stress and know that we want you to
9+
contribute. You don't even have to be an expert on *pandas* to do so!
10+
Something as simple as rewriting small passages for clarity
911
as you reference the docs is a simple but effective way to contribute. The
1012
next person to read that passage will be in your debt!
1113

doc/source/advanced.rst

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -625,7 +625,7 @@ Index Types
625625
We have discussed ``MultiIndex`` in the previous sections pretty extensively. ``DatetimeIndex`` and ``PeriodIndex``
626626
are shown :ref:`here <timeseries.overview>`. ``TimedeltaIndex`` are :ref:`here <timedeltas.timedeltas>`.
627627

628-
In the following sub-sections we will highlite some other index types.
628+
In the following sub-sections we will highlight some other index types.
629629

630630
.. _indexing.categoricalindex:
631631

@@ -645,7 +645,7 @@ and allows efficient indexing and storage of an index with a large number of dup
645645
df.dtypes
646646
df.B.cat.categories
647647
648-
Setting the index, will create create a ``CategoricalIndex``
648+
Setting the index, will create a ``CategoricalIndex``
649649

650650
.. ipython:: python
651651
@@ -681,7 +681,7 @@ Groupby operations on the index will preserve the index nature as well
681681
Reindexing operations, will return a resulting index based on the type of the passed
682682
indexer, meaning that passing a list will return a plain-old-``Index``; indexing with
683683
a ``Categorical`` will return a ``CategoricalIndex``, indexed according to the categories
684-
of the PASSED ``Categorical`` dtype. This allows one to arbitrarly index these even with
684+
of the PASSED ``Categorical`` dtype. This allows one to arbitrarily index these even with
685685
values NOT in the categories, similarly to how you can reindex ANY pandas index.
686686

687687
.. ipython :: python
@@ -722,7 +722,7 @@ Int64Index and RangeIndex
722722
Prior to 0.18.0, the ``Int64Index`` would provide the default index for all ``NDFrame`` objects.
723723
724724
``RangeIndex`` is a sub-class of ``Int64Index`` added in version 0.18.0, now providing the default index for all ``NDFrame`` objects.
725-
``RangeIndex`` is an optimized version of ``Int64Index`` that can represent a monotonic ordered set. These are analagous to python `range types <https://docs.python.org/3/library/stdtypes.html#typesseq-range>`__.
725+
``RangeIndex`` is an optimized version of ``Int64Index`` that can represent a monotonic ordered set. These are analogous to python `range types <https://docs.python.org/3/library/stdtypes.html#typesseq-range>`__.
726726
727727
.. _indexing.float64index:
728728
@@ -963,7 +963,7 @@ index can be somewhat complicated. For example, the following does not work:
963963
s.loc['c':'e'+1]
964964
965965
A very common use case is to limit a time series to start and end at two
966-
specific dates. To enable this, we made the design design to make label-based
966+
specific dates. To enable this, we made the design to make label-based
967967
slicing include both endpoints:
968968
969969
.. ipython:: python

doc/source/api.rst

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -218,10 +218,19 @@ Top-level dealing with datetimelike
218218
to_timedelta
219219
date_range
220220
bdate_range
221+
cdate_range
221222
period_range
222223
timedelta_range
223224
infer_freq
224225

226+
Top-level dealing with intervals
227+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
228+
229+
.. autosummary::
230+
:toctree: generated/
231+
232+
interval_range
233+
225234
Top-level evaluation
226235
~~~~~~~~~~~~~~~~~~~~
227236

@@ -1282,7 +1291,7 @@ Index
12821291
-----
12831292

12841293
**Many of these methods or variants thereof are available on the objects
1285-
that contain an index (Series/Dataframe) and those should most likely be
1294+
that contain an index (Series/DataFrame) and those should most likely be
12861295
used before calling these methods directly.**
12871296

12881297
.. autosummary::
@@ -1407,6 +1416,20 @@ Selecting
14071416
Index.slice_indexer
14081417
Index.slice_locs
14091418

1419+
.. _api.numericindex:
1420+
1421+
Numeric Index
1422+
-------------
1423+
1424+
.. autosummary::
1425+
:toctree: generated/
1426+
:template: autosummary/class_without_autosummary.rst
1427+
1428+
RangeIndex
1429+
Int64Index
1430+
UInt64Index
1431+
Float64Index
1432+
14101433
.. _api.categoricalindex:
14111434

14121435
CategoricalIndex
@@ -2016,6 +2039,7 @@ Upsampling
20162039
Resampler.backfill
20172040
Resampler.bfill
20182041
Resampler.pad
2042+
Resampler.nearest
20192043
Resampler.fillna
20202044
Resampler.asfreq
20212045
Resampler.interpolate

doc/source/basics.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -923,7 +923,7 @@ Passing a named function will yield that name for the row:
923923
Aggregating with a dict
924924
+++++++++++++++++++++++
925925

926-
Passing a dictionary of column names to a scalar or a list of scalars, to ``DataFame.agg``
926+
Passing a dictionary of column names to a scalar or a list of scalars, to ``DataFrame.agg``
927927
allows you to customize which functions are applied to which columns. Note that the results
928928
are not in any particular order, you can use an ``OrderedDict`` instead to guarantee ordering.
929929

doc/source/categorical.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,8 @@ Using ``.describe()`` on categorical data will produce similar output to a `Seri
146146
df.describe()
147147
df["cat"].describe()
148148
149+
.. _categorical.cat:
150+
149151
Working with categories
150152
-----------------------
151153

@@ -204,6 +206,10 @@ by using the :func:`Categorical.rename_categories` method:
204206
s.cat.categories = ["Group %s" % g for g in s.cat.categories]
205207
s
206208
s.cat.rename_categories([1,2,3])
209+
s
210+
# You can also pass a dict-like object to map the renaming
211+
s.cat.rename_categories({1: 'x', 2: 'y', 3: 'z'})
212+
s
207213
208214
.. note::
209215

doc/source/computation.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -654,7 +654,7 @@ aggregation with, outputting a DataFrame:
654654
655655
r['A'].agg([np.sum, np.mean, np.std])
656656
657-
On a widowed DataFrame, you can pass a list of functions to apply to each
657+
On a windowed DataFrame, you can pass a list of functions to apply to each
658658
column, which produces an aggregated result with a hierarchical index:
659659

660660
.. ipython:: python

doc/source/groupby.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -561,7 +561,7 @@ must be either implemented on GroupBy or available via :ref:`dispatching
561561
562562
.. note::
563563

564-
If you pass a dict to ``aggregate``, the ordering of the output colums is
564+
If you pass a dict to ``aggregate``, the ordering of the output columns is
565565
non-deterministic. If you want to be sure the output columns will be in a specific
566566
order, you can use an ``OrderedDict``. Compare the output of the following two commands:
567567

@@ -1211,7 +1211,7 @@ Groupby by Indexer to 'resample' data
12111211

12121212
Resampling produces new hypothetical samples (resamples) from already existing observed data or from a model that generates data. These new samples are similar to the pre-existing samples.
12131213

1214-
In order to resample to work on indices that are non-datetimelike , the following procedure can be utilized.
1214+
In order to resample to work on indices that are non-datetimelike, the following procedure can be utilized.
12151215

12161216
In the following examples, **df.index // 5** returns a binary array which is used to determine what gets selected for the groupby operation.
12171217

doc/source/indexing.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -714,7 +714,7 @@ Finally, one can also set a seed for ``sample``'s random number generator using
714714
Setting With Enlargement
715715
------------------------
716716

717-
The ``.loc/[]`` operations can perform enlargement when setting a non-existant key for that axis.
717+
The ``.loc/[]`` operations can perform enlargement when setting a non-existent key for that axis.
718718

719719
In the ``Series`` case this is effectively an appending operation
720720

doc/source/io.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3077,7 +3077,7 @@ Compressed pickle files
30773077

30783078
.. versionadded:: 0.20.0
30793079

3080-
:func:`read_pickle`, :meth:`DataFame.to_pickle` and :meth:`Series.to_pickle` can read
3080+
:func:`read_pickle`, :meth:`DataFrame.to_pickle` and :meth:`Series.to_pickle` can read
30813081
and write compressed pickle files. The compression types of ``gzip``, ``bz2``, ``xz`` are supported for reading and writing.
30823082
`zip`` file supports read only and must contain only one data file
30833083
to be read in.
@@ -4492,7 +4492,7 @@ Several caveats.
44924492
- The format will NOT write an ``Index``, or ``MultiIndex`` for the ``DataFrame`` and will raise an
44934493
error if a non-default one is provided. You can simply ``.reset_index(drop=True)`` in order to store the index.
44944494
- Duplicate column names and non-string columns names are not supported
4495-
- Categorical dtypes are currently not-supported (for ``pyarrow``).
4495+
- Categorical dtypes can be serialized to parquet, but will de-serialize as ``object`` dtype.
44964496
- Non supported types include ``Period`` and actual python object types. These will raise a helpful error message
44974497
on an attempt at serialization.
44984498

0 commit comments

Comments
 (0)