pandas-dev · TomAugspurger · May 30, 2017 · May 11, 2017 · May 11, 2017 · May 11, 2017
diff --git a/.travis.yml b/.travis.yml
@@ -123,7 +123,7 @@ after_success:
 
 after_script:
   - echo "after_script start"
-  - source activate pandas && python -c "import pandas; pandas.show_versions();"
+  - source activate pandas && pushd /tmp && python -c "import pandas; pandas.show_versions();" && popd
   - if [ -e /tmp/single.xml ]; then
     ci/print_skipped.py /tmp/single.xml;
     fi

diff --git a/asv_bench/benchmarks/groupby.py b/asv_bench/benchmarks/groupby.py
@@ -368,6 +368,11 @@ def setup(self):
         self.dates = (np.datetime64('now') + self.offsets)
         self.df = DataFrame({'key1': np.random.randint(0, 500, size=self.n), 'key2': np.random.randint(0, 100, size=self.n), 'value1': np.random.randn(self.n), 'value2': np.random.randn(self.n), 'value3': np.random.randn(self.n), 'dates': self.dates, })
 
+        N = 1000000
+        self.draws = pd.Series(np.random.randn(N))
+        labels = pd.Series(['foo', 'bar', 'baz', 'qux'] * (N // 4))
+        self.cats = labels.astype('category')
+
     def time_groupby_multi_size(self):
         self.df.groupby(['key1', 'key2']).size()
 
@@ -377,6 +382,10 @@ def time_groupby_dt_size(self):
     def time_groupby_dt_timegrouper_size(self):
         self.df.groupby(TimeGrouper(key='dates', freq='M')).size()
 
+    def time_groupby_size(self):
+        self.draws.groupby(self.cats).size()
+
+
 
 #----------------------------------------------------------------------
 # groupby with a variable value for ngroups

diff --git a/asv_bench/benchmarks/indexing.py b/asv_bench/benchmarks/indexing.py
@@ -19,6 +19,9 @@ def time_getitem_list_like(self):
     def time_getitem_array(self):
         self.s[np.arange(10000)]
 
+    def time_getitem_lists(self):
+        self.s[np.arange(10000).tolist()]
+
     def time_iloc_array(self):
         self.s.iloc[np.arange(10000)]
 
@@ -190,9 +193,15 @@ def setup(self):
              np.arange(1000)], names=['one', 'two'])
 
         import string
-        self.mistring = MultiIndex.from_product(
-            [np.arange(1000),
-             np.arange(20), list(string.ascii_letters)],
+
+        self.mi_large = MultiIndex.from_product(
+            [np.arange(1000), np.arange(20), list(string.ascii_letters)],
+            names=['one', 'two', 'three'])
+        self.mi_med = MultiIndex.from_product(
+            [np.arange(1000), np.arange(10), list('A')],
+            names=['one', 'two', 'three'])
+        self.mi_small = MultiIndex.from_product(
+            [np.arange(100), list('A'), list('A')],
             names=['one', 'two', 'three'])
 
     def time_series_xs_mi_ix(self):
@@ -215,8 +224,26 @@ def time_multiindex_get_indexer(self):
                       (0, 16), (0, 17), (0, 18),
                       (0, 19)], dtype=object))
 
+    def time_multiindex_large_get_loc(self):
+        self.mi_large.get_loc((999, 19, 'Z'))
+
+    def time_multiindex_large_get_loc_warm(self):
+        for _ in range(1000):
+            self.mi_large.get_loc((999, 19, 'Z'))
+
+    def time_multiindex_med_get_loc(self):
+        self.mi_med.get_loc((999, 9, 'A'))
+
+    def time_multiindex_med_get_loc_warm(self):
+        for _ in range(1000):
+            self.mi_med.get_loc((999, 9, 'A'))
+
     def time_multiindex_string_get_loc(self):
-        self.mistring.get_loc((999, 19, 'Z'))
+        self.mi_small.get_loc((99, 'A', 'A'))
+
+    def time_multiindex_small_get_loc_warm(self):
+        for _ in range(1000):
+            self.mi_small.get_loc((99, 'A', 'A'))
 
     def time_is_monotonic(self):
         self.miint.is_monotonic

diff --git a/asv_bench/benchmarks/series_methods.py b/asv_bench/benchmarks/series_methods.py
@@ -111,6 +111,7 @@ def setup(self):
     def time_series_dropna_int64(self):
         self.s.dropna()
 
+
 class series_dropna_datetime(object):
     goal_time = 0.2
 
@@ -120,3 +121,13 @@ def setup(self):
 
     def time_series_dropna_datetime(self):
         self.s.dropna()
+
+
+class series_clip(object):
+    goal_time = 0.2
+
+    def setup(self):
+        self.s = pd.Series(np.random.randn(50))
+
+    def time_series_dropna_datetime(self):
+        self.s.clip(0, 1)
diff --git a/ci/install_travis.sh b/ci/install_travis.sh
@@ -119,15 +119,7 @@ if [ "$COVERAGE" ]; then
 fi
 
 echo
-if [ "$BUILD_TEST" ]; then
-
-    # build & install testing
-    echo ["Starting installation test."]
-    bash ci/install_release_build.sh
-    conda uninstall -y cython
-    time pip install dist/*tar.gz || exit 1
-
-else
+if [ -z "$BUILD_TEST" ]; then
 
     # build but don't install
     echo "[build em]"
@@ -163,9 +155,22 @@ fi
 # w/o removing anything else
 echo
 echo "[removing installed pandas]"
-conda remove pandas --force
+conda remove pandas -y --force
 
-if [ -z "$BUILD_TEST" ]; then
+if [ "$BUILD_TEST" ]; then
+
+    # remove any installation
+    pip uninstall -y pandas
+    conda list pandas
+    pip list --format columns |grep pandas
+
+    # build & install testing
+    echo ["building release"]
+    bash scripts/build_dist_for_release.sh
+    conda uninstall -y cython
+    time pip install dist/*tar.gz || exit 1
+
+else
 
     # install our pandas
     echo

diff --git a/ci/requirements-3.5_OSX.sh b/ci/requirements-3.5_OSX.sh
@@ -4,4 +4,4 @@ source activate pandas
 
 echo "install 35_OSX"
 
-conda install -n pandas -c conda-forge feather-format
+conda install -n pandas -c conda-forge feather-format==0.3.1
diff --git a/ci/script_multi.sh b/ci/script_multi.sh
@@ -19,20 +19,26 @@ export PYTHONHASHSEED=$(python -c 'import random; print(random.randint(1, 429496
 echo PYTHONHASHSEED=$PYTHONHASHSEED
 
 if [ "$BUILD_TEST" ]; then
-    echo "build-test"
+    echo "[build-test]"
+
+    echo "[env]"
+    pip list --format columns |grep pandas
+
+    echo "[running]"
     cd /tmp
-    pwd
-    conda list pandas
-    echo "running"
-    python -c "import pandas; pandas.test(['-n 2'])"
+    unset PYTHONPATH
+    python -c 'import pandas; pandas.test(["-n 2", "--skip-slow", "--skip-network", "-r xX", "-m not single"])'
+
 elif [ "$DOC" ]; then
     echo "We are not running pytest as this is a doc-build"
+
 elif [ "$COVERAGE" ]; then
     echo pytest -s -n 2 -m "not single" --cov=pandas --cov-report xml:/tmp/cov-multiple.xml --junitxml=/tmp/multiple.xml $TEST_ARGS pandas
     pytest -s -n 2 -m "not single" --cov=pandas --cov-report xml:/tmp/cov-multiple.xml --junitxml=/tmp/multiple.xml $TEST_ARGS pandas
+
 else
-    echo pytest -n 2 -m "not single" --junitxml=/tmp/multiple.xml $TEST_ARGS pandas
-    pytest -n 2 -m "not single" --junitxml=/tmp/multiple.xml $TEST_ARGS pandas # TODO: doctest
+    echo pytest -n 2 -r xX -m "not single" --junitxml=/tmp/multiple.xml $TEST_ARGS pandas
+    pytest -n 2 -r xX -m "not single" --junitxml=/tmp/multiple.xml $TEST_ARGS pandas # TODO: doctest
 fi
 
 RET="$?"

diff --git a/ci/script_single.sh b/ci/script_single.sh
@@ -20,8 +20,8 @@ elif [ "$COVERAGE" ]; then
     echo pytest -s -m "single" --cov=pandas --cov-report xml:/tmp/cov-single.xml --junitxml=/tmp/single.xml $TEST_ARGS pandas
     pytest -s -m "single" --cov=pandas --cov-report xml:/tmp/cov-single.xml --junitxml=/tmp/single.xml $TEST_ARGS pandas
 else
-    echo pytest -m "single" --junitxml=/tmp/single.xml $TEST_ARGS pandas
-    pytest -m "single" --junitxml=/tmp/single.xml $TEST_ARGS pandas # TODO: doctest
+    echo pytest -m "single" -r xX --junitxml=/tmp/single.xml $TEST_ARGS pandas
+    pytest -m "single" -r xX  --junitxml=/tmp/single.xml $TEST_ARGS pandas # TODO: doctest
 fi
 
 RET="$?"

diff --git a/doc/make.py b/doc/make.py
@@ -34,47 +34,60 @@
 SPHINX_BUILD = 'sphinxbuild'
 
 
-def upload_dev(user='pandas'):
+def _process_user(user):
+    if user is None or user is False:
+        user = ''
+    else:
+        user = user + '@'
+    return user
+
+
+def upload_dev(user=None):
     'push a copy to the pydata dev directory'
-    if os.system('cd build/html; rsync -avz . {0}@pandas.pydata.org'
+    user = _process_user(user)
+    if os.system('cd build/html; rsync -avz . {0}pandas.pydata.org'
                  ':/usr/share/nginx/pandas/pandas-docs/dev/ -essh'.format(user)):
         raise SystemExit('Upload to Pydata Dev failed')
 
 
-def upload_dev_pdf(user='pandas'):
+def upload_dev_pdf(user=None):
     'push a copy to the pydata dev directory'
-    if os.system('cd build/latex; scp pandas.pdf {0}@pandas.pydata.org'
+    user = _process_user(user)
+    if os.system('cd build/latex; scp pandas.pdf {0}pandas.pydata.org'
                  ':/usr/share/nginx/pandas/pandas-docs/dev/'.format(user)):
         raise SystemExit('PDF upload to Pydata Dev failed')
 
 
-def upload_stable(user='pandas'):
+def upload_stable(user=None):
     'push a copy to the pydata stable directory'
-    if os.system('cd build/html; rsync -avz . {0}@pandas.pydata.org'
+    user = _process_user(user)
+    if os.system('cd build/html; rsync -avz . {0}pandas.pydata.org'
                  ':/usr/share/nginx/pandas/pandas-docs/stable/ -essh'.format(user)):
         raise SystemExit('Upload to stable failed')
 
 
-def upload_stable_pdf(user='pandas'):
+def upload_stable_pdf(user=None):
     'push a copy to the pydata dev directory'
-    if os.system('cd build/latex; scp pandas.pdf {0}@pandas.pydata.org'
+    user = _process_user(user)
+    if os.system('cd build/latex; scp pandas.pdf {0}pandas.pydata.org'
                  ':/usr/share/nginx/pandas/pandas-docs/stable/'.format(user)):
         raise SystemExit('PDF upload to stable failed')
 
 
-def upload_prev(ver, doc_root='./', user='pandas'):
+def upload_prev(ver, doc_root='./', user=None):
     'push a copy of older release to appropriate version directory'
+    user = _process_user(user)
     local_dir = doc_root + 'build/html'
     remote_dir = '/usr/share/nginx/pandas/pandas-docs/version/%s/' % ver
-    cmd = 'cd %s; rsync -avz . %s@pandas.pydata.org:%s -essh'
+    cmd = 'cd %s; rsync -avz . %spandas.pydata.org:%s -essh'
     cmd = cmd % (local_dir, user, remote_dir)
     print(cmd)
     if os.system(cmd):
         raise SystemExit(
             'Upload to %s from %s failed' % (remote_dir, local_dir))
 
     local_dir = doc_root + 'build/latex'
-    pdf_cmd = 'cd %s; scp pandas.pdf %s@pandas.pydata.org:%s'
+    pdf_cmd = 'cd %s; scp pandas.pdf %spandas.pydata.org:%s'
     pdf_cmd = pdf_cmd % (local_dir, user, remote_dir)
     if os.system(pdf_cmd):
         raise SystemExit('Upload PDF to %s from %s failed' % (ver, doc_root))

diff --git a/doc/source/api.rst b/doc/source/api.rst
@@ -724,6 +724,7 @@ Serialization / IO / Conversion
    Series.to_dense
    Series.to_string
    Series.to_clipboard
+   Series.to_latex
 
 Sparse
 ~~~~~~

diff --git a/doc/source/categorical.rst b/doc/source/categorical.rst
@@ -453,6 +453,14 @@ the original values:
 
     np.asarray(cat) > base
 
+When you compare two unordered categoricals with the same categories, the order is not considered:
+
+.. ipython:: python
+
+   c1 = pd.Categorical(['a', 'b'], categories=['a', 'b'], ordered=False)
+   c2 = pd.Categorical(['a', 'b'], categories=['b', 'a'], ordered=False)
+   c1 == c2
+
 Operations
 ----------
 

diff --git a/doc/source/install.rst b/doc/source/install.rst
@@ -202,7 +202,7 @@ installed), make sure you have `pytest
 Dependencies
 ------------
 
-* `setuptools <http://pythonhosted.org/setuptools>`__
+* `setuptools <https://setuptools.readthedocs.io/en/latest/>`__
 * `NumPy <http://www.numpy.org>`__: 1.7.1 or higher
 * `python-dateutil <http://labix.org/python-dateutil>`__: 1.5 or higher
 * `pytz <http://pytz.sourceforge.net/>`__: Needed for time zone support

diff --git a/doc/source/style.ipynb b/doc/source/style.ipynb
@@ -12,7 +12,7 @@
     "\n",
     "<span style=\"color: red\">*Provisional: This is a new feature and still under development. We'll be adding features and possibly making breaking changes in future releases. We'd love to hear your feedback.*</span>\n",
     "\n",
-    "This document is written as a Jupyter Notebook, and can be viewed or downloaded [here](http://nbviewer.ipython.org/github/pandas-dev/pandas/blob/master/doc/source/html-styling.ipynb).\n",
+    "This document is written as a Jupyter Notebook, and can be viewed or downloaded [here](http://nbviewer.ipython.org/github/pandas-dev/pandas/blob/master/doc/source/style.ipynb).\n",
     "\n",
     "You can apply **conditional formatting**, the visual styling of a DataFrame\n",
     "depending on the data within, by using the ``DataFrame.style`` property.\n",

diff --git a/doc/source/themes/nature_with_gtoc/layout.html b/doc/source/themes/nature_with_gtoc/layout.html
@@ -94,4 +94,15 @@ <h3 style="margin-top: 1.5em;">{{ _('Search') }}</h3>
     });
 });
 </script>
+<script type="text/javascript">
+  var _gaq = _gaq || [];
+  _gaq.push(['_setAccount', 'UA-27880019-2']);
+  _gaq.push(['_trackPageview']);
+
+  (function() {
+    var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
+    ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
+    var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
+  })();
+</script>
 {% endblock %}
diff --git a/doc/source/whatsnew.rst b/doc/source/whatsnew.rst
@@ -18,6 +18,8 @@ What's New
 
 These are new features and improvements of note in each release.
 
+.. include:: whatsnew/v0.20.2.txt
+
 .. include:: whatsnew/v0.20.0.txt
 
 .. include:: whatsnew/v0.19.2.txt
Original file line number	Diff line number	Diff line change
Expand Up		@@ -4,4 +4,4 @@ source activate pandas

		echo "install 35_OSX"

		conda install -n pandas -c conda-forge feather-format
		conda install -n pandas -c conda-forge feather-format==0.3.1