Skip to content

BUG: fix reverse comparison operations for Categorical #8706

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Nov 2, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.15.1.txt
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,7 @@ Bug Fixes
- Bug in selecting from a ``Categorical`` with ``.iloc`` (:issue:`8623`)
- Bug in groupby-transform with a Categorical (:issue:`8623`)
- Bug in duplicated/drop_duplicates with a Categorical (:issue:`8623`)
- Bug in ``Categorical`` reflected comparison operator raising if the first argument was a numpy array scalar (e.g. np.int64) (:issue:`8658`)



Expand Down
11 changes: 10 additions & 1 deletion pandas/core/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,16 @@ def f(self, other):
# In other series, the leads to False, so do that here too
ret[na_mask] = False
return ret
elif lib.isscalar(other):

# Numpy-1.9 and earlier may convert a scalar to a zerodim array during
# comparison operation when second arg has higher priority, e.g.
#
# cat[0] < cat
#
# With cat[0], for example, being ``np.int64(1)`` by the time it gets
# into this function would become ``np.array(1)``.
other = lib.item_from_zerodim(other)
if lib.isscalar(other):
if other in self.categories:
i = self.categories.get_loc(other)
return getattr(self._codes, op)(i)
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ def _check(cls, inst):
ABCSparseArray = create_pandas_abc_type("ABCSparseArray", "_subtyp",
('sparse_array', 'sparse_series'))
ABCCategorical = create_pandas_abc_type("ABCCategorical","_typ",("categorical"))

ABCPeriod = create_pandas_abc_type("ABCPeriod", "_typ", ("period",))

class _ABCGeneric(type):

Expand Down
49 changes: 48 additions & 1 deletion pandas/lib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import numpy as np

from numpy cimport *

np.import_array()

cdef extern from "numpy/arrayobject.h":
cdef enum NPY_TYPES:
Expand Down Expand Up @@ -234,8 +235,54 @@ cpdef checknull_old(object val):
else:
return util._checknull(val)

# ABCPeriod cannot be imported right away from pandas.core.common.
ABCPeriod = None
def isscalar(object val):
return np.isscalar(val) or val is None or PyDateTime_Check(val) or PyDelta_Check(val)
"""
Return True if given value is scalar.

This includes:
- numpy array scalar (e.g. np.int64)
- Python builtin numerics
- Python builtin byte arrays and strings
- None
- instances of datetime.datetime
- instances of datetime.timedelta
- any type previously registered with :func:`register_scalar_type` function

"""
global ABCPeriod
if ABCPeriod is None:
from pandas.core.common import ABCPeriod as _ABCPeriod
ABCPeriod = _ABCPeriod

return (np.PyArray_IsAnyScalar(val)
# As of numpy-1.9, PyArray_IsAnyScalar misses bytearrays on Py3.
or PyBytes_Check(val)
or val is None
or PyDate_Check(val)
or PyDelta_Check(val)
or PyTime_Check(val)
or isinstance(val, ABCPeriod))


def item_from_zerodim(object val):
"""
If the value is a zerodim array, return the item it contains.

Examples
--------
>>> item_from_zerodim(1)
1
>>> item_from_zerodim('foobar')
'foobar'
>>> item_from_zerodim(np.array(1))
1
>>> item_from_zerodim(np.array([1]))
array([1])

"""
return util.unbox_if_zerodim(val)


@cython.wraparound(False)
Expand Down
15 changes: 15 additions & 0 deletions pandas/src/numpy_helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,21 @@ void set_array_not_contiguous(PyArrayObject *ao) {
}


// If arr is zerodim array, return a proper array scalar (e.g. np.int64).
// Otherwise, return arr as is.
PANDAS_INLINE PyObject*
unbox_if_zerodim(PyObject* arr) {
if (PyArray_IsZeroDim(arr)) {
PyObject *ret;
ret = PyArray_ToScalar(PyArray_DATA(arr), arr);
return ret;
} else {
Py_INCREF(arr);
return arr;
}
}


// PANDAS_INLINE PyObject*
// get_base_ndarray(PyObject* ap) {
// // if (!ap || (NULL == ap)) {
Expand Down
2 changes: 1 addition & 1 deletion pandas/src/util.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ cdef extern from "numpy_helper.h":
inline void transfer_object_column(char *dst, char *src, size_t stride,
size_t length)
object sarr_from_data(cnp.dtype, int length, void* data)
inline object unbox_if_zerodim(object arr)

cdef inline object get_value_at(ndarray arr, object loc):
cdef:
Expand Down Expand Up @@ -64,7 +65,6 @@ cdef inline int is_contiguous(ndarray arr):
cdef inline is_array(object o):
return cnp.PyArray_Check(o)


cdef inline bint _checknull(object val):
try:
return val is None or (cpython.PyFloat_Check(val) and val != val)
Expand Down
6 changes: 6 additions & 0 deletions pandas/tests/test_categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -917,6 +917,12 @@ def test_datetime_categorical_comparison(self):
self.assert_numpy_array_equal(dt_cat > dt_cat[0], [False, True, True])
self.assert_numpy_array_equal(dt_cat[0] < dt_cat, [False, True, True])

def test_reflected_comparison_with_scalars(self):
# GH8658
cat = pd.Categorical([1, 2, 3])
self.assert_numpy_array_equal(cat > cat[0], [False, True, True])
self.assert_numpy_array_equal(cat[0] < cat, [False, True, True])


class TestCategoricalAsBlock(tm.TestCase):
_multiprocess_can_split_ = True
Expand Down
72 changes: 72 additions & 0 deletions pandas/tests/test_lib.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
from datetime import datetime, timedelta, date, time

import numpy as np

import pandas as pd
from pandas.lib import isscalar, item_from_zerodim
import pandas.util.testing as tm


class TestIsscalar(tm.TestCase):
def test_isscalar_builtin_scalars(self):
self.assertTrue(isscalar(None))
self.assertTrue(isscalar(True))
self.assertTrue(isscalar(False))
self.assertTrue(isscalar(0.))
self.assertTrue(isscalar(np.nan))
self.assertTrue(isscalar('foobar'))
self.assertTrue(isscalar(b'foobar'))
self.assertTrue(isscalar(u'foobar'))
self.assertTrue(isscalar(datetime(2014, 1, 1)))
self.assertTrue(isscalar(date(2014, 1, 1)))
self.assertTrue(isscalar(time(12, 0)))
self.assertTrue(isscalar(timedelta(hours=1)))
self.assertTrue(isscalar(pd.NaT))

def test_isscalar_builtin_nonscalars(self):
self.assertFalse(isscalar({}))
self.assertFalse(isscalar([]))
self.assertFalse(isscalar([1]))
self.assertFalse(isscalar(()))
self.assertFalse(isscalar((1,)))
self.assertFalse(isscalar(slice(None)))
self.assertFalse(isscalar(Ellipsis))

def test_isscalar_numpy_array_scalars(self):
self.assertTrue(isscalar(np.int64(1)))
self.assertTrue(isscalar(np.float64(1.)))
self.assertTrue(isscalar(np.int32(1)))
self.assertTrue(isscalar(np.object_('foobar')))
self.assertTrue(isscalar(np.str_('foobar')))
self.assertTrue(isscalar(np.unicode_(u'foobar')))
self.assertTrue(isscalar(np.bytes_(b'foobar')))
self.assertTrue(isscalar(np.datetime64('2014-01-01')))
self.assertTrue(isscalar(np.timedelta64(1, 'h')))

def test_isscalar_numpy_zerodim_arrays(self):
for zerodim in [np.array(1),
np.array('foobar'),
np.array(np.datetime64('2014-01-01')),
np.array(np.timedelta64(1, 'h'))]:
self.assertFalse(isscalar(zerodim))
self.assertTrue(isscalar(item_from_zerodim(zerodim)))

def test_isscalar_numpy_arrays(self):
self.assertFalse(isscalar(np.array([])))
self.assertFalse(isscalar(np.array([[]])))
self.assertFalse(isscalar(np.matrix('1; 2')))

def test_isscalar_pandas_scalars(self):
self.assertTrue(isscalar(pd.Timestamp('2014-01-01')))
self.assertTrue(isscalar(pd.Timedelta(hours=1)))
self.assertTrue(isscalar(pd.Period('2014-01-01')))

def test_isscalar_pandas_containers(self):
self.assertFalse(isscalar(pd.Series()))
self.assertFalse(isscalar(pd.Series([1])))
self.assertFalse(isscalar(pd.DataFrame()))
self.assertFalse(isscalar(pd.DataFrame([[1]])))
self.assertFalse(isscalar(pd.Panel()))
self.assertFalse(isscalar(pd.Panel([[[1]]])))
self.assertFalse(isscalar(pd.Index([])))
self.assertFalse(isscalar(pd.Index([1])))
2 changes: 1 addition & 1 deletion pandas/tseries/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ class Period(PandasObject):
"""
__slots__ = ['freq', 'ordinal']
_comparables = ['name','freqstr']
_typ = 'period'

@classmethod
def _from_ordinal(cls, ordinal, freq):
Expand Down Expand Up @@ -498,7 +499,6 @@ def strftime(self, fmt):
base, mult = _gfc(self.freq)
return tslib.period_format(self.ordinal, base, fmt)


def _get_ordinals(data, freq):
f = lambda x: Period(x, freq=freq).ordinal
if isinstance(data[0], Period):
Expand Down