Skip to content

Commit b6c9233

Browse files
committed
BUG: Common NumericIndex.__new__, fixed name handling in indices
closes #12309
1 parent b722222 commit b6c9233

File tree

11 files changed

+131
-62
lines changed

11 files changed

+131
-62
lines changed

pandas/indexes/category.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,9 @@ def __new__(cls, data=None, categories=None, ordered=None, dtype=None,
4646
if fastpath:
4747
return cls._simple_new(data, name=name)
4848

49+
if name is None and hasattr(data, 'name'):
50+
name = data.name
51+
4952
if isinstance(data, com.ABCCategorical):
5053
data = cls._create_categorical(cls, data, categories, ordered)
5154
elif isinstance(data, CategoricalIndex):

pandas/indexes/numeric.py

Lines changed: 42 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,28 @@ class NumericIndex(Index):
2222
"""
2323
_is_numeric_dtype = True
2424

25+
def __new__(cls, data=None, dtype=None, copy=False, name=None,
26+
fastpath=False):
27+
28+
if fastpath:
29+
return cls._simple_new(data, name=name)
30+
31+
# isscalar, generators handled in coerce_to_ndarray
32+
data = cls._coerce_to_ndarray(data)
33+
34+
if issubclass(data.dtype.type, compat.string_types):
35+
cls._string_data_error(data)
36+
37+
if copy or not com.is_dtype_equal(data.dtype, cls._default_dtype):
38+
subarr = np.array(data, dtype=cls._default_dtype, copy=copy)
39+
cls._assert_safe_casting(data, subarr)
40+
else:
41+
subarr = data
42+
43+
if name is None and hasattr(data, 'name'):
44+
name = data.name
45+
return cls._simple_new(subarr, name=name)
46+
2547
def _maybe_cast_slice_bound(self, label, side, kind):
2648
"""
2749
This function should be overloaded in subclasses that allow non-trivial
@@ -55,6 +77,15 @@ def _convert_tolerance(self, tolerance):
5577
raise ValueError('tolerance argument for %s must be numeric: %r' %
5678
(type(self).__name__, tolerance))
5779

80+
@classmethod
81+
def _assert_safe_casting(cls, data, subarr):
82+
"""
83+
Subclasses need to override this only if the process of casting data
84+
from some accepted dtype to the internal dtype(s) bears the risk of
85+
truncation (e.g. float to int).
86+
"""
87+
pass
88+
5889

5990
class Int64Index(NumericIndex):
6091
"""
@@ -90,29 +121,7 @@ class Int64Index(NumericIndex):
90121

91122
_engine_type = _index.Int64Engine
92123

93-
def __new__(cls, data=None, dtype=None, copy=False, name=None,
94-
fastpath=False, **kwargs):
95-
96-
if fastpath:
97-
return cls._simple_new(data, name=name)
98-
99-
# isscalar, generators handled in coerce_to_ndarray
100-
data = cls._coerce_to_ndarray(data)
101-
102-
if issubclass(data.dtype.type, compat.string_types):
103-
cls._string_data_error(data)
104-
105-
elif issubclass(data.dtype.type, np.integer):
106-
dtype = np.int64
107-
subarr = np.array(data, dtype=dtype, copy=copy)
108-
else:
109-
subarr = np.array(data, dtype=np.int64, copy=copy)
110-
if len(data) > 0:
111-
if (subarr != data).any():
112-
raise TypeError('Unsafe NumPy casting to integer, you must'
113-
' explicitly cast')
114-
115-
return cls._simple_new(subarr, name=name)
124+
_default_dtype = np.int64
116125

117126
@property
118127
def inferred_type(self):
@@ -166,6 +175,15 @@ def _wrap_joined_index(self, joined, other):
166175
name = self.name if self.name == other.name else None
167176
return Int64Index(joined, name=name)
168177

178+
@classmethod
179+
def _assert_safe_casting(cls, data, subarr):
180+
"""
181+
Ensure incoming data can be represented as ints.
182+
"""
183+
if not issubclass(data.dtype.type, np.integer):
184+
if not np.array_equal(data, subarr):
185+
raise TypeError('Unsafe NumPy casting, you must '
186+
'explicitly cast')
169187

170188
Int64Index._add_numeric_methods()
171189
Int64Index._add_logical_methods()
@@ -200,39 +218,7 @@ class Float64Index(NumericIndex):
200218
_inner_indexer = _algos.inner_join_indexer_float64
201219
_outer_indexer = _algos.outer_join_indexer_float64
202220

203-
def __new__(cls, data=None, dtype=None, copy=False, name=None,
204-
fastpath=False, **kwargs):
205-
206-
if fastpath:
207-
return cls._simple_new(data, name)
208-
209-
data = cls._coerce_to_ndarray(data)
210-
211-
if issubclass(data.dtype.type, compat.string_types):
212-
cls._string_data_error(data)
213-
214-
if dtype is None:
215-
dtype = np.float64
216-
dtype = np.dtype(dtype)
217-
218-
# allow integer / object dtypes to be passed, but coerce to float64
219-
if dtype.kind in ['i', 'O', 'f']:
220-
dtype = np.float64
221-
222-
else:
223-
raise TypeError("cannot support {0} dtype in "
224-
"Float64Index".format(dtype))
225-
226-
try:
227-
subarr = np.array(data, dtype=dtype, copy=copy)
228-
except:
229-
raise TypeError('Unsafe NumPy casting, you must explicitly cast')
230-
231-
# coerce to float64 for storage
232-
if subarr.dtype != np.float64:
233-
subarr = subarr.astype(np.float64)
234-
235-
return cls._simple_new(subarr, name)
221+
_default_dtype = np.float64
236222

237223
@property
238224
def inferred_type(self):
@@ -392,6 +378,5 @@ def isin(self, values, level=None):
392378
return lib.ismember_nans(np.array(self), value_set,
393379
isnull(list(value_set)).any())
394380

395-
396381
Float64Index._add_numeric_methods()
397382
Float64Index._add_logical_methods_disabled()

pandas/tests/frame/test_block_internals.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -372,11 +372,13 @@ def test_consolidate_datetime64(self):
372372
ser_starting.index = ser_starting.values
373373
ser_starting = ser_starting.tz_localize('US/Eastern')
374374
ser_starting = ser_starting.tz_convert('UTC')
375+
ser_starting.index.name = 'starting'
375376

376377
ser_ending = df.ending
377378
ser_ending.index = ser_ending.values
378379
ser_ending = ser_ending.tz_localize('US/Eastern')
379380
ser_ending = ser_ending.tz_convert('UTC')
381+
ser_ending.index.name = 'ending'
380382

381383
df.starting = ser_starting.index
382384
df.ending = ser_ending.index

pandas/tests/indexes/common.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,53 @@ def test_hash_error(self):
205205
type(ind).__name__):
206206
hash(ind)
207207

208+
def test_copy_name(self):
209+
# Check that "name" argument passed at initialization is honoured
210+
# GH12309
211+
for name, index in compat.iteritems(self.indices):
212+
if isinstance(index, MultiIndex):
213+
continue
214+
215+
first = index.__class__(index, copy=True, name='mario')
216+
second = first.__class__(first, copy=False)
217+
218+
# Even though "copy=False", we want a new object.
219+
self.assertIsNot(first, second)
220+
# Not using tm.assert_index_equal() since names differ:
221+
self.assertTrue(index.equals(first))
222+
223+
self.assertEqual(first.name, 'mario')
224+
self.assertEqual(second.name, 'mario')
225+
226+
s1 = Series(2, index=first)
227+
s2 = Series(3, index=second[:-1])
228+
if not isinstance(index, CategoricalIndex): # See GH13365
229+
s3 = s1 * s2
230+
self.assertEqual(s3.index.name, 'mario')
231+
232+
def test_ensure_copied_data(self):
233+
# Check the "copy" argument of each Index.__new__ is honoured
234+
# GH12309
235+
for name, index in compat.iteritems(self.indices):
236+
init_kwargs = {}
237+
if isinstance(index, PeriodIndex):
238+
# Needs "freq" specification:
239+
init_kwargs['freq'] = index.freq
240+
elif isinstance(index, (RangeIndex, MultiIndex, CategoricalIndex)):
241+
# RangeIndex cannot be initialized from data
242+
# MultiIndex and CategoricalIndex are tested separately
243+
continue
244+
245+
index_type = index.__class__
246+
result = index_type(index.values, copy=True, **init_kwargs)
247+
tm.assert_index_equal(index, result)
248+
tm.assert_numpy_array_equal(index.values, result.values,
249+
check_same='copy')
250+
251+
result = index_type(index.values, copy=False, **init_kwargs)
252+
tm.assert_numpy_array_equal(index.values, result.values,
253+
check_same='same')
254+
208255
def test_copy_and_deepcopy(self):
209256
from copy import copy, deepcopy
210257

pandas/tests/indexes/test_base.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,7 @@ def test_constructor_from_series(self):
172172
df['date'] = ['1-1-1990', '2-1-1990', '3-1-1990', '4-1-1990',
173173
'5-1-1990']
174174
result = DatetimeIndex(df['date'], freq='MS')
175+
expected.name = 'date'
175176
self.assert_index_equal(result, expected)
176177
self.assertEqual(df['date'].dtype, object)
177178

pandas/tests/indexes/test_category.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -507,6 +507,20 @@ def test_identical(self):
507507
self.assertTrue(ci1.identical(ci1.copy()))
508508
self.assertFalse(ci1.identical(ci2))
509509

510+
def test_ensure_copied_data(self):
511+
# Check the "copy" argument of each Index.__new__ is honoured
512+
# GH12309
513+
# Must be tested separately from other indexes because
514+
# self.value is not an ndarray
515+
_base = lambda ar : ar if ar.base is None else ar.base
516+
for index in self.indices.values():
517+
result = CategoricalIndex(index.values, copy=True)
518+
tm.assert_index_equal(index, result)
519+
self.assertIsNot(_base(index.values), _base(result.values))
520+
521+
result = CategoricalIndex(index.values, copy=False)
522+
self.assertIs(_base(index.values), _base(result.values))
523+
510524
def test_equals(self):
511525

512526
ci1 = CategoricalIndex(['a', 'b'], categories=['a', 'b'], ordered=True)

pandas/tseries/index.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,9 @@ def __new__(cls, data=None,
225225
verify_integrity=True, normalize=False,
226226
closed=None, ambiguous='raise', dtype=None, **kwargs):
227227

228+
if name is None and hasattr(data, 'name'):
229+
name = data.name
230+
228231
dayfirst = kwargs.pop('dayfirst', None)
229232
yearfirst = kwargs.pop('yearfirst', None)
230233

pandas/tseries/period.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,9 @@ def __new__(cls, data=None, ordinal=None, freq=None, start=None, end=None,
182182
raise ValueError('Periods must be a number, got %s' %
183183
str(periods))
184184

185+
if name is None and hasattr(data, 'name'):
186+
name = data.name
187+
185188
if data is None:
186189
if ordinal is not None:
187190
data = np.asarray(ordinal, dtype=np.int64)
@@ -190,7 +193,7 @@ def __new__(cls, data=None, ordinal=None, freq=None, start=None, end=None,
190193
freq, kwargs)
191194
else:
192195
ordinal, freq = cls._from_arraylike(data, freq, tz)
193-
data = np.array(ordinal, dtype=np.int64, copy=False)
196+
data = np.array(ordinal, dtype=np.int64, copy=copy)
194197

195198
return cls._simple_new(data, name=name, freq=freq)
196199

pandas/tseries/tdi.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -138,8 +138,9 @@ def __new__(cls, data=None, unit=None,
138138

139139
if isinstance(data, TimedeltaIndex) and freq is None and name is None:
140140
if copy:
141-
data = data.copy()
142-
return data
141+
return data.copy()
142+
else:
143+
return data._shallow_copy()
143144

144145
freq_infer = False
145146
if not isinstance(freq, DateOffset):

pandas/tseries/tests/test_timedeltas.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1739,7 +1739,7 @@ def test_join_self(self):
17391739
kinds = 'outer', 'inner', 'left', 'right'
17401740
for kind in kinds:
17411741
joined = index.join(index, how=kind)
1742-
self.assertIs(index, joined)
1742+
tm.assert_index_equal(index, joined)
17431743

17441744
def test_factorize(self):
17451745
idx1 = TimedeltaIndex(['1 day', '1 day', '2 day', '2 day', '3 day',

pandas/util/testing.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -991,7 +991,7 @@ def raise_assert_detail(obj, message, left, right):
991991

992992
def assert_numpy_array_equal(left, right, strict_nan=False,
993993
check_dtype=True, err_msg=None,
994-
obj='numpy array'):
994+
obj='numpy array', check_same=None):
995995
""" Checks that 'np.ndarray' is equivalent
996996
997997
Parameters
@@ -1007,6 +1007,8 @@ def assert_numpy_array_equal(left, right, strict_nan=False,
10071007
obj : str, default 'numpy array'
10081008
Specify object name being compared, internally used to show appropriate
10091009
assertion message
1010+
check_same : None|'copy'|'same', default None
1011+
Ensure "left" and "right refer/do not refer to the same memory area
10101012
"""
10111013

10121014
# instance validation
@@ -1016,6 +1018,14 @@ def assert_numpy_array_equal(left, right, strict_nan=False,
10161018
assertIsInstance(left, np.ndarray, '[ndarray] ')
10171019
assertIsInstance(right, np.ndarray, '[ndarray] ')
10181020

1021+
def _get_base(obj):
1022+
return obj.base if getattr(obj, 'base', None) is not None else obj
1023+
1024+
if check_same == 'same':
1025+
assertIs(_get_base(left), _get_base(right))
1026+
elif check_same == 'copy':
1027+
assertIsNot(_get_base(left), _get_base(right))
1028+
10191029
def _raise(left, right, err_msg):
10201030
if err_msg is None:
10211031
if left.shape != right.shape:

0 commit comments

Comments
 (0)