Skip to content

Commit 20e7d89

Browse files
committed
Merge pull request #5227 from jreback/enlarge_empty
BUG: allow enlargement to work with empty objects (GH5226)
2 parents 0fb94ba + 7bbeb79 commit 20e7d89

File tree

4 files changed

+98
-11
lines changed

4 files changed

+98
-11
lines changed

doc/source/release.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -206,7 +206,7 @@ API Changes
206206
(:issue:`4384`, :issue:`4375`, :issue:`4372`)
207207
- ``Series.get`` with negative indexers now returns the same as ``[]`` (:issue:`4390`)
208208
- allow ``ix/loc`` for Series/DataFrame/Panel to set on any axis even when the single-key is not currently contained in
209-
the index for that axis (:issue:`2578`)
209+
the index for that axis (:issue:`2578`, :issue:`5226`)
210210
- ``at`` now will enlarge the object inplace (and return the same) (:issue:`2578`)
211211

212212
- ``HDFStore``

pandas/core/frame.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1865,6 +1865,15 @@ def _setitem_frame(self, key, value):
18651865

18661866
self.where(-key, value, inplace=True)
18671867

1868+
def _ensure_valid_index(self, value):
1869+
"""
1870+
ensure that if we don't have an index, that we can create one from the passed value
1871+
"""
1872+
if not len(self.index):
1873+
if not isinstance(value, Series):
1874+
raise ValueError("cannot set a frame with no defined index and a non-series")
1875+
self._data.set_axis(1, value.index.copy(), check_axis=False)
1876+
18681877
def _set_item(self, key, value):
18691878
"""
18701879
Add series to DataFrame in specified column.
@@ -1875,6 +1884,7 @@ def _set_item(self, key, value):
18751884
Series/TimeSeries will be conformed to the DataFrame's index to
18761885
ensure homogeneity.
18771886
"""
1887+
self._ensure_valid_index(value)
18781888
value = self._sanitize_column(key, value)
18791889
NDFrame._set_item(self, key, value)
18801890

@@ -1890,6 +1900,7 @@ def insert(self, loc, column, value, allow_duplicates=False):
18901900
column : object
18911901
value : int, Series, or array-like
18921902
"""
1903+
self._ensure_valid_index(value)
18931904
value = self._sanitize_column(column, value)
18941905
self._data.insert(
18951906
loc, column, value, allow_duplicates=allow_duplicates)
@@ -1900,7 +1911,7 @@ def _sanitize_column(self, key, value):
19001911
if _is_sequence(value):
19011912
is_frame = isinstance(value, DataFrame)
19021913
if isinstance(value, Series) or is_frame:
1903-
if value.index.equals(self.index):
1914+
if value.index.equals(self.index) or not len(self.index):
19041915
# copy the values
19051916
value = value.values.copy()
19061917
else:

pandas/core/indexing.py

Lines changed: 35 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -173,9 +173,19 @@ def _setitem_with_indexer(self, indexer, value):
173173
if self.ndim > 1 and i == self.obj._info_axis_number:
174174

175175
# add the new item, and set the value
176-
new_indexer = _convert_from_missing_indexer_tuple(indexer)
176+
# must have all defined axes if we have a scalar
177+
# or a list-like on the non-info axes if we have a list-like
178+
len_non_info_axes = [ len(_ax) for _i, _ax in enumerate(self.obj.axes) if _i != i ]
179+
if any([ not l for l in len_non_info_axes ]):
180+
if not is_list_like(value):
181+
raise ValueError("cannot set a frame with no defined index and a scalar")
182+
self.obj[key] = value
183+
return self.obj
184+
177185
self.obj[key] = np.nan
178-
self.obj.loc[new_indexer] = value
186+
187+
new_indexer = _convert_from_missing_indexer_tuple(indexer, self.obj.axes)
188+
self._setitem_with_indexer(new_indexer, value)
179189
return self.obj
180190

181191
# reindex the axis
@@ -208,12 +218,21 @@ def _setitem_with_indexer(self, indexer, value):
208218
else:
209219
new_index = _safe_append_to_index(index, indexer)
210220

211-
new_values = np.concatenate([self.obj.values, [value]])
221+
# this preserves dtype of the value
222+
new_values = Series([value]).values
223+
if len(self.obj.values):
224+
new_values = np.concatenate([self.obj.values, new_values])
225+
212226
self.obj._data = self.obj._constructor(new_values, index=new_index, name=self.obj.name)
213227
self.obj._maybe_update_cacher(clear=True)
214228
return self.obj
215229

216230
elif self.ndim == 2:
231+
232+
# no columns and scalar
233+
if not len(self.obj.columns):
234+
raise ValueError("cannot set a frame with no defined columns")
235+
217236
index = self.obj._get_axis(0)
218237
labels = _safe_append_to_index(index, indexer)
219238
self.obj._data = self.obj.reindex_axis(labels,0)._data
@@ -410,16 +429,17 @@ def _align_series(self, indexer, ser):
410429
new_ix = Index([new_ix])
411430
else:
412431
new_ix = Index(new_ix.ravel())
413-
if ser.index.equals(new_ix):
432+
if ser.index.equals(new_ix) or not len(new_ix):
414433
return ser.values.copy()
434+
415435
return ser.reindex(new_ix).values
416436

417437
# 2 dims
418438
elif single_aligner and is_frame:
419439

420440
# reindex along index
421441
ax = self.obj.axes[1]
422-
if ser.index.equals(ax):
442+
if ser.index.equals(ax) or not len(ax):
423443
return ser.values.copy()
424444
return ser.reindex(ax).values
425445

@@ -819,6 +839,12 @@ def _convert_to_indexer(self, obj, axis=0, is_setter=False):
819839
# if we are setting and its not a valid location
820840
# its an insert which fails by definition
821841
if is_setter:
842+
843+
# always valid
844+
if self.name == 'loc':
845+
return { 'key' : obj }
846+
847+
# a positional
822848
if obj >= len(self.obj) and not isinstance(labels, MultiIndex):
823849
raise ValueError("cannot set by positional indexing with enlargement")
824850

@@ -1307,11 +1333,11 @@ def _convert_missing_indexer(indexer):
13071333

13081334
return indexer, False
13091335

1310-
def _convert_from_missing_indexer_tuple(indexer):
1336+
def _convert_from_missing_indexer_tuple(indexer, axes):
13111337
""" create a filtered indexer that doesn't have any missing indexers """
1312-
def get_indexer(_idx):
1313-
return _idx['key'] if isinstance(_idx,dict) else _idx
1314-
return tuple([ get_indexer(_idx) for _i, _idx in enumerate(indexer) ])
1338+
def get_indexer(_i, _idx):
1339+
return axes[_i].get_loc(_idx['key']) if isinstance(_idx,dict) else _idx
1340+
return tuple([ get_indexer(_i, _idx) for _i, _idx in enumerate(indexer) ])
13151341

13161342
def _safe_append_to_index(index, key):
13171343
""" a safe append to an index, if incorrect type, then catch and recreate """

pandas/tests/test_indexing.py

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1542,6 +1542,56 @@ def f():
15421542
df.ix[100,:] = df.ix[0]
15431543
self.assertRaises(ValueError, f)
15441544

1545+
def test_partial_set_empty(self):
1546+
1547+
# GH5226
1548+
1549+
# partially set with an empty object
1550+
# series
1551+
s = Series()
1552+
s.loc[1] = 1
1553+
assert_series_equal(s,Series([1],index=[1]))
1554+
s.loc[3] = 3
1555+
assert_series_equal(s,Series([1,3],index=[1,3]))
1556+
1557+
s = Series()
1558+
s.loc[1] = 1.
1559+
assert_series_equal(s,Series([1.],index=[1]))
1560+
s.loc[3] = 3.
1561+
assert_series_equal(s,Series([1.,3.],index=[1,3]))
1562+
1563+
s = Series()
1564+
s.loc['foo'] = 1
1565+
assert_series_equal(s,Series([1],index=['foo']))
1566+
s.loc['bar'] = 3
1567+
assert_series_equal(s,Series([1,3],index=['foo','bar']))
1568+
s.loc[3] = 4
1569+
assert_series_equal(s,Series([1,3,4],index=['foo','bar',3]))
1570+
1571+
# partially set with an empty object
1572+
# frame
1573+
df = DataFrame()
1574+
1575+
def f():
1576+
df.loc[1] = 1
1577+
self.assertRaises(ValueError, f)
1578+
def f():
1579+
df.loc[1] = Series([1],index=['foo'])
1580+
self.assertRaises(ValueError, f)
1581+
def f():
1582+
df.loc[:,1] = 1
1583+
self.assertRaises(ValueError, f)
1584+
1585+
df2 = DataFrame()
1586+
df2[1] = Series([1],index=['foo'])
1587+
df.loc[:,1] = Series([1],index=['foo'])
1588+
assert_frame_equal(df,DataFrame([[1]],index=['foo'],columns=[1]))
1589+
assert_frame_equal(df,df2)
1590+
1591+
df = DataFrame(columns=['A','B'])
1592+
df.loc[3] = [6,7]
1593+
assert_frame_equal(df,DataFrame([[6,7]],index=[3],columns=['A','B']))
1594+
15451595
def test_cache_updating(self):
15461596
# GH 4939, make sure to update the cache on setitem
15471597

0 commit comments

Comments
 (0)