Skip to content

feat: Allow iloc to support lists of negative indices #1497

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Mar 19, 2025
36 changes: 35 additions & 1 deletion bigframes/core/indexers.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import bigframes.core.guid as guid
import bigframes.core.indexes as indexes
import bigframes.core.scalar
import bigframes.core.window_spec as windows
import bigframes.dataframe
import bigframes.dtypes
import bigframes.exceptions as bfe
Expand Down Expand Up @@ -477,6 +478,19 @@ def _iloc_getitem_series_or_dataframe(
Union[bigframes.dataframe.DataFrame, bigframes.series.Series],
series_or_dataframe.iloc[0:0],
)

# Check if both positive index and negative index are necessary
if isinstance(key, (bigframes.series.Series, indexes.Index)):
# Avoid data download
is_key_unisigned = False
else:
first_sign = key[0] >= 0
is_key_unisigned = True
for k in key:
if (k >= 0) != first_sign:
is_key_unisigned = False
break

if isinstance(series_or_dataframe, bigframes.series.Series):
original_series_name = series_or_dataframe.name
series_name = (
Expand All @@ -497,7 +511,27 @@ def _iloc_getitem_series_or_dataframe(
block = df._block
# explicitly set index to offsets, reset_index may not generate offsets in some modes
block, offsets_id = block.promote_offsets("temp_iloc_offsets_")
block = block.set_index([offsets_id])
pos_block = block.set_index([offsets_id])

if not is_key_unisigned or key[0] < 0:
neg_block, size_col_id = block.apply_window_op(
offsets_id,
ops.aggregations.SizeUnaryOp(),
window_spec=windows.rows(),
)
neg_block, neg_index_id = neg_block.apply_binary_op(
offsets_id, size_col_id, ops.SubOp()
)

neg_block = neg_block.set_index([neg_index_id]).drop_columns(
[size_col_id, offsets_id]
)

if is_key_unisigned:
block = pos_block if key[0] >= 0 else neg_block
else:
block = pos_block.concat([neg_block], how="inner")

df = bigframes.dataframe.DataFrame(block)

result = df.loc[key]
Expand Down
24 changes: 18 additions & 6 deletions tests/system/small/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -4400,9 +4400,15 @@ def test_loc_list_multiindex(scalars_dfs_maybe_ordered):
)


def test_iloc_list(scalars_df_index, scalars_pandas_df_index):
index_list = [0, 0, 0, 5, 4, 7]

@pytest.mark.parametrize(
"index_list",
[
[0, 1, 2, 3, 4, 4],
[0, 0, 0, 5, 4, 7, -2, -5, 3],
[-1, -2, -3, -4, -5, -5],
],
)
def test_iloc_list(scalars_df_index, scalars_pandas_df_index, index_list):
bf_result = scalars_df_index.iloc[index_list]
pd_result = scalars_pandas_df_index.iloc[index_list]

Expand All @@ -4412,11 +4418,17 @@ def test_iloc_list(scalars_df_index, scalars_pandas_df_index):
)


@pytest.mark.parametrize(
"index_list",
[
[0, 1, 2, 3, 4, 4],
[0, 0, 0, 5, 4, 7, -2, -5, 3],
[-1, -2, -3, -4, -5, -5],
],
)
def test_iloc_list_partial_ordering(
scalars_df_partial_ordering, scalars_pandas_df_index
scalars_df_partial_ordering, scalars_pandas_df_index, index_list
):
index_list = [0, 0, 0, 5, 4, 7]

bf_result = scalars_df_partial_ordering.iloc[index_list]
pd_result = scalars_pandas_df_index.iloc[index_list]

Expand Down