Skip to content

gh-89189: More compact range iterator #27986

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Nov 30, 2022
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion Include/internal/pycore_range.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ extern "C" {

typedef struct {
PyObject_HEAD
long index;
long start;
long step;
long len;
Expand Down
38 changes: 33 additions & 5 deletions Lib/test/test_range.py
Original file line number Diff line number Diff line change
Expand Up @@ -407,11 +407,7 @@ def test_iterator_pickling_overflowing_index(self):
for proto in range(pickle.HIGHEST_PROTOCOL + 1):
with self.subTest(proto=proto):
it = iter(range(2**32 + 2))
_, _, idx = it.__reduce__()
self.assertEqual(idx, 0)
it.__setstate__(2**32 + 1) # undocumented way to set r->index
_, _, idx = it.__reduce__()
self.assertEqual(idx, 2**32 + 1)
it.__setstate__(2**32 + 1) # undocumented way to advance an iterator
d = pickle.dumps(it, proto)
it = pickle.loads(d)
self.assertEqual(next(it), 2**32 + 1)
Expand Down Expand Up @@ -442,6 +438,38 @@ def test_large_exhausted_iterator_pickling(self):
self.assertEqual(list(i), [])
self.assertEqual(list(i2), [])

def test_iterator_unpickle_compat(self):
testcases = [
b'c__builtin__\niter\n(c__builtin__\nxrange\n(I10\nI20\nI2\ntRtRI2\nb.',
b'c__builtin__\niter\n(c__builtin__\nxrange\n(K\nK\x14K\x02tRtRK\x02b.',
b'\x80\x02c__builtin__\niter\nc__builtin__\nxrange\nK\nK\x14K\x02\x87R\x85RK\x02b.',
b'\x80\x03cbuiltins\niter\ncbuiltins\nrange\nK\nK\x14K\x02\x87R\x85RK\x02b.',
b'\x80\x04\x951\x00\x00\x00\x00\x00\x00\x00\x8c\x08builtins\x8c\x04iter\x93\x8c\x08builtins\x8c\x05range\x93K\nK\x14K\x02\x87R\x85RK\x02b.',

b'c__builtin__\niter\n(c__builtin__\nxrange\n(L-36893488147419103232L\nI20\nI2\ntRtRL18446744073709551623L\nb.',
b'c__builtin__\niter\n(c__builtin__\nxrange\n(L-36893488147419103232L\nK\x14K\x02tRtRL18446744073709551623L\nb.',
b'\x80\x02c__builtin__\niter\nc__builtin__\nxrange\n\x8a\t\x00\x00\x00\x00\x00\x00\x00\x00\xfeK\x14K\x02\x87R\x85R\x8a\t\x07\x00\x00\x00\x00\x00\x00\x00\x01b.',
b'\x80\x03cbuiltins\niter\ncbuiltins\nrange\n\x8a\t\x00\x00\x00\x00\x00\x00\x00\x00\xfeK\x14K\x02\x87R\x85R\x8a\t\x07\x00\x00\x00\x00\x00\x00\x00\x01b.',
b'\x80\x04\x95C\x00\x00\x00\x00\x00\x00\x00\x8c\x08builtins\x8c\x04iter\x93\x8c\x08builtins\x8c\x05range\x93\x8a\t\x00\x00\x00\x00\x00\x00\x00\x00\xfeK\x14K\x02\x87R\x85R\x8a\t\x07\x00\x00\x00\x00\x00\x00\x00\x01b.',
]
for t in testcases:
it = pickle.loads(t)
self.assertEqual(list(it), [14, 16, 18])

def test_iterator_setstate(self):
it = iter(range(10, 20, 2))
it.__setstate__(2)
self.assertEqual(list(it), [14, 16, 18])
it = reversed(range(10, 20, 2))
it.__setstate__(3)
self.assertEqual(list(it), [12, 10])
it = iter(range(-2**65, 20, 2))
it.__setstate__(2**64 + 7)
self.assertEqual(list(it), [14, 16, 18])
it = reversed(range(10, 2**65, 2))
it.__setstate__(2**64 - 7)
self.assertEqual(list(it), [12, 10])

def test_odd_bug(self):
# This used to raise a "SystemError: NULL result without error"
# because the range validation step was eating the exception
Expand Down
3 changes: 2 additions & 1 deletion Lib/test/test_sys.py
Original file line number Diff line number Diff line change
Expand Up @@ -1484,7 +1484,8 @@ def delx(self): del self.__x
# PyCapsule
# XXX
# rangeiterator
check(iter(range(1)), size('4l'))
check(iter(range(1)), size('3l'))
check(iter(range(2**65)), size('3P'))
# reverse
check(reversed(''), size('nP'))
# range
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Optimize the :class:`range` object iterator. It is now smaller, faster
iteration of ranges containing large numbers. Smaller pickles, faster
unpickling.
75 changes: 38 additions & 37 deletions Objects/rangeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -756,18 +756,19 @@ PyTypeObject PyRange_Type = {
static PyObject *
rangeiter_next(_PyRangeIterObject *r)
{
if (r->index < r->len)
/* cast to unsigned to avoid possible signed overflow
in intermediate calculations. */
return PyLong_FromLong((long)(r->start +
(unsigned long)(r->index++) * r->step));
if (r->len > 0) {
long result = r->start;
r->start = result + r->step;
r->len--;
return PyLong_FromLong(result);
}
return NULL;
}

static PyObject *
rangeiter_len(_PyRangeIterObject *r, PyObject *Py_UNUSED(ignored))
{
return PyLong_FromLong(r->len - r->index);
return PyLong_FromLong(r->len);
}

PyDoc_STRVAR(length_hint_doc,
Expand All @@ -794,8 +795,8 @@ rangeiter_reduce(_PyRangeIterObject *r, PyObject *Py_UNUSED(ignored))
if (range == NULL)
goto err;
/* return the result */
return Py_BuildValue(
"N(N)l", _PyEval_GetBuiltin(&_Py_ID(iter)), range, r->index);
return Py_BuildValue("N(N)O", _PyEval_GetBuiltin(&_Py_ID(iter)),
range, Py_None);
Comment on lines +798 to +799
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure we care, but we might -- am I right that this writes pickles that can't be read by 3.11 or before?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, absolutely no. I would never propose such change.

Internals will be different, but the unpickled iterator wil produce the same values.

err:
Py_XDECREF(start);
Py_XDECREF(stop);
Expand All @@ -814,7 +815,8 @@ rangeiter_setstate(_PyRangeIterObject *r, PyObject *state)
index = 0;
else if (index > r->len)
index = r->len; /* exhausted iterator */
r->index = index;
r->start += index * r->step;
r->len -= index;
Py_RETURN_NONE;
}

Expand Down Expand Up @@ -904,13 +906,11 @@ fast_range_iter(long start, long stop, long step, long len)
it->start = start;
it->step = step;
it->len = len;
it->index = 0;
return (PyObject *)it;
}

typedef struct {
PyObject_HEAD
PyObject *index;
PyObject *start;
PyObject *step;
PyObject *len;
Expand All @@ -919,7 +919,8 @@ typedef struct {
static PyObject *
longrangeiter_len(longrangeiterobject *r, PyObject *no_args)
{
return PyNumber_Subtract(r->len, r->index);
Py_INCREF(r->len);
return r->len;
}

static PyObject *
Expand All @@ -946,8 +947,8 @@ longrangeiter_reduce(longrangeiterobject *r, PyObject *Py_UNUSED(ignored))
}

/* return the result */
return Py_BuildValue(
"N(N)O", _PyEval_GetBuiltin(&_Py_ID(iter)), range, r->index);
return Py_BuildValue("N(N)O", _PyEval_GetBuiltin(&_Py_ID(iter)),
range, Py_None);
}

static PyObject *
Expand All @@ -970,7 +971,18 @@ longrangeiter_setstate(longrangeiterobject *r, PyObject *state)
if (cmp > 0)
state = r->len;
}
Py_XSETREF(r->index, Py_NewRef(state));
PyObject *new_len = PyNumber_Subtract(r->len, state);
if (new_len == NULL)
return NULL;
Py_SETREF(r->len, new_len);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a little scary -- if the new length is set but computing the new start fails, and the original iterator is still accessible, it will be truncated at the end rather than at the start. Let's update both fields after all errors have been checked.

Ditto in next().

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point. Although __setstate__ is only supposed to be called for just created object when unpickle or make a deep copy, so there are no references to that iterator yet.

But there were other issue here. state could be a borrowed reference to r->len. It is used in two places: with PyNumber_Subtract() and with PyNumber_Multiply(). The second time it is used after setting the new r->state, when it can refer to a freed memory.

next() does not have such issue.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems that it never happens in normal code. You have to call __setstate__() manually with the out-of-range value.

PyObject *product = PyNumber_Multiply(state, r->step);
if (product == NULL)
return NULL;
PyObject *new_start = PyNumber_Add(r->start, product);
Py_DECREF(product);
if (new_start == NULL)
return NULL;
Py_SETREF(r->start, new_start);
Py_RETURN_NONE;
}

Expand All @@ -987,7 +999,6 @@ static PyMethodDef longrangeiter_methods[] = {
static void
longrangeiter_dealloc(longrangeiterobject *r)
{
Py_XDECREF(r->index);
Py_XDECREF(r->start);
Py_XDECREF(r->step);
Py_XDECREF(r->len);
Expand All @@ -997,29 +1008,21 @@ longrangeiter_dealloc(longrangeiterobject *r)
static PyObject *
longrangeiter_next(longrangeiterobject *r)
{
PyObject *product, *new_index, *result;
if (PyObject_RichCompareBool(r->index, r->len, Py_LT) != 1)
if (PyObject_RichCompareBool(r->len, _PyLong_GetZero(), Py_GT) != 1)
return NULL;

new_index = PyNumber_Add(r->index, _PyLong_GetOne());
if (!new_index)
PyObject *new_start = PyNumber_Add(r->start, r->step);
if (new_start == NULL) {
return NULL;

product = PyNumber_Multiply(r->index, r->step);
if (!product) {
Py_DECREF(new_index);
return NULL;
}

result = PyNumber_Add(r->start, product);
Py_DECREF(product);
if (result) {
Py_SETREF(r->index, new_index);
}
else {
Py_DECREF(new_index);
PyObject *new_len = PyNumber_Subtract(r->len, _PyLong_GetOne());
if (new_len == NULL) {
Py_DECREF(new_start);
return NULL;
}

PyObject *result = r->start;
r->start = new_start;
Py_SETREF(r->len, new_len);
return result;
}

Expand Down Expand Up @@ -1108,7 +1111,6 @@ range_iter(PyObject *seq)
it->start = Py_NewRef(r->start);
it->step = Py_NewRef(r->step);
it->len = Py_NewRef(r->length);
it->index = Py_NewRef(_PyLong_GetZero());
return (PyObject *)it;
}

Expand Down Expand Up @@ -1186,7 +1188,7 @@ range_reverse(PyObject *seq, PyObject *Py_UNUSED(ignored))
it = PyObject_New(longrangeiterobject, &PyLongRangeIter_Type);
if (it == NULL)
return NULL;
it->index = it->start = it->step = NULL;
it->start = it->step = NULL;

/* start + (len - 1) * step */
it->len = Py_NewRef(range->length);
Expand All @@ -1210,7 +1212,6 @@ range_reverse(PyObject *seq, PyObject *Py_UNUSED(ignored))
if (!it->step)
goto create_failure;

it->index = Py_NewRef(_PyLong_GetZero());
return (PyObject *)it;

create_failure:
Expand Down
7 changes: 4 additions & 3 deletions Python/bytecodes.c
Original file line number Diff line number Diff line change
Expand Up @@ -2620,14 +2620,15 @@ dummy_func(
STAT_INC(FOR_ITER, hit);
_Py_CODEUNIT next = next_instr[INLINE_CACHE_ENTRIES_FOR_ITER];
assert(_PyOpcode_Deopt[_Py_OPCODE(next)] == STORE_FAST);
if (r->index >= r->len) {
if (r->len <= 0) {
STACK_SHRINK(1);
Py_DECREF(r);
JUMPBY(INLINE_CACHE_ENTRIES_FOR_ITER + oparg + 1);
}
else {
long value = (long)(r->start +
(unsigned long)(r->index++) * r->step);
long value = r->start;
r->start = value + r->step;
r->len--;
if (_PyLong_AssignValue(&GETLOCAL(_Py_OPARG(next)), value) < 0) {
goto error;
}
Expand Down
7 changes: 4 additions & 3 deletions Python/generated_cases.c.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.