Skip to content

Fix scan_checkpoints with sequences #754

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 8 additions & 6 deletions pytensor/scan/checkpoints.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import pytensor.tensor.basic as ptb
from pytensor.scan.basic import scan
from pytensor.tensor.basic import Join
from pytensor.tensor.math import ceil, eq
from pytensor.tensor.math import ceil, eq, neq
from pytensor.tensor.subtensor import set_subtensor


Expand Down Expand Up @@ -130,16 +130,18 @@ def scan_checkpoints(
# Since padding could be an empty tensor, Join returns a view of s.
join = Join(view=0)
for i, s in enumerate(sequences):
n = s.shape[0] % save_every_N
z = ptb.zeros((n, s.shape[1:]), dtype=s.dtype)
sequences[i] = join(0, [s, z])
overshoots_by = s.shape[0] % save_every_N
overshoots = neq(overshoots_by, 0)
n = (save_every_N - overshoots_by) * overshoots
z = ptb.zeros((n, *s.shape[1:]), dtype=s.dtype)
sequences[i] = join(0, s, z)

# Establish the input variables of the outer scan
o_sequences = [
s.reshape(
[s.shape[0] / save_every_N, save_every_N]
[s.shape[0] // save_every_N, save_every_N]
+ [s.shape[i] for i in range(1, s.ndim)],
s.ndim + 1,
ndim=s.ndim + 1,
)
for s in sequences
]
Expand Down
9 changes: 6 additions & 3 deletions tests/scan/test_checkpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,23 +5,26 @@
from pytensor.gradient import grad
from pytensor.scan.basic import scan
from pytensor.scan.checkpoints import scan_checkpoints
from pytensor.tensor.basic import ones_like
from pytensor.tensor.basic import arange, ones_like
from pytensor.tensor.type import iscalar, vector


class TestScanCheckpoint:
def setup_method(self):
self.k = iscalar("k")
self.A = vector("A")
seq = arange(self.k, dtype="float32") + 1
result, _ = scan(
fn=lambda prior_result, A: prior_result * A,
fn=lambda s, prior_result, A: prior_result * A / s,
outputs_info=ones_like(self.A),
sequences=[seq],
non_sequences=self.A,
n_steps=self.k,
)
result_check, _ = scan_checkpoints(
fn=lambda prior_result, A: prior_result * A,
fn=lambda s, prior_result, A: prior_result * A / s,
outputs_info=ones_like(self.A),
sequences=[seq],
non_sequences=self.A,
n_steps=self.k,
save_every_N=100,
Expand Down
Loading