pytorch
diff --git a/‎.ci/docker/requirements.txt
+5-5 b/‎.ci/docker/requirements.txt
+5-5
diff --git a/‎.jenkins/build.sh
+2-3 b/‎.jenkins/build.sh
+2-3
diff --git a/‎.jenkins/validate_tutorials_built.py
-4 b/‎.jenkins/validate_tutorials_built.py
-4
diff --git a/‎_static/css/custom.css
+20 b/‎_static/css/custom.css
+20
diff --git a/‎_static/img/distributed/tcpstore_barrier_time.png
486 KB b/‎_static/img/distributed/tcpstore_barrier_time.png
486 KB
diff --git a/‎_static/img/distributed/tcpstore_init_time.png
423 KB b/‎_static/img/distributed/tcpstore_init_time.png
423 KB
diff --git a/‎_static/img/onnx/custom_aten_add_function.png
3.52 KB b/‎_static/img/onnx/custom_aten_add_function.png
3.52 KB
diff --git a/‎_static/img/onnx/custom_aten_gelu_function.png
-22.1 KB b/‎_static/img/onnx/custom_aten_gelu_function.png
-22.1 KB
diff --git a/‎_static/img/onnx/custom_aten_gelu_model.png
19 KB b/‎_static/img/onnx/custom_aten_gelu_model.png
19 KB
diff --git a/‎_static/img/pinmem/pinmem.png
72 KB b/‎_static/img/pinmem/pinmem.png
72 KB
diff --git a/‎_static/img/pinmem/trace_streamed0_pinned0.png
81.2 KB b/‎_static/img/pinmem/trace_streamed0_pinned0.png
81.2 KB
diff --git a/‎_static/img/pinmem/trace_streamed0_pinned1.png
81.4 KB b/‎_static/img/pinmem/trace_streamed0_pinned1.png
81.4 KB
diff --git a/‎_static/img/pinmem/trace_streamed1_pinned0.png
85.4 KB b/‎_static/img/pinmem/trace_streamed1_pinned0.png
85.4 KB
diff --git a/‎_static/img/pinmem/trace_streamed1_pinned1.png
90.6 KB b/‎_static/img/pinmem/trace_streamed1_pinned1.png
90.6 KB
diff --git a/‎advanced_source/coding_ddpg.py
+18-16 b/‎advanced_source/coding_ddpg.py
+18-16
diff --git a/‎advanced_source/cpp_custom_ops.rst
+7-5 b/‎advanced_source/cpp_custom_ops.rst
+7-5
diff --git a/‎advanced_source/cpp_export.rst
+1-1 b/‎advanced_source/cpp_export.rst
+1-1
diff --git a/‎advanced_source/cpp_extension.rst
+5-1 b/‎advanced_source/cpp_extension.rst
+5-1
diff --git a/‎advanced_source/custom_ops_landing_page.rst
+7-6 b/‎advanced_source/custom_ops_landing_page.rst
+7-6
@@ -13,7 +13,7 @@ tqdm==4.66.1
 numpy==1.24.4
 matplotlib
 librosa
-torch==2.3
+torch==2.4
 torchvision
 torchtext
 torchdata
@@ -28,9 +28,9 @@ tensorboard
 jinja2==3.1.3
 pytorch-lightning
 torchx
-torchrl==0.3.0
-tensordict==0.3.0
-ax-platform
+torchrl==0.5.0
+tensordict==0.5.0
+ax-platform>==0.4.0
 nbformat>==5.9.2
 datasets
 transformers
@@ -68,4 +68,4 @@ pygame==2.1.2
 pycocotools
 semilearn==0.3.2
 torchao==0.0.3
-segment_anything==1.0
+segment_anything==1.0
@@ -21,9 +21,9 @@ sudo apt-get install -y pandoc
 
 #Install PyTorch Nightly for test.
 # Nightly - pip install --pre torch torchvision torchaudio -f https://download.pytorch.org/whl/nightly/cu102/torch_nightly.html
-# Install 2.2 for testing - uncomment to install nightly binaries (update the version as needed).
+# Install 2.4 to merge all 2.4 PRs - uncomment to install nightly binaries (update the version as needed).
 # pip uninstall -y torch torchvision torchaudio torchtext torchdata
-# pip3 install torch==2.3.0 torchvision torchaudio --no-cache-dir --index-url https://download.pytorch.org/whl/test/cu121 
+# pip3 install torch==2.4.0 torchvision torchaudio --no-cache-dir --index-url https://download.pytorch.org/whl/test/cu124
 
 # Install two language tokenizers for Translation with TorchText tutorial
 python -m spacy download en_core_web_sm
@@ -36,7 +36,6 @@ awsv2 configure set default.s3.multipart_threshold 5120MB
 if [[ "${JOB_TYPE}" == "worker" ]]; then
   # Step 1: Remove runnable code from tutorials that are not supposed to be run
   python $DIR/remove_runnable_code.py beginner_source/aws_distributed_training_tutorial.py beginner_source/aws_distributed_training_tutorial.py || true
-  # python $DIR/remove_runnable_code.py advanced_source/ddp_pipeline_tutorial.py advanced_source/ddp_pipeline_tutorial.py || true
   # Temp remove for mnist download issue. (Re-enabled for 1.8.1)
   # python $DIR/remove_runnable_code.py beginner_source/fgsm_tutorial.py beginner_source/fgsm_tutorial.py || true
   # python $DIR/remove_runnable_code.py intermediate_source/spatial_transformer_tutorial.py intermediate_source/spatial_transformer_tutorial.py || true
 
@@ -29,8 +29,6 @@
     "intermediate_source/fx_conv_bn_fuser",
     "intermediate_source/_torch_export_nightly_tutorial",  # does not work on release
     "advanced_source/super_resolution_with_onnxruntime",
-    "advanced_source/python_custom_ops",  # https://github.com/pytorch/pytorch/issues/127443
-    "advanced_source/ddp_pipeline",  # requires 4 gpus
     "advanced_source/usb_semisup_learn", # fails with CUDA OOM error, should try on a different worker
     "prototype_source/fx_graph_mode_ptq_dynamic",
     "prototype_source/vmap_recipe",
@@ -55,8 +53,6 @@
     "intermediate_source/flask_rest_api_tutorial",
     "intermediate_source/text_to_speech_with_torchaudio",
     "intermediate_source/tensorboard_profiler_tutorial", # reenable after 2.0 release.
-    "intermediate_source/inductor_debug_cpu", # reenable after 2942 
-    "beginner_source/onnx/onnx_registry_tutorial", # reenable after 2941 is fixed.
     "intermediate_source/torch_export_tutorial" # reenable after 2940 is fixed.
 ]
 
 
@@ -71,3 +71,23 @@
 .sd-card:hover:after {
     transform: scaleX(1);
 }
+
+.card-prerequisites:hover {
+    transition: none;
+    border: none;
+}
+
+.card-prerequisites:hover:after {
+    transition: none;
+    transform: none;
+}
+
+.card-prerequisites:after {
+    display: block;
+    content: '';
+    border-bottom: none;
+    background-color: #fff;
+    transform: none;
+    transition: none;
+    transform-origin: none;
+}
@@ -182,7 +182,7 @@
 # Later, we will see how the target parameters should be updated in TorchRL.
 #
 
-from tensordict.nn import TensorDictModule
+from tensordict.nn import TensorDictModule, TensorDictSequential
 
 
 def _init(
@@ -290,12 +290,11 @@ def _loss_actor(
 ) -> torch.Tensor:
     td_copy = tensordict.select(*self.actor_in_keys)
     # Get an action from the actor network: since we made it functional, we need to pass the params
-    td_copy = self.actor_network(td_copy, params=self.actor_network_params)
+    with self.actor_network_params.to_module(self.actor_network):
+        td_copy = self.actor_network(td_copy)
     # get the value associated with that action
-    td_copy = self.value_network(
-        td_copy,
-        params=self.value_network_params.detach(),
-    )
+    with self.value_network_params.detach().to_module(self.value_network):
+        td_copy = self.value_network(td_copy)
     return -td_copy.get("state_action_value")
 
 
@@ -317,7 +316,8 @@ def _loss_value(
     td_copy = tensordict.clone()
 
     # V(s, a)
-    self.value_network(td_copy, params=self.value_network_params)
+    with self.value_network_params.to_module(self.value_network):
+        self.value_network(td_copy)
     pred_val = td_copy.get("state_action_value").squeeze(-1)
 
     # we manually reconstruct the parameters of the actor-critic, where the first
@@ -332,9 +332,8 @@ def _loss_value(
         batch_size=self.target_actor_network_params.batch_size,
         device=self.target_actor_network_params.device,
     )
-    target_value = self.value_estimator.value_estimate(
-        tensordict, target_params=target_params
-    ).squeeze(-1)
+    with target_params.to_module(self.actor_critic):
+        target_value = self.value_estimator.value_estimate(tensordict).squeeze(-1)
 
     # Computes the value loss: L2, L1 or smooth L1 depending on `self.loss_function`
     loss_value = distance_loss(pred_val, target_value, loss_function=self.loss_function)
@@ -717,7 +716,7 @@ def get_env_stats():
     ActorCriticWrapper,
     DdpgMlpActor,
     DdpgMlpQNet,
-    OrnsteinUhlenbeckProcessWrapper,
+    OrnsteinUhlenbeckProcessModule,
     ProbabilisticActor,
     TanhDelta,
     ValueOperator,
@@ -776,15 +775,18 @@ def make_ddpg_actor(
 # Exploration
 # ~~~~~~~~~~~
 #
-# The policy is wrapped in a :class:`~torchrl.modules.OrnsteinUhlenbeckProcessWrapper`
+# The policy is passed into a :class:`~torchrl.modules.OrnsteinUhlenbeckProcessModule`
 # exploration module, as suggested in the original paper.
 # Let's define the number of frames before OU noise reaches its minimum value
 annealing_frames = 1_000_000
 
-actor_model_explore = OrnsteinUhlenbeckProcessWrapper(
+actor_model_explore = TensorDictSequential(
     actor,
-    annealing_num_steps=annealing_frames,
-).to(device)
+    OrnsteinUhlenbeckProcessModule(
+        spec=actor.spec.clone(),
+        annealing_num_steps=annealing_frames,
+    ).to(device),
+)
 if device == torch.device("cpu"):
     actor_model_explore.share_memory()
 
@@ -1168,7 +1170,7 @@ def ceil_div(x, y):
         )
 
     # update the exploration strategy
-    actor_model_explore.step(current_frames)
+    actor_model_explore[1].step(current_frames)
 
 collector.shutdown()
 del collector
 
@@ -8,14 +8,16 @@ Custom C++ and CUDA Operators
 .. grid:: 2
 
     .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn
+       :class-card: card-prerequisites
 
-      * How to integrate custom operators written in C++/CUDA with PyTorch
-      * How to test custom operators using ``torch.library.opcheck``
+       * How to integrate custom operators written in C++/CUDA with PyTorch
+       * How to test custom operators using ``torch.library.opcheck``
 
     .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites
+       :class-card: card-prerequisites
 
-      * PyTorch 2.4 or later
-      * Basic understanding of C++ and CUDA programming
+       * PyTorch 2.4 or later
+       * Basic understanding of C++ and CUDA programming
 
 PyTorch offers a large library of operators that work on Tensors (e.g. torch.add, torch.sum, etc).
 However, you may wish to bring a new custom operator to PyTorch. This tutorial demonstrates the
@@ -415,4 +417,4 @@ Conclusion
 In this tutorial, we went over the recommended approach to integrating Custom C++
 and CUDA operators with PyTorch. The ``TORCH_LIBRARY/torch.library`` APIs are fairly
 low-level. For more information about how to use the API, see
-`The Custom Operators Manual <https://pytorch.org/docs/main/notes/custom_operators.html>`_.
+`The Custom Operators Manual <https://pytorch.org/tutorials/advanced/custom_ops_landing_page.html#the-custom-operators-manual>`_.
@@ -203,7 +203,7 @@ minimal ``CMakeLists.txt`` to build it could look as simple as:
 
   add_executable(example-app example-app.cpp)
   target_link_libraries(example-app "${TORCH_LIBRARIES}")
-  set_property(TARGET example-app PROPERTY CXX_STANDARD 14)
+  set_property(TARGET example-app PROPERTY CXX_STANDARD 17)
 
 The last thing we need to build the example application is the LibTorch
 distribution. You can always grab the latest stable release from the `download
 
@@ -2,6 +2,10 @@ Custom C++ and CUDA Extensions
 ==============================
 **Author**: `Peter Goldsborough <https://www.goldsborough.me/>`_
 
+.. warning::
+
+    This tutorial is deprecated as of PyTorch 2.4. Please see :ref:`custom-ops-landing-page`
+    for the newest up-to-date guides on extending PyTorch with Custom C++/CUDA Extensions.
 
 PyTorch provides a plethora of operations related to neural networks, arbitrary
 tensor algebra, data wrangling and other purposes. However, you may still find
@@ -225,7 +229,7 @@ Instead of:
 Currently open issue for nvcc bug `here
 <https://github.com/pytorch/pytorch/issues/69460>`_.
 Complete workaround code example `here
-<https://github.com/facebookresearch/pytorch3d/commit/cb170ac024a949f1f9614ffe6af1c38d972f7d48>`_. 
+<https://github.com/facebookresearch/pytorch3d/commit/cb170ac024a949f1f9614ffe6af1c38d972f7d48>`_.
 
 Forward Pass
 ************
 
@@ -1,7 +1,7 @@
 .. _custom-ops-landing-page:
 
-PyTorch Custom Operators Landing Page
-=====================================
+PyTorch Custom Operators
+===========================
 
 PyTorch offers a large library of operators that work on Tensors (e.g. ``torch.add``,
 ``torch.sum``, etc). However, you may wish to bring a new custom operation to PyTorch
@@ -10,26 +10,27 @@ In order to do so, you must register the custom operation with PyTorch via the P
 `torch.library docs <https://pytorch.org/docs/stable/library.html>`_ or C++ ``TORCH_LIBRARY``
 APIs.
 
-TL;DR
------
+
 
 Authoring a custom operator from Python
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 Please see :ref:`python-custom-ops-tutorial`.
 
 You may wish to author a custom operator from Python (as opposed to C++) if:
+
 - you have a Python function you want PyTorch to treat as an opaque callable, especially with
-respect to ``torch.compile`` and ``torch.export``.
+  respect to ``torch.compile`` and ``torch.export``.
 - you have some Python bindings to C++/CUDA kernels and want those to compose with PyTorch
-subsystems (like ``torch.compile`` or ``torch.autograd``)
+  subsystems (like ``torch.compile`` or ``torch.autograd``)
 
 Integrating custom C++ and/or CUDA code with PyTorch
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 Please see :ref:`cpp-custom-ops-tutorial`.
 
 You may wish to author a custom operator from C++ (as opposed to Python) if:
+
 - you have custom C++ and/or CUDA code.
 - you plan to use this code with ``AOTInductor`` to do Python-less inference.