Distinguish between size=None and size=() in RandomVariables

ricardoV94 · ricardoV94 · commit 13807b457468 · 2024-05-09T22:49:07.000+02:00
diff --git a/pytensor/link/numba/dispatch/random.py b/pytensor/link/numba/dispatch/random.py
@@ -21,6 +21,7 @@
 )
 from pytensor.tensor.basic import get_vector_length
 from pytensor.tensor.random.type import RandomStateType
+from pytensor.tensor.type_other import NoneTypeT
 
 
 class RandomStateNumbaType(types.Type):
@@ -100,9 +101,13 @@ def make_numba_random_fn(node, np_random_func):
     if not isinstance(rng_param.type, RandomStateType):
         raise TypeError("Numba does not support NumPy `Generator`s")
 
-    tuple_size = int(get_vector_length(node.op.size_param(node)))
+    size_param = node.op.size_param(node)
+    size_len = (
+        None
+        if isinstance(size_param.type, NoneTypeT)
+        else int(get_vector_length(node.op.size_param(node)))
+    )
     dist_params = node.op.dist_params(node)
-    size_dims = tuple_size - max(i.ndim for i in dist_params)
 
     # Make a broadcast-capable version of the Numba supported scalar sampling
     # function
@@ -118,7 +123,7 @@ def make_numba_random_fn(node, np_random_func):
             "np_random_func",
             "numba_vectorize",
             "to_fixed_tuple",
-            "tuple_size",
+            "size_len",
             "size_dims",
             "rng",
             "size",
@@ -154,10 +159,12 @@ def {bcast_fn_name}({bcast_fn_input_names}):
         "out_dtype": out_dtype,
     }
 
-    if tuple_size > 0:
+    if size_len is not None:
+        size_dims = size_len - max(i.ndim for i in dist_params)
+
         random_fn_body = dedent(
             f"""
-        size = to_fixed_tuple(size, tuple_size)
+        size = to_fixed_tuple(size, size_len)
 
         data = np.empty(size, dtype=out_dtype)
         for i in np.ndindex(size[:size_dims]):
@@ -169,7 +176,7 @@ def {bcast_fn_name}({bcast_fn_input_names}):
             {
                 "np": np,
                 "to_fixed_tuple": numba_ndarray.to_fixed_tuple,
-                "tuple_size": tuple_size,
+                "size_len": size_len,
                 "size_dims": size_dims,
             }
         )
@@ -305,19 +312,24 @@ def body_fn(a):
 @numba_funcify.register(ptr.CategoricalRV)
 def numba_funcify_CategoricalRV(op, node, **kwargs):
     out_dtype = node.outputs[1].type.numpy_dtype
-    size_len = int(get_vector_length(node.inputs[1]))
+    size_param = node.op.size_param(node)
+    size_len = (
+        None
+        if isinstance(size_param.type, NoneTypeT)
+        else int(get_vector_length(size_param))
+    )
     p_ndim = node.inputs[-1].ndim
 
     @numba_basic.numba_njit
     def categorical_rv(rng, size, p):
-        if not size_len:
+        if size_len is None:
             size_tpl = p.shape[:-1]
         else:
             size_tpl = numba_ndarray.to_fixed_tuple(size, size_len)
             p = np.broadcast_to(p, size_tpl + p.shape[-1:])
 
         # Workaround https://github.com/numba/numba/issues/8975
-        if not size_len and p_ndim == 1:
+        if size_len is None and p_ndim == 1:
             unif_samples = np.asarray(np.random.uniform(0, 1))
         else:
             unif_samples = np.random.uniform(0, 1, size_tpl)
@@ -336,22 +348,27 @@ def numba_funcify_DirichletRV(op, node, **kwargs):
     out_dtype = node.outputs[1].type.numpy_dtype
     alphas_ndim = node.op.dist_params(node)[0].type.ndim
     neg_ind_shape_len = -alphas_ndim + 1
-    size_len = int(get_vector_length(node.op.size_param(node)))
+    size_param = node.op.size_param(node)
+    size_len = (
+        None
+        if isinstance(size_param.type, NoneTypeT)
+        else int(get_vector_length(size_param))
+    )
 
     if alphas_ndim > 1:
 
         @numba_basic.numba_njit
         def dirichlet_rv(rng, size, alphas):
-            if size_len > 0:
+            if size_len is None:
+                samples_shape = alphas.shape
+            else:
                 size_tpl = numba_ndarray.to_fixed_tuple(size, size_len)
                 if (
                     0 < alphas.ndim - 1 <= len(size_tpl)
                     and size_tpl[neg_ind_shape_len:] != alphas.shape[:-1]
                 ):
                     raise ValueError("Parameters shape and size do not match.")
                 samples_shape = size_tpl + alphas.shape[-1:]
-            else:
-                samples_shape = alphas.shape
 
             res = np.empty(samples_shape, dtype=out_dtype)
             alphas_bcast = np.broadcast_to(alphas, samples_shape)
@@ -365,7 +382,8 @@ def dirichlet_rv(rng, size, alphas):
 
         @numba_basic.numba_njit
         def dirichlet_rv(rng, size, alphas):
-            size = numba_ndarray.to_fixed_tuple(size, size_len)
+            if size_len is not None:
+                size = numba_ndarray.to_fixed_tuple(size, size_len)
             return (rng, np.random.dirichlet(alphas, size))
 
     return dirichlet_rv
diff --git a/pytensor/tensor/random/basic.py b/pytensor/tensor/random/basic.py
@@ -874,12 +874,12 @@ def rng_fn(cls, rng, mean, cov, size):
             # multivariate normals (or any other multivariate distributions),
             # so we need to implement that here
 
-            size = tuple(size or ())
-            if size:
+            if size is None:
+                mean, cov = broadcast_params([mean, cov], [1, 2])
+            else:
+                size = tuple(size)
                 mean = np.broadcast_to(mean, size + mean.shape[-1:])
                 cov = np.broadcast_to(cov, size + cov.shape[-2:])
-            else:
-                mean, cov = broadcast_params([mean, cov], [1, 2])
 
             res = np.empty(mean.shape)
             for idx in np.ndindex(mean.shape[:-1]):
@@ -1760,13 +1760,12 @@ def __call__(self, n, p, size=None, **kwargs):
     @classmethod
     def rng_fn(cls, rng, n, p, size):
         if n.ndim > 0 or p.ndim > 1:
-            size = tuple(size or ())
-
-            if size:
+            if size is None:
+                n, p = broadcast_params([n, p], [0, 1])
+            else:
+                size = tuple(size)
                 n = np.broadcast_to(n, size)
                 p = np.broadcast_to(p, size + p.shape[-1:])
-            else:
-                n, p = broadcast_params([n, p], [0, 1])
 
             res = np.empty(p.shape, dtype=cls.dtype)
             for idx in np.ndindex(p.shape[:-1]):
diff --git a/pytensor/tensor/random/op.py b/pytensor/tensor/random/op.py
@@ -15,7 +15,6 @@
     as_tensor_variable,
     concatenate,
     constant,
-    get_underlying_scalar_constant_value,
     get_vector_length,
     infer_static_shape,
 )
@@ -27,7 +26,7 @@
 )
 from pytensor.tensor.shape import shape_tuple
 from pytensor.tensor.type import TensorType
-from pytensor.tensor.type_other import NoneConst
+from pytensor.tensor.type_other import NoneConst, NoneTypeT
 from pytensor.tensor.utils import _parse_gufunc_signature, safe_signature
 from pytensor.tensor.variable import TensorVariable
 
@@ -198,10 +197,10 @@ def batch_ndim(self, node):
 
     def _infer_shape(
         self,
-        size: TensorVariable,
+        size: TensorVariable | Variable,
         dist_params: Sequence[TensorVariable],
         param_shapes: Sequence[tuple[Variable, ...]] | None = None,
-    ) -> TensorVariable | tuple[ScalarVariable, ...]:
+    ) -> tuple[ScalarVariable | TensorVariable, ...]:
         """Compute the output shape given the size and distribution parameters.
 
         Parameters
@@ -227,9 +226,9 @@ def _infer_shape(
                 self._supp_shape_from_params(dist_params, param_shapes=param_shapes)
             )
 
-        size_len = get_vector_length(size)
+        if not isinstance(size.type, NoneTypeT):
+            size_len = get_vector_length(size)
 
-        if size_len > 0:
             # Fail early when size is incompatible with parameters
             for i, (param, param_ndim_supp) in enumerate(
                 zip(dist_params, self.ndims_params)
@@ -283,21 +282,11 @@ def extract_batch_shape(p, ps, n):
 
         shape = batch_shape + supp_shape
 
-        if not shape:
-            shape = constant([], dtype="int64")
-
         return shape
 
     def infer_shape(self, fgraph, node, input_shapes):
         _, size, *dist_params = node.inputs
-        _, size_shape, *param_shapes = input_shapes
-
-        try:
-            size_len = get_vector_length(size)
-        except ValueError:
-            size_len = get_underlying_scalar_constant_value(size_shape[0])
-
-        size = tuple(size[n] for n in range(size_len))
+        _, _, *param_shapes = input_shapes
 
         shape = self._infer_shape(size, dist_params, param_shapes=param_shapes)
 
@@ -369,8 +358,8 @@ def make_node(self, rng, size, *dist_params):
                 "The type of rng should be an instance of either RandomGeneratorType or RandomStateType"
             )
 
-        shape = self._infer_shape(size, dist_params)
-        _, static_shape = infer_static_shape(shape)
+        inferred_shape = self._infer_shape(size, dist_params)
+        _, static_shape = infer_static_shape(inferred_shape)
 
         dtype = self.dtype
         out_var = TensorType(dtype=dtype, shape=static_shape)()
@@ -397,16 +386,7 @@ def perform(self, node, inputs, outputs):
 
         rng, size, *args = inputs
 
-        # If `size == []`, that means no size is enforced, and NumPy is trusted
-        # to draw the appropriate number of samples, NumPy uses `size=None` to
-        # represent that.  Otherwise, NumPy expects a tuple.
-        if np.size(size) == 0:
-            size = None
-        else:
-            size = tuple(size)
-
-        # Draw from `rng` if `self.inplace` is `True`, and from a copy of `rng`
-        # otherwise.
+        # Draw from `rng` if `self.inplace` is `True`, and from a copy of `rng` otherwise.
         if not self.inplace:
             rng = copy(rng)
 
@@ -474,7 +454,9 @@ def vectorize_random_variable(
 
     original_dist_params = op.dist_params(node)
     old_size = op.size_param(node)
-    len_old_size = get_vector_length(old_size)
+    len_old_size = (
+        None if isinstance(old_size.type, NoneTypeT) else get_vector_length(old_size)
+    )
 
     original_expanded_dist_params = explicit_expand_dims(
         original_dist_params, op.ndims_params, len_old_size
diff --git a/pytensor/tensor/random/utils.py b/pytensor/tensor/random/utils.py
@@ -9,8 +9,8 @@
 from pytensor.compile.sharedvalue import shared
 from pytensor.graph.basic import Constant, Variable
 from pytensor.scalar import ScalarVariable
-from pytensor.tensor import get_vector_length
-from pytensor.tensor.basic import as_tensor_variable, cast, constant
+from pytensor.tensor import NoneConst, get_vector_length
+from pytensor.tensor.basic import as_tensor_variable, cast
 from pytensor.tensor.extra_ops import broadcast_arrays, broadcast_to
 from pytensor.tensor.math import maximum
 from pytensor.tensor.shape import shape_padleft, specify_shape
@@ -124,17 +124,15 @@ def broadcast_params(params, ndims_params):
 def explicit_expand_dims(
     params: Sequence[TensorVariable],
     ndim_params: Sequence[int],
-    size_length: int = 0,
+    size_length: int | None = None,
 ) -> list[TensorVariable]:
     """Introduce explicit expand_dims in RV parameters that are implicitly broadcasted together and/or by size."""
 
     batch_dims = [
         param.type.ndim - ndim_param for param, ndim_param in zip(params, ndim_params)
     ]
 
-    if size_length:
-        # NOTE: PyTensor is currently treating zero-length size as size=None, which is not what Numpy does
-        # See: https://github.com/pymc-devs/pytensor/issues/568
+    if size_length is not None:
         max_batch_dims = size_length
     else:
         max_batch_dims = max(batch_dims, default=0)
@@ -152,37 +150,37 @@ def explicit_expand_dims(
 def compute_batch_shape(params, ndims_params: Sequence[int]) -> TensorVariable:
     params = explicit_expand_dims(params, ndims_params)
     batch_params = [
-        param[(..., *((0,) for _ in range(core_ndim)))]
+        param[(..., *(0,) * core_ndim)]
         for param, core_ndim in zip(params, ndims_params)
     ]
     return broadcast_arrays(*batch_params)[0].shape
 
 
 def normalize_size_param(
-    size: int | np.ndarray | Variable | Sequence | None,
+    shape: int | np.ndarray | Variable | Sequence | None,
 ) -> Variable:
     """Create an PyTensor value for a ``RandomVariable`` ``size`` parameter."""
-    if size is None:
-        size = constant([], dtype="int64")
-    elif isinstance(size, int):
-        size = as_tensor_variable([size], ndim=1)
-    elif not isinstance(size, np.ndarray | Variable | Sequence):
+    if shape is None or NoneConst.equals(shape):
+        return NoneConst
+    elif isinstance(shape, int):
+        shape = as_tensor_variable([shape], ndim=1)
+    elif not isinstance(shape, np.ndarray | Variable | Sequence):
         raise TypeError(
             "Parameter size must be None, an integer, or a sequence with integers."
         )
     else:
-        size = cast(as_tensor_variable(size, ndim=1, dtype="int64"), "int64")
+        shape = cast(as_tensor_variable(shape, ndim=1, dtype="int64"), "int64")
 
-        if not isinstance(size, Constant):
+        if not isinstance(shape, Constant):
             # This should help ensure that the length of non-constant `size`s
             # will be available after certain types of cloning (e.g. the kind
             # `Scan` performs)
-            size = specify_shape(size, (get_vector_length(size),))
+            shape = specify_shape(shape, (get_vector_length(shape),))
 
-    assert not any(s is None for s in size.type.shape)
-    assert size.dtype in int_dtypes
+    assert not any(s is None for s in shape.type.shape)
+    assert shape.dtype in int_dtypes
 
-    return size
+    return shape
 
 
 class RandomStream:
diff --git a/tests/tensor/random/test_basic.py b/tests/tensor/random/test_basic.py
diff --git a/tests/tensor/random/test_op.py b/tests/tensor/random/test_op.py