pymc-devs
diff --git a/‎pytensor/link/numba/dispatch/random.py
Lines changed: 20 additions & 12 deletions b/‎pytensor/link/numba/dispatch/random.py
Lines changed: 20 additions & 12 deletions
diff --git a/‎pytensor/tensor/random/op.py
Lines changed: 8 additions & 24 deletions b/‎pytensor/tensor/random/op.py
Lines changed: 8 additions & 24 deletions
diff --git a/‎pytensor/tensor/random/rewriting/basic.py
Lines changed: 11 additions & 14 deletions b/‎pytensor/tensor/random/rewriting/basic.py
Lines changed: 11 additions & 14 deletions
diff --git a/‎pytensor/tensor/random/utils.py
Lines changed: 12 additions & 13 deletions b/‎pytensor/tensor/random/utils.py
Lines changed: 12 additions & 13 deletions
@@ -18,6 +18,7 @@
     get_name_for_object,
     unique_name_generator,
 )
+from pytensor.tensor import NoneConst
 from pytensor.tensor.basic import get_vector_length
 from pytensor.tensor.random.type import RandomStateType
 
@@ -98,8 +99,7 @@ def make_numba_random_fn(node, np_random_func):
     if not isinstance(node.inputs[0].type, RandomStateType):
         raise TypeError("Numba does not support NumPy `Generator`s")
 
-    tuple_size = int(get_vector_length(node.inputs[1]))
-    size_dims = tuple_size - max(i.ndim for i in node.inputs[3:])
+    size = node.inputs[1]
 
     # Make a broadcast-capable version of the Numba supported scalar sampling
     # function
@@ -115,8 +115,6 @@ def make_numba_random_fn(node, np_random_func):
             "np_random_func",
             "numba_vectorize",
             "to_fixed_tuple",
-            "tuple_size",
-            "size_dims",
             "rng",
             "size",
             "dtype",
@@ -152,7 +150,10 @@ def {bcast_fn_name}({bcast_fn_input_names}):
         "out_dtype": out_dtype,
     }
 
-    if tuple_size > 0:
+    if not NoneConst.equals(size):
+        tuple_size = int(get_vector_length(node.inputs[1]))
+        size_dims = tuple_size - max(i.ndim for i in node.inputs[3:])
+
         random_fn_body = dedent(
             f"""
         size = to_fixed_tuple(size, tuple_size)
@@ -302,12 +303,15 @@ def body_fn(a):
 @numba_funcify.register(ptr.CategoricalRV)
 def numba_funcify_CategoricalRV(op, node, **kwargs):
     out_dtype = node.outputs[1].type.numpy_dtype
-    size_len = int(get_vector_length(node.inputs[1]))
+    size = node.inputs[1]
+    none_size = NoneConst.equals(size)
+    if not none_size:
+        size_len = int(get_vector_length(size))
     p_ndim = node.inputs[-1].ndim
 
     @numba_basic.numba_njit
     def categorical_rv(rng, size, dtype, p):
-        if not size_len:
+        if none_size:
             size_tpl = p.shape[:-1]
         else:
             size_tpl = numba_ndarray.to_fixed_tuple(size, size_len)
@@ -333,22 +337,25 @@ def numba_funcify_DirichletRV(op, node, **kwargs):
     out_dtype = node.outputs[1].type.numpy_dtype
     alphas_ndim = node.inputs[3].type.ndim
     neg_ind_shape_len = -alphas_ndim + 1
-    size_len = int(get_vector_length(node.inputs[1]))
+    size = node.inputs[1]
+    none_size = NoneConst.equals(size)
+    if not none_size:
+        size_len = int(get_vector_length(size))
 
     if alphas_ndim > 1:
 
         @numba_basic.numba_njit
         def dirichlet_rv(rng, size, dtype, alphas):
-            if size_len > 0:
+            if none_size:
+                samples_shape = alphas.shape
+            else:
                 size_tpl = numba_ndarray.to_fixed_tuple(size, size_len)
                 if (
                     0 < alphas.ndim - 1 <= len(size_tpl)
                     and size_tpl[neg_ind_shape_len:] != alphas.shape[:-1]
                 ):
                     raise ValueError("Parameters shape and size do not match.")
                 samples_shape = size_tpl + alphas.shape[-1:]
-            else:
-                samples_shape = alphas.shape
 
             res = np.empty(samples_shape, dtype=out_dtype)
             alphas_bcast = np.broadcast_to(alphas, samples_shape)
@@ -362,7 +369,8 @@ def dirichlet_rv(rng, size, dtype, alphas):
 
         @numba_basic.numba_njit
         def dirichlet_rv(rng, size, dtype, alphas):
-            size = numba_ndarray.to_fixed_tuple(size, size_len)
+            if size is not None:
+                size = numba_ndarray.to_fixed_tuple(size, size_len)
             return (rng, np.random.dirichlet(alphas, size))
 
     return dirichlet_rv
@@ -15,7 +15,6 @@
     as_tensor_variable,
     concatenate,
     constant,
-    get_underlying_scalar_constant_value,
     get_vector_length,
     infer_static_shape,
 )
@@ -133,7 +132,7 @@ def __str__(self):
 
     def _infer_shape(
         self,
-        size: TensorVariable,
+        size: Union[TensorVariable, NoneConst],
         dist_params: Sequence[TensorVariable],
         param_shapes: Optional[Sequence[tuple[Variable, ...]]] = None,
     ) -> Union[TensorVariable, tuple[ScalarVariable, ...]]:
@@ -162,9 +161,9 @@ def _infer_shape(
                 self._supp_shape_from_params(dist_params, param_shapes=param_shapes)
             )
 
-        size_len = get_vector_length(size)
+        if not NoneConst.equals(size):
+            size_len = get_vector_length(size)
 
-        if size_len > 0:
             # Fail early when size is incompatible with parameters
             for i, (param, param_ndim_supp) in enumerate(
                 zip(dist_params, self.ndims_params)
@@ -174,7 +173,7 @@ def _infer_shape(
                     raise ValueError(
                         f"Size length is incompatible with batched dimensions of parameter {i} {param}:\n"
                         f"len(size) = {size_len}, len(batched dims {param}) = {param_batched_dims}. "
-                        f"Size length must be 0 or >= {param_batched_dims}"
+                        f"Size must be None or have length >= {param_batched_dims}"
                     )
 
             return tuple(size) + supp_shape
@@ -218,22 +217,12 @@ def extract_batch_shape(p, ps, n):
 
         shape = batch_shape + supp_shape
 
-        if not shape:
-            shape = constant([], dtype="int64")
-
         return shape
 
     def infer_shape(self, fgraph, node, input_shapes):
         _, size, _, *dist_params = node.inputs
         _, size_shape, _, *param_shapes = input_shapes
 
-        try:
-            size_len = get_vector_length(size)
-        except ValueError:
-            size_len = get_underlying_scalar_constant_value(size_shape[0])
-
-        size = tuple(size[n] for n in range(size_len))
-
         shape = self._infer_shape(size, dist_params, param_shapes=param_shapes)
 
         return [None, list(shape)]
@@ -313,12 +302,7 @@ def perform(self, node, inputs, outputs):
 
         out_var = node.outputs[1]
 
-        # If `size == []`, that means no size is enforced, and NumPy is trusted
-        # to draw the appropriate number of samples, NumPy uses `size=None` to
-        # represent that.  Otherwise, NumPy expects a tuple.
-        if np.size(size) == 0:
-            size = None
-        else:
+        if size is not None:
             size = tuple(size)
 
         # Draw from `rng` if `self.inplace` is `True`, and from a copy of `rng`
@@ -394,21 +378,21 @@ def vectorize_random_variable(
     # Need to make parameters implicit broadcasting explicit
     original_dist_params = node.inputs[3:]
     old_size = node.inputs[1]
-    len_old_size = get_vector_length(old_size)
 
     original_expanded_dist_params = explicit_expand_dims(
-        original_dist_params, op.ndims_params, len_old_size
+        original_dist_params, op.ndims_params, old_size
     )
     # We call vectorize_graph to automatically handle any new explicit expand_dims
     dist_params = vectorize_graph(
         original_expanded_dist_params, dict(zip(original_dist_params, dist_params))
     )
 
-    if len_old_size and equal_computations([old_size], [size]):
+    if (not NoneConst.equals(size)) and equal_computations([old_size], [size]):
         # If the original RV had a size variable and a new one has not been provided,
         # we need to define a new size as the concatenation of the original size dimensions
         # and the novel ones implied by new broadcasted batched parameters dimensions.
         # We use the first broadcasted batch dimension for reference.
+        len_old_size = get_vector_length(old_size)
         bcasted_param = explicit_expand_dims(dist_params, op.ndims_params)[0]
         new_param_ndim = (bcasted_param.type.ndim - op.ndims_params[0]) - len_old_size
         if new_param_ndim >= 0:
 
@@ -7,7 +7,7 @@
 from pytensor.graph.rewriting.basic import copy_stack_trace, in2out, node_rewriter
 from pytensor.scalar import integer_types
 from pytensor.tensor import NoneConst
-from pytensor.tensor.basic import constant, get_vector_length
+from pytensor.tensor.basic import constant
 from pytensor.tensor.elemwise import DimShuffle
 from pytensor.tensor.extra_ops import broadcast_to
 from pytensor.tensor.random.op import RandomVariable
@@ -85,7 +85,7 @@ def local_rv_size_lift(fgraph, node):
 
     dist_params = broadcast_params(dist_params, node.op.ndims_params)
 
-    if get_vector_length(size) > 0:
+    if not NoneConst.equals(size):
         dist_params = [
             broadcast_to(
                 p,
@@ -156,20 +156,17 @@ def local_dimshuffle_rv_lift(fgraph, node):
     if is_rv_used_in_graph(base_rv, node, fgraph):
         return False
 
-    batched_dims = rv.ndim - rv_op.ndim_supp
+    batched_dims = rv.type.ndim - rv_op.ndim_supp
     batched_dims_ds_order = tuple(o for o in ds_op.new_order if o not in supp_dims)
 
-    # Make size explicit
-    missing_size_dims = batched_dims - get_vector_length(size)
-    if missing_size_dims > 0:
-        full_size = tuple(broadcast_params(dist_params, rv_op.ndims_params)[0].shape)
-        size = full_size[:missing_size_dims] + tuple(size)
-
-    # Update the size to reflect the DimShuffled dimensions
-    new_size = [
-        constant(1, dtype="int64") if o == "x" else size[o]
-        for o in batched_dims_ds_order
-    ]
+    if NoneConst.equals(size):
+        new_size = NoneConst
+    else:
+        # Update the size to reflect the DimShuffled dimensions
+        new_size = [
+            constant(1, dtype="int64") if o == "x" else size[o]
+            for o in batched_dims_ds_order
+        ]
 
     # Updates the params to reflect the Dimshuffled dimensions
     new_dist_params = []
 
@@ -7,10 +7,10 @@
 import numpy as np
 
 from pytensor.compile.sharedvalue import shared
-from pytensor.graph.basic import Constant, Variable
+from pytensor.graph.basic import Variable
 from pytensor.scalar import ScalarVariable
-from pytensor.tensor import get_vector_length
-from pytensor.tensor.basic import as_tensor_variable, cast, constant
+from pytensor.tensor import NoneConst, get_vector_length
+from pytensor.tensor.basic import as_tensor_variable, cast
 from pytensor.tensor.extra_ops import broadcast_to
 from pytensor.tensor.math import maximum
 from pytensor.tensor.shape import shape_padleft, specify_shape
@@ -124,20 +124,18 @@ def broadcast_params(params, ndims_params):
 def explicit_expand_dims(
     params: Sequence[TensorVariable],
     ndim_params: tuple[int],
-    size_length: int = 0,
+    size: Union[TensorVariable, NoneConst] = NoneConst,
 ) -> list[TensorVariable]:
     """Introduce explicit expand_dims in RV parameters that are implicitly broadcasted together and/or by size."""
 
     batch_dims = [
         param.type.ndim - ndim_param for param, ndim_param in zip(params, ndim_params)
     ]
 
-    if size_length:
-        # NOTE: PyTensor is currently treating zero-length size as size=None, which is not what Numpy does
-        # See: https://github.com/pymc-devs/pytensor/issues/568
-        max_batch_dims = size_length
-    else:
+    if NoneConst.equals(size):
         max_batch_dims = max(batch_dims)
+    else:
+        max_batch_dims = get_vector_length(size)
 
     new_params = []
     for new_param, batch_dim in zip(params, batch_dims):
@@ -153,9 +151,10 @@ def normalize_size_param(
     size: Optional[Union[int, np.ndarray, Variable, Sequence]],
 ) -> Variable:
     """Create an PyTensor value for a ``RandomVariable`` ``size`` parameter."""
-    if size is None:
-        size = constant([], dtype="int64")
-    elif isinstance(size, int):
+    if size is None or NoneConst.equals(size):
+        return NoneConst
+
+    if isinstance(size, int):
         size = as_tensor_variable([size], ndim=1)
     elif not isinstance(size, (np.ndarray, Variable, Sequence)):
         raise TypeError(
@@ -164,7 +163,7 @@ def normalize_size_param(
     else:
         size = cast(as_tensor_variable(size, ndim=1, dtype="int64"), "int64")
 
-        if not isinstance(size, Constant):
+        if size.type.shape == (None,):
             # This should help ensure that the length of non-constant `size`s
             # will be available after certain types of cloning (e.g. the kind
             # `Scan` performs)