Closed
Description
Description of your problem
While locally running tests, a bug was surfaced by a recently added validity check (see #4214 (comment)) from Theano-PyMC:
________________________________________________________________ TestSamplePPC.test_model_shared_variable _________________________________________________________________
self = <pymc3.tests.test_sampling.TestSamplePPC object at 0x000001EF2100D748>
def test_model_shared_variable(self):
x = np.random.randn(100)
y = x > 0
x_shared = theano.shared(x)
y_shared = theano.shared(y)
with pm.Model() as model:
coeff = pm.Normal("x", mu=0, sd=1)
logistic = pm.Deterministic("p", pm.math.sigmoid(coeff * x_shared))
> obs = pm.Bernoulli("obs", p=logistic, observed=y_shared)
pymc3\tests\test_sampling.py:572:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
pymc3\distributions\distribution.py:98: in __new__
return model.Var(name, dist, data, total_size, dims=dims)
pymc3\model.py:1166: in Var
model=self,
pymc3\model.py:1800: in __init__
self.tag.test_value = theano.compile.view_op(data).tag.test_value
..\theano-pymc\theano\gof\utils.py:277: in __setattr__
obj = self.attr_filter(obj)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = TensorType(bool, vector)
data = array([1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0,
1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1,...0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1,
1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1], dtype=int64)
strict = False, allow_downcast = None
def filter(self, data, strict=False, allow_downcast=None):
"""
Convert `data` to something which can be associated to a
`TensorVariable`.
This function is not meant to be called in user code. It is for
`Linker` instances to use when running a compiled graph.
"""
# Explicit error message when one accidentally uses a Variable as
# input (typical mistake, especially with shared variables).
if isinstance(data, Variable):
raise TypeError(
"Expected an array-like object, but found a Variable: "
"maybe you are trying to call a function on a (possibly "
"shared) variable instead of a numeric array?"
)
if (type(data) is np.ndarray) and (data.dtype == self.numpy_dtype):
if data.dtype.num != self.numpy_dtype.num:
data = theano._asarray(data, dtype=self.dtype)
# -- now fall through to ndim check
elif (type(data) is np.memmap) and (data.dtype == self.numpy_dtype):
# numpy.memmap is a "safe" subclass of ndarray,
# so we can use it wherever we expect a base ndarray.
# however, casting it would defeat the purpose of not
# loading the whole data into memory
pass
elif strict:
# If any of the two conditions above was not met,
# we raise a meaningful TypeError.
if not (type(data) is np.ndarray):
raise TypeError(
"%s expected a ndarray object." % self, data, type(data)
)
if data.dtype != self.numpy_dtype:
raise TypeError(
("%s expected a ndarray object with " "dtype = %s (got %s).")
% (self, self.numpy_dtype, data.dtype)
)
raise AssertionError("This point should never be reached.")
else:
if allow_downcast:
# Convert to self.dtype, regardless of the type of data
data = theano._asarray(data, dtype=self.dtype)
# TODO: consider to pad shape with ones to make it consistent
# with self.broadcastable... like vector->row type thing
else:
if isinstance(data, np.ndarray):
# Check if self.dtype can accurately represent data
# (do not try to convert the data)
up_dtype = scal.upcast(self.dtype, data.dtype)
if up_dtype == self.dtype:
# Bug in the following line when data is a
# scalar array, see
# http://projects.scipy.org/numpy/ticket/1611
# data = data.astype(self.dtype)
data = theano._asarray(data, dtype=self.dtype)
if up_dtype != self.dtype:
err_msg = (
"%s cannot store a value of dtype %s without "
"risking loss of precision. If you do not mind "
"this loss, you can: "
"1) explicitly cast your data to %s, or "
'2) set "allow_input_downcast=True" when calling '
'"function". Value: "%s"'
% (self, data.dtype, self.dtype, repr(data))
)
> raise TypeError(err_msg)
E TypeError: TensorType(bool, vector) cannot store a value of dtype int64 without risking loss of precision. If you do not mind this loss, you can: 1) explicitly cast your data to bool, or 2) set "allow_input_downcast=True" when calling "function". Value: "array([1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0,
E 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0,
E 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0,
E 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1,
E 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1], dtype=int64)"
..\theano-pymc\theano\tensor\type.py:145: TypeError
The problem does not surface in CI pipelines, because they don't test against latest Theano-PyMC !
Versions and main components
- PyMC3 Version: recent master
- Theano Version: recent Theano-PyMC master