|
| 1 | +# -*- coding: utf-8 -*- |
| 2 | + |
| 3 | +""" |
| 4 | +(Prototype) MaskedTensor Advanced Semantics |
| 5 | +=========================================== |
| 6 | +""" |
| 7 | + |
| 8 | +###################################################################### |
| 9 | +# |
| 10 | +# Before working on this tutorial, please make sure to review our |
| 11 | +# `MaskedTensor Overview tutorial <https://pytorch.org/tutorials/prototype/maskedtensor_overview.html>`. |
| 12 | +# |
| 13 | +# The purpose of this tutorial is to help users understand how some of the advanced semantics work |
| 14 | +# and how they came to be. We will focus on two particular ones: |
| 15 | +# |
| 16 | +# *. Differences between MaskedTensor and `NumPy's MaskedArray <https://numpy.org/doc/stable/reference/maskedarray.html>`__ |
| 17 | +# *. Reduction semantics |
| 18 | +# |
| 19 | +# Preparation |
| 20 | +# ----------- |
| 21 | +# |
| 22 | + |
| 23 | +import torch |
| 24 | +from torch.masked import masked_tensor |
| 25 | +import numpy as np |
| 26 | +import warnings |
| 27 | + |
| 28 | +# Disable prototype warnings and such |
| 29 | +warnings.filterwarnings(action='ignore', category=UserWarning) |
| 30 | + |
| 31 | +###################################################################### |
| 32 | +# MaskedTensor vs NumPy's MaskedArray |
| 33 | +# ----------------------------------- |
| 34 | +# |
| 35 | +# NumPy's ``MaskedArray`` has a few fundamental semantics differences from MaskedTensor. |
| 36 | +# |
| 37 | +# *. Their factory function and basic definition inverts the mask (similar to ``torch.nn.MHA``); that is, MaskedTensor |
| 38 | +# uses ``True`` to denote "specified" and ``False`` to denote "unspecified", or "valid"/"invalid", |
| 39 | +# whereas NumPy does the opposite. We believe that our mask definition is not only more intuitive, |
| 40 | +# but it also aligns more with the existing semantics in PyTorch as a whole. |
| 41 | +# *. Intersection semantics. In NumPy, if one of two elements are masked out, the resulting element will be |
| 42 | +# masked out as well -- in practice, they |
| 43 | +# `apply the logical_or operator <https://github.com/numpy/numpy/blob/68299575d8595d904aff6f28e12d21bf6428a4ba/numpy/ma/core.py#L1016-L1024>`__. |
| 44 | +# |
| 45 | + |
| 46 | +data = torch.arange(5.) |
| 47 | +mask = torch.tensor([True, True, False, True, False]) |
| 48 | +npm0 = np.ma.masked_array(data.numpy(), (~mask).numpy()) |
| 49 | +npm1 = np.ma.masked_array(data.numpy(), (mask).numpy()) |
| 50 | + |
| 51 | +print("npm0:\n", npm0) |
| 52 | +print("npm1:\n", npm1) |
| 53 | +print("npm0 + npm1:\n", npm0 + npm1) |
| 54 | + |
| 55 | +###################################################################### |
| 56 | +# Meanwhile, MaskedTensor does not support addition or binary operators with masks that don't match -- |
| 57 | +# to understand why, please find the :ref:`section on reductions <reduction-semantics>`. |
| 58 | +# |
| 59 | + |
| 60 | +mt0 = masked_tensor(data, mask) |
| 61 | +mt1 = masked_tensor(data, ~mask) |
| 62 | +print("mt0:\n", mt0) |
| 63 | +print("mt1:\n", mt1) |
| 64 | + |
| 65 | +try: |
| 66 | + mt0 + mt1 |
| 67 | +except ValueError as e: |
| 68 | + print ("mt0 + mt1 failed. Error: ", e) |
| 69 | + |
| 70 | +###################################################################### |
| 71 | +# However, if this behavior is desired, MaskedTensor does support these semantics by giving access to the data and masks |
| 72 | +# and conveniently converting a MaskedTensor to a Tensor with masked values filled in using :func:`to_tensor`. |
| 73 | +# For example: |
| 74 | +# |
| 75 | + |
| 76 | +t0 = mt0.to_tensor(0) |
| 77 | +t1 = mt1.to_tensor(0) |
| 78 | +mt2 = masked_tensor(t0 + t1, mt0.get_mask() & mt1.get_mask()) |
| 79 | + |
| 80 | +print("t0:\n", t0) |
| 81 | +print("t1:\n", t1) |
| 82 | +print("mt2 (t0 + t1):\n", mt2) |
| 83 | + |
| 84 | +###################################################################### |
| 85 | +# Note that the mask is `mt0.get_mask() & mt1.get_mask()` since :class:`MaskedTensor`'s mask is the inverse of NumPy's. |
| 86 | +# |
| 87 | +# .. _reduction-semantics: |
| 88 | +# |
| 89 | +# Reduction Semantics |
| 90 | +# ------------------- |
| 91 | +# |
| 92 | +# Recall in `MaskedTensor's Overview tutorial <https://pytorch.org/tutorials/prototype/maskedtensor_overview.html>`__ |
| 93 | +# we discussed "Implementing missing torch.nan* ops". Those are examples of reductions -- operators that remove one |
| 94 | +# (or more) dimensions from a Tensor and then aggregate the result. In this section, we will use reduction semantics |
| 95 | +# to motivate our strict requirements around matching masks from above. |
| 96 | +# |
| 97 | +# Fundamentally, :class:`MaskedTensor`s perform the same reduction operation while ignoring the masked out |
| 98 | +# (unspecified) values. By way of example: |
| 99 | +# |
| 100 | + |
| 101 | +data = torch.arange(12, dtype=torch.float).reshape(3, 4) |
| 102 | +mask = torch.randint(2, (3, 4), dtype=torch.bool) |
| 103 | +mt = masked_tensor(data, mask) |
| 104 | + |
| 105 | +print("data:\n", data) |
| 106 | +print("mask:\n", mask) |
| 107 | +print("mt:\n", mt) |
| 108 | + |
| 109 | +###################################################################### |
| 110 | +# Now, the different reductions (all on dim=1): |
| 111 | +# |
| 112 | + |
| 113 | +print("torch.sum:\n", torch.sum(mt, 1)) |
| 114 | +print("torch.mean:\n", torch.mean(mt, 1)) |
| 115 | +print("torch.prod:\n", torch.prod(mt, 1)) |
| 116 | +print("torch.amin:\n", torch.amin(mt, 1)) |
| 117 | +print("torch.amax:\n", torch.amax(mt, 1)) |
| 118 | + |
| 119 | +###################################################################### |
| 120 | +# Of note, the value under a masked out element is not guaranteed to have any specific value, especially if the |
| 121 | +# row or column is entirely masked out (the same is true for normalizations). |
| 122 | +# For more details on masked semantics, you can find this `RFC <https://github.com/pytorch/rfcs/pull/27>`__. |
| 123 | +# |
| 124 | +# Now, we can revisit the question: why do we enforce the invariant that masks must match for binary operators? |
| 125 | +# In other words, why don't we use the same semantics as ``np.ma.masked_array``? Consider the following example: |
| 126 | +# |
| 127 | + |
| 128 | +data0 = torch.arange(10.).reshape(2, 5) |
| 129 | +data1 = torch.arange(10.).reshape(2, 5) + 10 |
| 130 | +mask0 = torch.tensor([[True, True, False, False, False], [False, False, False, True, True]]) |
| 131 | +mask1 = torch.tensor([[False, False, False, True, True], [True, True, False, False, False]]) |
| 132 | +npm0 = np.ma.masked_array(data0.numpy(), (mask0).numpy()) |
| 133 | +npm1 = np.ma.masked_array(data1.numpy(), (mask1).numpy()) |
| 134 | + |
| 135 | +print("npm0:", npm0) |
| 136 | +print("npm1:", npm1) |
| 137 | + |
| 138 | +###################################################################### |
| 139 | +# Now, let's try addition: |
| 140 | +# |
| 141 | + |
| 142 | +print("(npm0 + npm1).sum(0):\n", (npm0 + npm1).sum(0)) |
| 143 | +print("npm0.sum(0) + npm1.sum(0):\n", npm0.sum(0) + npm1.sum(0)) |
| 144 | + |
| 145 | +###################################################################### |
| 146 | +# Sum and addition should clearly be associative, but with NumPy's semantics, they are not, |
| 147 | +# which can certainly be confusing for the user. |
| 148 | +# |
| 149 | +# :class:`MaskedTensor`, on the other hand, will simply not allow this operation since `mask0 != mask1`. |
| 150 | +# That being said, if the user wishes, there are ways around this |
| 151 | +# (for example, filling in the MaskedTensor's undefined elements with 0 values using :func:`to_tensor` |
| 152 | +# like shown below), but the user must now be more explicit with their intentions. |
| 153 | +# |
| 154 | + |
| 155 | +mt0 = masked_tensor(data0, ~mask0) |
| 156 | +mt1 = masked_tensor(data1, ~mask1) |
| 157 | + |
| 158 | +(mt0.to_tensor(0) + mt1.to_tensor(0)).sum(0) |
| 159 | + |
| 160 | +###################################################################### |
| 161 | +# Conclusion |
| 162 | +# ---------- |
| 163 | +# |
| 164 | +# In this tutorial, we have learned about the different design decisions behind MaskedTensor and |
| 165 | +# NumPy's MaskedArray, as well as reduction semantics. |
| 166 | +# In general, MaskedTensor is designed to avoid ambiguity and confusing semantics (for example, we try to preserve |
| 167 | +# the associative property amongst binary operations), which in turn can necessitate the user |
| 168 | +# to be more intentional with their code at times, but we believe this to be the better move. |
| 169 | +# If you have any thoughts on this, please `let us know <https://github.com/pytorch/pytorch/issues>`__! |
| 170 | +# |
0 commit comments