Skip to content

Fixed bug where division lead to nan or inf values #33

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,6 @@ pre-commit:

## Create the changelog
##
## Generate the changelog
##
changelog:
docker run -it --rm \
-v "$(pwd)":/usr/local/src/python-cmethods \
Expand Down
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ The documentation is available at: [https://python-cmethods.readthedocs.io/en/st

## 1. About

These programs and data structures are designed to help minimize discrepancies between modeled and observed climate data. Data from past periods are used to adjust variables from current and future time series so that their distributional properties approximate possible actual values.
These programs and data structures are developed with the aim of reducing discrepancies between modeled and observed climate data. Historical data is utilized to calibrate variables from current and future time series to achieve distributional properties that closely resemble the possible actual values.

<figure>
<img
Expand All @@ -51,7 +51,7 @@ These programs and data structures are designed to help minimize discrepancies b
<figcaption>Figure 1: Schematic representation of a bias adjustment procedure</figcaption>
</figure>

In this way, for example, modeled data, which on average represent values that are too cold, can be adjusted by applying an adjustment procedure. The following figure shows the observed, the modeled, and the adjusted values. It is directly visible that the delta adjusted time series ($T^{\*DM}_{sim,p}$) are much more similar to the observed data ($T_{obs,p}$) than the raw modeled data ($T_{sim,p}$).
For instance, modeled data typically indicate values that are colder than the actual values. To address this issue, an adjustment procedure is employed. The figure below illustrates the observed, modeled, and adjusted values, revealing that the delta adjusted time series ($T^{*DM}_{sim,p}$) are significantly more similar to the observed data ($T{obs,p}$) than the raw modeled data ($T_{sim,p}$).

<figure>
<img
Expand Down Expand Up @@ -136,7 +136,7 @@ qdm_result = cm.adjust_3d( # 3d = 2 spatial and 1 time dimension
Notes:

- When using the `adjust_3d` method you have to specify the method by name.
- For the multiplicative linear scaling and the delta method as well as the variance scaling method a maximum scaling factor of 10 is defined. This can be changed by the parameter `max_scaling_factor`.
- For the multiplicative techniques a maximum scaling factor of 10 is defined. This can be changed by the attribute `max_scaling_factor`.

## Examples (see repository on [GitHub](https://github.com/btschwertfeger/python-cmethods))

Expand Down
53 changes: 41 additions & 12 deletions cmethods/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -462,7 +462,7 @@ def linear_scaling(
return np.array(simp) + (np.nanmean(obs) - np.nanmean(simh)) # Eq. 1
if kind in cls.MULTIPLICATIVE:
adj_scaling_factor = cls.get_adjusted_scaling_factor(
np.nanmean(obs) / np.nanmean(simh),
cls.ensure_devidable(np.nanmean(obs), np.nanmean(simh)),
kwargs.get("max_scaling_factor", cls.MAX_SCALING_FACTOR),
)
return np.array(simp) * adj_scaling_factor # Eq. 2
Expand Down Expand Up @@ -585,7 +585,7 @@ def variance_scaling(
VS_1_simp = LS_simp - np.nanmean(LS_simp) # Eq. 4

adj_scaling_factor = cls.get_adjusted_scaling_factor(
np.std(np.array(obs)) / np.std(VS_1_simh),
cls.ensure_devidable(np.std(np.array(obs)), np.std(VS_1_simh)),
kwargs.get("max_scaling_factor", cls.MAX_SCALING_FACTOR),
)

Expand Down Expand Up @@ -700,7 +700,7 @@ def delta_method(
return np.array(obs) + (np.nanmean(simp) - np.nanmean(simh)) # Eq. 1
if kind in cls.MULTIPLICATIVE:
adj_scaling_factor = cls.get_adjusted_scaling_factor(
np.nanmean(simp) / np.nanmean(simh),
cls.ensure_devidable(np.nanmean(simp), np.nanmean(simh)),
kwargs.get("max_scaling_factor", cls.MAX_SCALING_FACTOR),
)
return np.array(obs) * adj_scaling_factor # Eq. 2
Expand Down Expand Up @@ -864,14 +864,14 @@ def quantile_mapping(

elif kind in cls.MULTIPLICATIVE:
epsilon = np.interp( # Eq. 2
(m_simh_mean * m_simp) / m_simp_mean,
cls.ensure_devidable((m_simh_mean * m_simp), m_simp_mean),
xbins,
cdf_simh,
left=kwargs.get("val_min", 0.0),
right=kwargs.get("val_max", None),
)
X = np.interp(epsilon, cdf_obs, xbins) * (
m_simp_mean / m_simh_mean
cls.ensure_devidable(m_simp_mean, m_simh_mean)
) # Eq. 2
for i, idx in enumerate(idxs):
res[idx] = X[i]
Expand Down Expand Up @@ -1006,7 +1006,7 @@ def quantile_delta_mapping(
.. math::

\Delta(i) & = \frac{ F^{-1}_{sim,p}\left[\varepsilon(i)\right] }{ F^{-1}_{sim,h}\left[\varepsilon(i)\right] } \\[1pt]
& = \frac{ X_{sim,p}(i) }{ F^{-1}_{sim,h}\left\{F^{}_{sim,p}\left[X_{sim,p}(i)\right]\right\} }
& = \frac{ X_{sim,p}(i) }{ F^{-1}_{sim,h}\left\{F^_{sim,p}\left[X_{sim,p}(i)\right]\right\} }


**(2.4)** The relative change between the modeled data of the control and scenario period is than
Expand Down Expand Up @@ -1089,17 +1089,46 @@ def quantile_delta_mapping(
epsilon = np.interp(simp, xbins, cdf_simp) # Eq. 1.1
QDM1 = cls.get_inverse_of_cdf(cdf_obs, epsilon, xbins) # Eq. 1.2

with np.errstate(divide="ignore", invalid="ignore"):
delta = simp / cls.get_inverse_of_cdf(
cdf_simh, epsilon, xbins
) # Eq. 2.3
delta[np.isnan(delta)] = 0

delta = cls.ensure_devidable(
simp, cls.get_inverse_of_cdf(cdf_simh, epsilon, xbins)
) # Eq. 2.3
return QDM1 * delta # Eq. 2.4
raise NotImplementedError(
f"{kind} not available for quantile_delta_mapping. Use '+' or '*' instead."
)

@classmethod
def ensure_devidable(
cls, numerator: Union[float, np.array], denominator: Union[float, np.array]
) -> np.array:
"""
Ensures that the arrays can be devided. The numerator will be multiplied by
the maximum scaling factor of the CMethods class.

:param numerator: Numerator to use
:type numerator: np.array
:param denominator: Denominator that can be zero
:type denominator: np.array
:return: Zero-ensured devision
:rtype: np.array
"""
with np.errstate(divide="ignore", invalid="ignore"):
result = numerator / denominator

if isinstance(numerator, np.ndarray):
mask_inf = np.isinf(result)
result[mask_inf] = numerator[mask_inf] * cls.MAX_SCALING_FACTOR

mask_nan = np.isnan(result)
result[mask_nan] = 0
else:
if np.isinf(result):
result = numerator * cls.MAX_SCALING_FACTOR
elif np.isnan(result):
result = 0

return result

@staticmethod
def get_pdf(x: Union[list, np.array], xbins: Union[list, np.array]) -> np.array:
r"""
Expand Down
10 changes: 10 additions & 0 deletions tests/test_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,8 @@ def test_quantile_delta_mapping(self) -> None:
)

assert isinstance(qdm_result, (np.ndarray, np.generic))
if np.isnan(qdm_result).any():
raise ValueError(qdm_result)
assert mean_squared_error(
qdm_result, self.data[kind]["obsp"][:, 0, 0], squared=False
) < mean_squared_error(
Expand Down Expand Up @@ -397,6 +399,14 @@ def test_get_adjusted_scaling_factor(self) -> None:
assert cm.get_adjusted_scaling_factor(-10, -11) == -10
assert cm.get_adjusted_scaling_factor(-11, -10) == -10

def test_ensure_devidable(self) -> None:
assert np.array_equal(
cm.ensure_devidable(
np.array((1, 2, 3, 4, 5, 0)), np.array((0, 1, 0, 2, 3, 0))
),
np.array((10, 2, 30, 2, 5 / 3, 0)),
)


if __name__ == "__main__":
unittest.main()