Skip to content

Fix calculation of curve fit weights #1224

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 25 additions & 14 deletions qiskit_experiments/curve_analysis/curve_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -316,20 +316,30 @@ def _run_curve_fit(

valid_uncertainty = np.all(np.isfinite(curve_data.y_err))

model_weights = {}
if valid_uncertainty:
for model in models:
sub_yerr = curve_data.get_subset_of(model._name).y_err
if len(sub_yerr) == 0:
continue
nonzero_yerr = np.where(np.isclose(sub_yerr, 0.0), np.finfo(float).eps, sub_yerr)
raw_weights = 1 / nonzero_yerr
# Remove outlier. When all sample values are the same with sample average,
# or sampling error is zero with shot-weighted average,
# some yerr values might be very close to zero, yielding significant weights.
# With such outlier, the fit doesn't sense residual of other data points.
maximum_weight = np.percentile(raw_weights, 90)
model_weights[model._name] = np.clip(raw_weights, 0.0, maximum_weight)

# Objective function for minimize. This computes composite residuals of sub models.
def _objective(_params):
ys = []
for model in models:
sub_data = curve_data.get_subset_of(model._name)
with np.errstate(divide="ignore"):
# Ignore numpy runtime warning.
# Zero y_err point introduces infinite weight,
# but this should be managed by LMFIT.
weights = 1.0 / sub_data.y_err if valid_uncertainty else None
yi = model._residual(
params=_params,
data=sub_data.y,
weights=weights,
weights=model_weights.get(model._name, None),
x=sub_data.x,
)
ys.append(yi)
Expand All @@ -351,14 +361,15 @@ def _objective(_params):
)

try:
new = lmfit.minimize(
fcn=_objective,
params=guess_params,
method=self.options.fit_method,
scale_covar=not valid_uncertainty,
nan_policy="omit",
**fit_option.fitter_opts,
)
with np.errstate(all="ignore"):
new = lmfit.minimize(
fcn=_objective,
params=guess_params,
method=self.options.fit_method,
scale_covar=not valid_uncertainty,
nan_policy="omit",
**fit_option.fitter_opts,
)
except Exception: # pylint: disable=broad-except
continue

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
---
fixes:
- |
Fix calculation of weight for curve fitting. Previously the weights of data points to obtain
the residual of fit curve were computed by the inverse of the error bars of y data.
This may yield significant weights on certain data points when their error bar is small or zero,
and this can cause the local overfit to these data points.
To avoid this edge case of small error bars, computed weights are now clipped at 90 percentile.
This update might slightly change the outcome of fit.
43 changes: 43 additions & 0 deletions test/curve_analysis/test_baseclass.py
Original file line number Diff line number Diff line change
Expand Up @@ -440,6 +440,49 @@ def test_end_to_end_parallel_analysis(self):
self.assertAlmostEqual(taus[0].value.nominal_value, tau1, delta=0.1)
self.assertAlmostEqual(taus[1].value.nominal_value, tau2, delta=0.1)

def test_end_to_end_zero_yerr(self):
"""Integration test for an edge case of having zero y error.

When the error bar is zero, the fit weights to compute residual tend to become larger.
When the weight is too much significant, the result locally overfits to
certain data points with smaller or zero y error.
"""
analysis = CurveAnalysis(models=[ExpressionModel(expr="amp * x**2", name="test")])
analysis.set_options(
data_processor=DataProcessor(input_key="counts", data_actions=[Probability("1")]),
result_parameters=["amp"],
average_method="sample", # Use sample average to make some yerr = 0
plot=False,
p0={"amp": 0.2},
)

amp = 0.3
x = np.linspace(0, 1, 100)
y = amp * x**2

# Replace small y values with zero.
# Since mock function samples count dictionary from binomial distribution,
# y=0 (or 1) yield always the same count dictionary
# and hence y error becomes zero with sample averaging.
# In this case, amp = 0 may yield the best result.
y[0] = 0
y[1] = 0
y[2] = 0

test_data1 = self.single_sampler(x, y, seed=123)
test_data2 = self.single_sampler(x, y, seed=124)
test_data3 = self.single_sampler(x, y, seed=125)

expdata = ExperimentData(experiment=FakeExperiment())
expdata.add_data(test_data1.data())
expdata.add_data(test_data2.data())
expdata.add_data(test_data3.data())

result = analysis.run(expdata)
self.assertExperimentDone(result)

self.assertAlmostEqual(result.analysis_results("amp").value.nominal_value, amp, delta=0.1)

def test_get_init_params(self):
"""Integration test for getting initial parameter from overview entry."""

Expand Down
2 changes: 1 addition & 1 deletion test/library/calibration/test_ramsey_xy.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def test_end_to_end(self, freq_shift: float):

This test also checks that we can pickup frequency shifts with different signs.
"""
test_tol = 0.01
test_tol = 0.03
abs_tol = max(1e3, abs(freq_shift) * test_tol)

exp_helper = RamseyXYHelper()
Expand Down