Source code for spectrochempy.processing.transformation.autosub

# ======================================================================================
# Copyright (©) 2015-2025 LCS - Laboratoire Catalyse et Spectrochimie, Caen, France.
# CeCILL-B FREE SOFTWARE LICENSE AGREEMENT
# See full LICENSE agreement in the root directory.
# ======================================================================================
"""Plugin module to perform automatic subtraction of ref on a dataset."""

__all__ = ["autosub"]

__dataset_methods__ = __all__

import numpy as np
from scipy.optimize import minimize_scalar

from spectrochempy.utils.coordrange import trim_ranges


[docs] def autosub( dataset, ref, *ranges, dim="x", method="vardiff", return_coefs=False, inplace=False, ): r""" Automatic subtraction of a reference to the dataset. The subtraction coefficient are adjusted to either minimise the variance of the subtraction (method = 'vardiff') which will minimize peaks due to ref or minimize the sum of squares of the subtraction (method = 'ssdiff'). Parameters ---------- dataset : `NDDataset` Dataset to which we want to subtract the reference data. ref : `NDDataset` 1D reference data, with a size matching the axis to subtract. (axis parameter). # TODO : optionally use title of axis. *ranges : pair(s) of values Any number of pairs is allowed. Coord range(s) in which the variance is minimized. dim : `str` or `int` , optional, default='x' Tells on which dimension to perform the subtraction. If dim is an integer it refers to the axis index. method : str, optional, default='vardiff' 'vardiff': minimize the difference of the variance. 'ssdiff': minimize the sum of squares difference of sum of squares. return_coefs : `bool` , optional, default=`False` Returns the table of coefficients. inplace : `bool` , optional, default=`False` True if the subtraction is done in place. In this case we do not need to catch the function output. Returns ------- out : `NDDataset` The subtracted dataset. coefs : `~numpy.ndarray` . The table of subtraction coefficients (only if `return_coefs` is set to `True` ). See Also -------- Baseline : Manual baseline corrections. abc : Automatic baseline corrections. Examples -------- >>> path_A = 'irdata/nh4y-activation.spg' >>> A = scp.read(path_A, protocol='omnic') >>> ref = A[0, :] # let's subtrack the first row >>> B = A.autosub(ref, [3900., 3700.], [1600., 1500.], inplace=False) >>> B NDDataset: [float64] a.u. (shape: (y:55, x:5549)) """ # output dataset new = dataset.copy() if not inplace else dataset # we assume that the last dimension ('x' for transposed array) is always the dimension to which we want # to subtract. # Swap the axes to be sure to be in this situation axis, dim = new.get_axis(dim) if axis == new.ndim - 1: axis = -1 try: ref.to(dataset.units) except Exception as e: raise ValueError("Units of the dataset and reference are not compatible") from e swapped = False if axis != -1: new = new.swapdims(axis, -1) swapped = True # TODO: detect the case where the ref is not exactly with same coords: interpolate? # selection of the multiple ranges # shape = list(new.shape) ranges = tuple(np.array(ranges, dtype=float)) # must be float to be considered as frequency for instance coords = new.coordset[dim] xrange = trim_ranges(*ranges, reversed=coords.reversed) s = [] r = [] # TODO: this do not work obviously for axis != -1 - correct this for xpair in xrange: # determine the slices sl = slice(*xpair) s.append(dataset[..., sl].data) r.append(ref[..., sl].data) X_r = np.concatenate((*s,), axis=-1) ref_r = np.concatenate((*r,), axis=-1).squeeze() indices, _ = list(zip(*np.ndenumerate(X_r[..., 0]), strict=False)) # .squeeze()))) # two methods # @jit def _f(alpha, p): if method == "ssdiff": return np.sum((p - alpha * ref_r) ** 2) if method == "vardiff": return np.var(np.diff(p - alpha * ref_r)) raise ValueError(f"Not implemented for method={method}") # @jit(cache=True) def _minim(): # table of subtraction coefficients x = [] for tup in indices: # slices = [i for i in tup] # slices.append(slice(None)) # args = (X_r[slices],) args = X_r[tup] res = minimize_scalar(_f, args=(args,), method="brent") x.append(res.x) return np.asarray(x) x = _minim() new._data -= np.dot(x.reshape(-1, 1), ref.data.reshape(1, -1)) if swapped: new = new.swapdims(axis, -1) new.history = f"Automatic subtraction of {ref.name}" if return_coefs: return new, x return new