Source code for spectrochempy.processing.transformation.autosub

# ======================================================================================
# Copyright (©) 2015-2025 LCS - Laboratoire Catalyse et Spectrochimie, Caen, France.
# CeCILL-B FREE SOFTWARE LICENSE AGREEMENT
# See full LICENSE agreement in the root directory.
# ======================================================================================
"""Plugin module to perform automatic subtraction of ref on a dataset."""

__all__ = ["autosub"]

__dataset_methods__ = __all__

import numpy as np
from scipy.optimize import minimize_scalar

from spectrochempy.utils.coordrange import trim_ranges



[docs]
def autosub(
    dataset,
    ref,
    *ranges,
    dim="x",
    method="vardiff",
    return_coefs=False,
    inplace=False,
):
    r"""
    Automatic subtraction of a reference to the dataset.

    The subtraction coefficient are adjusted to either
    minimise the variance of the subtraction (method = 'vardiff') which will
    minimize peaks due to ref or minimize the sum of squares of the subtraction
    (method = 'ssdiff').

    Parameters
    ----------
    dataset : `NDDataset`
        Dataset to which we want to subtract the reference data.
    ref : `NDDataset`
         1D reference data, with a size matching the axis to subtract.
         (axis parameter).  # TODO : optionally use title of axis.
    *ranges : pair(s) of values
        Any number of pairs is allowed.
        Coord range(s) in which the variance is minimized.
    dim : `str` or `int` , optional, default='x'
        Tells on which dimension to perform the subtraction.
        If dim is an integer it refers to the axis index.
    method : str, optional, default='vardiff'
        'vardiff': minimize the difference of the variance.
        'ssdiff': minimize the sum of squares difference of sum of squares.
    return_coefs : `bool` , optional, default=`False`
         Returns the table of coefficients.
    inplace : `bool` , optional, default=`False`
        True if the subtraction is done in place.
        In this case we do not need to catch the function output.

    Returns
    -------
    out : `NDDataset`
        The subtracted dataset.
    coefs : `~numpy.ndarray` .
        The table of subtraction coefficients
        (only if `return_coefs` is set to `True` ).

    See Also
    --------
    Baseline : Manual baseline corrections.
    abc : Automatic baseline corrections.

    Examples
    --------
    >>> path_A = 'irdata/nh4y-activation.spg'
    >>> A = scp.read(path_A, protocol='omnic')
    >>> ref = A[0, :]  # let's subtrack the first row
    >>> B = A.autosub(ref, [3900., 3700.], [1600., 1500.], inplace=False)
    >>> B
    NDDataset: [float64]  a.u. (shape: (y:55, x:5549))

    """
    # output dataset

    new = dataset.copy() if not inplace else dataset

    # we assume that the last dimension ('x' for transposed array) is always the dimension to which we want
    # to subtract.

    # Swap the axes to be sure to be in this situation
    axis, dim = new.get_axis(dim)

    if axis == new.ndim - 1:
        axis = -1

    try:
        ref.to(dataset.units)
    except Exception as e:
        raise ValueError("Units of the dataset and reference are not compatible") from e

    swapped = False
    if axis != -1:
        new = new.swapdims(axis, -1)
        swapped = True

    # TODO: detect the case where the ref is not exactly with same coords: interpolate?

    # selection of the multiple ranges

    # shape = list(new.shape)
    ranges = tuple(np.array(ranges, dtype=float))
    # must be float to be considered as frequency for instance

    coords = new.coordset[dim]
    xrange = trim_ranges(*ranges, reversed=coords.reversed)

    s = []
    r = []

    # TODO: this do not work obviously for axis != -1 - correct this
    for xpair in xrange:
        # determine the slices

        sl = slice(*xpair)
        s.append(dataset[..., sl].data)
        r.append(ref[..., sl].data)

    X_r = np.concatenate((*s,), axis=-1)
    ref_r = np.concatenate((*r,), axis=-1).squeeze()

    indices, _ = list(zip(*np.ndenumerate(X_r[..., 0]), strict=False))  # .squeeze())))

    # two methods
    # @jit
    def _f(alpha, p):
        if method == "ssdiff":
            return np.sum((p - alpha * ref_r) ** 2)
        if method == "vardiff":
            return np.var(np.diff(p - alpha * ref_r))
        raise ValueError(f"Not implemented for method={method}")

    # @jit(cache=True)
    def _minim():
        # table of subtraction coefficients
        x = []

        for tup in indices:
            # slices = [i for i in tup]
            # slices.append(slice(None))
            # args = (X_r[slices],)
            args = X_r[tup]
            res = minimize_scalar(_f, args=(args,), method="brent")
            x.append(res.x)

        return np.asarray(x)

    x = _minim()

    new._data -= np.dot(x.reshape(-1, 1), ref.data.reshape(1, -1))

    if swapped:
        new = new.swapdims(axis, -1)

    new.history = f"Automatic subtraction of {ref.name}"

    if return_coefs:
        return new, x
    return new