Source code for spectrochempy.processing.alignement.align

# ======================================================================================
# Copyright (©) 2015-2025 LCS - Laboratoire Catalyse et Spectrochimie, Caen, France.
# CeCILL-B FREE SOFTWARE LICENSE AGREEMENT
# See full LICENSE agreement in the root directory.
# ======================================================================================
"""Module defining functions related to NDDataset alignment."""

__all__ = ["align"]
__dataset_methods__ = __all__

# import scipy.interpolate
import numpy as np

from spectrochempy.application import error_
from spectrochempy.application import warning_
from spectrochempy.utils import exceptions
from spectrochempy.utils.constants import MASKED
from spectrochempy.utils.misc import get_n_decimals


def can_merge_or_align(coord1, coord2):
    """
    Check if two coordinates can be merged or aligned.

    Parameters
    ----------
    coord1, coord2 :  `Coord`
        Coordinates to merge or align.

    Returns
    -------
    can_merge, can_align : tuple of bools
        Two flags about merge and alignment possibility.

    """
    if coord1 == coord2:
        # same coordinates
        can_merge = True  # merge is obvious
        can_align = True  # of course as it is the same coordinate

    else:
        # no the same coordinates
        can_merge = False  # we need to do alignment to merge
        # can align only if data exists, units compatibles, and title are
        # the same
        can_align = True
        can_align &= not coord1.is_empty
        can_align &= not coord2.is_empty
        can_align &= coord1.title == coord2.title
        if can_align and (coord1.has_units or coord2.has_units):
            if coord1.has_units:
                can_align &= coord1.is_units_compatible(coord2)
            else:
                can_align &= coord2.is_units_compatible(coord1)

    return can_merge, can_align


[docs] def align(dataset, *others, **kwargs): r""" Align individual `NDDataset` along given dimensions using various methods. Parameters ---------- dataset : `NDDataset` Dataset on which we want to align other objects. *others : `NDDataset` Objects to align. dim : str. Optional, default='x' Along which axis to perform the alignment. dims : list of str, optional, default=None Align along all dims defined in dims (if dim is also defined, then dims have higher priority). method : enum ['outer', 'inner', 'first', 'last', 'interpolate'], optional, default='outer' Which method to use for the alignment. If align is defined : * 'outer' means that a union of the different coordinates is achieved (missing values are masked). * 'inner' means that the intersection of the coordinates is used. * 'first' means that the first dataset is used as reference. * 'last' means that the last dataset is used as reference. * 'interpolate' means that interpolation is performed relative to the first dataset. interpolate_method : enum ['linear','pchip']. Optional, default='linear' Method of interpolation to performs for the alignment. interpolate_sampling : 'auto', int or float. Optional, default='auto' Values: * 'auto' : sampling is determined automatically from the existing data. * int : if an integer values is specified, then the sampling interval for the interpolated data will be split in this number of points. * float : If a float value is provided, it determines the interval between the interpolated data. coord : `Coord` , optional, default=None Coordinates to use for alignment. Ignore those corresponding to the dimensions to align. copy : bool, optional, default=True If False then the returned objects will share memory with the original objects, whenever it is possible : in principle only if reindexing is not necessary. Returns ------- tuple of `NDDataset` Same objects as datasets with dimensions aligned. Raises ------ ValueError Issued when the dimensions given in `dim` or `dims` argument are not compatibles (units, titles, etc.). """ # TODO: Perform an alignment along numeric labels # TODO: add example in docs # copy objects? copy = kwargs.pop("copy", True) # make a single list with dataset and the remaining object objects = [dataset] + list(others) # should we align on given external coordinates extern_coord = kwargs.pop("coord", None) # what's the method to use (by default='outer') method = kwargs.pop("method", "outer") # trivial cases where alignment is not possible or unnecessary if not objects: warning_("No object provided for alignment!") return None if ( len(objects) == 1 and objects[0]._implements("NDDataset") and extern_coord is None ): # no necessary alignment return objects # evaluate on which axis we align axis, dims = dataset.get_axis(only_first=False, **kwargs) # check compatibility of the dims and prepare the dimension for alignment for _axis, dim in zip(axis, dims, strict=False): # get all objects to align _objects = {} _nobj = 0 for idx, object in enumerate(objects): if not object._implements("NDDataset"): error_( f"Bad object(s) found: {object}. Note that only NDDataset " f"objects are accepted " f"for alignment", ) return None _objects[_nobj] = { "obj": object.copy(), "idx": idx, } _nobj += 1 _last = _nobj - 1 # get the reference object (by default the first, except if method if # set to 'last' ref_obj_index = 0 if method == "last": ref_obj_index = _last ref_obj = _objects[ref_obj_index]["obj"] # as we will sort their coordinates at some point, we need to know # if the coordinates need to be reversed at # the end of the alignment process reversed = ref_obj.coordset[dim].reversed if reversed: ref_obj.sort(descend=False, dim=dim, inplace=True) # get the coordset corresponding to the reference object ref_obj_coordset = ref_obj.coordset # get the coordinate for the reference dimension ref_coord = ref_obj_coordset[dim] # as we will sort their coordinates at some point, we need to know # if the coordinates need to be reversed at # the end of the alignment process reversed = ref_coord.reversed # prepare a new Coord object to store the final new dimension new_coord = ref_coord.copy() ndec = get_n_decimals(new_coord.data.max(), 1.0e-5) # loop on all object for _index, object in _objects.items(): obj = object["obj"] if obj is ref_obj: # not necessary to compare with itself! continue if reversed: obj.sort(descend=False, dim=dim, inplace=True) # get the current object coordinates and check compatibility coord = obj.coordset[dim] if not coord.is_units_compatible(ref_coord): # not compatible, stop everything raise exceptions.UnitsCompatibilityError( "NDataset to align must have compatible units!", ) # do units transform if necesssary so coords can be compared if coord.units != ref_coord.units: coord.ito(ref_coord) # adjust the new_cord depending on the method of alignment new_coord_data = set(np.around(new_coord.data, ndec)) coord_data = set(np.around(coord.data, ndec)) if method in ["outer", "interpolate"]: # in this case we do a union of the coords (masking the # missing values) # For method=`interpolate` , the interpolation will be # performed in a second step new_coord._data = sorted(coord_data | new_coord_data) elif method == "inner": # take only intersection of the coordinates # and generate a warning if it result something null or new_coord._data = sorted(coord_data & new_coord_data) elif method in ["first", "last"]: # we take the reference coordinates already determined as # basis (masking the missing values) continue else: raise NotImplementedError(f"The method {method} is unknown!") # Now perform alignment of all objects on the new coordinates for index, object in _objects.items(): obj = object["obj"] # get the dim index for the given object dim_index = obj.dims.index(dim) # prepare slicing keys ; set slice(None) for the untouched # dimensions preceding the dimension of interest prepend_keys = [slice(None)] * dim_index # New objects for obj must be created with the new coordinates # change the data shape new_obj_shape = list(obj.shape) new_obj_shape[dim_index] = len(new_coord) new_obj_data = np.full(new_obj_shape, np.nan) # create new dataset for obj and ref_objects new_obj = obj.copy() if copy else obj # update the data and mask coord = obj.coordset[dim] coord_data = set(np.around(coord.data, ndec)) dim_loc = new_coord._loc2index(sorted(coord_data)) loc = tuple(prepend_keys + [dim_loc]) new_obj._data = new_obj_data # mask all the data then unmask later the relevant data in # the next step if not new_obj.is_masked: new_obj.mask = MASKED new_obj.mask[loc] = False else: mask = new_obj.mask.copy() new_obj.mask = MASKED new_obj.mask[loc] = mask # set the data for the loc new_obj._data[loc] = obj.data # update the coordinates new_coordset = obj.coordset.copy() if coord.is_labeled: label_shape = list(coord.labels.shape) label_shape[0] = new_coord.size new_coord._labels = np.zeros(tuple(label_shape)).astype( coord.labels.dtype, ) new_coord._labels[:] = "--" new_coord._labels[dim_loc] = coord.labels setattr(new_coordset, dim, new_coord) new_obj._coordset = new_coordset # reversed? if reversed: # we must reverse the given coordinates new_obj.sort(descend=reversed, dim=dim, inplace=True) # update the _objects _objects[index]["obj"] = new_obj if method == "interpolate": warning_( "Interpolation not yet implemented - for now equivalent to `outer`", ) # the new transformed object must be in the same order as the passed # objects # and the missing values must be masked (for the moment they are defined to NaN for _index, object in _objects.items(): obj = object["obj"] # obj[np.where(np.isnan(obj))] = MASKED # mask NaN values obj[ np.where(np.isnan(obj)) ] = 99999999999999.0 # replace NaN values (to simplify # comparisons) idx = int(object["idx"]) objects[idx] = obj # we also transform into linear coord if possible ? # TODO: # Now return return tuple(objects)
# if method == 'interpolate': # # # reorders dataset and reference # in ascending order # is_sorted # = False # if # dataset.coordset(axis).reversed: # datasetordered = # dataset.sort(axis, # descend=False) # refordered = ref.sort( # refaxis, descend=False) # is_sorted = True # # else: # # datasetordered = dataset.copy() # refordered = ref.copy() # # try: # # datasetordered.coordset(axis).to( # refordered.coordset(refaxis).units) # except: # # raise # ValueError( # 'units of the dataset and # reference axes on which interpolate are not # compatible') # # # oldaxisdata = datasetordered.coordset(axis).data # # refaxisdata = # refordered.coordset(refaxis).data # TODO: at the # end restore the original order # # method = # kwargs.pop( # 'method', 'linear') # fill_value = kwargs.pop('fill_value', # np.NaN) # # # if method == 'linear': # interpolator # = lambda data, ax=0: scipy.interpolate.interp1d( # # # oldaxisdata, data, axis=ax, kind=method, bounds_error=False, # fill_value=fill_value, # assume_sorted=True) # # elif # method == 'pchip': # interpolator = lambda data, # ax=0: scipy.interpolate.PchipInterpolator( # # oldaxisdata, data, axis=ax, # extrapolate=False) # # else: # raise AttributeError(f'{method} is not a # # recognised option method for `align`') # # # interpolate_data = interpolator( # datasetordered.data, # axis) # newdata = interpolate_data(refaxisdata) # # # # if datasetordered.is_masked: # interpolate_mask = # interpolator( # datasetordered.mask, axis) # newmask # = interpolate_mask(refaxisdata) # # else: # # newmask = NOMASK # # # interpolate_axis = # # interpolator(datasetordered.coordset(axis).data) # # # newaxisdata = # interpolate_axis(refaxisdata) # # newaxisdata = refaxisdata.copy() # # if # method == # 'pchip' and not np.isnan(fill_value): # index = # # np.any(np.isnan(newdata)) # newdata[index] = # fill_value # # # index = np.any(np.isnan( # newaxisdata)) # newaxisdata[index] = fill_value # # # create the new axis # newaxes = dataset.coords.copy() # # newaxes[axis]._data = newaxisdata # newaxes[axis]._labels = # np.array([''] * newaxisdata.size) # # # transform the dataset # # inplace = kwargs.pop('inplace', False) # # if inplace: # # out = dataset # else: # # out = dataset.copy() # # # out._data = newdata # out._coords = newaxes # out._mask # # = newmask # # out.name = dataset.name # out.title = # dataset.title # # out.history # = '{}: Aligned along dim {} # with respect to dataset {} using coords {} \n'.format( # # str( # dataset.modified), axis, ref.name, ref.coords[refaxis].title) # # if is_sorted and out.coordset( # axis).reversed: # # out.sort(axis, descend=True, inplace=True) # ref.sort( # refaxis, # descend=True, inplace=True) # # return out