# -*- coding: utf-8 -*-
# ======================================================================================
# Copyright (©) 2015-2023 LCS - Laboratoire Catalyse et Spectrochimie, Caen, France.
# CeCILL-B FREE SOFTWARE LICENSE AGREEMENT
# See full LICENSE agreement in the root directory.
# ======================================================================================
"""
This module implement the EFA (Evolving Factor Analysis) class.
"""
import numpy as np
import traitlets as tr
from spectrochempy.analysis._base._analysisbase import DecompositionAnalysis
from spectrochempy.application import info_
from spectrochempy.utils.decorators import (
_wrap_ndarray_output_to_nddataset,
deprecated,
signature_has_configurable_traits,
)
from spectrochempy.utils.docstrings import _docstring
__all__ = ["EFA"]
__configurables__ = ["EFA"]
[docs]@signature_has_configurable_traits
class EFA(DecompositionAnalysis):
_docstring.delete_params("DecompositionAnalysis.see_also", "EFA")
__doc__ = _docstring.dedent(
r"""
Evolving Factor Analysis (EFA).
Evolving factor analysis (`EFA`\ ) is a method that allows model-free resolution of
overlapping peaks into concentration profiles and normalized spectra of components.
Originally developed for GC and GC-MS experiments (See *e.g.,*
:cite:t:`maeder:1986` , :cite:t:`roach:1992`\ ), it is also suitable for
analysis spectra such as those obtained by Operando FTIR for example.
The model used in this class allow to perform a forward and reverse analysis of the
input `NDDataset` .
Parameters
----------
%(AnalysisConfigurable.parameters)s
See Also
--------
%(DecompositionAnalysis.see_also.no_EFA)s
Examples
--------
>>> # Init the model
>>> model = scp.EFA()
>>> # Read an experimental 2D spectra (N x M )
>>> X = scp.read("irdata/nh4y-activation.spg")
>>> # Fit the model
>>> _ = model.fit(X)
>>> # Display components spectra (2 x M)
>>> model.n_components = 2
>>> _ = model.components.plot(title="Component spectra")
>>> # Get the abstract concentration profile based on the FIFO EFA analysis
>>> c = model.transform()
>>> # Plot the transposed concentration matrix (2 x N)
>>> _ = c.T.plot(title="Concentration")
>>> scp.show()
"""
)
# ----------------------------------------------------------------------------------
# Configuration parameters (mostly defined in subclass
# as they depend on the model estimator)
# ----------------------------------------------------------------------------------
cutoff = tr.Float(default_value=None, allow_none=True, help="Cut-off value.").tag(
config=True
)
n_components = tr.Int(
allow_none=True, default_value=None, help="Number of components to keep."
).tag(config=True)
# ----------------------------------------------------------------------------------
# Initialization
# ----------------------------------------------------------------------------------
def __init__(
self,
*,
log_level="WARNING",
warm_start=False,
**kwargs,
):
# Call the super class for initialisation of the configuration parameters
# to do before anything else!
super().__init__(
log_level=log_level,
warm_start=warm_start,
**kwargs,
)
if "used_components" in kwargs:
deprecated("used_components", replace="n_components", removed="0.7")
kwargs["n_components"] = kwargs.pop("used_components")
def _fit(self, X, Y=None):
# X has already been validated and eventually
# preprocessed. X is now a nd-array with masked elements removed.
# and this method should return _outfit
# Y is not used but necessary to fit the superclass
# max number of components
M, N = X.shape
K = min(M, N)
percent_done_list = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100]
# ------------------------------------------------------------------------------
# forward analysis
# ------------------------------------------------------------------------------
f = np.zeros((M, K))
for i in range(M):
s = np.linalg.svd(X[: i + 1], compute_uv=False)
k = s.size
f[i, :k] = s**2
percent_done = int(i / (2 * M) * 100)
if percent_done in percent_done_list:
info_(f"Evolving Factor Analysis: {percent_done}% \r")
del percent_done_list[0]
# ------------------------------------------------------------------------------
# backward analysis
# ------------------------------------------------------------------------------
b = np.zeros((M, K))
for i in range(M - 1, -1, -1):
# if some rows are masked, we must skip them
s = np.linalg.svd(X[i:M], compute_uv=False)
k = s.size
b[i, :k] = s**2
percent_done = int(100 - i / (2 * M) * 100)
if percent_done in percent_done_list:
info_(f"Evolving Factor Analysis: {percent_done} % \r")
del percent_done_list[0]
# store the components number (real or desired)
self._n_components = K
# return results
_outfit = f, b
return _outfit
# ----------------------------------------------------------------------------------
# Private methods that should be most of the time overloaded in subclass
# ----------------------------------------------------------------------------------
def _transform(self, X=None):
# X is ignored for EFA
# Return concentration profile
return self._get_conc()
def _get_conc(self):
f, b = self._outfit
M = f.shape[0]
K = self._n_components
if self.n_components is not None:
K = min(K, self.n_components)
c = np.zeros((M, K))
for i in range(M):
c[i] = np.min((f[i, :K], b[i, :K][::-1]), axis=0)
return c
def _get_components(self):
# compute the components from the original dataset and the EFA concentrations
St = np.dot(self._get_conc().T, self._X_preprocessed)
return St
# ----------------------------------------------------------------------------------
# Public methods/properties
# ----------------------------------------------------------------------------------
[docs] @_docstring.dedent
def fit(self, X):
"""
Fit the `EFA` model on a `X` dataset.
Parameters
----------
%(analysis_fit.parameters.X)s
Returns
-------
%(analysis_fit.returns)s
See Also
--------
%(analysis_fit.see_also)s
"""
return super().fit(X, Y=None)
[docs] def reconstruct(self):
"""Not implemented."""
@property
@_wrap_ndarray_output_to_nddataset(units=None, title="keep", typex="components")
def f_ev(self):
"""
Eigenvalues for the forward analysis ( `NDDataset` ).
"""
f = self._outfit[0]
if self.cutoff is not None:
f = np.max((f, np.ones_like(f) * self.cutoff), axis=0)
return f
@property
@_wrap_ndarray_output_to_nddataset(units=None, title="keep", typex="components")
def b_ev(self):
"""
Eigenvalues for the backward analysis ( `NDDataset` ).
"""
b = self._outfit[1]
if self.cutoff is not None:
b = np.max((b, np.ones_like(b) * self.cutoff), axis=0)
return b