Source code for spectrochempy.analysis.decomposition.svd

# ======================================================================================
# Copyright (©) 2015-2025 LCS - Laboratoire Catalyse et Spectrochimie, Caen, France.
# CeCILL-B FREE SOFTWARE LICENSE AGREEMENT
# See full LICENSE agreement in the root directory.
# ======================================================================================
"""Implements the Singular Value Decomposition (SVD) class."""

import numpy as np
import traitlets as tr

from spectrochempy.analysis._base._analysisbase import DecompositionAnalysis
from spectrochempy.analysis._base._analysisbase import _wrap_ndarray_output_to_nddataset
from spectrochempy.utils.docreps import _docstring

__all__ = ["SVD"]
__configurables__ = ["SVD"]


# ======================================================================================
# Utilities
# ======================================================================================
def _svd_flip(U, VT, u_based_decision=True):
    """
    Sign correction to ensure deterministic output from SVD.

    Adjusts the columns of u and the rows of v such that the loadings in the
    columns in u that are largest in absolute value are always positive.

    Parameters
    ----------
    u_based_decision : boolean, (default=True)
        If True, use the columns of u as the basis for sign flipping.
        Otherwise, use the rows of v.

    Notes
    -----
    Copied and modified from scikit-learn.utils.extmath (BSD 3 Licence)

    """
    if u_based_decision:
        # columns of U, rows of VT
        max_abs_cols = np.argmax(np.abs(U), axis=0)
        signs = np.sign(U[max_abs_cols, range(U.shape[1])])
        U *= signs
        VT *= signs[:, np.newaxis]
    else:
        # rows of V, columns of U
        max_abs_rows = np.argmax(np.abs(VT), axis=1)
        signs = np.sign(VT[range(VT.shape[0]), max_abs_rows])
        U *= signs
        VT *= signs[:, np.newaxis]

    return U, VT


# ======================================================================================
# class PCA
# ======================================================================================
[docs] class SVD(DecompositionAnalysis): _docstring.delete_params("DecompositionAnalysis.see_also", "SVD") __doc__ = _docstring.dedent( r""" Singular Value Decomposition (SVD). The SVD is commonly written as :math:`X = U \Sigma V^{T}`. This class has the attributes : U, s = diag(S) and VT=V :math:`^T`. If the dataset contains masked values, the corresponding ranges are ignored in the calculation. Parameters ---------- %(AnalysisConfigurable.parameters)s See Also -------- %(DecompositionAnalysis.see_also.no_SVD)s Examples -------- >>> dataset = scp.read('irdata/nh4y-activation.spg') >>> svd = scp.SVD() >>> svd.fit(dataset) <svd: U(55, 55), s(55), VT(55, 5549)> >>> print(svd.ev.data) [1.185e+04 634 ... 0.001089 0.000975] >>> print(svd.ev_cum.data) [ 94.54 99.6 ... 100 100] >>> print(svd.ev_ratio.data) [ 94.54 5.059 ... 8.687e-06 7.779e-06] """, ) # ---------------------------------------------------------------------------------- # Configuration parameters # ---------------------------------------------------------------------------------- full_matrices = tr.Bool( default_value=False, help="If False , U and VT have the shapes (M, k) and (k, N), respectively, " "where k = min(M, N). Otherwise the shapes will be (M, M) and (N, N), " "respectively.", ).tag(config=True) compute_uv = tr.Bool( default_value=True, help="Whether or not to compute U and VT in addition to s.", ).tag(config=True) # ---------------------------------------------------------------------------------- # Initialization # ---------------------------------------------------------------------------------- def __init__( self, *, log_level="WARNING", warm_start=False, **kwargs, ): # call the super class for initialisation of the configuration parameters # to do before anything else! super().__init__( log_level=log_level, warm_start=warm_start, **kwargs, ) # ---------------------------------------------------------------------------------- # Private methods (overloading abstract classes) # ---------------------------------------------------------------------------------- def _fit(self, X, Y=None): # this method is called by the abstract class fit. # Input X is a np.ndarray # Y is ignored in this model full_matrices = self.full_matrices compute_uv = self.compute_uv _outfit = np.linalg.svd(X, full_matrices, compute_uv) # Sign correction to ensure deterministic output from SVD. # This doesn't work will full_matrices=True. if compute_uv and not full_matrices: U, s, VT = _outfit U, VT = _svd_flip(U, VT) _outfit = U, s, VT return _outfit # ---------------------------------------------------------------------------------- # special methods # ---------------------------------------------------------------------------------- def __repr__(self): if self.compute_uv: U, s, VT = self._outfit return f"<svd: U{U.shape}, s({s.size}), VT{VT.shape}>" s = self._outfit return f"<svd: s({s.size}), U and VT not computed>" # ---------------------------------------------------------------------------------- # Public method and properties # ---------------------------------------------------------------------------------- _docstring.keep_params("analysis_fit.parameters", "X")
[docs] @_docstring.dedent def fit(self, X): """ Fit the SVD model on X. Parameters ---------- %(analysis_fit.parameters.X)s Returns ------- %(analysis_fit.returns)s See Also -------- %(analysis_fit.see_also)s """ return super().fit(X, Y=None)
@property @_wrap_ndarray_output_to_nddataset( units=None, title="Singular values", typesingle="components", ) def singular_values(self): """Return a NDDataset containing singular values.""" return self._outfit[1] sv = singular_values @property def explained_variance(self): """Return a NDDataset of the explained variance.""" size = self.sv.size ev = self.sv**2 / (size - 1) ev.name = "ev" ev.title = "explained variance" return ev ev = explained_variance @property def explained_variance_ratio(self): """Return Explained Variance per singular values.""" ratio = self.ev * 100.0 / np.sum(self.ev) ratio.name = "ev_ratio" ratio.title = "explained variance ratio" ratio.units = "percent" return ratio ev_ratio = explained_variance_ratio @property def cumulative_explained_variance(self): """Return Cumulative Explained Variance.""" ev_cum = np.cumsum(self.ev_ratio) ev_cum.name = "ev_cum" ev_cum.title = "cumulative explained variance" ev_cum.units = "percent" return ev_cum ev_cum = cumulative_explained_variance # TODO: return masked array for U,s,VT @property def U(self): """ Return the left unitary matrix. Its shape depends on `full_matrices` . """ if self.compute_uv: return self._outfit[0] return None @property def VT(self): """ Return a transpose matrix of the Loadings. Its shape depends on `full_matrices` """ if self.compute_uv: return self._outfit[2] return None @property def s(self): """Return Vector of singular values .""" return self._outfit[1]