Source code for spectrochempy.core.readers.read_matlab

# -*- coding: utf-8 -*-
# ======================================================================================
# Copyright (©) 2015-2023 LCS - Laboratoire Catalyse et Spectrochimie, Caen, France.
# CeCILL-B FREE SOFTWARE LICENSE AGREEMENT
# See full LICENSE agreement in the root directory.
# ======================================================================================
"""
Plugin module to extend NDDataset with the import methods method.
"""

__all__ = ["read_matlab", "read_mat"]
__dataset_methods__ = __all__

from datetime import datetime

import numpy as np
import scipy.io as sio

from spectrochempy.application import info_, warning_
from spectrochempy.core.dataset.nddataset import Coord, NDDataset
from spectrochempy.core.readers.importer import Importer, _importer_method, _openfid
from spectrochempy.utils.docstrings import _docstring

# ======================================================================================
# Public functions
# ======================================================================================
_docstring.delete_params("Importer.see_also", "read_matlab")


[docs]@_docstring.dedent
def read_matlab(*paths, **kwargs):
    """
    Read a matlab file with extension :file:`.mat` and return its content as a list.

    The array of numbers (*i.e.,* matlab matrices) and Eigenvector's DataSet Object
    (``DSO``, see `DSO <https://www.eigenvector.com/software/dataset.htm>`__ ) are
    returned as NDDatasets.  The content not recognized by SpectroChemPy is returned
    as a tuple (name, object).

    Parameters
    ----------
    %(Importer.parameters)s

    Returns
    --------
    %(Importer.returns)s

    Other Parameters
    ----------------
    %(Importer.other_parameters)s

    See Also
    ---------
    %(Importer.see_also.no_read_matlab)s

    Examples
    ---------

    >>> scp.read_matlab('matlabdata/dso.mat')
    NDDataset: [float64] unitless (shape: (y:20, x:426))
    """
    kwargs["filetypes"] = ["MATLAB files (*.mat *.dso)"]
    kwargs["protocol"] = ["matlab", "mat", "dso"]
    importer = Importer()
    return importer(*paths, **kwargs)


read_mat = read_matlab


# --------------------------------------------------------------------------------------
# Private methods
# --------------------------------------------------------------------------------------
@_importer_method
def _read_mat(*args, **kwargs):
    _, filename = args

    fid, kwargs = _openfid(filename, **kwargs)

    dic = sio.loadmat(fid)

    datasets = []
    for name, data in dic.items():

        dataset = NDDataset()
        if name == "__header__":
            dataset.description = str(data, "utf-8", "ignore")
            continue
        if name.startswith("__"):
            continue

        if data.dtype in [
            np.dtype("float64"),
            np.dtype("float32"),
            np.dtype("int8"),
            np.dtype("int16"),
            np.dtype("int32"),
            np.dtype("int64"),
            np.dtype("uint8"),
            np.dtype("uint16"),
            np.dtype("uint32"),
            np.dtype("uint64"),
        ]:

            # this is an array of numbers
            dataset.data = data
            dataset.name = name
            dataset.filename = filename
            dataset.history = "Imported from .mat file"
            # TODO: reshape from fortran/Matlab order to C opder
            # for 3D or higher datasets ?
            datasets.append(dataset)

        elif data.dtype.char == "U":
            # this is an array of string
            info_(
                f"The mat file contains an array of strings named '{name}' which will not be converted to NDDataset"
            )
            continue

        elif all(
            name_ in data.dtype.names for name_ in ["moddate", "axisscale", "imagesize"]
        ):
            # this is probably a DSO object
            dataset = _read_dso(dataset, name, data)
            datasets.append(dataset)

        else:
            warning_(f"unsupported data type : {data.dtype}")
            # TODO: implement DSO reader
            datasets.append([name, data])

    return datasets


@_importer_method
def _read_dso(dataset, name, data):
    name_mat = data["name"][0][0]
    if len(name_mat) == 0:
        name = ""
    else:
        name = name_mat[0]

    typedata_mat = data["type"][0][0]
    if len(typedata_mat) == 0:
        typedata = ""
    else:
        typedata = typedata_mat[0]

    if typedata != "data":
        return (name, data)

    else:
        author_mat = data["author"][0][0]
        if len(author_mat) == 0:
            author = "*unknown*"
        else:
            author = author_mat[0]

        date_mat = data["date"][0][0]
        if len(date_mat) == 0:
            date = datetime(1, 1, 1, 0, 0)
        else:
            date = datetime(
                int(date_mat[0][0]),
                int(date_mat[0][1]),
                int(date_mat[0][2]),
                int(date_mat[0][3]),
                int(date_mat[0][4]),
                int(date_mat[0][5]),
            )

        dat = data["data"][0][0]

        # look at coords and labels
        # only the first label and axisscale are taken into account
        # the axisscale title is used as the coordinate title

        coords = []
        for i in range(len(dat.shape)):
            coord = datac = None  # labels = title = None
            labelsarray = data["label"][0][0][i][0]
            if len(labelsarray):  # some labels might be present
                if isinstance(labelsarray[0], np.ndarray):
                    labels = data["label"][0][0][i][0][0]
                else:
                    labels = data["label"][0][0][i][0]
                if len(labels):
                    coord = Coord(labels=[str(label) for label in labels])
                if len(data["label"][0][0][i][1]):
                    if isinstance(data["label"][0][0][i][1][0], np.ndarray):
                        if len(data["label"][0][0][i][1][0]):
                            coord.name = data["label"][0][0][i][1][0][0]
                    elif isinstance(data["label"][0][0][i][1][0], str):
                        coord.name = data["label"][0][0][i][1][0]

            axisdataarray = data["axisscale"][0][0][i][0]
            if len(axisdataarray):  # some axiscale might be present
                if isinstance(axisdataarray[0], np.ndarray):
                    if len(axisdataarray[0]) == dat.shape[i]:
                        datac = axisdataarray[0]  # take the first axiscale data
                    elif axisdataarray[0].size == dat.shape[i]:
                        datac = axisdataarray[0][0]

                if datac is not None:
                    if isinstance(coord, Coord):
                        coord.data = datac
                    else:
                        coord = Coord(data=datac)

                if len(data["axisscale"][0][0][i][1]):  # some titles might be present
                    try:
                        coord.title = data["axisscale"][0][0][i][1][0]
                    except Exception:
                        try:
                            coord.title = data["axisscale"][0][0][i][1][0][0]
                        except Exception:
                            pass

            if not isinstance(coord, Coord):
                coord = Coord(data=[j for j in range(dat.shape[i])], title="index")

            coords.append(coord)

        dataset.data = dat
        dataset.set_coordset(*[coord for coord in coords])
        dataset.author = author
        dataset.name = name
        dataset.date = date

        # TODO: reshape from fortran/Matlab order to C order
        #  for 3D or higher datasets ?

        for i in data["description"][0][0]:
            dataset.description += i

        for i in data["history"][0][0][0][0]:
            dataset.history = i

        dataset.history = "Imported by spectrochempy."
    return dataset