Source code for spectrochempy.core.readers.read_matlab

# ======================================================================================
# Copyright (©) 2015-2025 LCS - Laboratoire Catalyse et Spectrochimie, Caen, France.
# CeCILL-B FREE SOFTWARE LICENSE AGREEMENT
# See full LICENSE agreement in the root directory.
# ======================================================================================
"""Plugin module to extend NDDataset with the import methods method."""

__all__ = ["read_matlab", "read_mat"]
__dataset_methods__ = __all__

import contextlib
from datetime import datetime

import numpy as np
import scipy.io as sio

from spectrochempy.application import info_
from spectrochempy.application import warning_
from spectrochempy.core.dataset.nddataset import Coord
from spectrochempy.core.dataset.nddataset import NDDataset
from spectrochempy.core.readers.importer import Importer
from spectrochempy.core.readers.importer import _importer_method
from spectrochempy.core.readers.importer import _openfid
from spectrochempy.utils.docreps import _docstring

# ======================================================================================
# Public functions
# ======================================================================================
_docstring.delete_params("Importer.see_also", "read_matlab")


[docs] @_docstring.dedent def read_matlab(*paths, **kwargs): """ Read a matlab file with extension :file:`.mat` and return its content as a list. The array of numbers (*i.e.,* matlab matrices) and Eigenvector's DataSet Object (``DSO``, see `DSO <https://www.eigenvector.com/software/dataset.htm>`__ ) are returned as NDDatasets. The content not recognized by SpectroChemPy is returned as a tuple (name, object). Parameters ---------- %(Importer.parameters)s Returns ------- %(Importer.returns)s Other Parameters ---------------- %(Importer.other_parameters)s See Also -------- %(Importer.see_also.no_read_matlab)s Examples -------- >>> scp.read_matlab('matlabdata/dso.mat') NDDataset: [float64] unitless (shape: (y:20, x:426)) """ kwargs["filetypes"] = ["MATLAB files (*.mat *.dso)"] kwargs["protocol"] = ["matlab", "mat", "dso"] importer = Importer() return importer(*paths, **kwargs)
read_mat = read_matlab # -------------------------------------------------------------------------------------- # Private methods # -------------------------------------------------------------------------------------- @_importer_method def _read_mat(*args, **kwargs): _, filename = args fid, kwargs = _openfid(filename, **kwargs) dic = sio.loadmat(fid) datasets = [] for name, data in dic.items(): dataset = NDDataset() if name == "__header__": dataset.description = str(data, "utf-8", "ignore") continue if name.startswith("__"): continue if data.dtype in [ np.dtype("float64"), np.dtype("float32"), np.dtype("int8"), np.dtype("int16"), np.dtype("int32"), np.dtype("int64"), np.dtype("uint8"), np.dtype("uint16"), np.dtype("uint32"), np.dtype("uint64"), ]: # this is an array of numbers dataset.data = data dataset.name = name dataset.filename = filename dataset.history = "Imported from .mat file" # TODO: reshape from fortran/Matlab order to C opder # for 3D or higher datasets ? datasets.append(dataset) elif data.dtype.char == "U": # this is an array of string info_( f"The mat file contains an array of strings named '{name}' which will not be converted to NDDataset", ) continue elif all( name_ in data.dtype.names for name_ in ["moddate", "axisscale", "imagesize"] ): # this is probably a DSO object dataset = _read_dso(dataset, name, data) datasets.append(dataset) else: warning_(f"unsupported data type : {data.dtype}") # TODO: implement DSO reader datasets.append([name, data]) return datasets @_importer_method def _read_dso(dataset, name, data): name_mat = data["name"][0][0] name = "" if len(name_mat) == 0 else name_mat[0] typedata_mat = data["type"][0][0] typedata = "" if len(typedata_mat) == 0 else typedata_mat[0] if typedata != "data": return (name, data) author_mat = data["author"][0][0] author = "*unknown*" if len(author_mat) == 0 else author_mat[0] date_mat = data["date"][0][0] if len(date_mat) == 0: date = datetime(1, 1, 1, 0, 0) else: date = datetime( int(date_mat[0][0]), int(date_mat[0][1]), int(date_mat[0][2]), int(date_mat[0][3]), int(date_mat[0][4]), int(date_mat[0][5]), ) dat = data["data"][0][0] # look at coords and labels # only the first label and axisscale are taken into account # the axisscale title is used as the coordinate title coords = [] for i in range(len(dat.shape)): coord = datac = None # labels = title = None labelsarray = data["label"][0][0][i][0] if len(labelsarray): # some labels might be present if isinstance(labelsarray[0], np.ndarray): labels = data["label"][0][0][i][0][0] else: labels = data["label"][0][0][i][0] if len(labels): coord = Coord(labels=[str(label) for label in labels]) if len(data["label"][0][0][i][1]): if isinstance(data["label"][0][0][i][1][0], np.ndarray): if len(data["label"][0][0][i][1][0]): coord.name = data["label"][0][0][i][1][0][0] elif isinstance(data["label"][0][0][i][1][0], str): coord.name = data["label"][0][0][i][1][0] axisdataarray = data["axisscale"][0][0][i][0] if len(axisdataarray): # some axiscale might be present if isinstance(axisdataarray[0], np.ndarray): if len(axisdataarray[0]) == dat.shape[i]: datac = axisdataarray[0] # take the first axiscale data elif axisdataarray[0].size == dat.shape[i]: datac = axisdataarray[0][0] if datac is not None: if isinstance(coord, Coord): coord.data = datac else: coord = Coord(data=datac) if len(data["axisscale"][0][0][i][1]): # some titles might be present try: coord.title = data["axisscale"][0][0][i][1][0] except Exception: with contextlib.suppress(Exception): coord.title = data["axisscale"][0][0][i][1][0][0] if not isinstance(coord, Coord): coord = Coord(data=list(range(dat.shape[i])), title="index") coords.append(coord) dataset.data = dat dataset.set_coordset(*list(coords)) dataset.author = author dataset.name = name dataset.date = date # TODO: reshape from fortran/Matlab order to C order # for 3D or higher datasets ? for i in data["description"][0][0]: dataset.description += i for i in data["history"][0][0][0][0]: dataset.history = i dataset.history = "Imported by spectrochempy." return dataset