# ======================================================================================
# Copyright (©) 2015-2025 LCS - Laboratoire Catalyse et Spectrochimie, Caen, France.
# CeCILL-B FREE SOFTWARE LICENSE AGREEMENT
# See full LICENSE agreement in the root directory.
# ======================================================================================
"""Plugin module to extend NDDataset with the import methods method."""
__all__ = ["read_matlab", "read_mat"]
__dataset_methods__ = __all__
import contextlib
from datetime import datetime
import numpy as np
import scipy.io as sio
from spectrochempy.application import info_
from spectrochempy.application import warning_
from spectrochempy.core.dataset.nddataset import Coord
from spectrochempy.core.dataset.nddataset import NDDataset
from spectrochempy.core.readers.importer import Importer
from spectrochempy.core.readers.importer import _importer_method
from spectrochempy.core.readers.importer import _openfid
from spectrochempy.utils.docreps import _docstring
# ======================================================================================
# Public functions
# ======================================================================================
_docstring.delete_params("Importer.see_also", "read_matlab")
[docs]
@_docstring.dedent
def read_matlab(*paths, **kwargs):
"""
Read a matlab file with extension :file:`.mat` and return its content as a list.
The array of numbers (*i.e.,* matlab matrices) and Eigenvector's DataSet Object
(``DSO``, see `DSO <https://www.eigenvector.com/software/dataset.htm>`__ ) are
returned as NDDatasets. The content not recognized by SpectroChemPy is returned
as a tuple (name, object).
Parameters
----------
%(Importer.parameters)s
Returns
-------
%(Importer.returns)s
Other Parameters
----------------
%(Importer.other_parameters)s
See Also
--------
%(Importer.see_also.no_read_matlab)s
Examples
--------
>>> scp.read_matlab('matlabdata/dso.mat')
NDDataset: [float64] unitless (shape: (y:20, x:426))
"""
kwargs["filetypes"] = ["MATLAB files (*.mat *.dso)"]
kwargs["protocol"] = ["matlab", "mat", "dso"]
importer = Importer()
return importer(*paths, **kwargs)
read_mat = read_matlab
# --------------------------------------------------------------------------------------
# Private methods
# --------------------------------------------------------------------------------------
@_importer_method
def _read_mat(*args, **kwargs):
_, filename = args
fid, kwargs = _openfid(filename, **kwargs)
dic = sio.loadmat(fid)
datasets = []
for name, data in dic.items():
dataset = NDDataset()
if name == "__header__":
dataset.description = str(data, "utf-8", "ignore")
continue
if name.startswith("__"):
continue
if data.dtype in [
np.dtype("float64"),
np.dtype("float32"),
np.dtype("int8"),
np.dtype("int16"),
np.dtype("int32"),
np.dtype("int64"),
np.dtype("uint8"),
np.dtype("uint16"),
np.dtype("uint32"),
np.dtype("uint64"),
]:
# this is an array of numbers
dataset.data = data
dataset.name = name
dataset.filename = filename
dataset.history = "Imported from .mat file"
# TODO: reshape from fortran/Matlab order to C opder
# for 3D or higher datasets ?
datasets.append(dataset)
elif data.dtype.char == "U":
# this is an array of string
info_(
f"The mat file contains an array of strings named '{name}' which will not be converted to NDDataset",
)
continue
elif all(
name_ in data.dtype.names for name_ in ["moddate", "axisscale", "imagesize"]
):
# this is probably a DSO object
dataset = _read_dso(dataset, name, data)
datasets.append(dataset)
else:
warning_(f"unsupported data type : {data.dtype}")
# TODO: implement DSO reader
datasets.append([name, data])
return datasets
@_importer_method
def _read_dso(dataset, name, data):
name_mat = data["name"][0][0]
name = "" if len(name_mat) == 0 else name_mat[0]
typedata_mat = data["type"][0][0]
typedata = "" if len(typedata_mat) == 0 else typedata_mat[0]
if typedata != "data":
return (name, data)
author_mat = data["author"][0][0]
author = "*unknown*" if len(author_mat) == 0 else author_mat[0]
date_mat = data["date"][0][0]
if len(date_mat) == 0:
date = datetime(1, 1, 1, 0, 0)
else:
date = datetime(
int(date_mat[0][0]),
int(date_mat[0][1]),
int(date_mat[0][2]),
int(date_mat[0][3]),
int(date_mat[0][4]),
int(date_mat[0][5]),
)
dat = data["data"][0][0]
# look at coords and labels
# only the first label and axisscale are taken into account
# the axisscale title is used as the coordinate title
coords = []
for i in range(len(dat.shape)):
coord = datac = None # labels = title = None
labelsarray = data["label"][0][0][i][0]
if len(labelsarray): # some labels might be present
if isinstance(labelsarray[0], np.ndarray):
labels = data["label"][0][0][i][0][0]
else:
labels = data["label"][0][0][i][0]
if len(labels):
coord = Coord(labels=[str(label) for label in labels])
if len(data["label"][0][0][i][1]):
if isinstance(data["label"][0][0][i][1][0], np.ndarray):
if len(data["label"][0][0][i][1][0]):
coord.name = data["label"][0][0][i][1][0][0]
elif isinstance(data["label"][0][0][i][1][0], str):
coord.name = data["label"][0][0][i][1][0]
axisdataarray = data["axisscale"][0][0][i][0]
if len(axisdataarray): # some axiscale might be present
if isinstance(axisdataarray[0], np.ndarray):
if len(axisdataarray[0]) == dat.shape[i]:
datac = axisdataarray[0] # take the first axiscale data
elif axisdataarray[0].size == dat.shape[i]:
datac = axisdataarray[0][0]
if datac is not None:
if isinstance(coord, Coord):
coord.data = datac
else:
coord = Coord(data=datac)
if len(data["axisscale"][0][0][i][1]): # some titles might be present
try:
coord.title = data["axisscale"][0][0][i][1][0]
except Exception:
with contextlib.suppress(Exception):
coord.title = data["axisscale"][0][0][i][1][0][0]
if not isinstance(coord, Coord):
coord = Coord(data=list(range(dat.shape[i])), title="index")
coords.append(coord)
dataset.data = dat
dataset.set_coordset(*list(coords))
dataset.author = author
dataset.name = name
dataset.date = date
# TODO: reshape from fortran/Matlab order to C order
# for 3D or higher datasets ?
for i in data["description"][0][0]:
dataset.description += i
for i in data["history"][0][0][0][0]:
dataset.history = i
dataset.history = "Imported by spectrochempy."
return dataset