# ======================================================================================
# Copyright (©) 2015-2025 LCS - Laboratoire Catalyse et Spectrochimie, Caen, France.
# CeCILL-B FREE SOFTWARE LICENSE AGREEMENT
# See full LICENSE agreement in the root directory.
# ======================================================================================
"""Module implementing the `NDDataset` class."""
__all__ = ["NDDataset"]
# import signal
import sys
import textwrap
from contextlib import suppress
from datetime import datetime
from datetime import tzinfo
from zoneinfo import ZoneInfo
from zoneinfo import ZoneInfoNotFoundError
import numpy as np
import traitlets as tr
from tzlocal import get_localzone
from spectrochempy.application import error_
from spectrochempy.application import warning_
from spectrochempy.core.dataset.arraymixins.ndio import NDIO
from spectrochempy.core.dataset.arraymixins.ndmath import NDMath # _set_ufuncs,
from spectrochempy.core.dataset.arraymixins.ndmath import _set_operators
from spectrochempy.core.dataset.arraymixins.ndplot import NDPlot
from spectrochempy.core.dataset.baseobjects.ndarray import DEFAULT_DIM_NAME
from spectrochempy.core.dataset.baseobjects.ndarray import NDArray
from spectrochempy.core.dataset.baseobjects.ndcomplex import NDComplexArray
from spectrochempy.core.dataset.coord import Coord
from spectrochempy.core.dataset.coordset import CoordSet
from spectrochempy.extern.traittypes import Array
from spectrochempy.utils.datetimeutils import utcnow
from spectrochempy.utils.exceptions import SpectroChemPyError
from spectrochempy.utils.optional import import_optional_dependency
from spectrochempy.utils.print import colored_output
from spectrochempy.utils.system import get_user_and_node
# ======================================================================================
# NDDataset class definition
# ======================================================================================
[docs]
@tr.signature_has_traits
class NDDataset(NDMath, NDIO, NDPlot, NDComplexArray):
r"""
The main N-dimensional dataset class used by `SpectroChemPy`.
The `NDDataset` is the main object use by SpectroChemPy. Like numpy
`~numpy.ndarray`\ 's, `NDDataset` have the capability to be sliced, sorted and
subject to mathematical operations. But, in addition, `NDDataset` may have units,
can be masked and each dimensions can have coordinates also with units. This make
`NDDataset` aware of unit compatibility,
*e.g.,* for binary operation such as additions or subtraction or during the
application of mathematical operations.
In addition or in replacement of numerical data for coordinates,
`NDDataset` can also have labeled coordinates where labels can be different kind of
objects (`str`, `datetime`, `~numpy.ndarray` or other `NDDataset`'s, etc...).
Parameters
----------
data : :term:`array-like`
Data array contained in the object. The data can be a list, a tuple,
a `~numpy.ndarray`, a subclass of `~numpy.ndarray`, another `NDDataset` or a
`Coord` object.
Any size or shape of data is accepted. If not given, an empty
`NDDataset` will be inited.
At the initialisation the provided data will be eventually cast to
a `~numpy.ndarray`.
If the provided objects is passed which already contains some
mask, or units, these elements will be used if possible to accordingly set
those of the created object. If possible, the provided data will not be copied
for `data` input, but will be passed by reference, so you should
make a copy of the `data` before passing them if that's the desired behavior
or set the `copy` argument to `True`.
coordset : `CoordSet` instance, optional
It contains the coordinates for the different dimensions of the `data`.
if `CoordSet` is provided, it must specify the `coord` and `labels` for all
dimensions of the `data`. Multiple `coord`'s can be specified in a
`CoordSet` instance for each dimension.
coordunits : `list`, optional, default: `None`
A list of units corresponding to the dimensions in the order of the
coordset.
coordtitles : `list`, optional, default: `None`
A list of titles corresponding of the dimensions in the order of the
coordset.
**kwargs
Optional keyword parameters (see Other Parameters).
Other Parameters
----------------
dtype : `str` or `~numpy.dtype`, optional, default: `np.float64`
If specified, the data will be cast to this dtype, else the data
will be cast to float64 or complex128.
dims : `list` of `str`, optional
If specified the list must have a length equal to the number od data
dimensions (`ndim`) and the elements must be
taken among ``x,y,z,u,v,w or t``. If not specified, the dimension
names are automatically attributed in this order.
name : `str`, optional
A user-friendly name for this object. If not given, the automatic
`id` given at the object creation will be used as a name.
labels : :term:`array-like` of objects, optional
Labels for the `data`. labels can be used only for 1D-datasets.
The labels array may have an additional dimension, meaning several
series of labels for the same data.
The given array can be a list, a tuple, a `~numpy.ndarray` , a ndarray-like,
a `NDArray` or any subclass of `NDArray` .
mask : :term:`array-like` of `bool` or `NOMASK` , optional
Mask for the data. The mask array must have the same shape as the
data. The given array can be a list,
a tuple, or a `~numpy.ndarray` . Each values in the array must be `False`
where the data are *valid* and True when
they are not (like in numpy masked arrays). If `data` is already a
:class:`~numpy.ma.MaskedArray` , or any
array object (such as a `NDArray` or subclass of it), providing a
`mask` here, will cause the mask from the
masked array to be ignored.
units : `Unit` instance or `str`, optional
Units of the data. If data is a `Quantity` then `units` is set to
the unit of the `data`; if a unit is also
explicitly provided an error is raised. Handling of units use the
`pint <https://pint.readthedocs.org/>`__
package.
timezone : `datetime.tzinfo`, optional
The timezone where the data were created. If not specified, the local timezone
is assumed.
title : `str`, optional
The title of the data dimension. The `title` attribute should not be confused
with the `name` .
The `title` attribute is used for instance for labelling plots of the data.
It is optional but recommended to give a title to each ndarray data.
dlabel : `str`, optional
Alias of `title` .
meta : `dict`-like object, optional
Additional metadata for this object. Must be dict-like but no
further restriction is placed on meta.
author : `str`, optional
Name(s) of the author(s) of this dataset. By default, name of the
computer note where this dataset is
created.
description : `str`, optional
An optional description of the nd-dataset. A shorter alias is `desc` .
origin : `str`, optional
Origin of the data: Name of organization, address, telephone number,
name of individual contributor, etc., as appropriate.
roi : `list`
Region of interest (ROI) limits.
history : `str`, optional
A string to add to the object history.
copy : `bool`, optional
Perform a copy of the passed object. Default is False.
See Also
--------
Coord : Explicit coordinates object.
CoordSet : Set of coordinates.
Notes
-----
The underlying array in a `NDDataset` object can be accessed through the
`data` attribute, which will return a conventional `~numpy.ndarray`.
"""
# Examples
# --------
# Usage by an end-user
#
# >>> x = scp.NDDataset([1, 2, 3])
# >>> print(x.data) # doctest: +NORMALIZE_WHITESPACE
# [ 1 2 3.]
# """
# coordinates
_coordset = tr.Instance(CoordSet, allow_none=True)
# model data (e.g., for fit)
_modeldata = Array(tr.Float(), allow_none=True)
# some setting for NDDataset
_copy = tr.Bool(False)
_labels_allowed = tr.Bool(False) # no labels for NDDataset
# dataset can be members of a project.
_parent = tr.Instance(
"spectrochempy.core.project.abstractproject.AbstractProject",
allow_none=True,
)
# For the GUI interface
# parameters state
# _state = Dict()
# processed data (for GUI)
# _processeddata = Array(Float(), allow_none=True)
# processed mask (for GUI)
# _processedmask = Union((Bool(), Array(Bool()), Instance(MaskedConstant)))
# baseline data (for GUI)
# _baselinedata = Array(Float(), allow_none=True)
# reference data (for GUI)
# _referencedata = Array(Float(), allow_none=True)
# ranges
# _ranges = Instance(Meta)
# history
_history = tr.List(tr.Tuple(), allow_none=True)
# Dates
_acquisition_date = tr.Instance(datetime, allow_none=True)
_created = tr.Instance(datetime)
_modified = tr.Instance(datetime)
_timezone = tr.Instance(tzinfo, allow_none=True)
# Metadata
_author = tr.Unicode()
_description = tr.Unicode()
_origin = tr.Unicode()
# ----------------------------------------------------------------------------------
# Initialisation
# ----------------------------------------------------------------------------------
def __init__(
self,
data=None,
coordset=None,
coordunits=None,
coordtitles=None,
**kwargs,
):
super().__init__(data, **kwargs)
self._created = utcnow()
self.description = kwargs.pop("description", "")
self.author = kwargs.pop("author", get_user_and_node())
history = kwargs.pop("history", None)
if history is not None:
self.history = history
self._parent = None
# eventually set the coordinates with optional units and title
if isinstance(coordset, CoordSet):
self.set_coordset(**coordset)
else:
if coordset is None:
coordset = [None] * self.ndim
if coordunits is None:
coordunits = [None] * self.ndim
if coordtitles is None:
coordtitles = [None] * self.ndim
_coordset = []
for c, u, t in zip(coordset, coordunits, coordtitles, strict=False):
if not isinstance(c, CoordSet):
coord = Coord(c)
if u is not None:
coord.units = u
if t is not None:
coord.title = t
else:
if u: # pragma: no cover
warning_(
"units have been set for a CoordSet, "
"but this will be ignored "
"(units are only defined at the coordinate level",
)
if t: # pragma: no cover
warning_(
"title will be ignored as they are only defined at "
"the coordinates level",
)
coord = c
_coordset.append(coord)
if _coordset and set(_coordset) != {
Coord(),
}: # if they are no coordinates do nothing
self.set_coordset(*_coordset)
self._modified = self._created
# ----------------------------------------------------------------------------------
# Special methods
# ----------------------------------------------------------------------------------
def __dir__(self):
# Only these attributes are used for saving dataset
# WARNING: be careful to keep the present order of the three first elements!
# Needed for save/load operations
return [
# Keep the following order
"dims",
"coordset",
"data",
# From here it is free
"name",
"title",
"mask",
"units",
"meta",
"preferences",
"author",
"description",
"history",
"created",
"modified",
# "acquisition_date",
"origin",
"roi",
"transposed",
"modeldata",
# "processeddata",
# "referencedata",
# "baselinedata",
# "state",
# "ranges",
] + NDIO().__dir__()
def __getitem__(self, items, **kwargs):
saveditems = items
# coordinate selection to test first
if isinstance(items, str):
with suppress(Exception):
return self._coordset[items]
# slicing
new, items = super().__getitem__(items, return_index=True)
if new is None:
return None
if self._coordset is not None:
names = self._coordset.names # all names of the current coordinates
new_coords = self._coordset.copy() # [None] * len(names)
if isinstance(items, np.ndarray):
# probably a fancy indexing
items = (items,)
for i, item in enumerate(items):
# get the corresponding dimension name in the dims list
name = self.dims[i]
# get the corresponding index in the coordinate's names list
idx = names.index(name)
if self._coordset[idx].is_empty:
new_coords[idx] = Coord(None, name=name)
elif not isinstance(self._coordset[idx], CoordSet):
new_coords[idx] = self._coordset[idx][item]
else:
# we must slice all internal coordinates
newc = []
for c in self._coordset[idx]._coords:
newc.append(c[item])
new_coords[idx] = CoordSet(*newc[::-1], name=name)
# we reverse to be sure
# the order will be kept for internal coordinates
new_coords[idx]._default = self._coordset[
idx
]._default # set the same default coord
new_coords[idx]._is_same_dim = self._coordset[idx]._is_same_dim
# elif isinstance(item, (np.ndarray, list)):
# new_coords[idx] = self._coordset[idx][item]
new.set_coordset(*new_coords, keepnames=True)
new.history = f"Slice extracted: ({saveditems})"
return new
def __getattr__(self, item):
# when the attribute was not found
if (
item.startswith("_")
or item
in [
"interface",
"clevels",
"coords",
]
or "_validate" in item
or "_changed" in item
):
# raise an error so that traits, ipython operation and more ...
# will be handled correctly
raise AttributeError
# syntax such as ds.x, ds.y, etc...
if item[0] in self.dims or self._coordset:
# look also properties
attribute = None
index = 0
# print(item)
if len(item) > 2 and item[1] == "_":
attribute = item[1:]
item = item[0]
index = self.dims.index(item)
if self._coordset:
try:
c = self._coordset[item]
if isinstance(c, str) and c in self.dims:
# probably a reference to another coordinate name
c = self._coordset[c]
if c.name in self.dims or c._parent_dim in self.dims:
if attribute is not None:
# get the attribute
return getattr(c, attribute)
return c
raise AttributeError
except Exception as err:
if item in self.dims:
return None
raise err
elif attribute is not None:
if attribute == "size":
# we want the size but there is no coords, get it from the data shape
return self.shape[index]
raise AttributeError(
f"Can not find `{attribute}` when no coordinate is defined",
)
return None
raise AttributeError
def __setattr__(self, key, value):
# TODO: entering this function in debug stepping mode kill the program
# need to investigate further, why!
if key in DEFAULT_DIM_NAME: # syntax such as ds.x, ds.y, etc...
# Note the above test is important to avoid errors with traitlets
# even if it looks redundant with the following
if key in self.dims:
if self._coordset is None:
# we need to create a coordset first
self.set_coordset({self.dims[i]: None for i in range(self.ndim)})
idx = self._coordset.names.index(key)
_coordset = self._coordset
listcoord = False
if isinstance(value, list):
listcoord = all(isinstance(item, Coord) for item in value)
if listcoord:
_coordset[idx] = list(CoordSet(value).to_dict().values())[0]
_coordset[idx].name = key
_coordset[idx]._is_same_dim = True
elif isinstance(value, CoordSet):
if len(value) > 1:
value = CoordSet(value)
_coordset[idx] = list(value.to_dict().values())[0]
_coordset[idx].name = key
_coordset[idx]._is_same_dim = True
elif isinstance(value, Coord):
value.name = key
_coordset[idx] = value
else:
_coordset[idx] = Coord(value, name=key)
_coordset = self._valid_coordset(_coordset)
self._coordset.set(_coordset)
else:
raise AttributeError(f"Coordinate `{key}` is not used.")
else:
# print(key, value)
super().__setattr__(key, value)
def __eq__(self, other, attrs=None):
attrs = self.__dir__()
for attr in (
"filename",
"preferences",
"name",
"author",
"description",
"history",
"created",
"modified",
"origin",
"roi",
"modeldata",
"processeddata",
"baselinedata",
"referencedata",
"state",
"ranges",
):
# These attributes are not used for comparison (comparison based on data and units!)
with suppress(ValueError):
attrs.remove(attr)
return super().__eq__(other, attrs)
def __hash__(self):
# all instance of this class has same hash, so they can be compared
return super().__hash__ + hash(self._coordset)
# ----------------------------------------------------------------------------------
# Private methods and properties
# ----------------------------------------------------------------------------------
@tr.default("_coordset")
def _coordset_default(self):
return None
@tr.default("_modeldata")
def _modeldata_default(self):
return None
# @tr.default("_processeddata")
# def _processeddata_default(self):
# return None
#
# @tr.default("_baselinedata")
# def _baselinedata_default(self):
# return None
#
# @tr.default("_referencedata")
# def _referencedata_default(self):
# return None
#
# @tr.default("_ranges")
# def _ranges_default(self):
# ranges = Meta()
# for dim in self.dims:
# ranges[dim] = dict(masks={}, baselines={}, integrals={}, others={})
# return ranges
@tr.default("_timezone")
def _timezone_default(self):
# Return the default timezone (local timezone)
return get_localzone()
# @tr.validate("_created")
# def _created_validate(self, proposal):
# date = proposal["value"]
# if date.tzinfo is not None:
# # make the date utc naive
# date = date.replace(tzinfo=None)
# return date
@tr.validate("_history")
def _history_validate(self, proposal):
history = proposal["value"]
if isinstance(history, list) or history is None:
# reset
self._history = None
return history
# @tr.validate("_modified")
# def _modified_validate(self, proposal):
# date = proposal["value"]
# if date.tzinfo is not None:
# # make the date utc naive
# date = date.replace(tzinfo=None)
# return date
@tr.observe(tr.All)
def _anytrait_changed(self, change):
# ex: change {
# 'owner': object, # The HasTraits instance
# 'new': 6, # The new value
# 'old': 5, # The old value
# 'name': "foo", # The name of the changed trait
# 'type': 'change', # The event type of the notification, usually 'change'
# }
if change["name"] in ["_created", "_modified", "trait_added"]:
return
# all the time -> update modified date
self._modified = utcnow()
return
def _cstr(self):
# Display the metadata of the object and partially the data
out = ""
out += f" name: {self.name}\n"
out += f" author: {self.author}\n"
out += f" created: {self.created}\n"
out += (
f" modified: {self.modified}\n"
if (self._modified - self._created).seconds > 30
else ""
)
wrapper1 = textwrap.TextWrapper(
initial_indent="",
subsequent_indent=" " * 15,
replace_whitespace=True,
width=self._text_width,
)
pars = self.description.strip().splitlines()
if pars:
out += " description: "
desc = ""
if pars:
desc += f"{wrapper1.fill(pars[0])}\n"
for par in pars[1:]:
desc += "{}\n".format(textwrap.indent(par, " " * 15))
# the three escaped null characters are here to facilitate
# the generation of html outputs
desc = f"\0\0\0{desc.rstrip()}\0\0\0\n"
out += desc
if self._history:
pars = self.history
out += " history: "
hist = ""
if pars:
hist += f"{wrapper1.fill(pars[0])}\n"
for par in pars[1:]:
hist += "{}\n".format(textwrap.indent(par, " " * 15))
# the three escaped null characters are here to facilitate
# the generation of html outputs
hist = f"\0\0\0{hist.rstrip()}\0\0\0\n"
out += hist
out += f"{self._str_value().rstrip()}\n"
out += f"{self._str_shape().rstrip()}\n" if self._str_shape() else ""
out += f"{self._str_dims().rstrip()}\n"
if not out.endswith("\n"):
out += "\n"
out += "\n"
if not self._html_output:
return colored_output(out.rstrip())
return out.rstrip()
def _loc2index(self, loc, dim=-1, *, units=None):
# Return the index of a location (label or coordinates) along the dim
# This can work only if `coords` exists.
if self._coordset is None:
raise SpectroChemPyError(
"No coords have been defined. Slicing or selection"
f" by location ({loc}) needs coords definition.",
)
coord = self.coord(dim)
return coord._loc2index(loc, units=units)
def _str_dims(self):
if self.is_empty:
return ""
if len(self.dims) < 1 or not hasattr(self, "_coordset"):
return ""
if not self._coordset or len(self._coordset) < 1:
return ""
self._coordset._html_output = (
self._html_output
) # transfer the html flag if necessary: false by default
txt = self._coordset._cstr()
return txt.rstrip() # remove the trailing '\n'
_repr_dims = _str_dims
def _dims_update(self, change=None):
# when notified that a coords names have been updated
_ = self.dims # fire an update
@tr.validate("_coordset")
def _coordset_validate(self, proposal):
coords = proposal["value"]
return self._valid_coordset(coords)
def _valid_coordset(self, coords):
# uses in coords_validate and setattr
if coords is None:
return None
for k, coord in enumerate(coords):
if (
coord is not None
and not isinstance(coord, CoordSet)
and coord.data is None
):
continue
# For coord to be acceptable, we require at least a NDArray, a NDArray subclass or a CoordSet
if not isinstance(coord, Coord | CoordSet):
if isinstance(coord, NDArray):
coord = coords[k] = Coord(coord)
else:
raise TypeError(
"Coordinates must be an instance or a subclass of Coord class or NDArray, or of "
f" CoordSet class, but an instance of {type(coord)} has been passed",
)
if self.dims and coord.name in self.dims:
# check the validity of the given coordinates in terms of size (if it correspond to one of the dims)
size = coord.size
if self._implements("NDDataset"):
idx = self._get_dims_index(coord.name)[0] # idx in self.dims
if size != self._data.shape[idx]:
raise ValueError(
f"the size of a coordinates array must be None or be equal"
f" to that of the respective `{coord.name}`"
f" data dimension but coordinate size={size} != data shape[{idx}]="
f"{self._data.shape[idx]}",
)
else:
pass # bypass this checking for any other derived type (should be done in the subclass)
coords._parent = self
return coords
@property
def _dict_dims(self):
_dict = {}
for index, dim in enumerate(self.dims):
if dim not in _dict:
_dict[dim] = {"size": self.shape[index], "coord": getattr(self, dim)}
return _dict
# ----------------------------------------------------------------------------------
# Public methods and property
# ----------------------------------------------------------------------------------
@property
def acquisition_date(self):
"""Acquisition date."""
if self._acquisition_date is not None:
# take the one which has been previously set for this dataset
acq = self._acquisition_date.astimezone(self._timezone)
return acq.isoformat(sep=" ", timespec="seconds")
return None
@acquisition_date.setter
def acquisition_date(self, value):
self._acquisition_date = value
[docs]
def add_coordset(self, *coords, dims=None, **kwargs):
"""
Add one or a set of coordinates from a dataset.
Parameters
----------
*coords : iterable
Coordinates object(s).
dims : list
Name of the coordinates.
**kwargs
Optional keyword parameters passed to the coordset.
"""
if not coords and not kwargs:
# reset coordinates
self._coordset = None
return
if self._coordset is None:
# make the whole coordset at once
self._coordset = CoordSet(*coords, dims=dims, **kwargs)
else:
# add one coordinate
self._coordset._append(*coords, **kwargs)
if self._coordset:
# set a notifier to the updated traits of the CoordSet instance
tr.HasTraits.observe(self._coordset, self._dims_update, "_updated")
# force it one time after this initialization
self._coordset._updated = True
@property
def author(self):
"""Creator of the dataset (str)."""
return self._author
@author.setter
def author(self, value):
self._author = value
@property
def history(self):
"""Describes the history of actions made on this array (List of strings)."""
history = []
for date, value in self._history:
date = date.astimezone(self._timezone).isoformat(
sep=" ",
timespec="seconds",
)
value = value[0].capitalize() + value[1:]
history.append(f"{date}> {value}")
return history
@history.setter
def history(self, value):
if value is None:
return
if isinstance(value, list):
# history will be replaced
self._history = []
if len(value) == 0:
return
value = value[0]
date = utcnow()
self._history.append((date, value))
[docs]
def coord(self, dim="x"):
"""
Return the coordinates along the given dimension.
Parameters
----------
dim : int or str
A dimension index or name, default index = `x` .
If an integer is provided, it is equivalent to the `axis` parameter for numpy array.
Returns
-------
`Coord`
Coordinates along the given axis.
"""
idx = self._get_dims_index(dim)[0] # should generate an error if the
# dimension name is not recognized
if idx is None:
return None
if self._coordset is None:
return None
# idx is not necessarily the position of the coordinates in the CoordSet
# indeed, transposition may have taken place. So we need to retrieve the coordinates by its name
name = self.dims[idx]
if name in self._coordset.names:
idx = self._coordset.names.index(name)
return self._coordset[idx]
error_(f"could not find this dimenson name: `{name}`")
return None
@property
def coordset(self):
"""
`CoordSet` instance.
Contains the coordinates of the various dimensions of the dataset.
It's a readonly property. Use set_coords to change one or more coordinates at once.
"""
if self._coordset and all(c.is_empty for c in self._coordset):
# all coordinates are empty, this is equivalent to None for the coordset
return None
return self._coordset
@coordset.setter
def coordset(self, coords):
if isinstance(coords, CoordSet):
self.set_coordset(**coords)
else:
self.set_coordset(coords)
@property
def coordnames(self):
"""
List of the `Coord` names.
Read only property.
"""
if self._coordset is not None:
return self._coordset.names
return None
@property
def coordtitles(self):
"""
List of the `Coord` titles.
Read only property. Use set_coordtitle to eventually set titles.
"""
if self._coordset is not None:
return self._coordset.titles
return None
@property
def coordunits(self):
"""
List of the `Coord` units.
Read only property. Use set_coordunits to eventually set units.
"""
if self._coordset is not None:
return self._coordset.units
return None
@property
def created(self):
"""Creation date object (Datetime)."""
created = self._created.astimezone(self._timezone)
return created.isoformat(sep=" ", timespec="seconds")
@property
def data(self):
"""
The `data` array.
If there is no data but labels, then the labels are returned instead of data.
"""
return super().data
@data.setter
def data(self, data):
# as we can't write super().data = data, we call _set_data
# see comment in the data.setter of NDArray
super()._set_data(data)
[docs]
def delete_coordset(self):
"""Delete all coordinate settings."""
self._coordset = None
# ...........................................................................................................
@property
def description(self):
"""Provides a description of the underlying data (str)."""
return self._description
comment = description
comment.__doc__ = """Provides a comment (Alias to the description attribute)."""
# ..........................................................................
@description.setter
def description(self, value):
self._description = value
@property
def local_timezone(self):
"""Return the local timezone."""
return str(get_localzone())
@property
def modeldata(self):
"""
`~numpy.ndarray` - models data.
Data eventually generated by modelling of the data.
"""
return self._modeldata
@modeldata.setter
def modeldata(self, data):
self._modeldata = data
@property
def modified(self):
"""
Date of modification (readonly property).
Returns
-------
str
Date of modification in isoformat.
"""
modified = self._modified.astimezone(self._timezone)
return modified.isoformat(sep=" ", timespec="seconds")
@property
def origin(self):
"""
Origin of the data.
e.g. spectrometer or software
"""
return self._origin
@origin.setter
def origin(self, value):
self._origin = value
@property
def parent(self):
"""
`Project` instance.
The parent project of the dataset.
"""
return self._parent
@parent.setter
def parent(self, value):
if self._parent is not None:
# A parent project already exists for this dataset but the
# entered values gives a different parent. This is not allowed,
# as it can produce impredictable results. We will first remove it
# from the current project.
self._parent.remove_dataset(self.name)
self._parent = value
[docs]
def set_coordset(self, *args, **kwargs):
"""
Set one or more coordinates at once.
Parameters
----------
*args : `Coord` or `CoordSet`
One or more coordinates.
**kwargs
Optional keyword parameters passed to the coordset.
Warnings
--------
This method replace all existing coordinates.
See Also
--------
add_coordset : Add one or a set of coordinates from a dataset.
set_coordtitles : Set titles of the one or more coordinates.
set_coordunits : Set units of the one or more coordinates.
"""
self._coordset = None
self.add_coordset(*args, dims=self.dims, **kwargs)
[docs]
def set_coordtitles(self, *args, **kwargs):
"""Set titles of the one or more coordinates."""
self._coordset.set_titles(*args, **kwargs)
[docs]
def set_coordunits(self, *args, **kwargs):
"""Set units of the one or more coordinates."""
self._coordset.set_units(*args, **kwargs)
[docs]
def sort(self, **kwargs):
"""
Return the dataset sorted along a given dimension.
By default, it is the last dimension [axis=-1]) using the numeric or label values.
Parameters
----------
dim : str or int, optional, default=-1
Dimension index or name along which to sort.
pos : int , optional
If labels are multidimensional - allow to sort on a define
row of labels : labels[pos]. Experimental : Not yet checked.
by : str among ['value', 'label'], optional, default=`value`
Indicate if the sorting is following the order of labels or
numeric coord values.
descend : `bool` , optional, default=`False`
If true the dataset is sorted in a descending direction. Default is False except if coordinates
are reversed.
inplace : bool, optional, default=`False`
Flag to say that the method return a new object (default)
or not (inplace=True).
Returns
-------
`NDDataset`
Sorted dataset.
"""
inplace = kwargs.get("inplace", False)
new = self.copy() if not inplace else self
# parameter for selecting the level of labels (default None or 0)
pos = kwargs.pop("pos", None)
# parameter to say if selection is done by values or by labels
by = kwargs.pop("by", "value")
# determine which axis is sorted (dims or axis can be passed in kwargs)
# it will return a tuple with axis and dim
axis, dim = self.get_axis(**kwargs)
if axis is None:
axis, dim = self.get_axis(axis=0)
# get the corresponding coordinates (remember their order can be different
# from the order of dimension in dims. So we cannot just take the coord from
# the indice.
multi = getattr(self, dim) # get the coordinate using the syntax such as self.x
coord = multi.default
# sort on the default coordinate (in case we have multicoordinates)
descend = kwargs.pop("descend", None)
if descend is None:
# when non specified, default is False (except for reversed coordinates)
descend = coord.reversed
# import warnings
# warnings.simplefilter("error")
indexes = []
for i in range(self.ndim):
if i == axis:
if not coord.has_data:
# sometimes we have only label for Coord objects.
# in this case, we sort labels if they exist!
if coord.is_labeled:
by = "label"
else:
# nothing to do for sorting
# return inchanged dataset
return new
args = coord._argsort(by=by, pos=pos, descend=descend)
setattr(new, dim, multi[args])
# sort all coordinate in case of multicoordinates
indexes.append(args)
else:
indexes.append(slice(None))
new._data = new._data[tuple(indexes)]
if new.is_masked:
new._mask = new._mask[tuple(indexes)]
return new
[docs]
def squeeze(self, *dims, inplace=False):
"""
Remove single-dimensional entries from the shape of a NDDataset.
Parameters
----------
*dims : None or int or tuple of ints, optional
Selects a subset of the single-dimensional entries in the
shape. If a dimension (dim) is selected with shape entry greater than
one, an error is raised.
inplace : bool, optional, default=`False`
Flag to say that the method return a new object (default)
or not (inplace=True).
Returns
-------
`NDDataset`
The input array, but with all or a subset of the
dimensions of length 1 removed.
Raises
------
ValueError
If `dim` is not `None` , and the dimension being squeezed is not
of length 1.
"""
# make a copy of the original dims
old = self.dims[:]
# squeeze the data and determine which axis must be squeezed
new, axis = super().squeeze(*dims, inplace=inplace, return_axis=True)
if axis is not None and new._coordset is not None:
# if there are coordinates they have to be squeezed as well (remove
# coordinate for the squeezed axis)
for i in axis:
dim = old[i]
del new._coordset[dim]
return new
[docs]
def atleast_2d(self, inplace=False):
"""
Expand the shape of an array to make it at least 2D.
Parameters
----------
inplace : bool, optional, default=`False`
Flag to say that the method return a new object (default)
or not (inplace=True).
Returns
-------
`NDDataset`
The input array, but with dimensions increased.
See Also
--------
squeeze : The inverse operation, removing singleton dimensions.
"""
new = self.copy() if not inplace else self
coordset = self.coordset
# mask = self.mask
if self.ndim == 0:
new._data = self._data[np.newaxis, np.newaxis]
new._mask = self._mask[np.newaxis, np.newaxis]
new.dims = ["v", "u"]
new.set_coordset(u=None, v=None)
elif self.ndim == 1:
xdim = new.dims[0]
xcoord = coordset[0] if coordset is not None else None
new._data = self._data[np.newaxis]
new._mask = self._mask[np.newaxis]
new.dims = ["u", xdim]
# new.set_coordset(x=coordset[0] if coordset is not None else None, u=None)
new.set_coordset({xdim: xcoord, "u": None})
return new
[docs]
def swapdims(self, dim1, dim2, inplace=False):
"""
Interchange two dimensions of a NDDataset.
Parameters
----------
dim1 : int
First axis.
dim2 : int
Second axis.
inplace : bool, optional, default=`False`
Flag to say that the method return a new object (default)
or not (inplace=True).
Returns
-------
`NDDataset`
Swaped dataset.
See Also
--------
transpose : Transpose a dataset.
"""
new = super().swapdims(dim1, dim2, inplace=inplace)
new.history = f"Data swapped between dims {dim1} and {dim2}"
return new
@property
def T(self):
"""
Transposed `NDDataset` .
The same object is returned if `ndim` is less than 2.
"""
return self.transpose()
[docs]
def take(self, indices, **kwargs):
"""
Take elements from an array.
Returns
-------
`NDDataset`
A sub dataset defined by the input indices.
"""
# handle the various syntax to pass the axis
dims = self._get_dims_from_args(**kwargs)
axis = self._get_dims_index(dims)
axis = axis[0] if axis else None
# indices = indices.tolist()
if axis is None:
# just do a fancy indexing
return self[indices]
if axis < 0:
axis = self.ndim + axis
index = tuple(
[...] + [indices] + [slice(None) for i in range(self.ndim - 1 - axis)],
)
return self[index]
@property
def timezone(self):
"""
Return the timezone information.
A timezone's offset refers to how many hours the timezone
is from Coordinated Universal Time (UTC).
In spectrochempy, all datetimes are stored in UTC, so that conversion
must be done during the display of these datetimes using tzinfo.
"""
return str(self._timezone)
@timezone.setter
def timezone(self, val):
try:
self._timezone = ZoneInfo(val)
except ZoneInfoNotFoundError as e:
raise ZoneInfoNotFoundError(
"You can get a list of valid timezones in "
"https://en.wikipedia.org/wiki/tr.List_of_tz_database_time_zones ",
) from e
[docs]
def to_array(self):
"""
Return a numpy masked array.
Other NDDataset attributes are lost.
Returns
-------
`~numpy.ndarray`
The numpy masked array from the NDDataset data.
Examples
--------
>>> dataset = scp.read('wodger.spg')
>>> a = scp.to_array(dataset)
equivalent to:
>>> a = np.ma.array(dataset)
or
>>> a = dataset.masked_data
"""
return np.ma.array(self)
[docs]
def to_xarray(self):
"""
Convert a NDDataset instance to an `~xarray.DataArray` object.
Warning: the xarray library must be available.
Returns
-------
object
A axrray.DataArray object.
"""
# Information about DataArray from the DataArray docstring
#
# Attributes
# ----------
# dims: tuple
# Dimension names associated with this array.
# values: np.ndarray
# Access or modify DataArray values as a numpy array.
# coords: dict-like
# Dictionary of DataArray objects that label values along each dimension.
# name: str or None
# Name of this array.
# attrs: OrderedDict
# Dictionary for holding arbitrary metadata.
# Init docstring
#
# Parameters
# ----------
# data: array_like
# Values for this array. Must be an `numpy.ndarray` , ndarray like,
# or castable to an `~numpy.ndarray` .
# coords: sequence or dict of array_like objects, optional
# Coordinates (tick labels) to use for indexing along each dimension.
# If dict-like, should be a mapping from dimension names to the
# corresponding coordinates. If sequence-like, should be a sequence
# of tuples where the first element is the dimension name and the
# second element is the corresponding coordinate array_like object.
# dims: str or sequence of str, optional
# Name(s) of the data dimension(s). Must be either a string (only
# for 1D data) or a sequence of strings with length equal to the
# number of dimensions. If this argument is omitted, dimension names
# are taken from `coords` (if possible) and otherwise default to
# `['dim_0', ... 'dim_n']` .
# name: str or None, optional
# Name of this array.
# attrs: dict_like or None, optional
# Attributes to assign to the new instance. By default, an empty
# attribute dictionary is initialized.
# encoding: dict_like or None, optional
# Dictionary specifying how to encode this array's data into a
# serialized format like netCDF4. Currently used keys (for netCDF)
# include '_FillValue', 'scale_factor', 'add_offset', 'dtype',
# 'units' and 'calendar' (the later two only for datetime arrays).
# Unrecognized keys are ignored.
xr = import_optional_dependency("xarray")
if xr is None:
return None
x, y = self.x, self.y
tx = x.title
if y:
ty = y.title
da = xr.DataArray(
np.array(self.data, dtype=np.float64),
coords=[(ty, y.data), (tx, x.data)],
)
da.attrs["units"] = self.units
else:
da = xr.DataArray(
np.array(self.data, dtype=np.float64),
coords=[(tx, x.data)],
)
da.attrs["units"] = self.units
da.attrs["title"] = self.title
return da
[docs]
def transpose(self, *dims, inplace=False):
"""
Permute the dimensions of a NDDataset.
Parameters
----------
*dims : sequence of dimension indexes or names, optional
By default, reverse the dimensions, otherwise permute the dimensions
according to the values given.
inplace : bool, optional, default=`False`
Flag to say that the method return a new object (default)
or not (inplace=True).
Returns
-------
NDDataset
Transposed NDDataset.
See Also
--------
swapdims : Interchange two dimensions of a NDDataset.
"""
new = super().transpose(*dims, inplace=inplace)
new.history = (
f"Data transposed between dims: {dims}" if dims else "Data transposed"
)
return new
# # ----------------------------------------------------------------------------------
# # DASH GUI options (Work in Progress - not used for now)
# # ----------------------------------------------------------------------------------
# #
# # TODO: refactor the spectrochempy preference system to have a common basis
#
#
# @property
# def ranges(self):
# return self._ranges
#
# @ranges.setter
# def ranges(self, value):
# self._ranges = value
#
# @property
# def state(self):
# """
# State of the controller window for this dataset.
# """
# return self._state
#
# @state.setter
# def state(self, val):
# self._state = val
#
# @property
# def processeddata(self):
# """
# Data after processing (optionaly used).
# """
# return self._processeddata
#
# @processeddata.setter
# def processeddata(self, val):
# self._processeddata = val
#
# @property
# def processedmask(self):
# """
# Mask for the optional processed data.
# """
# return self._processedmask
#
# @processedmask.setter
# def processedmask(self, val):
# self._processedmask = val
#
# @property
# def baselinedata(self):
# """
# Data for an optional baseline.
# """
# return self._baselinedata
#
# @baselinedata.setter
# def baselinedata(self, val):
# self._baselinedata = val
#
# @property
# def referencedata(self):
# """
# Data for an optional reference spectra.
# """
# return self._referencedata
#
# @referencedata.setter
# def referencedata(self, val):
# self._referencedata = val
# ======================================================================================
# module function
# ======================================================================================
# make some NDDataset operation accessible from the spectrochempy API
thismodule = sys.modules[__name__]
api_funcs = [
"sort",
"copy",
"squeeze",
"swapdims",
"transpose",
"to_array",
"to_xarray",
"take",
"set_complex",
"set_quaternion",
"set_hypercomplex",
"component",
"to",
"to_base_units",
"to_reduced_units",
"ito",
"ito_base_units",
"ito_reduced_units",
"is_units_compatible",
"remove_masks",
]
for funcname in api_funcs:
setattr(thismodule, funcname, getattr(NDDataset, funcname))
__all__.append(funcname)
# import also npy functions # TODO: this will be changed with __array_functions__
from spectrochempy.processing.transformation.npy import dot
NDDataset.dot = dot
# ======================================================================================
# Set the operators
# ======================================================================================
_set_operators(NDDataset, priority=100000)