Source code for spectrochempy.core.readers.read_carroucell

# ======================================================================================
# Copyright (©) 2015-2025 LCS - Laboratoire Catalyse et Spectrochimie, Caen, France.
# CeCILL-B FREE SOFTWARE LICENSE AGREEMENT
# See full LICENSE agreement in the root directory.
# ======================================================================================
"""Provides methods for reading data in a directory after a carroucell experiment."""

__all__ = ["read_carroucell"]
__dataset_methods__ = __all__

import datetime
import os
import re
import warnings

import numpy as np
import scipy.interpolate
import xlrd

from spectrochempy.application import info_
from spectrochempy.core.dataset.coord import Coord
from spectrochempy.core.readers.importer import Importer
from spectrochempy.core.readers.importer import _importer_method
from spectrochempy.core.readers.read_omnic import read_omnic
from spectrochempy.utils.datetimeutils import UTC
from spectrochempy.utils.docreps import _docstring
from spectrochempy.utils.file import get_directory_name
from spectrochempy.utils.file import get_filenames

_docstring.delete_params("Importer.see_also", "read_carroucell")


[docs] @_docstring.dedent def read_carroucell(directory=None, **kwargs): r""" Open :file:`.spa` files in a directory after a :term:`carroucell` experiment. The files for a given sample are grouped in `NDDataset`\ s (sorted by acquisition date). The `NDDataset`\ s are returned in a list sorted by sample number. When the file containing the temperature data is present, the temperature is read and assigned as a label to each spectrum. Parameters ---------- directory : `str`, optional If not specified, opens a dialog box. %(kwargs)s Returns ------- %(Importer.returns)s Other Parameters ---------------- spectra : :term:`array-like` of 2 `int` (``min`` , ``max`` ), optional, default: `None` The first and last spectrum to be loaded as determined by their number. If `None` all spectra are loaded. discardbg : `bool`, optional, default: `True` If `True` : do not load background (sample #9). delta_clocks : `int`, optional, default: 0 Difference in seconds between the clocks used for spectra and temperature acquisition. Defined as ``t(thermocouple clock) - t(spectrometer clock)`` . See Also -------- %(Importer.see_also.no_read_carroucell)s Notes ----- All files are expected to be present in the same directory and their filenames are expected to be in the format : :file:`X_samplename_YYY.spa` and for the background files : :file:`X_BCKG_YYYBG.spa` where ``X`` is the sample holder number and ``YYY`` the spectrum number. Examples -------- >>> scp.read_carroucell("irdata/carroucell_samp") no temperature file [NDDataset: [float64] a.u. (shape: (y:6, x:11098)), NDDataset: ... """ kwargs["filetypes"] = ["Carroucell files (*.spa)"] kwargs["protocol"] = ["carroucell"] importer = Importer() return importer(directory, **kwargs)
# -------------------------------------------------------------------------------------- # Private methods # -------------------------------------------------------------------------------------- @_importer_method def _read_carroucell(*args, **kwargs): _, directory = args directory = get_directory_name(directory) if not directory: # pragma: no cover # probably cancel has been chosen in the open dialog info_("No directory was selected.") return None spectra = kwargs.get("spectra") discardbg = kwargs.get("discardbg", True) delta_clocks = datetime.timedelta(seconds=kwargs.get("delta_clocks", 0)) datasets = [] # get the sorted list of spa files in the directory spafiles = sorted(get_filenames(directory, **kwargs)[".spa"]) spafilespec = [f for f in spafiles if "BCKG" not in f.stem] spafileback = [f for f in spafiles if "BCKG" in f.stem] # select files def prefix(f): return f.stem.split("_")[0] def number(f): return int(f.stem.split("_")[1]) if spectra is not None: [min, max] = spectra spafilespec = [f for f in spafilespec if min <= number(f) <= max] spafileback = [f for f in spafileback if min <= number(f) <= max] # discard BKG files spafiles = spafilespec if not discardbg: spafiles += spafileback # merge dataset with the same number curfilelist = [spafiles[0]] curprefix = prefix(spafiles[0]) for f in spafiles[1:]: if prefix(f) != curprefix: ds = read_omnic( curfilelist, sortbydate=True, directory=directory, name=curprefix, ) datasets.append(ds) curfilelist = [f] curprefix = prefix(f) else: curfilelist.append(f) ds = read_omnic(curfilelist, sortbydate=True, directory=directory, name=curprefix) datasets.append(ds) # Now manage temperature Tfile = sorted([f for f in os.listdir(directory) if f[-4:].lower() == ".xls"]) if len(Tfile) == 0: info_("no temperature file") elif len(Tfile) > 1: warnings.warn( "several .xls/.csv files. The temperature will not be read", stacklevel=2, ) else: Tfile = Tfile[0] if Tfile[-4:].lower() == ".xls": book = xlrd.open_workbook(os.path.join(directory, Tfile)) # determine experiment start and end time (thermocouple clock) ti = datasets[0].y.labels[0][0] + delta_clocks tf = datasets[-1].y.labels[-1][0] + delta_clocks # get thermocouple time and T information during the experiment t = [] T = [] sheet = book.sheet_by_index(0) for i in range(9, sheet.nrows): try: time = datetime.datetime.strptime( sheet.cell(i, 0).value, "%d/%m/%y %H:%M:%S", ).replace(tzinfo=UTC) if ti <= time <= tf: t.append(time) T.append(sheet.cell(i, 4).value) except ValueError: pass except TypeError: pass # interpolate T = f(timestamp) tstamp = [time.timestamp() for time in t] # interpolate, except for the first and last points that are extrapolated interpolator = scipy.interpolate.interp1d( tstamp, T, fill_value="extrapolate", assume_sorted=True, ) for ds in datasets: # timestamp of spectra for the thermocouple clock tstamp_ds = [ (label[0] + delta_clocks).timestamp() for label in ds.y.labels ] T_ds = interpolator(tstamp_ds) newlabels = np.hstack((ds.y.labels, T_ds.reshape((50, 1)))) ds.y = Coord(title=ds.y.title, data=ds.y.data, labels=newlabels) if len(datasets) == 1: return datasets[0] # a single dataset is returned # several datasets returned, sorted by sample # return sorted(datasets, key=lambda ds: re.split("-|_", ds.name)[0])