Source code for spectrochempy.core.readers.read_jcamp

# ======================================================================================
# Copyright (©) 2015-2025 LCS - Laboratoire Catalyse et Spectrochimie, Caen, France.
# CeCILL-B FREE SOFTWARE LICENSE AGREEMENT
# See full LICENSE agreement in the root directory.
# ======================================================================================
"""Module to extend NDDataset with import methods."""

__all__ = ["read_jcamp"]
__dataset_methods__ = __all__

import io
import re
from datetime import datetime

import numpy as np

from spectrochempy.core.dataset.coord import Coord
from spectrochempy.core.readers.importer import Importer
from spectrochempy.core.readers.importer import _importer_method
from spectrochempy.utils.datetimeutils import UTC
from spectrochempy.utils.decorators import deprecated
from spectrochempy.utils.docreps import _docstring

# ======================================================================================
# Public functions
# ======================================================================================
_docstring.delete_params("Importer.see_also", "read_jcamp")


[docs] @_docstring.dedent def read_jcamp(*paths, **kwargs): r""" Open Infrared ``JCAMP-DX`` files with extension :file:`.jdx` or :file:`.dx`. Limited to AFFN encoding (see :cite:t:`mcdonald:1988`) Parameters ---------- %(Importer.parameters)s Returns ------- %(Importer.returns)s Other Parameters ---------------- %(Importer.other_parameters)s See Also -------- %(Importer.see_also.no_read_jcamp)s """ kwargs["filetypes"] = ["JCAMP-DX files (*.jdx *.dx)"] kwargs["protocol"] = ["jcamp"] importer = Importer() return importer(*paths, **kwargs)
@deprecated(replace="read__jcamp") def read_jdx(*args, **kwargs): return read_jcamp(*args, **kwargs) @deprecated(replace="read_jcamp") def read_dx(*args, **kwargs): # pragma: no cover return read_jcamp(*args, **kwargs) # ====================================================================================== # private functions # ====================================================================================== @_importer_method def _read_jdx(*args, **kwargs): # read jdx file dataset, filename = args content = kwargs.get("content") sortbydate = kwargs.pop("sortbydate", True) if content is not None: fid = io.StringIO(content.decode("utf-8")) else: fid = open(filename) # noqa: SIM115 # Read header of outer Block keyword = "" while keyword != "##TITLE": keyword, text = _readl(fid) if keyword != "EOF": jdx_title = text else: # pragma: no cover raise ValueError("No ##TITLE LR in outer block header") while (keyword != "##DATA TYPE") and (keyword != "##DATATYPE"): keyword, text = _readl(fid) if keyword != "EOF": jdx_data_type = text else: # pragma: no cover raise ValueError("No ##DATA TYPE LR in outer block header") if jdx_data_type == "LINK": while keyword != "##BLOCKS": keyword, text = _readl(fid) nspec = int(text) elif jdx_data_type.replace(" ", "") == "INFRAREDSPECTRUM": nspec = 1 else: raise ValueError("DATA TYPE must be LINK or INFRARED SPECTRUM") # Create variables xaxis = np.array([]) data = np.array([]) alltitles, alltimestamps, alldates, xunits, yunits, allorigins = ( [], [], [], [], [], [], ) nx, firstx, lastx = ( np.zeros(nspec, "int"), np.zeros(nspec, "float"), np.zeros(nspec, "float"), ) # Read the spectra for i in range(nspec): # Reset variables keyword = "" # (year, month,...) must be reset at each spectrum because labels "time" # and "longdate" are not required in JDX file [year, month, day, hour, minute, second] = "", "", "", "", "", "" # Read JDX file for spectrum n° i while keyword != "##END": keyword, text = _readl(fid) if keyword in ["##OWNER", "##JCAMP-DX"]: continue if keyword == "##ORIGIN": allorigins.append(text) elif keyword == "##TITLE": # Add the title of the spectrum in the list alltitles alltitles.append(text) elif keyword == "##LONGDATE": [year, month, day] = text.split("/") elif keyword == "##TIME": [hour, minute, second] = re.split(r"[:.]", text) elif keyword == "##XUNITS": xunits.append(text) elif keyword == "##YUNITS": yunits.append(text) elif keyword == "##FIRSTX": firstx[i] = float(text) elif keyword == "##LASTX": lastx[i] = float(text) elif keyword == "##XFACTOR": xfactor = float(text) elif keyword == "##YFACTOR": yfactor = float(text) elif keyword == "##NPOINTS": nx[i] = float(text) elif keyword == "##XYDATA": # Read the intensities allintensities = [] while keyword != "##END": keyword, text = _readl(fid) # for each line, get all the values except the first one (first value = wavenumber) intensities = list(filter(None, text.split(" ")[1:])) if len(intensities) > 0: allintensities += intensities spectra = np.array( [allintensities], ) # convert allintensities into an array spectra[ spectra == "?" ] = "nan" # deals with missing or out of range intensity values spectra = spectra.astype(np.float32) spectra *= yfactor # add spectra in "data" matrix data = spectra if not data.size else np.concatenate((data, spectra), 0) # Check "firstx", "lastx" and "nx" if firstx[i] != 0 and lastx[i] != 0 and nx[i] != 0: if not xaxis.size: # Creation of xaxis if it doesn't exist yet xaxis = np.linspace(firstx[0], lastx[0], nx[0]) xaxis = np.around((xaxis * xfactor), 3) else: # Check the consistency of xaxis if nx[i] - nx[i - 1] != 0: raise ValueError( "Inconsistent data set: number of wavenumber per spectrum should be identical", ) if firstx[i] - firstx[i - 1] != 0: raise ValueError( "Inconsistent data set: the x axis should start at same value", ) if lastx[i] - lastx[i - 1] != 0: raise ValueError( "Inconsistent data set: the x axis should end at same value", ) else: raise ValueError( "##FIRST, ##LASTX or ##NPOINTS are unusable in the spectrum n°", i + 1, ) # Creation of the acquisition date if ( year != "" and month != "" and day != "" and hour != "" and minute != "" and second != "" ): date = datetime( int(year), int(month), int(day), int(hour), int(minute), int(second), tzinfo=UTC, ) timestamp = date.timestamp() # Transform back to timestamp for storage in the Coord object # use datetime.fromtimestamp(d, timezone.utc)) # to transform back to datetime object else: timestamp = date = None # Todo: cases where incomplete date and/or time info alltimestamps.append(timestamp) alldates.append(date) # Check the consistency of xunits and yunits if i > 0: if yunits[i] != yunits[i - 1]: raise ValueError( f"##YUNITS should be the same for all spectra (check spectrum n°{i + 1}", ) if xunits[i] != xunits[i - 1]: raise ValueError( f"##XUNITS should be the same for all spectra (check spectrum n°{i + 1}", ) # Determine xaxis name **************************************************** if xunits[0].strip() == "1/CM": axisname = "wavenumbers" axisunit = "cm^-1" elif xunits[0].strip() == "MICROMETERS": axisname = "wavelength" axisunit = "um" elif xunits[0].strip() == "NANOMETERS": axisname = "wavelength" axisunit = "nm" elif xunits[0].strip() == "SECONDS": axisname = "time" axisunit = "s" elif xunits[0].strip() == "ARBITRARY UNITS": axisname = "arbitrary unit" axisunit = None else: axisname = "" axisunit = "" fid.close() dataset.data = data dataset.name = jdx_title dataset.filename = filename if yunits[0].strip() == "ABSORBANCE": dataset.units = "absorbance" dataset.title = "absorbance" elif yunits[0].strip() == "TRANSMITTANCE": # TODO: This units not in pint. Add this dataset.title = "transmittance" # now add coordinates _x = Coord(xaxis, title=axisname, units=axisunit) if jdx_data_type == "LINK": _y = Coord( alltimestamps, title="acquisition timestamp (GMT)", units="s", labels=(alldates, alltitles), ) dataset.set_coordset(y=_y, x=_x) else: _y = Coord() dataset.set_coordset(y=_y, x=_x) # Set origin, description and history if nspec > 1: origins = set(allorigins) if len(origins) == 0: pass elif len(origins) == 1: dataset.origin = allorigins[0] else: dataset.origin = [(origin + "; ") for origin in set(allorigins)][0][:-2] dataset.description = f"Dataset from jdx file: '{jdx_title}'" dataset.history = "Imported from jdx file" if sortbydate and nspec > 1: dataset.sort(dim="x", inplace=True) dataset.history = "Sorted by date" # Todo: make sure that the lowest index correspond to the largest wavenumber # for compatibility with dataset created by read_omnic: # reset modification date to cretion date dataset._modified = dataset._created return dataset @_importer_method def _read_dx(*args, **kwargs): # pragma: no cover return _read_jdx(*args, **kwargs) def _readl(fid): line = fid.readline() if not line: return "EOF", "" line = line.strip(" \n") # remove newline character if line[0:2] == "##": # if line starts with "##" if line[0:5] == "##END": # END KEYWORD, no text keyword = "##END" text = "" else: # keyword + text keyword, text = line.split("=") else: keyword = "" text = line.strip() return keyword, text