Source code for spectrochempy.core.readers.importer

# -*- coding: utf-8 -*-
# ======================================================================================
# Copyright (©) 2015-2023 LCS - Laboratoire Catalyse et Spectrochimie, Caen, France.
# CeCILL-B FREE SOFTWARE LICENSE AGREEMENT
# See full LICENSE agreement in the root directory.
# ======================================================================================
"""
This module define a generic class to import directories, files and contents.
"""
__all__ = ["read", "read_dir"]  # , "read_remote"]
__dataset_methods__ = __all__

import io
import re
from warnings import warn
from zipfile import ZipFile

import requests
import yaml
from traitlets import Dict, HasTraits, List, Type, Unicode

from spectrochempy.application import info_, warning_
from spectrochempy.utils.docstrings import _docstring
from spectrochempy.utils.exceptions import DimensionsCompatibilityError, ProtocolError
from spectrochempy.utils.file import (
    check_filename_to_open,
    get_directory_name,
    get_filenames,
    pathclean,
)

FILETYPES = [
    ("scp", "SpectroChemPy files (*.scp)"),
    ("omnic", "Nicolet OMNIC files and series (*.spa *.spg *.srs)"),
    ("soc", "Surface Optics Corp. (*.ddr *.hdr *.sdr)"),
    ("labspec", "LABSPEC exported files (*.txt)"),
    ("opus", "Bruker OPUS files (*.[0-9]*)"),
    (
        "topspin",
        "Bruker TOPSPIN fid or series or processed data files "
        "(fid ser 1[r|i] 2[r|i]* 3[r|i]*)",
    ),
    ("matlab", "MATLAB files (*.mat)"),
    ("dso", "Data Set Object files (*.dso)"),
    ("jcamp", "JCAMP-DX files (*.jdx *.dx)"),
    ("csv", "CSV files (*.csv)"),
    ("excel", "Microsoft Excel files (*.xls)"),
    ("zip", "Compressed folder of data files (*.zip)"),
    ("quadera", "Quadera ascii files (*.asc)"),
    ("carroucell", "Carroucell files (*spa)"),
    ("galactic", "GRAMS/Thermo Galactic files (*.spc)"),
    ("wire", "Renishaw WiRE files (*.wdf)"),
    #  ('all', 'All files (*.*)')
]
ALIAS = [
    ("spg", "omnic"),
    ("spa", "omnic"),
    ("ddr", "soc"),
    ("hdr", "soc"),
    ("sdr", "soc"),
    ("spc", "galactic"),
    ("srs", "omnic"),
    ("mat", "matlab"),
    ("txt", "labspec"),
    ("jdx", "jcamp"),
    ("dx", "jcamp"),
    ("xls", "excel"),
    ("asc", "quadera"),
    ("wdf", "wire"),
]


# --------------------------------------------------------------------------------------
class Importer(HasTraits):
    # Private Importer class

    objtype = Type()
    datasets = List()
    files = Dict()
    default_key = Unicode()
    protocol = Unicode()

    protocols = Dict()
    filetypes = Dict()

    def __init__(self):
        super().__init__()

        self.filetypes = dict(FILETYPES)
        temp = list(zip(*FILETYPES))
        temp.reverse()
        self.protocols = dict(zip(*temp))

        #  add alias

        self.alias = dict(ALIAS)

    def __call__(self, *args, **kwargs):
        self.datasets = []
        self.default_key = kwargs.pop("default_key", ".scp")

        if "merge" not in kwargs.keys():
            # if merge is not specified, but the args are provided as a single list,
            # then will are supposed to merge the datasets. If merge is specified then
            # it has priority.
            # This is not useful for the 1D datasets, as if they are compatible they
            # are merged automatically
            if args and len(args) == 1 and isinstance(args[0], (list, tuple)):
                kwargs["merge"] = True

        args, kwargs = self._setup_objtype(*args, **kwargs)
        res = check_filename_to_open(*args, **kwargs)
        if res:
            # Normal return
            self.files = res
        else:
            # Cancel in dialog!
            return None

        for key in self.files.keys():
            # particular case of carroucell files
            if key == "" and kwargs.get("protocol") == ["carroucell"]:
                key = ".carroucell"
                self.files = {".carroucell": self.files[""]}

            # particular case of topspin files
            elif key == "" and kwargs.get("protocol") == ["topspin"]:
                key = ".topspin"
                self.files = {".topspin": self.files[""]}

            if key == "frombytes":
                # here we need to read contents
                for filename, content in self.files[key].items():
                    files_ = check_filename_to_open(filename)
                    kwargs["content"] = content
                    key_ = list(files_.keys())[0]
                    self._switch_protocol(key_, files_, **kwargs)
                if len(self.datasets) > 1:
                    self.datasets = self._do_merge(self.datasets, **kwargs)

            elif key and key[1:] not in list(zip(*FILETYPES))[0] + list(zip(*ALIAS))[0]:
                raise TypeError(f"Filetype `{key}` is unknown in spectrochempy")
            else:
                # here files are read / or remotely from the disk using filenames
                self._switch_protocol(key, self.files, **kwargs)

        # now we will reset preference for this newly loaded datasets
        if len(self.datasets) > 0:
            if all(self.datasets) is None:
                return None

            try:
                prefs = self.datasets[0].preferences
                prefs.reset()
            except (FileNotFoundError, AttributeError):
                pass
        else:
            return None

        if len(self.datasets) == 1:
            nd = self.datasets[0]  # a single dataset is returned
            name = kwargs.pop("name", None)
            if name:
                nd.name = name
            return nd

        else:
            nds = self.datasets
            names = kwargs.pop("names", None)
            if names and len(names) == len(nds):
                for nd, name in zip(nds, names):
                    nd.name = name
            elif names and len(names) != len(nds):
                warn(
                    "length of the `names` list and of the list of datasets mismatch - names not applied"
                )
            return sorted(
                nds, key=str
            )  # return a sorted list (sorted according to their string representation)

    def _setup_objtype(self, *args, **kwargs):
        # check if the first argument is an instance of NDDataset or Project

        args = list(args)
        if (
            args
            and hasattr(args[0], "_implements")
            and args[0]._implements() in ["NDDataset"]
        ):
            # the first arg is an instance of NDDataset
            object = args.pop(0)
            self.objtype = type(object)

        else:
            # by default returned objtype is NDDataset (import here to avoid circular import)
            from spectrochempy.core.dataset.nddataset import NDDataset

            self.objtype = kwargs.pop("objtype", NDDataset)

        return args, kwargs

    def _switch_protocol(self, key, files, **kwargs):
        protocol = kwargs.get("protocol", None)
        if protocol is not None and protocol != "ALL":
            if not isinstance(protocol, list):
                protocol = [protocol]
            if key and key[1:] not in protocol and self.alias[key[1:]] not in protocol:
                return

        datasets = []
        files[key] = sorted(files[key])  # sort the files according their names
        for filename in files[key]:
            read_ = getattr(self, f"_read_{key[1:]}")

            dataset = None
            try:
                # read locally or using url if filename is an url
                dataset = read_(self.objtype(), filename, **kwargs)

            except (FileNotFoundError, OSError) as exc:
                # file was not found.
                # it is an url we raise an error
                local_only = kwargs.get("local_only", False)
                if _is_url(filename) or local_only:
                    raise (FileNotFoundError) from exc

                # else, we try on github
                try:
                    # Try to get the file from github
                    kwargs["read_method"] = read_
                    info_(
                        "File/directory not found locally: Attempt to download it from "
                        "the GitHub repository `spectrochempy_data`..."
                    )
                    dataset = _read_remote(self.objtype(), filename, **kwargs)

                except FileNotFoundError as exc:
                    raise (FileNotFoundError) from exc

                except Exception as e:
                    warning_(str(e))

            except Exception as e:
                warning_(str(e))

            if dataset is not None:
                if not isinstance(dataset, list):
                    datasets.append(dataset)
                else:
                    datasets.extend(dataset)

        if len(datasets) > 1:
            datasets = self._do_merge(datasets, **kwargs)
            if kwargs.get("merge", False):
                datasets[0].name = pathclean(filename).stem
                datasets[0].filename = pathclean(filename)

        self.datasets.extend(datasets)

    def _do_merge(self, datasets, **kwargs):
        # several datasets returned (only if several files have been passed) and the `merge` keyword argument is False
        merged = kwargs.get("merge", False)
        shapes = list({nd.shape if hasattr(nd, "shape") else None for nd in datasets})
        if len(shapes) == 1 and None not in shapes:
            # homogeneous set of files
            # we can merge them if they are 1D spectra
            if len(shapes[0]) == 1 or shapes[0][0] == 1:
                merged = kwargs.get("merge", True)  # priority to the keyword setting
        else:
            # not homogeneous
            merged = kwargs.get("merge", False)

        if merged:
            # Try to stack the dataset into a single one
            try:
                if datasets[0].ndim == 1:
                    dataset = self.objtype.stack(datasets)
                    dataset.history = "Stacked from several files"
                else:
                    dataset = self.objtype.concatenate(datasets, axis=0)
                    dataset.history = "Merged from several files"

                if dataset.coordset is not None and kwargs.pop("sortbydate", True):
                    dataset.sort(dim=0, inplace=True)
                    #  dataset.history = "Sorted"  (this not always by date:
                    #  actually for now it is by value which can be a date or not)
                datasets = [dataset]

            except DimensionsCompatibilityError as e:
                warn(str(e))  # return only the list

        return datasets


def _importer_method(func):
    # Decorator to define a given read function as belonging to Importer
    setattr(Importer, func.__name__, staticmethod(func))
    return func


# --------------------------------------------------------------------------------------
# Public Generic Read function
# --------------------------------------------------------------------------------------

_docstring.get_sections(
    """
See Also
--------
read : Generic reader inferring protocol from the filename extension.
read_zip : Read Zip archives (containing spectrochempy readable files)
read_dir : Read an entire directory.
read_opus : Read OPUS spectra.
read_labspec : Read Raman LABSPEC spectra (:file:`.txt`\ ).
read_omnic : Read Omnic spectra (:file:`.spa`\ , :file:`.spg`\ , :file:`.srs`\ ).
read_soc : Read Surface Optics Corps. files (:file:`.ddr` , :file:`.hdr` or :file:`.sdr`\ ).
read_galactic : Read Galactic files (:file:`.spc`\ ).
read_quadera : Read a Pfeiffer Vacuum's QUADERA mass spectrometer software file.
read_topspin : Read TopSpin Bruker NMR spectra.
read_csv : Read CSV files (:file:`.csv`\ ).
read_jcamp : Read Infrared JCAMP-DX files (:file:`.jdx`\ , :file:`.dx`\ ).
read_matlab : Read Matlab files (:file:`.mat`\ , :file:`.dso`\ ).
read_carroucell : Read files in a directory after a carroucell experiment.
read_wire : Read REnishaw Wire files (:file:`.wdf`\ ).
""",
    sections=["See Also"],
    base="Importer",
)

_docstring.delete_params("Importer.see_also", "read")


[docs]@_docstring.dedent def read(*paths, **kwargs): """ Generic read method. This method is generally able to load experimental files based on extensions. Parameters ---------- *paths : `str`, `~pathlib.Path` object objects or valid urls, optional The data source(s) can be specified by the name or a list of name for the file(s) to be loaded: * *e.g.,* ( filename1, filename2, ..., \*\*kwargs )* If the list of filenames are enclosed into brackets: * *e.g.,* ( **[** *filename1, filename2, ...* **]**, \*\*kwargs *)* The returned datasets are merged to form a single dataset, except if ``merge`` is set to `False`. If a source is not provided (*i.e.,* no ``paths`` , nor ``content``\ ), a dialog box will be opened to select files. %(kwargs)s Returns ------- object : `NDDataset` or list of `NDDataset` The returned dataset(s). Other Parameters ---------------- protocol : `str`\ , optional ``Protocol`` used for reading. It can be one of {``'scp'``\ , ``'omnic'``\ , ``'opus'``\ , ``'topspin'``\ , ``'matlab'``\ , ``'jcamp'``\ , ``'csv'``\ , ``'excel'``\ }. If not provided, the correct protocol is inferred (whenever it is possible) from the filename extension. directory : `~pathlib.Path` object objects or valid urls, optional From where to read the files. merge : `bool`\ , optional, default: `False` If `True` and several filenames or a ``directory`` have been provided as arguments, then a single `NDDataset` with merged (stacked along the first dimension) is returned. sortbydate : `bool`, optional, default: `True` Sort multiple filename by acquisition date. description : `str`, optional A Custom description. origin : one of {``'omnic'``\ , ``'tga'``\ }, optional Used when reading with the CSV protocol. In order to properly interpret CSV file it can be necessary to set the origin of the spectra. Up to now only ``'omnic'`` and ``'tga'`` have been implemented. csv_delimiter : `str`\ , optional, default: `~spectrochempy.preferences.csv_delimiter` Set the column delimiter in CSV file. content : `bytes` object, optional Instead of passing a filename for further reading, a bytes content can be directly provided as bytes objects. The most convenient way is to use a dictionary. This feature is particularly useful for a GUI Dash application to handle drag and drop of files into a Browser. iterdir : `bool`\ , optional, default: `True` If `True` and no filename was provided, all files present in the provided ``directory`` are returned (and merged if ``merge`` is `True`\ . It is assumed that all the files correspond to current reading protocol. .. versionchanged:: 0.6.2 ``iterdir`` replace the deprecated ``listdir`` argument. recursive : `bool`, optional, default: `False` Read also in subfolders. replace_existing: `bool`, optional, default: `False` Used only when url are specified. By default, existing files are not replaced so not downloaded. download_only: `bool`, optional, default: `False` Used only when url are specified. If True, only downloading and saving of the files is performed, with no attempt to read their content. read_only: `bool`, optional, default: `True` Used only when url are specified. If True, saving of the files is performed in the current directory, or in the directory specified by the directory parameter. See Also -------- %(Importer.see_also.no_read)s Examples --------- Reading a single OPUS file (providing a windows type filename relative to the default `~spectrochempy.preferences.datadir` ) >>> scp.read('irdata\\\\OPUS\\\\test.0000') NDDataset: [float64] a.u. (shape: (y:1, x:2567)) Reading a single OPUS file (providing a unix/python type filename relative to the default ``datadir`` ) Note that here read_opus is called as a classmethod of the NDDataset class >>> scp.read('irdata/OPUS/test.0000') NDDataset: [float64] a.u. (shape: (y:1, x:2567)) Single file specified with pathlib.Path object >>> from pathlib import Path >>> folder = Path('irdata/OPUS') >>> p = folder / 'test.0000' >>> scp.read(p) NDDataset: [float64] a.u. (shape: (y:1, x:2567)) Multiple files not merged (return a list of datasets). Note that a directory is specified >>> le = scp.read('test.0000', 'test.0001', 'test.0002', directory='irdata/OPUS') >>> len(le) 3 >>> le[0] NDDataset: [float64] a.u. (shape: (y:1, x:2567)) Multiple files merged as the `merge` keyword is set to true >>> scp.read('test.0000', 'test.0001', 'test.0002', directory='irdata/OPUS', merge=True) NDDataset: [float64] a.u. (shape: (y:3, x:2567)) Multiple files to merge : they are passed as a list instead of using the keyword `merge` >>> scp.read(['test.0000', 'test.0001', 'test.0002'], directory='irdata/OPUS') NDDataset: [float64] a.u. (shape: (y:3, x:2567)) Multiple files not merged : they are passed as a list but `merge` is set to false >>> le = scp.read(['test.0000', 'test.0001', 'test.0002'], directory='irdata/OPUS', merge=False) >>> len(le) 3 Read without a filename. This has the effect of opening a dialog for file(s) selection >>> nd = scp.read() Read in a directory (assume that only OPUS files are present in the directory (else we must use the generic `read` function instead) >>> le = scp.read(directory='irdata/OPUS') >>> len(le) 2 Again we can use merge to stack all 4 spectra if thet have compatible dimensions. >>> scp.read(directory='irdata/OPUS', merge=True) [NDDataset: [float64] a.u. (shape: (y:1, x:5549)), NDDataset: [float64] a.u. (shape: (y:4, x:2567))] """ importer = Importer() protocol = kwargs.get("protocol", None) available_protocols = list(importer.protocols.values()) available_protocols.extend( list(importer.alias.keys()) ) # to handle variants of protocols if protocol is None: kwargs["filetypes"] = list(importer.filetypes.values()) kwargs["protocol"] = "ALL" default_filter = kwargs.get("default_filter", None) if default_filter is not None: kwargs["default_filter"] = importer.filetypes[default_filter] else: try: kwargs["filetypes"] = [importer.filetypes[protocol]] except KeyError: raise ProtocolError(protocol, list(importer.protocols.values())) except TypeError as e: print(e) # deprecated kwargs listdir = kwargs.pop("listdir", True) if "listdir" in kwargs and "iterdir" not in kwargs: kwargs["iterdir"] = listdir warning_( "argument `listdir` is deprecated, use ìterdir` instead", category=DeprecationWarning, ) return importer(*paths, **kwargs)
# for some reasons the doctring.getsection modify the signature of the function # when used as a decorator, so we use it as a function _docstring.get_sections( read.__doc__, sections=["Parameters", "Other Parameters", "Returns"], base="Importer", ) _docstring.delete_params("Importer.see_also", "read_dir")
[docs]@_docstring.dedent def read_dir(directory=None, **kwargs): """ Read an entire directory. Open a list of readable files in a and store data/metadata in a dataset or a list of datasets according to the following rules : * 2D spectroscopic data (e.g. valid .spg files or matlab arrays, etc...) from distinct files are stored in distinct `NDdataset`\ s. * 1D spectroscopic data (e.g., :file:`.spa` files) in a given directory are merged into single `NDDataset`\ , providing their unique dimension are compatible. If not, an error is generated. * non-readable files are ignored Parameters ---------- directory : str or pathlib Folder where are located the files to read. Returns -------- %(Importer.returns)s Depending on the python version, the order of the datasets in the list may change. See Also -------- %(Importer.see_also.no_read_dir)s Examples -------- >>> scp.preferences.csv_delimiter = ',' >>> A = scp.read_dir('irdata') >>> len(A) 4 >>> B = scp.read_dir() """ kwargs["iterdir"] = True importer = Importer() return importer(directory, **kwargs)
# _docstring.delete_params("Importer.see_also", "read_remote") # @_docstring.dedent # def read_remote(file_or_dir, **kwargs): # """ # Download and read files or an entire directory from any url # # The first usage in spectrochempy is the loading of test files in the # `spectrochempy_data repository <https://github.com/spectrochempy/spectrochempy_data>`__\ . # This is done only if the data are not yet # downloaded and present in the `~spectrochempy.preferences.datadir` directory. # # It can also be used to download and read file or directory from any url. # # Parameters # ---------- # path : `str`, `~pathlib.Path` object or an url. # When a file or folder is specified, it must be written as if it were present # locally exactly as for the `read` function. The correponding file or directory # is downloaded from the ``github spectrochemp_data`` repository. # Otherwise it should be a full and valid url. # %(kwargs)s # # Returns # -------- # %(Importer.returns)s # # Other Parameters # ---------------- # %(Importer.other_parameters)s # # See Also # -------- # %(Importer.see_also.no_read_remote)s # # Examples # -------- # # >>> A = scp.read_remote('irdata/subdir') # """ # kwargs["remote"] = True # importer = Importer() # return importer(file_or_dir, **kwargs) # # ====================================================================================== # Private read functions # ====================================================================================== @_importer_method def _read_dir(*args, **kwargs): _, directory = args directory = get_directory_name(directory) files = get_filenames(directory, **kwargs) datasets = [] valid_extensions = list(zip(*FILETYPES))[0] + list(zip(*ALIAS))[0] for key in [key for key in files.keys() if key[1:] in valid_extensions]: if key: importer = Importer() nd = importer(files[key], **kwargs) if nd is not None: if not isinstance(nd, list): nd = [nd] datasets.extend(nd) return datasets @_importer_method def _read_scp(*args, **kwargs): dataset, filename = args return dataset.load(filename, **kwargs) @_importer_method def _read_(*args, **kwargs): dataset, filename = args if kwargs.pop("remote", False): return Importer._read_remote(*args, **kwargs) elif not filename or filename.is_dir(): return Importer._read_dir(*args, **kwargs) else: raise FileNotFoundError # protocol = kwargs.get("protocol", None) # if protocol and ".scp" in protocol: # return dataset.load(filename, **kwargs) # # elif filename and filename.name in ("fid", "ser", "1r", "2rr", "3rrr"): # # probably an Topspin NMR file # return dataset.read_topspin(filename, **kwargs) # elif filename: # # try scp format # try: # return dataset.load(filename, **kwargs) # except Exception: # # lets try some common format # for key in ["omnic", "opus", "topspin", "labspec", "matlab", "jdx"]: # try: # _read = getattr(dataset, f"read_{key}") # f = f"{filename}.{key}" # return _read(f, **kwargs) # except Exception: # pass # raise NotImplementedError # ====================================================================================== # Private functions # ====================================================================================== def _is_url(filename): return ( isinstance(filename, str) and re.match(r"http[s]?:[\/]{2}", filename) is not None ) def _openfid(filename, mode="rb", **kwargs): # Return a file ID # Check if Content has been passed? content = kwargs.get("content", False) # default encoding encoding = "utf-8" if _is_url(filename): # by default, we set the read_only flag to True when reading remote url kwargs["read_only"] = kwargs.get("read_only", True) # use request to read the remote content r = requests.get(filename, allow_redirects=True) r.raise_for_status() content = r.content encoding = r.encoding else: # Transform filename to a Path object is not yet the case filename = pathclean(filename) # Create the file ID if content: # if a content has been passed, then it has priority fid = ( io.BytesIO(content) if mode == "rb" else io.StringIO(content.decode(encoding)) ) else: fid = open(filename, mode=mode) return fid, kwargs def _write_downloaded_file(content, dst): if not dst.parent.exists(): # create the eventually missing subdirectory dst.parent.mkdir(parents=True, exist_ok=True) dst.write_bytes(content) info_(f"{dst.name} has been downloaded and written in {dst.parent}") def _get_url_content_and_save(url, dst, replace, read_only=False): if not replace and dst.exists(): return try: r = requests.get(url, allow_redirects=True) r.raise_for_status() # write downloaded file if not read_only: _write_downloaded_file(r.content, dst) # in all case return the content return r.content except OSError: raise FileNotFoundError(f"Not found locally or at url:{url}") def _download_full_testdata_directory(): from spectrochempy.core import preferences as prefs datadir = prefs.datadir url = "https://github.com/spectrochempy/spectrochempy_data/archive/refs/heads/master.zip" resp = requests.get(url) zipfile = ZipFile(io.BytesIO(resp.content)) files = [zipfile.open(file_name) for file_name in zipfile.namelist()] for file in files: name = file.name if name.endswith("/") or "testdata/" not in name: # dir continue uncompressed = zipfile.read(name) p = list(pathclean(name).parts)[2:] dst = datadir.joinpath("/".join(p)) _write_downloaded_file(uncompressed, dst) def _download_from_github(path, dst, replace=False): # download on github (always save the downloaded files) relative_path = str(pathclean(path).as_posix()) path = ( f"https://github.com/spectrochempy/spectrochempy_data/raw/master/" f"testdata/{relative_path}" ) # first determine if it is a directory r = requests.get(path + "/__index__", allow_redirects=True) index = None if r.status_code == 200: index = yaml.load(r.content, Loader=yaml.CLoader) if index is None: return _get_url_content_and_save(path, dst, replace) else: # download folder for filename in index["files"]: _get_url_content_and_save(f"{path}/{filename}", dst / filename, replace) for folder in index["folders"]: _download_from_github(f"{relative_path}/{folder}", dst / folder) def _is_relative_to(path, base): # try to emulate the pathlib is_relative_to method which does not work on python # 3.7 (needed for Colab!) # TODO: replace as Colab is updated to 3.9 pparts = path.parts bparts = base.parts if bparts[-1] in pparts: idx = pparts.index(bparts[-1]) pparts_base = pparts[: idx + 1] return pparts_base == bparts return False def _relative_to(path, base): pparts = path.parts bparts = base.parts if bparts[-1] in pparts: idx = pparts.index(bparts[-1]) return pathclean("/".join(pparts[idx + 1 :])) raise ValueError( f"'{path}' is not in the subpath of '{base}' OR one path is " f"relative and the other absolute." ) @_importer_method def _read_remote(*args, **kwargs): from spectrochempy.core import preferences as prefs datadir = prefs.datadir dataset, path = args kwargs["merge"] = kwargs.get("merge", False) # by default, no attempt to merge read_method = kwargs.pop("read_method", read) download_only = kwargs.pop("download_only", False) replace = kwargs.pop( "replace_existing", False ) # by default we download only if needed. # downloaded file # we try to download the github testdata path = pathclean(path) # we need to download additional files for topspin topspin = True if "topspin" in read_method.__name__ else False # we have to treat a special case: topspin, where the parent directory need # to be downloaded with the required file if topspin: savedpath = path m = re.match(r"(.*)(\/pdata\/\d+\/\d+[r|i]{1,2}|ser|fid)", str(path)) if m is not None: path = pathclean(m[1]) if _is_relative_to(path, datadir): # try to make it relative for remote downloading on github relative_path = _relative_to(path, datadir) else: # assume it is already relative relative_path = path # Try to download it dst = datadir / relative_path if dst.name == "testdata": # we are going to download the whole testdata directory # -> use a faster method _download_full_testdata_directory() return else: content = _download_from_github(relative_path, dst, replace) if not download_only: if content is None: if topspin: return read_method( dataset, dst / _relative_to(savedpath, dst), **kwargs ) else: return read_method(dataset, dst, **kwargs) else: return read_method(dataset, dst, content=content, **kwargs)