diff --git a/.travis.yml b/.travis.yml index b67cd56c95..1b37064202 100644 --- a/.travis.yml +++ b/.travis.yml @@ -15,6 +15,7 @@ cache: env: global: - DEPENDS="six numpy scipy matplotlib h5py pillow" + - OPTIONAL_DEPENDS="" - PYDICOM=1 - INSTALL_TYPE="setup" - EXTRA_WHEELS="https://5cf40426d9f06eb7461d-6fe47d9331aba7cd62fc36c7196769e4.ssl.cf2.rackcdn.com" @@ -84,6 +85,13 @@ matrix: - python: 3.5 env: - DOC_DOC_TEST=1 + # Run tests with indexed_gzip present + - python: 2.7 + env: + - OPTIONAL_DEPENDS="indexed_gzip" + - python: 3.5 + env: + - OPTIONAL_DEPENDS="indexed_gzip" before_install: - source tools/travis_tools.sh - python -m pip install --upgrade pip @@ -93,7 +101,7 @@ before_install: - python --version # just to check - pip install -U pip wheel # needed at one point - retry pip install nose flake8 mock # always - - pip install $EXTRA_PIP_FLAGS $DEPENDS + - pip install $EXTRA_PIP_FLAGS $DEPENDS $OPTIONAL_DEPENDS # pydicom <= 0.9.8 doesn't install on python 3 - if [ "${TRAVIS_PYTHON_VERSION:0:1}" == "2" ]; then if [ "$PYDICOM" == "1" ]; then diff --git a/nibabel/analyze.py b/nibabel/analyze.py index 2f8cf03988..ba1529e3b9 100644 --- a/nibabel/analyze.py +++ b/nibabel/analyze.py @@ -934,8 +934,8 @@ def set_data_dtype(self, dtype): @classmethod @kw_only_meth(1) - def from_file_map(klass, file_map, mmap=True): - ''' class method to create image from mapping in `file_map `` + def from_file_map(klass, file_map, mmap=True, keep_file_open=None): + '''class method to create image from mapping in `file_map `` Parameters ---------- @@ -950,6 +950,19 @@ def from_file_map(klass, file_map, mmap=True): `mmap` value of True gives the same behavior as ``mmap='c'``. If image data file cannot be memory-mapped, ignore `mmap` value and read array from file. + keep_file_open : { None, 'auto', True, False }, optional, keyword only + `keep_file_open` controls whether a new file handle is created + every time the image is accessed, or a single file handle is + created and used for the lifetime of this ``ArrayProxy``. If + ``True``, a single file handle is created and used. If ``False``, + a new file handle is created every time the image is accessed. If + ``'auto'``, and the optional ``indexed_gzip`` dependency is + present, a single file handle is created and persisted. If + ``indexed_gzip`` is not available, behaviour is the same as if + ``keep_file_open is False``. If ``file_map`` refers to an open + file handle, this setting has no effect. The default value + (``None``) will result in the value of + ``nibabel.arrayproxy.KEEP_FILE_OPEN_DEFAULT`` being used. Returns ------- @@ -964,7 +977,8 @@ def from_file_map(klass, file_map, mmap=True): imgf = img_fh.fileobj if imgf is None: imgf = img_fh.filename - data = klass.ImageArrayProxy(imgf, hdr_copy, mmap=mmap) + data = klass.ImageArrayProxy(imgf, hdr_copy, mmap=mmap, + keep_file_open=keep_file_open) # Initialize without affine to allow header to pass through unmodified img = klass(data, None, header, file_map=file_map) # set affine from header though @@ -976,8 +990,8 @@ def from_file_map(klass, file_map, mmap=True): @classmethod @kw_only_meth(1) - def from_filename(klass, filename, mmap=True): - ''' class method to create image from filename `filename` + def from_filename(klass, filename, mmap=True, keep_file_open=None): + '''class method to create image from filename `filename` Parameters ---------- @@ -990,6 +1004,18 @@ def from_filename(klass, filename, mmap=True): `mmap` value of True gives the same behavior as ``mmap='c'``. If image data file cannot be memory-mapped, ignore `mmap` value and read array from file. + keep_file_open : { None, 'auto', True, False }, optional, keyword only + `keep_file_open` controls whether a new file handle is created + every time the image is accessed, or a single file handle is + created and used for the lifetime of this ``ArrayProxy``. If + ``True``, a single file handle is created and used. If ``False``, + a new file handle is created every time the image is accessed. If + ``'auto'``, and the optional ``indexed_gzip`` dependency is + present, a single file handle is created and persisted. If + ``indexed_gzip`` is not available, behaviour is the same as if + ``keep_file_open is False``. The default value (``None``) will + result in the value of + ``nibabel.arrayproxy.KEEP_FILE_OPEN_DEFAULT`` being used. Returns ------- @@ -998,7 +1024,8 @@ def from_filename(klass, filename, mmap=True): if mmap not in (True, False, 'c', 'r'): raise ValueError("mmap should be one of {True, False, 'c', 'r'}") file_map = klass.filespec_to_file_map(filename) - return klass.from_file_map(file_map, mmap=mmap) + return klass.from_file_map(file_map, mmap=mmap, + keep_file_open=keep_file_open) load = from_filename diff --git a/nibabel/arrayproxy.py b/nibabel/arrayproxy.py index 10a6cab8e6..18d7f67e3b 100644 --- a/nibabel/arrayproxy.py +++ b/nibabel/arrayproxy.py @@ -25,13 +25,34 @@ See :mod:`nibabel.tests.test_proxy_api` for proxy API conformance checks. """ +from contextlib import contextmanager +from threading import RLock + import numpy as np from .deprecated import deprecate_with_version from .volumeutils import array_from_file, apply_read_scaling from .fileslice import fileslice from .keywordonly import kw_only_meth -from .openers import ImageOpener +from .openers import ImageOpener, HAVE_INDEXED_GZIP + + +"""This flag controls whether a new file handle is created every time an image +is accessed through an ``ArrayProxy``, or a single file handle is created and +used for the lifetime of the ``ArrayProxy``. It should be set to one of +``True``, ``False``, or ``'auto'``. + +If ``True``, a single file handle is created and used. If ``False``, a new +file handle is created every time the image is accessed. If ``'auto'``, and +the optional ``indexed_gzip`` dependency is present, a single file handle is +created and persisted. If ``indexed_gzip`` is not available, behaviour is the +same as if ``keep_file_open is False``. + +If this is set to any other value, attempts to create an ``ArrayProxy`` without +specifying the ``keep_file_open`` flag will result in a ``ValueError`` being +raised. +""" +KEEP_FILE_OPEN_DEFAULT = False class ArrayProxy(object): @@ -69,8 +90,8 @@ class ArrayProxy(object): _header = None @kw_only_meth(2) - def __init__(self, file_like, spec, mmap=True): - """ Initialize array proxy instance + def __init__(self, file_like, spec, mmap=True, keep_file_open=None): + """Initialize array proxy instance Parameters ---------- @@ -99,8 +120,18 @@ def __init__(self, file_like, spec, mmap=True): True gives the same behavior as ``mmap='c'``. If `file_like` cannot be memory-mapped, ignore `mmap` value and read array from file. - scaling : {'fp', 'dv'}, optional, keyword only - Type of scaling to use - see header ``get_data_scaling`` method. + keep_file_open : { None, 'auto', True, False }, optional, keyword only + `keep_file_open` controls whether a new file handle is created + every time the image is accessed, or a single file handle is + created and used for the lifetime of this ``ArrayProxy``. If + ``True``, a single file handle is created and used. If ``False``, + a new file handle is created every time the image is accessed. If + ``'auto'``, and the optional ``indexed_gzip`` dependency is + present, a single file handle is created and persisted. If + ``indexed_gzip`` is not available, behaviour is the same as if + ``keep_file_open is False``. If ``file_like`` is an open file + handle, this setting has no effect. The default value (``None``) + will result in the value of ``KEEP_FILE_OPEN_DEFAULT`` being used. """ if mmap not in (True, False, 'c', 'r'): raise ValueError("mmap should be one of {True, False, 'c', 'r'}") @@ -125,6 +156,70 @@ def __init__(self, file_like, spec, mmap=True): # Permit any specifier that can be interpreted as a numpy dtype self._dtype = np.dtype(self._dtype) self._mmap = mmap + self._keep_file_open = self._should_keep_file_open(file_like, + keep_file_open) + self._lock = RLock() + + def __del__(self): + """If this ``ArrayProxy`` was created with ``keep_file_open=True``, + the open file object is closed if necessary. + """ + if hasattr(self, '_opener') and not self._opener.closed: + self._opener.close_if_mine() + self._opener = None + + def __getstate__(self): + """Returns the state of this ``ArrayProxy`` during pickling. """ + state = self.__dict__.copy() + state.pop('_lock', None) + return state + + def __setstate__(self, state): + """Sets the state of this ``ArrayProxy`` during unpickling. """ + self.__dict__.update(state) + self._lock = RLock() + + def _should_keep_file_open(self, file_like, keep_file_open): + """Called by ``__init__``, and used to determine the final value of + ``keep_file_open``. + + The return value is derived from these rules: + + - If ``file_like`` is a file(-like) object, ``False`` is returned. + Otherwise, ``file_like`` is assumed to be a file name. + - if ``file_like`` ends with ``'gz'``, and the ``indexed_gzip`` + library is available, ``True`` is returned. + - Otherwise, ``False`` is returned. + + Parameters + ---------- + + file_like : object + File-like object or filename, as passed to ``__init__``. + keep_file_open : { 'auto', True, False } + Flag as passed to ``__init__``. + + Returns + ------- + + The value of ``keep_file_open`` that will be used by this + ``ArrayProxy``. + """ + if keep_file_open is None: + keep_file_open = KEEP_FILE_OPEN_DEFAULT + # if keep_file_open is True/False, we do what the user wants us to do + if isinstance(keep_file_open, bool): + return keep_file_open + if keep_file_open != 'auto': + raise ValueError('keep_file_open should be one of {None, ' + '\'auto\', True, False}') + + # file_like is a handle - keep_file_open is irrelevant + if hasattr(file_like, 'read') and hasattr(file_like, 'seek'): + return False + # Otherwise, if file_like is gzipped, and we have_indexed_gzip, we set + # keep_file_open to True, else we set it to False + return HAVE_INDEXED_GZIP and file_like.endswith('gz') @property @deprecate_with_version('ArrayProxy.header deprecated', '2.2', '3.0') @@ -155,12 +250,33 @@ def inter(self): def is_proxy(self): return True + @contextmanager + def _get_fileobj(self): + """Create and return a new ``ImageOpener``, or return an existing one. + + The specific behaviour depends on the value of the ``keep_file_open`` + flag that was passed to ``__init__``. + + Yields + ------ + ImageOpener + A newly created ``ImageOpener`` instance, or an existing one, + which provides access to the file. + """ + if self._keep_file_open: + if not hasattr(self, '_opener'): + self._opener = ImageOpener(self.file_like) + yield self._opener + else: + with ImageOpener(self.file_like) as opener: + yield opener + def get_unscaled(self): - ''' Read of data from file + """ Read of data from file This is an optional part of the proxy API - ''' - with ImageOpener(self.file_like) as fileobj: + """ + with self._get_fileobj() as fileobj, self._lock: raw_data = array_from_file(self._shape, self._dtype, fileobj, @@ -175,18 +291,19 @@ def __array__(self): return apply_read_scaling(raw_data, self._slope, self._inter) def __getitem__(self, slicer): - with ImageOpener(self.file_like) as fileobj: + with self._get_fileobj() as fileobj: raw_data = fileslice(fileobj, slicer, self._shape, self._dtype, self._offset, - order=self.order) + order=self.order, + lock=self._lock) # Upcast as necessary for big slopes, intercepts return apply_read_scaling(raw_data, self._slope, self._inter) def reshape(self, shape): - ''' Return an ArrayProxy with a new shape, without modifying data ''' + """ Return an ArrayProxy with a new shape, without modifying data """ size = np.prod(self._shape) # Calculate new shape if not fully specified diff --git a/nibabel/fileslice.py b/nibabel/fileslice.py index ee72b83c99..e55f48c127 100644 --- a/nibabel/fileslice.py +++ b/nibabel/fileslice.py @@ -17,6 +17,22 @@ SKIP_THRESH = 2 ** 8 +class _NullLock(object): + """Can be used as no-function dummy object in place of ``threading.lock``. + + The ``_NullLock`` is an object which can be used in place of a + ``threading.Lock`` object, but doesn't actually do anything. + + It is used by the ``read_segments`` function in the event that a + ``Lock`` is not provided by the caller. + """ + def __enter__(self): + pass + + def __exit__(self, exc_type, exc_val, exc_tb): + return False + + def is_fancy(sliceobj): """ Returns True if sliceobj is attempting fancy indexing @@ -622,7 +638,7 @@ def slicers2segments(read_slicers, in_shape, offset, itemsize): return all_segments -def read_segments(fileobj, segments, n_bytes): +def read_segments(fileobj, segments, n_bytes, lock=None): """ Read `n_bytes` byte data implied by `segments` from `fileobj` Parameters @@ -634,6 +650,14 @@ def read_segments(fileobj, segments, n_bytes): absolute file offset in bytes and number of bytes to read n_bytes : int total number of bytes that will be read + lock : {None, threading.Lock, lock-like} optional + If provided, used to ensure that paired calls to ``seek`` and ``read`` + cannot be interrupted by another thread accessing the same ``fileobj``. + Each thread which accesses the same file via ``read_segments`` must + share a lock in order to ensure that the file access is thread-safe. + A lock does not need to be provided for single-threaded access. The + default value (``None``) results in a lock-like object (a + ``_NullLock``) which does not do anything. Returns ------- @@ -641,22 +665,28 @@ def read_segments(fileobj, segments, n_bytes): object implementing buffer protocol, such as byte string or ndarray or mmap or ctypes ``c_char_array`` """ + # Make a lock-like thing to make the code below a bit nicer + if lock is None: + lock = _NullLock() + if len(segments) == 0: if n_bytes != 0: raise ValueError("No segments, but non-zero n_bytes") return b'' if len(segments) == 1: offset, length = segments[0] - fileobj.seek(offset) - bytes = fileobj.read(length) + with lock: + fileobj.seek(offset) + bytes = fileobj.read(length) if len(bytes) != n_bytes: raise ValueError("Whoops, not enough data in file") return bytes # More than one segment bytes = mmap(-1, n_bytes) for offset, length in segments: - fileobj.seek(offset) - bytes.write(fileobj.read(length)) + with lock: + fileobj.seek(offset) + bytes.write(fileobj.read(length)) if bytes.tell() != n_bytes: raise ValueError("Oh dear, n_bytes does not look right") return bytes @@ -700,7 +730,7 @@ def _simple_fileslice(fileobj, sliceobj, shape, dtype, offset=0, order='C', def fileslice(fileobj, sliceobj, shape, dtype, offset=0, order='C', - heuristic=threshold_heuristic): + heuristic=threshold_heuristic, lock=None): """ Slice array in `fileobj` using `sliceobj` slicer and array definitions `fileobj` contains the contiguous binary data for an array ``A`` of shape, @@ -737,6 +767,14 @@ def fileslice(fileobj, sliceobj, shape, dtype, offset=0, order='C', returning one of 'full', 'contiguous', None. See :func:`optimize_slicer` and see :func:`threshold_heuristic` for an example. + lock : {None, threading.Lock, lock-like} optional + If provided, used to ensure that paired calls to ``seek`` and ``read`` + cannot be interrupted by another thread accessing the same ``fileobj``. + Each thread which accesses the same file via ``read_segments`` must + share a lock in order to ensure that the file access is thread-safe. + A lock does not need to be provided for single-threaded access. The + default value (``None``) results in a lock-like object (a + ``_NullLock``) which does not do anything. Returns ------- @@ -750,8 +788,8 @@ def fileslice(fileobj, sliceobj, shape, dtype, offset=0, order='C', segments, sliced_shape, post_slicers = calc_slicedefs( sliceobj, shape, itemsize, offset, order) n_bytes = reduce(operator.mul, sliced_shape, 1) * itemsize - bytes = read_segments(fileobj, segments, n_bytes) - sliced = np.ndarray(sliced_shape, dtype, buffer=bytes, order=order) + arr_data = read_segments(fileobj, segments, n_bytes, lock) + sliced = np.ndarray(sliced_shape, dtype, buffer=arr_data, order=order) return sliced[post_slicers] diff --git a/nibabel/freesurfer/mghformat.py b/nibabel/freesurfer/mghformat.py index efe51c7d5a..1bc9071538 100644 --- a/nibabel/freesurfer/mghformat.py +++ b/nibabel/freesurfer/mghformat.py @@ -476,7 +476,7 @@ def filespec_to_file_map(klass, filespec): @classmethod @kw_only_meth(1) - def from_file_map(klass, file_map, mmap=True): + def from_file_map(klass, file_map, mmap=True, keep_file_open=None): '''Load image from `file_map` Parameters @@ -491,6 +491,19 @@ def from_file_map(klass, file_map, mmap=True): `mmap` value of True gives the same behavior as ``mmap='c'``. If image data file cannot be memory-mapped, ignore `mmap` value and read array from file. + keep_file_open : { None, 'auto', True, False }, optional, keyword only + `keep_file_open` controls whether a new file handle is created + every time the image is accessed, or a single file handle is + created and used for the lifetime of this ``ArrayProxy``. If + ``True``, a single file handle is created and used. If ``False``, + a new file handle is created every time the image is accessed. If + ``'auto'``, and the optional ``indexed_gzip`` dependency is + present, a single file handle is created and persisted. If + ``indexed_gzip`` is not available, behaviour is the same as if + ``keep_file_open is False``. If ``file_map`` refers to an open + file handle, this setting has no effect. The default value + (``None``) will result in the value of + ``nibabel.arrayproxy.KEEP_FILE_OPEN_DEFAULT`` being used. ''' if mmap not in (True, False, 'c', 'r'): raise ValueError("mmap should be one of {True, False, 'c', 'r'}") @@ -500,7 +513,8 @@ def from_file_map(klass, file_map, mmap=True): affine = header.get_affine() hdr_copy = header.copy() # Pass original image fileobj / filename to array proxy - data = klass.ImageArrayProxy(img_fh.file_like, hdr_copy, mmap=mmap) + data = klass.ImageArrayProxy(img_fh.file_like, hdr_copy, mmap=mmap, + keep_file_open=keep_file_open) img = klass(data, affine, header, file_map=file_map) img._load_cache = {'header': hdr_copy, 'affine': affine.copy(), @@ -509,8 +523,8 @@ def from_file_map(klass, file_map, mmap=True): @classmethod @kw_only_meth(1) - def from_filename(klass, filename, mmap=True): - ''' class method to create image from filename `filename` + def from_filename(klass, filename, mmap=True, keep_file_open=None): + '''class method to create image from filename `filename` Parameters ---------- @@ -523,6 +537,18 @@ def from_filename(klass, filename, mmap=True): `mmap` value of True gives the same behavior as ``mmap='c'``. If image data file cannot be memory-mapped, ignore `mmap` value and read array from file. + keep_file_open : { None, 'auto', True, False }, optional, keyword only + `keep_file_open` controls whether a new file handle is created + every time the image is accessed, or a single file handle is + created and used for the lifetime of this ``ArrayProxy``. If + ``True``, a single file handle is created and used. If ``False``, + a new file handle is created every time the image is accessed. If + ``'auto'``, and the optional ``indexed_gzip`` dependency is + present, a single file handle is created and persisted. If + ``indexed_gzip`` is not available, behaviour is the same as if + ``keep_file_open is False``. The default value (``None``) will + result in the value of + ``nibabel.arrayproxy.KEEP_FILE_OPEN_DEFAULT`` being used. Returns ------- @@ -531,7 +557,8 @@ def from_filename(klass, filename, mmap=True): if mmap not in (True, False, 'c', 'r'): raise ValueError("mmap should be one of {True, False, 'c', 'r'}") file_map = klass.filespec_to_file_map(filename) - return klass.from_file_map(file_map, mmap=mmap) + return klass.from_file_map(file_map, mmap=mmap, + keep_file_open=keep_file_open) load = from_filename diff --git a/nibabel/openers.py b/nibabel/openers.py index abed97afcf..04382f66bc 100644 --- a/nibabel/openers.py +++ b/nibabel/openers.py @@ -14,6 +14,13 @@ import sys from os.path import splitext +# is indexed_gzip present? +try: + from indexed_gzip import SafeIndexedGzipFile + HAVE_INDEXED_GZIP = True +except ImportError: + HAVE_INDEXED_GZIP = False + # The largest memory chunk that gzip can use for reads GZIP_MAX_READ_CHUNK = 100 * 1024 * 1024 # 100Mb @@ -60,13 +67,21 @@ def readinto(self, buf): return n_read -def _gzip_open(fileish, *args, **kwargs): - gzip_file = BufferedGzipFile(fileish, *args, **kwargs) +def _gzip_open(filename, mode='rb', compresslevel=9): + + # use indexed_gzip if possible for faster read access + if mode == 'rb' and HAVE_INDEXED_GZIP: + gzip_file = SafeIndexedGzipFile(filename) + + # Fall-back to built-in GzipFile (wrapped with the BufferedGzipFile class + # defined above) + else: + gzip_file = BufferedGzipFile(filename, mode, compresslevel) - # Speedup for #209; attribute not present in in Python 3.5 - # open gzip files with faster reads on large files using larger - # See https://github.com/nipy/nibabel/pull/210 for discussion - if hasattr(gzip_file, 'max_chunk_read'): + # Speedup for #209, for versions of python < 3.5. Open gzip files with + # faster reads on large files using a larger read buffer. See + # https://github.com/nipy/nibabel/pull/210 for discussion + if hasattr(gzip_file, 'max_read_chunk'): gzip_file.max_read_chunk = GZIP_MAX_READ_CHUNK return gzip_file diff --git a/nibabel/spm99analyze.py b/nibabel/spm99analyze.py index 70b80c5818..40ae4f44b8 100644 --- a/nibabel/spm99analyze.py +++ b/nibabel/spm99analyze.py @@ -245,8 +245,8 @@ class Spm99AnalyzeImage(analyze.AnalyzeImage): @classmethod @kw_only_meth(1) - def from_file_map(klass, file_map, mmap=True): - ''' class method to create image from mapping in `file_map `` + def from_file_map(klass, file_map, mmap=True, keep_file_open=None): + '''class method to create image from mapping in `file_map `` Parameters ---------- @@ -261,13 +261,27 @@ def from_file_map(klass, file_map, mmap=True): `mmap` value of True gives the same behavior as ``mmap='c'``. If image data file cannot be memory-mapped, ignore `mmap` value and read array from file. + keep_file_open : { None, 'auto', True, False }, optional, keyword only + `keep_file_open` controls whether a new file handle is created + every time the image is accessed, or a single file handle is + created and used for the lifetime of this ``ArrayProxy``. If + ``True``, a single file handle is created and used. If ``False``, + a new file handle is created every time the image is accessed. If + ``'auto'``, and the optional ``indexed_gzip`` dependency is + present, a single file handle is created and persisted. If + ``indexed_gzip`` is not available, behaviour is the same as if + ``keep_file_open is False``. If ``file_map`` refers to an open + file handle, this setting has no effect. The default value + (``None``) will result in the value of + ``nibabel.arrayproxy.KEEP_FILE_OPEN_DEFAULT`` being used. Returns ------- img : Spm99AnalyzeImage instance + ''' - ret = super(Spm99AnalyzeImage, klass).from_file_map(file_map, - mmap=mmap) + ret = super(Spm99AnalyzeImage, klass).from_file_map( + file_map, mmap=mmap, keep_file_open=keep_file_open) try: matf = file_map['mat'].get_prepare_fileobj() except IOError: diff --git a/nibabel/tests/test_arrayproxy.py b/nibabel/tests/test_arrayproxy.py index dfc16fb48e..61492f6038 100644 --- a/nibabel/tests/test_arrayproxy.py +++ b/nibabel/tests/test_arrayproxy.py @@ -11,15 +11,22 @@ from __future__ import division, print_function, absolute_import import warnings +import gzip +import contextlib +import pickle from io import BytesIO from ..tmpdirs import InTemporaryDirectory import numpy as np -from ..arrayproxy import ArrayProxy, is_proxy, reshape_dataobj +from ..arrayproxy import (ArrayProxy, KEEP_FILE_OPEN_DEFAULT, is_proxy, + reshape_dataobj) +from ..openers import ImageOpener from ..nifti1 import Nifti1Header +import mock + from numpy.testing import assert_array_equal, assert_array_almost_equal from nose.tools import (assert_true, assert_false, assert_equal, assert_not_equal, assert_raises) @@ -327,3 +334,182 @@ def check_mmap(hdr, offset, proxy_class, # Check invalid values raise error assert_raises(ValueError, proxy_class, fname, hdr, mmap='rw') assert_raises(ValueError, proxy_class, fname, hdr, mmap='r+') + + +# An image opener class which counts how many instances of itself have been +# created +class CountingImageOpener(ImageOpener): + + num_openers = 0 + + def __init__(self, *args, **kwargs): + + super(CountingImageOpener, self).__init__(*args, **kwargs) + CountingImageOpener.num_openers += 1 + + +def test_keep_file_open_true_false_invalid(): + # Test the behaviour of the keep_file_open __init__ flag, when it is set to + # True or False. + CountingImageOpener.num_openers = 0 + fname = 'testdata' + dtype = np.float32 + data = np.arange(1000, dtype=dtype).reshape((10, 10, 10)) + voxels = np.random.randint(0, 10, (10, 3)) + with InTemporaryDirectory(): + with open(fname, 'wb') as fobj: + fobj.write(data.tostring(order='F')) + # Test that ArrayProxy(keep_file_open=True) only creates one file + # handle, and that ArrayProxy(keep_file_open=False) creates a file + # handle on every data access. + with mock.patch('nibabel.arrayproxy.ImageOpener', CountingImageOpener): + proxy_no_kfp = ArrayProxy(fname, ((10, 10, 10), dtype), + keep_file_open=False) + assert not proxy_no_kfp._keep_file_open + for i in range(voxels.shape[0]): + x , y, z = [int(c) for c in voxels[i, :]] + assert proxy_no_kfp[x, y, z] == x * 100 + y * 10 + z + assert CountingImageOpener.num_openers == i + 1 + CountingImageOpener.num_openers = 0 + proxy_kfp = ArrayProxy(fname, ((10, 10, 10), dtype), + keep_file_open=True) + assert proxy_kfp._keep_file_open + for i in range(voxels.shape[0]): + x , y, z = [int(c) for c in voxels[i, :]] + assert proxy_kfp[x, y, z] == x * 100 + y * 10 + z + assert CountingImageOpener.num_openers == 1 + # Test that the keep_file_open flag has no effect if an open file + # handle is passed in + with open(fname, 'rb') as fobj: + proxy_no_kfp = ArrayProxy(fobj, ((10, 10, 10), dtype), + keep_file_open=False) + assert not proxy_no_kfp._keep_file_open + for i in range(voxels.shape[0]): + assert proxy_no_kfp[x, y, z] == x * 100 + y * 10 + z + assert not fobj.closed + del proxy_no_kfp + proxy_no_kfp = None + assert not fobj.closed + proxy_kfp = ArrayProxy(fobj, ((10, 10, 10), dtype), + keep_file_open=True) + assert proxy_kfp._keep_file_open + for i in range(voxels.shape[0]): + assert proxy_kfp[x, y, z] == x * 100 + y * 10 + z + assert not fobj.closed + del proxy_kfp + proxy_kfp = None + assert not fobj.closed + # Test invalid values of keep_file_open + with assert_raises(ValueError): + ArrayProxy(fname, ((10, 10, 10), dtype), keep_file_open=0) + with assert_raises(ValueError): + ArrayProxy(fname, ((10, 10, 10), dtype), keep_file_open=1) + with assert_raises(ValueError): + ArrayProxy(fname, ((10, 10, 10), dtype), keep_file_open=55) + with assert_raises(ValueError): + ArrayProxy(fname, ((10, 10, 10), dtype), keep_file_open='autob') + with assert_raises(ValueError): + ArrayProxy(fname, ((10, 10, 10), dtype), keep_file_open='cauto') + + +@contextlib.contextmanager +def patch_indexed_gzip(state): + # Make it look like we do (state==True) or do not (state==False) have + # the indexed gzip module. + if state: + values = (True, True, gzip.GzipFile) + else: + values = (False, False, None) + with mock.patch('nibabel.openers.HAVE_INDEXED_GZIP', values[0]), \ + mock.patch('nibabel.arrayproxy.HAVE_INDEXED_GZIP', values[1]), \ + mock.patch('nibabel.openers.SafeIndexedGzipFile', values[2], + create=True): + yield + + +@contextlib.contextmanager +def patch_keep_file_open_default(value): + # Patch arrayproxy.KEEP_FILE_OPEN_DEFAULT with the given value + with mock.patch('nibabel.arrayproxy.KEEP_FILE_OPEN_DEFAULT', value): + yield + + +def test_keep_file_open_auto(): + # Test the behaviour of the keep_file_open __init__ flag, when it is set to + # 'auto' + dtype = np.float32 + data = np.arange(1000, dtype=dtype).reshape((10, 10, 10)) + with InTemporaryDirectory(): + fname = 'testdata.gz' + with gzip.open(fname, 'wb') as fobj: + fobj.write(data.tostring(order='F')) + # If have_indexed_gzip, then keep_file_open should be True + with patch_indexed_gzip(True): + proxy = ArrayProxy(fname, ((10, 10, 10), dtype), + keep_file_open='auto') + assert proxy._keep_file_open + # If no have_indexed_gzip, then keep_file_open should be False + with patch_indexed_gzip(False): + proxy = ArrayProxy(fname, ((10, 10, 10), dtype), + keep_file_open='auto') + assert not proxy._keep_file_open + + +def test_keep_file_open_default(): + # Test the behaviour of the keep_file_open __init__ flag, when the + # arrayproxy.KEEP_FILE_OPEN_DEFAULT value is changed + dtype = np.float32 + data = np.arange(1000, dtype=dtype).reshape((10, 10, 10)) + with InTemporaryDirectory(): + fname = 'testdata.gz' + with gzip.open(fname, 'wb') as fobj: + fobj.write(data.tostring(order='F')) + # The default value of KEEP_FILE_OPEN_DEFAULT should cause + # keep_file_open to be False, regardless of whether or not indexed_gzip + # is present + assert KEEP_FILE_OPEN_DEFAULT is False + with patch_indexed_gzip(False): + proxy = ArrayProxy(fname, ((10, 10, 10), dtype)) + assert not proxy._keep_file_open + with patch_indexed_gzip(True): + proxy = ArrayProxy(fname, ((10, 10, 10), dtype)) + assert not proxy._keep_file_open + # KEEP_FILE_OPEN_DEFAULT=True should cause keep_file_open to be True, + # regardless of whether or not indexed_gzip is present + with patch_keep_file_open_default(True), patch_indexed_gzip(True): + proxy = ArrayProxy(fname, ((10, 10, 10), dtype)) + assert proxy._keep_file_open + with patch_keep_file_open_default(True), patch_indexed_gzip(False): + proxy = ArrayProxy(fname, ((10, 10, 10), dtype)) + assert proxy._keep_file_open + # KEEP_FILE_OPEN_DEFAULT=auto should cause keep_file_open to be True + # or False, depending on whether indeed_gzip is present, + with patch_keep_file_open_default('auto'), patch_indexed_gzip(True): + proxy = ArrayProxy(fname, ((10, 10, 10), dtype)) + assert proxy._keep_file_open + with patch_keep_file_open_default('auto'), patch_indexed_gzip(False): + proxy = ArrayProxy(fname, ((10, 10, 10), dtype)) + assert not proxy._keep_file_open + # KEEP_FILE_OPEN_DEFAULT=any other value should cuse an error to be + # raised + with patch_keep_file_open_default('badvalue'): + assert_raises(ValueError, ArrayProxy, fname, ((10, 10, 10), + dtype)) + with patch_keep_file_open_default(None): + assert_raises(ValueError, ArrayProxy, fname, ((10, 10, 10), + dtype)) + + +def test_pickle_lock(): + # Test that ArrayProxy can be pickled, and that thread lock is created + + def islock(l): + # isinstance doesn't work on threading.Lock? + return hasattr(l, 'acquire') and hasattr(l, 'release') + + proxy = ArrayProxy('dummyfile', ((10, 10, 10), np.float32)) + assert islock(proxy._lock) + pickled = pickle.dumps(proxy) + unpickled = pickle.loads(pickled) + assert islock(unpickled._lock) + assert proxy._lock is not unpickled._lock diff --git a/nibabel/tests/test_fileslice.py b/nibabel/tests/test_fileslice.py index a515d70226..e9cfe8e0a4 100644 --- a/nibabel/tests/test_fileslice.py +++ b/nibabel/tests/test_fileslice.py @@ -8,6 +8,8 @@ from itertools import product from functools import partial from distutils.version import LooseVersion +from threading import Thread, Lock +import time import numpy as np @@ -689,6 +691,62 @@ def test_read_segments(): assert_raises(Exception, read_segments, fobj, [(0, 100), (100, 200)], 199) +def test_read_segments_lock(): + # Test read_segment locking with multiple threads + fobj = BytesIO() + arr = np.array(np.random.randint(0, 256, 1000), dtype=np.uint8) + fobj.write(arr.tostring()) + + # Encourage the interpreter to switch threads between a seek/read pair + def yielding_read(*args, **kwargs): + time.sleep(0.001) + return fobj._real_read(*args, **kwargs) + + fobj._real_read = fobj.read + fobj.read = yielding_read + + # Generate some random array segments to read from the file + def random_segments(nsegs): + segs = [] + nbytes = 0 + + for i in range(nsegs): + seglo = np.random.randint(0, 998) + seghi = np.random.randint(seglo + 1, 1000) + seglen = seghi - seglo + nbytes += seglen + segs.append([seglo, seglen]) + + return segs, nbytes + + # Get the data that should be returned for the given segments + def get_expected(segs): + segs = [arr[off:off + length] for off, length in segs] + return np.concatenate(segs) + + # Read from the file, check the result. We do this task simultaneously in + # many threads. Each thread that passes adds 1 to numpassed[0] + numpassed = [0] + lock = Lock() + + def runtest(): + seg, nbytes = random_segments(1) + expected = get_expected(seg) + _check_bytes(read_segments(fobj, seg, nbytes, lock), expected) + + seg, nbytes = random_segments(10) + expected = get_expected(seg) + _check_bytes(read_segments(fobj, seg, nbytes, lock), expected) + + with lock: + numpassed[0] += 1 + + threads = [Thread(target=runtest) for i in range(100)] + [t.start() for t in threads] + [t.join() for t in threads] + assert numpassed[0] == len(threads) + + def _check_slicer(sliceobj, arr, fobj, offset, order, heuristic=threshold_heuristic): new_slice = fileslice(fobj, sliceobj, arr.shape, arr.dtype, offset, order, diff --git a/nibabel/tests/test_openers.py b/nibabel/tests/test_openers.py index 550d0d414d..92c74f42e9 100644 --- a/nibabel/tests/test_openers.py +++ b/nibabel/tests/test_openers.py @@ -17,6 +17,7 @@ from ..tmpdirs import InTemporaryDirectory from ..volumeutils import BinOpener +import mock from nose.tools import (assert_true, assert_false, assert_equal, assert_not_equal, assert_raises) from ..testing import error_warnings @@ -95,6 +96,40 @@ def test_BinOpener(): BinOpener, 'test.txt', 'r') +def test_Opener_gzip_type(): + # Test that BufferedGzipFile or IndexedGzipFile are used as appropriate + + data = 'this is some test data' + fname = 'test.gz' + mockmod = mock.MagicMock() + + class MockIGZFile(object): + def __init__(self, *args, **kwargs): + pass + + with InTemporaryDirectory(): + + # make some test data + with GzipFile(fname, mode='wb') as f: + f.write(data.encode()) + + # test with indexd_gzip not present + with mock.patch('nibabel.openers.HAVE_INDEXED_GZIP', False), \ + mock.patch('nibabel.arrayproxy.HAVE_INDEXED_GZIP', False), \ + mock.patch('nibabel.openers.SafeIndexedGzipFile', None, + create=True): + assert isinstance(Opener(fname, mode='rb').fobj, GzipFile) + assert isinstance(Opener(fname, mode='wb').fobj, GzipFile) + + # test with indexd_gzip present + with mock.patch('nibabel.openers.HAVE_INDEXED_GZIP', True), \ + mock.patch('nibabel.arrayproxy.HAVE_INDEXED_GZIP', True), \ + mock.patch('nibabel.openers.SafeIndexedGzipFile', MockIGZFile, + create=True): + assert isinstance(Opener(fname, mode='rb').fobj, MockIGZFile) + assert isinstance(Opener(fname, mode='wb').fobj, GzipFile) + + class TestImageOpener: def setUp(self): @@ -202,7 +237,11 @@ class StrictOpener(Opener): if lext != ext: # extension should not be recognized -> file assert_true(isinstance(fobj.fobj, file_class)) elif lext == 'gz': - assert_true(isinstance(fobj.fobj, GzipFile)) + try: + from indexed_gzip import IndexedGzipFile + except ImportError: + IndexedGzipFile = GzipFile + assert_true(isinstance(fobj.fobj, (GzipFile, IndexedGzipFile))) else: assert_true(isinstance(fobj.fobj, BZ2File))