[ VIGRA Homepage | Function Index | Class Index | Namespaces | File List | Main Page ]

multi_array_chunked_hdf5.hxx VIGRA

1 /************************************************************************/
2 /* */
3 /* Copyright 2012-2014 by Ullrich Koethe and Thorben Kroeger */
4 /* */
5 /* This file is part of the VIGRA computer vision library. */
6 /* The VIGRA Website is */
7 /* http://hci.iwr.uni-heidelberg.de/vigra/ */
8 /* Please direct questions, bug reports, and contributions to */
9 /* ullrich.koethe@iwr.uni-heidelberg.de or */
10 /* vigra@informatik.uni-hamburg.de */
11 /* */
12 /* Permission is hereby granted, free of charge, to any person */
13 /* obtaining a copy of this software and associated documentation */
14 /* files (the "Software"), to deal in the Software without */
15 /* restriction, including without limitation the rights to use, */
16 /* copy, modify, merge, publish, distribute, sublicense, and/or */
17 /* sell copies of the Software, and to permit persons to whom the */
18 /* Software is furnished to do so, subject to the following */
19 /* conditions: */
20 /* */
21 /* The above copyright notice and this permission notice shall be */
22 /* included in all copies or substantial portions of the */
23 /* Software. */
24 /* */
25 /* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND */
26 /* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES */
27 /* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND */
28 /* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT */
29 /* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, */
30 /* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING */
31 /* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR */
32 /* OTHER DEALINGS IN THE SOFTWARE. */
33 /* */
34 /************************************************************************/
35 
36 #ifndef VIGRA_MULTI_ARRAY_CHUNKED_HDF5_HXX
37 #define VIGRA_MULTI_ARRAY_CHUNKED_HDF5_HXX
38 
39 #include <queue>
40 
41 #include "multi_array_chunked.hxx"
42 #include "hdf5impex.hxx"
43 
44 // Bounds checking Macro used if VIGRA_CHECK_BOUNDS is defined.
45 #ifdef VIGRA_CHECK_BOUNDS
46 #define VIGRA_ASSERT_INSIDE(diff) \
47  vigra_precondition(this->isInside(diff), "Index out of bounds")
48 #else
49 #define VIGRA_ASSERT_INSIDE(diff)
50 #endif
51 
52 namespace vigra {
53 
54 /** \addtogroup ChunkedArrayClasses
55 */
56 //@{
57 
58 /** \weakgroup ParallelProcessing
59  \sa ChunkedArrayHDF5
60 */
61 
62 /** Implement ChunkedArray as a chunked dataset in an HDF5 file.
63 
64  <b>\#include</b> <vigra/multi_array_chunked_hdf5.hxx> <br/>
65  Namespace: vigra
66 
67  This uses the native chunking and compression functionality provided by the
68  HDF5 library. Note: This file must only be included when the HDF5 headers
69  and libraries are installed on the system.
70 */
71 template <unsigned int N, class T, class Alloc = std::allocator<T> >
73 : public ChunkedArray<N, T>
74 {
75  /* REMARKS
76  Alternatives are:
77  * Back chunks by HDF5 chunks, possibly using on-the-fly compression. This
78  is in particular useful for existing HDF5 files.
79  * Back chunks by HDF5 datasets. This can be combined with compression
80  (both explicit and on-the-fly) or with memory mapping (using the
81  function H5Dget_offset() to get the offset from the beginning of the file).
82  */
83 
84  public:
85 
86  class Chunk
87  : public ChunkBase<N, T>
88  {
89  public:
90  typedef typename MultiArrayShape<N>::type shape_type;
91  typedef T value_type;
92  typedef value_type * pointer;
93  typedef value_type & reference;
94 
95  Chunk(shape_type const & shape, shape_type const & start,
96  ChunkedArrayHDF5 * array, Alloc const & alloc)
97  : ChunkBase<N, T>(detail::defaultStride(shape))
98  , shape_(shape)
99  , start_(start)
100  , array_(array)
101  , alloc_(alloc)
102  {}
103 
104  ~Chunk()
105  {
106  write();
107  }
108 
109  std::size_t size() const
110  {
111  return prod(shape_);
112  }
113 
114  void write(bool deallocate = true)
115  {
116  if(this->pointer_ != 0)
117  {
118  if(!array_->file_.isReadOnly())
119  {
120  herr_t status = array_->file_.writeBlock(array_->dataset_, start_,
121  MultiArrayView<N, T>(shape_, this->strides_, this->pointer_));
122  vigra_postcondition(status >= 0,
123  "ChunkedArrayHDF5: write to dataset failed.");
124  }
125  if(deallocate)
126  {
127  alloc_.deallocate(this->pointer_, this->size());
128  this->pointer_ = 0;
129  }
130  }
131  }
132 
133  pointer read()
134  {
135  if(this->pointer_ == 0)
136  {
137  this->pointer_ = alloc_.allocate(this->size());
138  herr_t status = array_->file_.readBlock(array_->dataset_, start_, shape_,
139  MultiArrayView<N, T>(shape_, this->strides_, this->pointer_));
140  vigra_postcondition(status >= 0,
141  "ChunkedArrayHDF5: read from dataset failed.");
142  }
143  return this->pointer_;
144  }
145 
146  shape_type shape_, start_;
147  ChunkedArrayHDF5 * array_;
148  Alloc alloc_;
149 
150  private:
151  Chunk & operator=(Chunk const &);
152  };
153 
154  typedef ChunkedArray<N, T> base_type;
156  typedef typename ChunkStorage::difference_type shape_type;
157  typedef T value_type;
158  typedef value_type * pointer;
159  typedef value_type & reference;
160 
161  /** \brief Construct with given 'shape', 'chunk_shape' and 'options',
162  using 'alloc' to manage the in-memory version of the data..
163 
164  The data are placed in 'file' at the internal path 'dataset'. Argument
165  'mode' must be one of the following:
166  <ul>
167  <li>HDF5File::New: Create new dataset, possibly deleting any existing content.
168  It is an error to request this mode when the entire
169  'file' is read-only.
170  <li>HDF5File::Replace: Same as New.
171  <li>HDF5File::ReadWrite: Open the dataset for reading and writing. Create
172  the datset if it doesn't exist. It is an error
173  to request this mode when 'file' is read-only.
174  <li>HDF5File::ReadOnly: Open the dataset for reading. It is an error to
175  request this mode when the dataset doesn't exist.
176  <li>HDF5File::Default: Resolves to ReadOnly when the dataset exists, and
177  to New otherwise.
178  </ul>
179  The supported compression algorithms are:
180  <ul>
181  <li>ZLIB_FAST: Fast compression using 'zlib' (slower than LZ4, but higher compression).
182  <li>ZLIB_BEST: Best compression using 'zlib', slow.
183  <li>ZLIB_NONE: Use 'zlib' format without compression.
184  <li>DEFAULT_COMPRESSION: Same as ZLIB_FAST.
185  </ul>
186  */
187  ChunkedArrayHDF5(HDF5File const & file, std::string const & dataset,
188  HDF5File::OpenMode mode,
189  shape_type const & shape,
190  shape_type const & chunk_shape=shape_type(),
191  ChunkedArrayOptions const & options = ChunkedArrayOptions(),
192  Alloc const & alloc = Alloc())
193  : ChunkedArray<N, T>(shape, chunk_shape, options),
194  file_(file),
195  dataset_name_(dataset),
196  dataset_(),
197  compression_(options.compression_method),
198  alloc_(alloc)
199  {
200  init(mode);
201  }
202 
203  /** \brief Construct for an already existing dataset with given 'options',
204  using 'alloc' to manage the in-memory version of the data.
205 
206  The data must be located in 'file' at the internal path 'dataset'. The
207  array's shape and chunk_shape are read from the file. It is an error
208  to use this constructor when 'dataset' doesn't exist.
209 
210  Argument 'mode' must be one of the following:
211  <ul>
212  <li>HDF5File::ReadWrite: Open the dataset for reading and writing. It is an error
213  to request this mode when 'file' is read-only.
214  <li>HDF5File::ReadOnly: Open the dataset for reading (default).
215  <li>HDF5File::Default: Same as ReadOnly.
216  </ul>
217  The supported compression algorithms are:
218  <ul>
219  <li>ZLIB_FAST: Fast compression using 'zlib' (slower than LZ4, but higher compression).
220  <li>ZLIB_BEST: Best compression using 'zlib', slow.
221  <li>ZLIB_NONE: Use 'zlib' format without compression.
222  <li>DEFAULT_COMPRESSION: Same as ZLIB_FAST.
223  </ul>
224  */
225  ChunkedArrayHDF5(HDF5File const & file, std::string const & dataset,
226  HDF5File::OpenMode mode = HDF5File::ReadOnly,
227  ChunkedArrayOptions const & options = ChunkedArrayOptions(),
228  Alloc const & alloc = Alloc())
229  : ChunkedArray<N, T>(shape_type(),
230  ceilPower2<N>(shape_type(file.getChunkShape(dataset).begin())),
231  options),
232  file_(file),
233  dataset_name_(dataset),
234  dataset_(),
235  compression_(options.compression_method),
236  alloc_(alloc)
237  {
238  init(mode);
239  }
240 
241 
242  // copy constructor
244  : ChunkedArray<N, T>(src),
245  file_(src.file_),
246  dataset_name_(src.dataset_name_),
247  compression_(src.compression_),
248  alloc_(src.alloc_)
249  {
250  if( file_.isReadOnly() )
251  init(HDF5File::ReadOnly);
252  else
253  init(HDF5File::ReadWrite);
254  }
255 
256  void init(HDF5File::OpenMode mode)
257  {
258  bool exists = file_.existsDataset(dataset_name_);
259 
260  if(mode == HDF5File::Replace)
261  {
262  mode = HDF5File::New;
263  }
264  else if(mode == HDF5File::Default)
265  {
266  if(exists)
267  mode = HDF5File::ReadOnly;
268  else
269  mode = HDF5File::New;
270  }
271 
272  if(mode == HDF5File::ReadOnly)
273  file_.setReadOnly();
274  else
275  vigra_precondition(!file_.isReadOnly(),
276  "ChunkedArrayHDF5(): 'mode' is incompatible with read-only file.");
277 
278  vigra_precondition(exists || !file_.isReadOnly(),
279  "ChunkedArrayHDF5(): dataset does not exist, but file is read-only.");
280 
281  if(!exists || mode == HDF5File::New)
282  {
283  // FIXME: set rdcc_nbytes to 0 (disable cache, because we don't
284  // need two caches
285  // H5Pset_chunk_cache (dapl, rdcc_nslots, rdcc_nbytes, rdcc_w0);
286  // Chunk cache size (rdcc_nbytes) should be large
287  // enough to hold all the chunks in a selection
288  // * If this is not possible, it may be best to disable chunk
289  // caching altogether (set rdcc_nbytes to 0)
290  // * rdcc_slots should be a prime number that is at
291  // least 10 to 100 times the number of chunks that can fit
292  // into rdcc_nbytes
293  // * rdcc_w0 should be set to 1 if chunks that have been
294  // fully read/written will never be read/written again
295  //
296  // the above may be WRONG in general - it may only apply if the
297  // chunk size in the file matches the chunk size in the CachedArray.
298  // Otherwise, make sure that the file cache can hold at least as many
299  // chunks as are needed for a single array chunk.
300  if(compression_ == DEFAULT_COMPRESSION)
301  compression_ = ZLIB_FAST;
302  vigra_precondition(compression_ != LZ4,
303  "ChunkedArrayHDF5(): HDF5 does not support LZ4 compression.");
304 
305  vigra_precondition(this->size() > 0,
306  "ChunkedArrayHDF5(): invalid shape.");
307  typename detail::HDF5TypeTraits<T>::value_type init(this->fill_scalar_);
308  dataset_ = file_.createDataset<N, T>(dataset_name_,
309  this->shape_,
310  init,
311  this->chunk_shape_,
312  compression_);
313  }
314  else
315  {
316  dataset_ = file_.getDatasetHandleShared(dataset_name_);
317 
318  // check shape
319  ArrayVector<hsize_t> fileShape(file_.getDatasetShape(dataset_name_));
320  typedef detail::HDF5TypeTraits<T> TypeTraits;
321  if(TypeTraits::numberOfBands() > 1)
322  {
323  vigra_precondition(fileShape.size() == N+1,
324  "ChunkedArrayHDF5(file, dataset): dataset has wrong dimension.");
325  vigra_precondition(fileShape[0] == static_cast<unsigned>(TypeTraits::numberOfBands()),
326  "ChunkedArrayHDF5(file, dataset): dataset has wrong number of bands.");
327  shape_type shape(fileShape.begin()+1);
328  if(this->size() > 0)
329  {
330  vigra_precondition(shape == this->shape_,
331  "ChunkedArrayHDF5(file, dataset, shape): shape mismatch between dataset and shape argument.");
332  }
333  else
334  {
335  this->shape_ = shape;
336  }
337  }
338  else
339  {
340  vigra_precondition(fileShape.size() == N,
341  "ChunkedArrayHDF5(file, dataset): dataset has wrong dimension.");
342  shape_type shape(fileShape.begin());
343  if(this->size() > 0)
344  {
345  vigra_precondition(shape == this->shape_,
346  "ChunkedArrayHDF5(file, dataset, shape): shape mismatch between dataset and shape argument.");
347  }
348  else
349  {
350  this->shape_ = shape;
351  ChunkStorage(detail::computeChunkArrayShape(shape, this->bits_, this->mask_)).swap(this->handle_array_);
352  }
353  }
354  typename ChunkStorage::iterator i = this->handle_array_.begin(),
355  end = this->handle_array_.end();
356  for(; i != end; ++i)
357  {
358  i->chunk_state_.store(base_type::chunk_asleep);
359  }
360  }
361  }
362 
364  {
365  closeImpl(true);
366  }
367 
368  void close()
369  {
370  closeImpl(false);
371  }
372 
373  void closeImpl(bool force_destroy)
374  {
375  flushToDiskImpl(true, force_destroy);
376  file_.close();
377  }
378 
379  void flushToDisk()
380  {
381  flushToDiskImpl(false, false);
382  }
383 
384  void flushToDiskImpl(bool destroy, bool force_destroy)
385  {
386  if(file_.isReadOnly())
387  return;
388 
389  threading::lock_guard<threading::mutex> guard(*this->chunk_lock_);
390  typename ChunkStorage::iterator i = this->handle_array_.begin(),
391  end = this->handle_array_.end();
392  if(destroy && !force_destroy)
393  {
394  for(; i != end; ++i)
395  {
396  vigra_precondition(i->chunk_state_.load() <= 0,
397  "ChunkedArrayHDF5::close(): cannot close file because there are active chunks.");
398  }
399  i = this->handle_array_.begin();
400  }
401  for(; i != end; ++i)
402  {
403  Chunk * chunk = static_cast<Chunk*>(i->pointer_);
404  if(!chunk)
405  continue;
406  if(destroy)
407  {
408  delete chunk;
409  i->pointer_ = 0;
410  }
411  else
412  {
413  chunk->write(false);
414  }
415  }
416  file_.flushToDisk();
417  }
418 
419  virtual bool isReadOnly() const
420  {
421  return file_.isReadOnly();
422  }
423 
424  virtual pointer loadChunk(ChunkBase<N, T> ** p, shape_type const & index)
425  {
426  vigra_precondition(file_.isOpen(),
427  "ChunkedArrayHDF5::loadChunk(): file was already closed.");
428  if(*p == 0)
429  {
430  *p = new Chunk(this->chunkShape(index), index*this->chunk_shape_, this, alloc_);
431  this->overhead_bytes_ += sizeof(Chunk);
432  }
433  return static_cast<Chunk *>(*p)->read();
434  }
435 
436  virtual bool unloadChunk(ChunkBase<N, T> * chunk, bool /* destroy */)
437  {
438  if(!file_.isOpen())
439  return true;
440  static_cast<Chunk *>(chunk)->write();
441  return false;
442  }
443 
444  virtual std::string backend() const
445  {
446  return "ChunkedArrayHDF5<'" + file_.filename() + "/" + dataset_name_ + "'>";
447  }
448 
449  virtual std::size_t dataBytes(ChunkBase<N,T> * c) const
450  {
451  return c->pointer_ == 0
452  ? 0
453  : static_cast<Chunk*>(c)->size()*sizeof(T);
454  }
455 
456  virtual std::size_t overheadBytesPerChunk() const
457  {
458  return sizeof(Chunk) + sizeof(SharedChunkHandle<N, T>);
459  }
460 
461  std::string fileName() const
462  {
463  return file_.filename();
464  }
465 
466  std::string datasetName() const
467  {
468  return dataset_name_;
469  }
470 
471  HDF5File file_;
472  std::string dataset_name_;
473  HDF5HandleShared dataset_;
474  CompressionMethod compression_;
475  Alloc alloc_;
476 };
477 
478 //@}
479 
480 } // namespace vigra
481 
482 #undef VIGRA_ASSERT_INSIDE
483 
484 #endif /* VIGRA_MULTI_ARRAY_CHUNKED_HDF5_HXX */
iterator end()
Definition: multi_array.hxx:1937
std::size_t dataBytes() const
Bytes of main memory occupied by the array's data.
Definition: multi_array_chunked.hxx:1674
Option object for ChunkedArray construction.
Definition: multi_array_chunked.hxx:1267
ArrayVector< hsize_t > getDatasetShape(std::string datasetName) const
Get the shape of each dimension of a certain dataset.
Definition: hdf5impex.hxx:1394
MultiArrayIndex size() const
Return the number of elements in this array.
ChunkedArrayHDF5(HDF5File const &file, std::string const &dataset, HDF5File::OpenMode mode=HDF5File::ReadOnly, ChunkedArrayOptions const &options=ChunkedArrayOptions(), Alloc const &alloc=Alloc())
Construct for an already existing dataset with given 'options', using 'alloc' to manage the in-memory...
Definition: multi_array_chunked_hdf5.hxx:225
view_type::iterator iterator
Definition: multi_array.hxx:2548
iterator begin()
Create a scan-order iterator for the entire chunked array.
Definition: multi_array_chunked.hxx:2381
iterator begin()
Definition: multi_array.hxx:1921
Main MultiArray class containing the memory management.
Definition: multi_array.hxx:2474
shape_type const & shape() const
Return the shape in this array.
HDF5HandleShared getDatasetHandleShared(std::string const &datasetName) const
Obtain a shared HDF5 handle of a dataset.
Definition: hdf5impex.hxx:1527
Interface and base class for chunked arrays.
Definition: multi_array_chunked.hxx:463
HDF5HandleShared createDataset(std::string datasetName, TinyVector< MultiArrayIndex, N > const &shape, typename detail::HDF5TypeTraits< T >::value_type init=typename detail::HDF5TypeTraits< T >::value_type(), TinyVector< MultiArrayIndex, N > const &chunkSize=(TinyVector< MultiArrayIndex, N >()), int compressionParameter=0)
Create a new dataset. This function can be used to create a dataset filled with a default value init...
Definition: hdf5impex.hxx:2761
view_type::difference_type difference_type
Definition: multi_array.hxx:2522
shape_type const & chunkShape() const
Return the global chunk shape.
std::string filename() const
Get the name of the associated file.
Definition: hdf5impex.hxx:1347
NumericTraits< V >::Promote prod(TinyVectorBase< V, SIZE, D1, D2 > const &l)
product of the vector's elements
Definition: tinyvector.hxx:2097
Definition: multi_array_chunked_hdf5.hxx:72
ChunkedArrayHDF5(HDF5File const &file, std::string const &dataset, HDF5File::OpenMode mode, shape_type const &shape, shape_type const &chunk_shape=shape_type(), ChunkedArrayOptions const &options=ChunkedArrayOptions(), Alloc const &alloc=Alloc())
Construct with given 'shape', 'chunk_shape' and 'options', using 'alloc' to manage the in-memory vers...
Definition: multi_array_chunked_hdf5.hxx:187
iterator end()
Create the end iterator for scan-order iteration over the entire chunked array.
Definition: multi_array_chunked.hxx:2389
OpenMode
Set how a file is opened.
Definition: hdf5impex.hxx:1031
void flushToDisk()
Immediately write all data to disk.
Definition: hdf5impex.hxx:2234
Class for fixed size vectors.This class contains an array of size SIZE of the specified VALUETYPE...
Definition: accessor.hxx:940
void close()
Close the current file.
Definition: hdf5impex.hxx:1199
Base class for, and view to, vigra::MultiArray.
Definition: multi_array.hxx:704
virtual std::size_t overheadBytesPerChunk() const
Bytes of main memory needed to manage a single chunk.
Definition: multi_array_chunked_hdf5.hxx:456
bool existsDataset(std::string datasetName) const
Check if given datasetName exists.
Definition: hdf5impex.hxx:1354
UInt32 ceilPower2(UInt32 x)
Round up to the nearest power of 2.
Definition: mathutil.hxx:294
Access to HDF5 files.
Definition: hdf5impex.hxx:974

© Ullrich Köthe (ullrich.koethe@iwr.uni-heidelberg.de)
Heidelberg Collaboratory for Image Processing, University of Heidelberg, Germany

html generated using doxygen and Python
vigra 1.11.1 (Fri May 19 2017)