36 #ifndef VIGRA_SAMPLING_HXX
37 #define VIGRA_SAMPLING_HXX
39 #include "array_vector.hxx"
67 double sample_proportion;
68 unsigned int sample_size;
69 bool sample_with_replacement;
70 bool stratified_sampling;
73 : sample_proportion(1.0),
75 sample_with_replacement(
true),
76 stratified_sampling(
false)
85 sample_with_replacement = in;
95 sample_with_replacement = !in;
125 vigra_precondition(proportion >= 0.0,
126 "SamplerOptions::sampleProportion(): argument must not be negative.");
127 sample_proportion = proportion;
143 stratified_sampling = in;
231 template<
class Random = MersenneTwister >
249 typedef std::map<IndexType, IndexArrayType> StrataIndicesType;
250 typedef std::map<IndexType, int> StrataSizesType;
254 static const int oobInvalid = -1;
256 int total_count_, sample_size_;
257 mutable int current_oob_count_;
258 StrataIndicesType strata_indices_;
259 StrataSizesType strata_sample_size_;
263 Random default_random_;
264 Random
const & random_;
267 void initStrataCount()
272 int strata_total_count = strata_sample_size *
strataCount();
274 for(StrataIndicesType::iterator i = strata_indices_.begin();
275 i != strata_indices_.end(); ++i)
277 if(strata_total_count > sample_size_)
279 strata_sample_size_[i->first] = strata_sample_size - 1;
280 --strata_total_count;
284 strata_sample_size_[i->first] = strata_sample_size;
298 Random
const * rnd = 0)
299 : total_count_(totalCount),
300 sample_size_(opt.sample_size == 0
301 ? static_cast<int>((std::
ceil(total_count_ * opt.sample_proportion)))
303 current_oob_count_(oobInvalid),
304 current_sample_(sample_size_),
305 current_oob_sample_(total_count_),
306 is_used_(total_count_),
307 default_random_(RandomSeed),
308 random_(rnd ? *rnd : default_random_),
311 vigra_precondition(opt.sample_with_replacement || sample_size_ <= total_count_,
312 "Sampler(): Cannot draw without replacement when data size is smaller than sample count.");
314 vigra_precondition(!opt.stratified_sampling,
315 "Sampler(): Stratified sampling requested, but no strata given.");
318 strata_indices_[0].resize(total_count_);
319 for(
int i=0; i<total_count_; ++i)
320 strata_indices_[0][i] = i;
336 template <
class Iterator>
338 Random
const * rnd = 0)
339 : total_count_(strataEnd - strataBegin),
340 sample_size_(opt.sample_size == 0
341 ? static_cast<int>((std::
ceil(total_count_ * opt.sample_proportion)))
343 current_oob_count_(oobInvalid),
344 current_sample_(sample_size_),
345 current_oob_sample_(total_count_),
346 is_used_(total_count_),
347 default_random_(RandomSeed),
348 random_(rnd ? *rnd : default_random_),
351 vigra_precondition(opt.sample_with_replacement || sample_size_ <= total_count_,
352 "Sampler(): Cannot draw without replacement when data size is smaller than sample count.");
355 if(opt.stratified_sampling)
357 for(
int i = 0; strataBegin != strataEnd; ++i, ++strataBegin)
359 strata_indices_[*strataBegin].push_back(i);
364 strata_indices_[0].resize(total_count_);
365 for(
int i=0; i<total_count_; ++i)
366 strata_indices_[0][i] = i;
369 vigra_precondition(sample_size_ >= static_cast<int>(strata_indices_.size()),
370 "Sampler(): Requested sample count must be at least as large as the number of strata.");
381 return current_sample_[k];
415 return strata_indices_.size();
423 return options_.stratified_sampling;
430 return options_.sample_with_replacement;
437 return current_sample_;
445 if(current_oob_count_ == oobInvalid)
447 current_oob_count_ = 0;
448 for(
int i = 0; i<total_count_; ++i)
452 current_oob_sample_[current_oob_count_] = i;
453 ++current_oob_count_;
457 return current_oob_sample_.
subarray(0, current_oob_count_);
459 IsUsedArrayType
const & is_used()
const
466 template<
class Random>
469 current_oob_count_ = oobInvalid;
470 is_used_.init(
false);
472 if(options_.sample_with_replacement)
476 StrataIndicesType::iterator iter;
477 for(iter = strata_indices_.begin(); iter != strata_indices_.end(); ++iter)
480 int stratum_size = iter->second.size();
481 for(
int i = 0; i < static_cast<int>(strata_sample_size_[iter->first]); ++i, ++j)
483 current_sample_[j] = iter->second[random_.uniformInt(stratum_size)];
484 is_used_[current_sample_[j]] =
true;
492 StrataIndicesType::iterator iter;
493 for(iter = strata_indices_.begin(); iter != strata_indices_.end(); ++iter)
496 int stratum_size = iter->second.size();
497 for(
int i = 0; i < static_cast<int>(strata_sample_size_[iter->first]); ++i, ++j)
499 std::swap(iter->second[i], iter->second[i+ random_.uniformInt(stratum_size - i)]);
500 current_sample_[j] = iter->second[i];
501 is_used_[current_sample_[j]] =
true;
507 template<
class Random =RandomTT800 >
512 typedef Int32 IndexType;
514 IndexArrayType used_indices_;
519 PoissonSampler(
double lambda,IndexType minIndex,IndexType maxIndex)
527 used_indices_.clear();
529 for(i=minIndex;i<maxIndex;++i)
534 double L=
exp(-lambda);
538 p*=randfloat.uniform53();
545 used_indices_.push_back(i);
551 IndexType
const & operator[](
int in)
const
553 return used_indices_[in];
556 int numOfSamples()
const
558 return used_indices_.size();
ArrayVectorView< IndexType > IndexArrayViewType
Definition: sampling.hxx:246
IndexType operator[](int k) const
Definition: sampling.hxx:379
int strataCount() const
Definition: sampling.hxx:413
Sampler(UInt32 totalCount, SamplerOptions const &opt=SamplerOptions(), Random const *rnd=0)
Definition: sampling.hxx:297
Create random samples from a sequence of indices.
Definition: sampling.hxx:232
SamplerOptions & sampleProportion(double proportion)
Determine the number of samples to draw as a proportion of the total number. That is...
Definition: sampling.hxx:123
void sample()
Definition: sampling.hxx:467
linalg::TemporaryMatrix< T > exp(MultiArrayView< 2, T, C > const &v)
bool withReplacement() const
Definition: sampling.hxx:428
Sampler(Iterator strataBegin, Iterator strataEnd, SamplerOptions const &opt=SamplerOptions(), Random const *rnd=0)
Definition: sampling.hxx:337
SamplerOptions & sampleSize(unsigned int size)
Draw the given number of samples. If stratifiedSampling is true, the size is equally distributed acro...
Definition: sampling.hxx:106
int sampleSize() const
Definition: sampling.hxx:397
int size() const
Definition: sampling.hxx:404
bool stratifiedSampling() const
Definition: sampling.hxx:421
detail::SelectIntegerType< 32, detail::SignedIntTypes >::type Int32
32-bit signed int
Definition: sized_int.hxx:175
IndexArrayViewType sampledIndices() const
Definition: sampling.hxx:435
int totalCount() const
Definition: sampling.hxx:390
this_type subarray(size_type begin, size_type end) const
Definition: array_vector.hxx:200
Int32 IndexType
Definition: sampling.hxx:239
SamplerOptions & stratified(bool in=true)
Draw equally many samples from each "stratum". A stratum is a group of like entities, e.g. pixels belonging to the same object class. This is useful to create balanced samples when the class probabilities are very unbalanced (e.g. when there are many background and few foreground pixels). Stratified sampling thus avoids that a trained classifier is biased towards the majority class.
Definition: sampling.hxx:141
SamplerOptions & withReplacement(bool in=true)
Sample from training population with replacement.
Definition: sampling.hxx:83
detail::SelectIntegerType< 32, detail::UnsignedIntTypes >::type UInt32
32-bit unsigned int
Definition: sized_int.hxx:183
int ceil(FixedPoint< IntBits, FracBits > v)
rounding up.
Definition: fixedpoint.hxx:675
Options object for the Sampler class.
Definition: sampling.hxx:63
IndexArrayViewType oobIndices() const
Definition: sampling.hxx:443
SamplerOptions & withoutReplacement(bool in=true)
Sample from training population without replacement.
Definition: sampling.hxx:93