[ VIGRA Homepage | Function Index | Class Index | Namespaces | File List | Main Page ]

details RandomForest< LabelType, PreprocessorTag > Class Template Reference VIGRA

Random forest version 2 (see also vigra::rf3::RandomForest for version 3) More...

#include <vigra/random_forest.hxx>

Public Member Functions

Constructors

Note: No copy constructor specified as no pointers are manipulated in this class

 RandomForest (Options_t const &options=Options_t(), ProblemSpec_t const &ext_param=ProblemSpec_t())
 default constructor More...
 
template<class TopologyIterator , class ParameterIterator >
 RandomForest (int treeCount, TopologyIterator topology_begin, ParameterIterator parameter_begin, ProblemSpec_t const &problem_spec, Options_t const &options=Options_t())
 Create RF from external source. More...
 
Data Access

data access interface - usage of member variables is deprecated

ProblemSpec_t const & ext_param () const
 return external parameters for viewing More...
 
void set_ext_param (ProblemSpec_t const &in)
 set external parameters More...
 
Options_tset_options ()
 access random forest options More...
 
Options_t const & options () const
 access const random forest options More...
 
DecisionTree_t const & tree (int index) const
 access const trees
 
DecisionTree_t & tree (int index)
 access trees
 
int feature_count () const
 return number of features used while training.
 
int column_count () const
 return number of features used while training. More...
 
int class_count () const
 return number of classes used while training.
 
int tree_count () const
 return number of trees
 
Learning

Following functions differ in the degree of customization allowed

template<class U , class C1 , class U2 , class C2 , class Split_t , class Stop_t , class Visitor_t , class Random_t >
void learn (MultiArrayView< 2, U, C1 > const &features, MultiArrayView< 2, U2, C2 > const &response, Visitor_t visitor, Split_t split, Stop_t stop, Random_t const &random)
 learn on data with custom config and random number generator More...
 
template<class U , class C1 , class U2 , class C2 , class Split_t , class Stop_t , class Visitor_t >
void learn (MultiArrayView< 2, U, C1 > const &features, MultiArrayView< 2, U2, C2 > const &response, Visitor_t visitor, Split_t split, Stop_t stop)
 
template<class U , class C1 , class U2 , class C2 , class Visitor_t >
void learn (MultiArrayView< 2, U, C1 > const &features, MultiArrayView< 2, U2, C2 > const &labels, Visitor_t visitor)
 
template<class U , class C1 , class U2 , class C2 , class Visitor_t , class Split_t >
void learn (MultiArrayView< 2, U, C1 > const &features, MultiArrayView< 2, U2, C2 > const &labels, Visitor_t visitor, Split_t split)
 
template<class U , class C1 , class U2 , class C2 >
void learn (MultiArrayView< 2, U, C1 > const &features, MultiArrayView< 2, U2, C2 > const &labels)
 learn on data with default configuration More...
 
template<class U , class C1 , class U2 , class C2 , class Split_t , class Stop_t , class Visitor_t , class Random_t >
void onlineLearn (MultiArrayView< 2, U, C1 > const &features, MultiArrayView< 2, U2, C2 > const &response, int new_start_index, Visitor_t visitor_, Split_t split_, Stop_t stop_, Random_t &random, bool adjust_thresholds=false)
 
template<class U , class C1 , class U2 , class C2 >
void onlineLearn (MultiArrayView< 2, U, C1 > const &features, MultiArrayView< 2, U2, C2 > const &labels, int new_start_index, bool adjust_thresholds=false)
 
template<class U , class C1 , class U2 , class C2 , class Split_t , class Stop_t , class Visitor_t , class Random_t >
void reLearnTree (MultiArrayView< 2, U, C1 > const &features, MultiArrayView< 2, U2, C2 > const &response, int treeId, Visitor_t visitor_, Split_t split_, Stop_t stop_, Random_t &random)
 
template<class U , class C1 , class U2 , class C2 >
void reLearnTree (MultiArrayView< 2, U, C1 > const &features, MultiArrayView< 2, U2, C2 > const &labels, int treeId)
 
Prediction
template<class U , class C , class Stop >
LabelType predictLabel (MultiArrayView< 2, U, C >const &features, Stop &stop) const
 predict a label given a feature. More...
 
template<class U , class C >
LabelType predictLabel (MultiArrayView< 2, U, C >const &features)
 
template<class U , class C >
LabelType predictLabel (MultiArrayView< 2, U, C > const &features, ArrayVectorView< double > prior) const
 predict a label with features and class priors More...
 
template<class U , class C1 , class T , class C2 >
void predictLabels (MultiArrayView< 2, U, C1 >const &features, MultiArrayView< 2, T, C2 > &labels) const
 predict multiple labels with given features More...
 
template<class U , class C1 , class T , class C2 >
void predictLabels (MultiArrayView< 2, U, C1 >const &features, MultiArrayView< 2, T, C2 > &labels, LabelType nanLabel) const
 predict multiple labels with given features More...
 
template<class U , class C1 , class T , class C2 , class Stop >
void predictLabels (MultiArrayView< 2, U, C1 >const &features, MultiArrayView< 2, T, C2 > &labels, Stop &stop) const
 predict multiple labels with given features More...
 
template<class U , class C1 , class T , class C2 , class Stop >
void predictProbabilities (MultiArrayView< 2, U, C1 >const &features, MultiArrayView< 2, T, C2 > &prob, Stop &stop) const
 predict the class probabilities for multiple labels More...
 
template<class T1 , class T2 , class C >
void predictProbabilities (OnlinePredictionSet< T1 > &predictionSet, MultiArrayView< 2, T2, C > &prob)
 
template<class U , class C1 , class T , class C2 >
void predictProbabilities (MultiArrayView< 2, U, C1 >const &features, MultiArrayView< 2, T, C2 > &prob) const
 predict the class probabilities for multiple labels More...
 
template<class U , class C1 , class T , class C2 >
void predictRaw (MultiArrayView< 2, U, C1 >const &features, MultiArrayView< 2, T, C2 > &prob) const
 

Detailed Description

template<class LabelType = double, class PreprocessorTag = ClassificationTag>
class vigra::RandomForest< LabelType, PreprocessorTag >

Random forest version 2 (see also vigra::rf3::RandomForest for version 3)

Template Parameters
<LabelType= double> Type used for predicted labels.
<PreprocessorTag= ClassificationTag> Class used to preprocess the input while learning and predicting. Currently Available: ClassificationTag and RegressionTag. It is recommended to use Splitfunctor::Preprocessor_t while using custom splitfunctors as they may need the data to be in a different format.
See Also
Preprocessor

Simple usage for classification (regression is not yet supported): look at RandomForest::learn() as well as RandomForestOptions() for additional options.

using namespace vigra;
using namespace rf;
typedef xxx feature_t; \\ replace xxx with whichever type
typedef yyy label_t; \\ likewise
// allocate the training data
MultiArrayView<2, feature_t> f = get_training_features();
MultiArrayView<2, label_t> l = get_training_labels();
// construct visitor to calculate out-of-bag error
visitors::OOB_Error oob_v;
// perform training
rf.learn(f, l, visitors::create_visitor(oob_v));
std::cout << "the out-of-bag error is: " << oob_v.oob_breiman << "\n";
// get features for new data to be used for prediction
MultiArrayView<2, feature_t> pf = get_features();
// allocate space for the response (pf.shape(0) is the number of samples)
MultiArrayView<2, label_t> prediction(pf.shape(0), 1);
MultiArrayView<2, double> prob(pf.shape(0), rf.class_count());
// perform prediction on new data
rf.predictLabels(pf, prediction);
rf.predictProbabilities(pf, prob);

Additional information such as Variable Importance measures are accessed via Visitors defined in rf::visitors. Have a look at rf::split for other splitting methods.

Constructor & Destructor Documentation

RandomForest ( Options_t const &  options = Options_t(),
ProblemSpec_t const &  ext_param = ProblemSpec_t() 
)

default constructor

Parameters
optionsgeneral options to the Random Forest. Must be of Type Options_t
ext_paramproblem specific values that can be supplied additionally. (class weights , labels etc)
See Also
RandomForestOptions, ProblemSpec
RandomForest ( int  treeCount,
TopologyIterator  topology_begin,
ParameterIterator  parameter_begin,
ProblemSpec_t const &  problem_spec,
Options_t const &  options = Options_t() 
)

Create RF from external source.

Parameters
treeCountNumber of trees to add.
topology_beginIterator to a Container where the topology_ data of the trees are stored. Iterator should support at least treeCount forward iterations. (i.e. topology_end - topology_begin >= treeCount
parameter_beginiterator to a Container where the parameters_ data of the trees are stored. Iterator should support at least treeCount forward iterations.
problem_specExtrinsic parameters that specify the problem e.g. ClassCount, featureCount etc.
options(optional) specify options used to train the original Random forest. This parameter is not used anywhere during prediction and thus is optional.

Member Function Documentation

ProblemSpec_t const& ext_param ( ) const

return external parameters for viewing

Returns
ProblemSpec_t
void set_ext_param ( ProblemSpec_t const &  in)

set external parameters

Parameters
inexternal parameters to be set

set external parameters explicitly. If Random Forest has not been trained the preprocessor will either ignore filling values set this way or will throw an exception if values specified manually do not match the value calculated & during the preparation step.

Options_t& set_options ( )

access random forest options

Returns
random forest options
Options_t const& options ( ) const

access const random forest options

Returns
const Option_t
int column_count ( ) const

return number of features used while training.

deprecated. Use feature_count() instead.

void learn ( MultiArrayView< 2, U, C1 > const &  features,
MultiArrayView< 2, U2, C2 > const &  response,
Visitor_t  visitor,
Split_t  split,
Stop_t  stop,
Random_t const &  random 
)

learn on data with custom config and random number generator

Parameters
featuresa N x M matrix containing N samples with M features
responsea N x D matrix containing the corresponding response. Current split functors assume D to be 1 and ignore any additional columns. This is not enforced to allow future support for uncertain labels, label independent strata etc. The Preprocessor specified during construction should be able to handle features and labels features and the labels. see also: SplitFunctor, Preprocessing
visitorvisitor which is to be applied after each split, tree and at the end. Use rf_default() for using default value. (No Visitors) see also: rf::visitors
splitsplit functor to be used to calculate each split use rf_default() for using default value. (GiniSplit) see also: rf::split
stoppredicate to be used to calculate each split use rf_default() for using default value. (EarlyStoppStd)
randomRandomNumberGenerator to be used. Use rf_default() to use default value.(RandomMT19337)
void learn ( MultiArrayView< 2, U, C1 > const &  features,
MultiArrayView< 2, U2, C2 > const &  labels 
)

learn on data with default configuration

Parameters
featuresa N x M matrix containing N samples with M features
labelsa N x D matrix containing the corresponding N labels. Current split functors assume D to be 1 and ignore any additional columns. this is not enforced to allow future support for uncertain labels.

learning is done with:

See Also
rf::split, EarlyStoppStd
  • Randomly seeded random number generator
  • default gini split functor as described by Breiman
  • default The standard early stopping criterion
void reLearnTree ( MultiArrayView< 2, U, C1 > const &  features,
MultiArrayView< 2, U2, C2 > const &  response,
int  treeId,
Visitor_t  visitor_,
Split_t  split_,
Stop_t  stop_,
Random_t &  random 
)
Todo:
replace this crappy class out. It uses function pointers. and is making code slower according to me. Comment from Nathan: This is copied from Rahul, so me=Rahul
LabelType predictLabel ( MultiArrayView< 2, U, C >const &  features,
Stop &  stop 
) const

predict a label given a feature.

Parameters
featuresa 1 by featureCount matrix containing data point to be predicted (this only works in classification setting)
stopearly stopping criterion
Returns
double value representing class. You can use the predictLabels() function together with the rf.external_parameter().class_type_ attribute to get back the same type used during learning.
LabelType predictLabel ( MultiArrayView< 2, U, C > const &  features,
ArrayVectorView< double >  prior 
) const

predict a label with features and class priors

Parameters
featuressame as above.
prioriterator to prior weighting of classes
Returns
sam as above.
void predictLabels ( MultiArrayView< 2, U, C1 >const &  features,
MultiArrayView< 2, T, C2 > &  labels 
) const

predict multiple labels with given features

Parameters
featuresa n by featureCount matrix containing data point to be predicted (this only works in classification setting)
labelsa n by 1 matrix passed by reference to store output.

If the input contains an NaN value, an precondition exception is thrown.

void predictLabels ( MultiArrayView< 2, U, C1 >const &  features,
MultiArrayView< 2, T, C2 > &  labels,
LabelType  nanLabel 
) const

predict multiple labels with given features

Parameters
featuresa n by featureCount matrix containing data point to be predicted (this only works in classification setting)
labelsa n by 1 matrix passed by reference to store output.
nanLabellabel to be returned for the row of the input that contain an NaN value.
void predictLabels ( MultiArrayView< 2, U, C1 >const &  features,
MultiArrayView< 2, T, C2 > &  labels,
Stop &  stop 
) const

predict multiple labels with given features

Parameters
featuresa n by featureCount matrix containing data point to be predicted (this only works in classification setting)
labelsa n by 1 matrix passed by reference to store output.
stopan early stopping criterion.
void predictProbabilities ( MultiArrayView< 2, U, C1 >const &  features,
MultiArrayView< 2, T, C2 > &  prob,
Stop &  stop 
) const

predict the class probabilities for multiple labels

Parameters
featuressame as above
proba n x class_count_ matrix. passed by reference to save class probabilities
stopearlystopping criterion
See Also
EarlyStopping

When a row of the feature array contains an NaN, the corresponding instance cannot belong to any of the classes. The corresponding row in the probability array will therefore contain all zeros.

void predictProbabilities ( MultiArrayView< 2, U, C1 >const &  features,
MultiArrayView< 2, T, C2 > &  prob 
) const

predict the class probabilities for multiple labels

Parameters
featuressame as above
proba n x class_count_ matrix. passed by reference to save class probabilities

The documentation for this class was generated from the following file:

© Ullrich Köthe (ullrich.koethe@iwr.uni-heidelberg.de)
Heidelberg Collaboratory for Image Processing, University of Heidelberg, Germany

html generated using doxygen and Python
vigra 1.11.1 (Fri May 19 2017)