[ VIGRA Homepage | Function Index | Class Index | Namespaces | File List | Main Page ]

regression.hxx VIGRA

1 /************************************************************************/
2 /* */
3 /* Copyright 2008-2013 by Ullrich Koethe */
4 /* */
5 /* This file is part of the VIGRA computer vision library. */
6 /* The VIGRA Website is */
7 /* http://hci.iwr.uni-heidelberg.de/vigra/ */
8 /* Please direct questions, bug reports, and contributions to */
9 /* ullrich.koethe@iwr.uni-heidelberg.de or */
10 /* vigra@informatik.uni-hamburg.de */
11 /* */
12 /* Permission is hereby granted, free of charge, to any person */
13 /* obtaining a copy of this software and associated documentation */
14 /* files (the "Software"), to deal in the Software without */
15 /* restriction, including without limitation the rights to use, */
16 /* copy, modify, merge, publish, distribute, sublicense, and/or */
17 /* sell copies of the Software, and to permit persons to whom the */
18 /* Software is furnished to do so, subject to the following */
19 /* conditions: */
20 /* */
21 /* The above copyright notice and this permission notice shall be */
22 /* included in all copies or substantial portions of the */
23 /* Software. */
24 /* */
25 /* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND */
26 /* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES */
27 /* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND */
28 /* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT */
29 /* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, */
30 /* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING */
31 /* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR */
32 /* OTHER DEALINGS IN THE SOFTWARE. */
33 /* */
34 /************************************************************************/
35 
36 
37 #ifndef VIGRA_REGRESSION_HXX
38 #define VIGRA_REGRESSION_HXX
39 
40 #include "matrix.hxx"
41 #include "linear_solve.hxx"
42 #include "singular_value_decomposition.hxx"
43 #include "numerictraits.hxx"
44 #include "functorexpression.hxx"
45 #include "autodiff.hxx"
46 
47 
48 namespace vigra
49 {
50 
51 namespace linalg
52 {
53 
54 /** \addtogroup Optimization Optimization and Regression
55  */
56 //@{
57  /** Ordinary Least Squares Regression.
58 
59  Given a matrix \a A with <tt>m</tt> rows and <tt>n</tt> columns (with <tt>m >= n</tt>),
60  and a column vector \a b of length <tt>m</tt> rows, this function computes
61  the column vector \a x of length <tt>n</tt> rows that solves the optimization problem
62 
63  \f[ \tilde \textrm{\bf x} = \textrm{argmin}
64  \left|\left|\textrm{\bf A} \textrm{\bf x} - \textrm{\bf b}\right|\right|_2^2
65  \f]
66 
67  When \a b is a matrix with <tt>k</tt> columns, \a x must also have
68  <tt>k</tt> columns, which will contain the solutions for the corresponding columns of
69  \a b. Note that all matrices must already have the correct shape.
70 
71  This function is just another name for \ref linearSolve(), perhaps
72  leading to more readable code when \a A is a rectangular matrix. It returns
73  <tt>false</tt> when the rank of \a A is less than <tt>n</tt>.
74  See \ref linearSolve() for more documentation.
75 
76  <b>\#include</b> <vigra/regression.hxx><br/>
77  Namespaces: vigra and vigra::linalg
78  */
79 template <class T, class C1, class C2, class C3>
80 inline bool
83  std::string method = "QR")
84 {
85  return linearSolve(A, b, x, method);
86 }
87 
88  /** Weighted Least Squares Regression.
89 
90  Given a matrix \a A with <tt>m</tt> rows and <tt>n</tt> columns (with <tt>m >= n</tt>),
91  a vector \a b of length <tt>m</tt>, and a weight vector \a weights of length <tt>m</tt>
92  with non-negative entries, this function computes the vector \a x of length <tt>n</tt>
93  that solves the optimization problem
94 
95  \f[ \tilde \textrm{\bf x} = \textrm{argmin}
96  \left(\textrm{\bf A} \textrm{\bf x} - \textrm{\bf b}\right)^T
97  \textrm{diag}(\textrm{\bf weights})
98  \left(\textrm{\bf A} \textrm{\bf x} - \textrm{\bf b}\right)
99  \f]
100 
101  where <tt>diag(weights)</tt> creates a diagonal matrix from \a weights.
102  The algorithm calls \ref leastSquares() on the equivalent problem
103 
104  \f[ \tilde \textrm{\bf x} = \textrm{argmin}
105  \left|\left|\textrm{diag}(\textrm{\bf weights})^{1/2}\textrm{\bf A} \textrm{\bf x} -
106  \textrm{diag}(\textrm{\bf weights})^{1/2} \textrm{\bf b}\right|\right|_2^2
107  \f]
108 
109  where the square root of \a weights is just taken element-wise.
110 
111  When \a b is a matrix with <tt>k</tt> columns, \a x must also have
112  <tt>k</tt> columns, which will contain the solutions for the corresponding columns of
113  \a b. Note that all matrices must already have the correct shape.
114 
115  The function returns
116  <tt>false</tt> when the rank of the weighted matrix \a A is less than <tt>n</tt>.
117 
118  <b>\#include</b> <vigra/regression.hxx><br/>
119  Namespaces: vigra and vigra::linalg
120  */
121 template <class T, class C1, class C2, class C3, class C4>
122 bool
124  MultiArrayView<2, T, C2> const &b, MultiArrayView<2, T, C3> const &weights,
125  MultiArrayView<2, T, C4> &x, std::string method = "QR")
126 {
127  const unsigned int rows = rowCount(A);
128  const unsigned int cols = columnCount(A);
129  const unsigned int rhsCount = columnCount(b);
130  vigra_precondition(rows >= cols,
131  "weightedLeastSquares(): Input matrix A must be rectangular with rowCount >= columnCount.");
132  vigra_precondition(rowCount(b) == rows,
133  "weightedLeastSquares(): Shape mismatch between matrices A and b.");
134  vigra_precondition(rowCount(b) == rowCount(weights) && columnCount(weights) == 1,
135  "weightedLeastSquares(): Weight matrix has wrong shape.");
136  vigra_precondition(rowCount(x) == cols && columnCount(x) == rhsCount,
137  "weightedLeastSquares(): Result matrix x has wrong shape.");
138 
139  Matrix<T> wa(A.shape()), wb(b.shape());
140 
141  for(unsigned int k=0; k<rows; ++k)
142  {
143  vigra_precondition(weights(k,0) >= 0,
144  "weightedLeastSquares(): Weights must be positive.");
145  T w = std::sqrt(weights(k,0));
146  for(unsigned int l=0; l<cols; ++l)
147  wa(k,l) = w * A(k,l);
148  for(unsigned int l=0; l<rhsCount; ++l)
149  wb(k,l) = w * b(k,l);
150  }
151 
152  return leastSquares(wa, wb, x, method);
153 }
154 
155  /** Ridge Regression.
156 
157  Given a matrix \a A with <tt>m</tt> rows and <tt>n</tt> columns (with <tt>m >= n</tt>),
158  a vector \a b of length <tt>m</tt>, and a regularization parameter <tt>lambda >= 0.0</tt>,
159  this function computes the vector \a x of length <tt>n</tt>
160  that solves the optimization problem
161 
162  \f[ \tilde \textrm{\bf x} = \textrm{argmin}
163  \left|\left|\textrm{\bf A} \textrm{\bf x} - \textrm{\bf b}\right|\right|_2^2 +
164  \lambda \textrm{\bf x}^T\textrm{\bf x}
165  \f]
166 
167  This is implemented by means of \ref singularValueDecomposition().
168 
169  When \a b is a matrix with <tt>k</tt> columns, \a x must also have
170  <tt>k</tt> columns, which will contain the solutions for the corresponding columns of
171  \a b. Note that all matrices must already have the correct shape.
172 
173  The function returns <tt>false</tt> if the rank of \a A is less than <tt>n</tt>
174  and <tt>lambda == 0.0</tt>.
175 
176  <b>\#include</b> <vigra/regression.hxx><br/>
177  Namespaces: vigra and vigra::linalg
178  */
179 template <class T, class C1, class C2, class C3>
180 bool
182  MultiArrayView<2, T, C2> const &b, MultiArrayView<2, T, C3> &x, double lambda)
183 {
184  const unsigned int rows = rowCount(A);
185  const unsigned int cols = columnCount(A);
186  const unsigned int rhsCount = columnCount(b);
187  vigra_precondition(rows >= cols,
188  "ridgeRegression(): Input matrix A must be rectangular with rowCount >= columnCount.");
189  vigra_precondition(rowCount(b) == rows,
190  "ridgeRegression(): Shape mismatch between matrices A and b.");
191  vigra_precondition(rowCount(x) == cols && columnCount(x) == rhsCount,
192  "ridgeRegression(): Result matrix x has wrong shape.");
193  vigra_precondition(lambda >= 0.0,
194  "ridgeRegression(): lambda >= 0.0 required.");
195 
196  unsigned int m = rows;
197  unsigned int n = cols;
198 
199  Matrix<T> u(m, n), s(n, 1), v(n, n);
200 
201  unsigned int rank = singularValueDecomposition(A, u, s, v);
202  if(rank < n && lambda == 0.0)
203  return false;
204 
205  Matrix<T> t = transpose(u)*b;
206  for(unsigned int k=0; k<cols; ++k)
207  for(unsigned int l=0; l<rhsCount; ++l)
208  t(k,l) *= s(k,0) / (sq(s(k,0)) + lambda);
209  x = v*t;
210  return true;
211 }
212 
213  /** Weighted ridge Regression.
214 
215  Given a matrix \a A with <tt>m</tt> rows and <tt>n</tt> columns (with <tt>m >= n</tt>),
216  a vector \a b of length <tt>m</tt>, a weight vector \a weights of length <tt>m</tt>
217  with non-negative entries, and a regularization parameter <tt>lambda >= 0.0</tt>
218  this function computes the vector \a x of length <tt>n</tt>
219  that solves the optimization problem
220 
221  \f[ \tilde \textrm{\bf x} = \textrm{argmin}
222  \left(\textrm{\bf A} \textrm{\bf x} - \textrm{\bf b}\right)^T
223  \textrm{diag}(\textrm{\bf weights})
224  \left(\textrm{\bf A} \textrm{\bf x} - \textrm{\bf b}\right) +
225  \lambda \textrm{\bf x}^T\textrm{\bf x}
226  \f]
227 
228  where <tt>diag(weights)</tt> creates a diagonal matrix from \a weights.
229  The algorithm calls \ref ridgeRegression() on the equivalent problem
230 
231  \f[ \tilde \textrm{\bf x} = \textrm{argmin}
232  \left|\left|\textrm{diag}(\textrm{\bf weights})^{1/2}\textrm{\bf A} \textrm{\bf x} -
233  \textrm{diag}(\textrm{\bf weights})^{1/2} \textrm{\bf b}\right|\right|_2^2 +
234  \lambda \textrm{\bf x}^T\textrm{\bf x}
235  \f]
236 
237  where the square root of \a weights is just taken element-wise. This solution is
238  computed by means of \ref singularValueDecomposition().
239 
240  When \a b is a matrix with <tt>k</tt> columns, \a x must also have
241  <tt>k</tt> columns, which will contain the solutions for the corresponding columns of
242  \a b. Note that all matrices must already have the correct shape.
243 
244  The function returns <tt>false</tt> if the rank of \a A is less than <tt>n</tt>
245  and <tt>lambda == 0.0</tt>.
246 
247  <b>\#include</b> <vigra/regression.hxx><br/>
248  Namespaces: vigra and vigra::linalg
249  */
250 template <class T, class C1, class C2, class C3, class C4>
251 bool
253  MultiArrayView<2, T, C2> const &b, MultiArrayView<2, T, C3> const &weights,
254  MultiArrayView<2, T, C4> &x, double lambda)
255 {
256  const unsigned int rows = rowCount(A);
257  const unsigned int cols = columnCount(A);
258  const unsigned int rhsCount = columnCount(b);
259  vigra_precondition(rows >= cols,
260  "weightedRidgeRegression(): Input matrix A must be rectangular with rowCount >= columnCount.");
261  vigra_precondition(rowCount(b) == rows,
262  "weightedRidgeRegression(): Shape mismatch between matrices A and b.");
263  vigra_precondition(rowCount(b) == rowCount(weights) && columnCount(weights) == 1,
264  "weightedRidgeRegression(): Weight matrix has wrong shape.");
265  vigra_precondition(rowCount(x) == cols && columnCount(x) == rhsCount,
266  "weightedRidgeRegression(): Result matrix x has wrong shape.");
267  vigra_precondition(lambda >= 0.0,
268  "weightedRidgeRegression(): lambda >= 0.0 required.");
269 
270  Matrix<T> wa(A.shape()), wb(b.shape());
271 
272  for(unsigned int k=0; k<rows; ++k)
273  {
274  vigra_precondition(weights(k,0) >= 0,
275  "weightedRidgeRegression(): Weights must be positive.");
276  T w = std::sqrt(weights(k,0));
277  for(unsigned int l=0; l<cols; ++l)
278  wa(k,l) = w * A(k,l);
279  for(unsigned int l=0; l<rhsCount; ++l)
280  wb(k,l) = w * b(k,l);
281  }
282 
283  return ridgeRegression(wa, wb, x, lambda);
284 }
285 
286  /** Ridge Regression with many lambdas.
287 
288  This executes \ref ridgeRegression() for a sequence of regularization parameters. This
289  is implemented so that the \ref singularValueDecomposition() has to be executed only once.
290  \a lambda must be an array conforming to the <tt>std::vector</tt> interface, i.e. must
291  support <tt>lambda.size()</tt> and <tt>lambda[k]</tt>. The columns of the matrix \a x
292  will contain the solutions for the corresponding lambda, so the number of columns of
293  the matrix \a x must be equal to <tt>lambda.size()</tt>, and \a b must be a columns vector,
294  i.e. cannot contain several right hand sides at once.
295 
296  The function returns <tt>false</tt> when the matrix \a A is rank deficient. If this
297  happens, and one of the lambdas is zero, the corresponding column of \a x will be skipped.
298 
299  <b>\#include</b> <vigra/regression.hxx><br/>
300  Namespaces: vigra and vigra::linalg
301  */
302 template <class T, class C1, class C2, class C3, class Array>
303 bool
305  MultiArrayView<2, T, C2> const &b, MultiArrayView<2, T, C3> &x, Array const & lambda)
306 {
307  const unsigned int rows = rowCount(A);
308  const unsigned int cols = columnCount(A);
309  const unsigned int lambdaCount = lambda.size();
310  vigra_precondition(rows >= cols,
311  "ridgeRegressionSeries(): Input matrix A must be rectangular with rowCount >= columnCount.");
312  vigra_precondition(rowCount(b) == rows && columnCount(b) == 1,
313  "ridgeRegressionSeries(): Shape mismatch between matrices A and b.");
314  vigra_precondition(rowCount(x) == cols && columnCount(x) == lambdaCount,
315  "ridgeRegressionSeries(): Result matrix x has wrong shape.");
316 
317  unsigned int m = rows;
318  unsigned int n = cols;
319 
320  Matrix<T> u(m, n), s(n, 1), v(n, n);
321 
322  unsigned int rank = singularValueDecomposition(A, u, s, v);
323 
324  Matrix<T> xl = transpose(u)*b;
325  Matrix<T> xt(cols,1);
326  for(unsigned int i=0; i<lambdaCount; ++i)
327  {
328  vigra_precondition(lambda[i] >= 0.0,
329  "ridgeRegressionSeries(): lambda >= 0.0 required.");
330  if(lambda[i] == 0.0 && rank < rows)
331  continue;
332  for(unsigned int k=0; k<cols; ++k)
333  xt(k,0) = xl(k,0) * s(k,0) / (sq(s(k,0)) + lambda[i]);
334  columnVector(x, i) = v*xt;
335  }
336  return (rank == n);
337 }
338 
339 /** \brief Pass options to leastAngleRegression().
340 
341  <b>\#include</b> <vigra/regression.hxx><br/>
342  Namespaces: vigra and vigra::linalg
343 */
345 {
346  public:
347  enum Mode { LARS, LASSO, NNLASSO };
348 
349  /** Initialize all options with default values.
350  */
352  : max_solution_count(0),
353  unconstrained_dimension_count(0),
354  mode(LASSO),
355  least_squares_solutions(true)
356  {}
357 
358  /** Maximum number of solutions to be computed.
359 
360  If \a n is 0 (the default), the number of solutions is determined by the length
361  of the solution array. Otherwise, the minimum of maxSolutionCount() and that
362  length is taken.<br>
363  Default: 0 (use length of solution array)
364  */
366  {
367  max_solution_count = static_cast<int>(n);
368  return *this;
369  }
370 
371  /** Set the mode of the algorithm.
372 
373  Mode must be one of "lars", "lasso", "nnlasso". The function just calls
374  the member function of the corresponding name to set the mode.
375 
376  Default: "lasso"
377  */
379  {
380  mode = tolower(mode);
381  if(mode == "lars")
382  this->lars();
383  else if(mode == "lasso")
384  this->lasso();
385  else if(mode == "nnlasso")
386  this->nnlasso();
387  else
388  vigra_fail("LeastAngleRegressionOptions.setMode(): Invalid mode.");
389  return *this;
390  }
391 
392 
393  /** Use the plain LARS algorithm.
394 
395  Default: inactive
396  */
398  {
399  mode = LARS;
400  return *this;
401  }
402 
403  /** Use the LASSO modification of the LARS algorithm.
404 
405  This allows features to be removed from the active set under certain conditions.<br>
406  Default: active
407  */
409  {
410  mode = LASSO;
411  return *this;
412  }
413 
414  /** Use the non-negative LASSO modification of the LARS algorithm.
415 
416  This enforces all non-zero entries in the solution to be positive.<br>
417  Default: inactive
418  */
420  {
421  mode = NNLASSO;
422  return *this;
423  }
424 
425  /** Compute least squares solutions.
426 
427  Use least angle regression to determine active sets, but
428  return least squares solutions for the features in each active set,
429  instead of constrained solutions.<br>
430  Default: <tt>true</tt>
431  */
433  {
434  least_squares_solutions = select;
435  return *this;
436  }
437 
438  int max_solution_count, unconstrained_dimension_count;
439  Mode mode;
440  bool least_squares_solutions;
441 };
442 
443 namespace detail {
444 
445 template <class T, class C1, class C2>
446 struct LarsData
447 {
448  typedef typename MultiArrayShape<2>::type Shape;
449 
450  int activeSetSize;
453  Matrix<T> R, qtb, lars_solution, lars_prediction, next_lsq_solution, next_lsq_prediction, searchVector;
454  ArrayVector<MultiArrayIndex> columnPermutation;
455 
456  // init data for a new run
457  LarsData(MultiArrayView<2, T, C1> const & Ai, MultiArrayView<2, T, C2> const & bi)
458  : activeSetSize(1),
459  A(Ai), b(bi), R(A), qtb(b),
460  lars_solution(A.shape(1), 1), lars_prediction(A.shape(0), 1),
461  next_lsq_solution(A.shape(1), 1), next_lsq_prediction(A.shape(0), 1), searchVector(A.shape(0), 1),
462  columnPermutation(A.shape(1))
463  {
464  for(unsigned int k=0; k<columnPermutation.size(); ++k)
465  columnPermutation[k] = k;
466  }
467 
468  // copy data for the recursive call in nnlassolsq
469  LarsData(LarsData const & d, int asetSize)
470  : activeSetSize(asetSize),
471  A(d.R.subarray(Shape(0,0), Shape(d.A.shape(0), activeSetSize))), b(d.qtb), R(A), qtb(b),
472  lars_solution(d.lars_solution.subarray(Shape(0,0), Shape(activeSetSize, 1))), lars_prediction(d.lars_prediction),
473  next_lsq_solution(d.next_lsq_solution.subarray(Shape(0,0), Shape(activeSetSize, 1))),
474  next_lsq_prediction(d.next_lsq_prediction), searchVector(d.searchVector),
475  columnPermutation(A.shape(1))
476  {
477  for(unsigned int k=0; k<columnPermutation.size(); ++k)
478  columnPermutation[k] = k;
479  }
480 };
481 
482 template <class T, class C1, class C2, class Array1, class Array2, class Array3>
483 unsigned int
484 leastAngleRegressionMainLoop(LarsData<T, C1, C2> & d,
485  Array1 & activeSets,
486  Array2 * lars_solutions, Array3 * lsq_solutions,
487  LeastAngleRegressionOptions const & options)
488 {
489  using namespace vigra::functor;
490 
491  typedef typename MultiArrayShape<2>::type Shape;
492  typedef typename Matrix<T>::view_type Subarray;
493  typedef ArrayVector<MultiArrayIndex> Permutation;
494  typedef typename Permutation::view_type ColumnSet;
495 
496  vigra_precondition(d.activeSetSize > 0,
497  "leastAngleRegressionMainLoop() must not be called with empty active set.");
498 
499  bool enforce_positive = (options.mode == LeastAngleRegressionOptions::NNLASSO);
500  bool lasso_modification = (options.mode != LeastAngleRegressionOptions::LARS);
501 
502  const MultiArrayIndex rows = rowCount(d.R);
503  const MultiArrayIndex cols = columnCount(d.R);
504  const MultiArrayIndex maxRank = std::min(rows, cols);
505 
506  MultiArrayIndex maxSolutionCount = options.max_solution_count;
507  if(maxSolutionCount == 0)
508  maxSolutionCount = lasso_modification
509  ? 10*maxRank
510  : maxRank;
511 
512  bool needToRemoveColumn = false;
513  MultiArrayIndex columnToBeAdded = 0, columnToBeRemoved = 0;
514  MultiArrayIndex currentSolutionCount = 0;
515  while(currentSolutionCount < maxSolutionCount)
516  {
517  //ColumnSet activeSet = d.columnPermutation.subarray(0, static_cast<unsigned int>(d.activeSetSize));
518  ColumnSet inactiveSet = d.columnPermutation.subarray(static_cast<unsigned int>(d.activeSetSize), static_cast<unsigned int>(cols));
519 
520  // find next dimension to be activated
521  Matrix<T> cLARS = transpose(d.A) * (d.b - d.lars_prediction), // correlation with LARS residual
522  cLSQ = transpose(d.A) * (d.b - d.next_lsq_prediction); // correlation with LSQ residual
523 
524  // In theory, all vectors in the active set should have the same correlation C, and
525  // the correlation of all others should not exceed this. In practice, we may find the
526  // maximum correlation in any variable due to tiny numerical inaccuracies. Therefore, we
527  // determine C from the entire set of variables.
528  MultiArrayIndex cmaxIndex = enforce_positive
529  ? argMax(cLARS)
530  : argMax(abs(cLARS));
531  T C = abs(cLARS(cmaxIndex, 0));
532 
533  Matrix<T> ac(cols - d.activeSetSize, 1);
534  for(MultiArrayIndex k = 0; k<cols-d.activeSetSize; ++k)
535  {
536  T rho = cLSQ(inactiveSet[k], 0),
537  cc = C - sign(rho)*cLARS(inactiveSet[k], 0);
538 
539  if(rho == 0.0) // make sure that 0/0 cannot happen in the other cases
540  ac(k,0) = 1.0; // variable k is linearly dependent on the active set
541  else if(rho > 0.0)
542  ac(k,0) = cc / (cc + rho); // variable k would enter the active set with positive sign
543  else if(enforce_positive)
544  ac(k,0) = 1.0; // variable k cannot enter the active set because it would be negative
545  else
546  ac(k,0) = cc / (cc - rho); // variable k would enter the active set with negative sign
547  }
548 
549  // in the non-negative case: make sure that a column just removed cannot re-enter right away
550  // (in standard LASSO, this is allowed, because the variable may re-enter with opposite sign)
551  if(enforce_positive && needToRemoveColumn)
552  ac(columnToBeRemoved-d.activeSetSize,0) = 1.0;
553 
554  // find candidate
555  // Note: R uses Arg1() > epsilon, but this is only possible because it allows several variables to
556  // join the active set simultaneously, so that gamma = 0 cannot occur.
557  columnToBeAdded = argMin(ac);
558 
559  // if no new column can be added, we do a full step gamma = 1.0 and then stop, unless a column is removed below
560  T gamma = (d.activeSetSize == maxRank)
561  ? 1.0
562  : ac(columnToBeAdded, 0);
563 
564  // adjust columnToBeAdded: we skipped the active set
565  if(columnToBeAdded >= 0)
566  columnToBeAdded += d.activeSetSize;
567 
568  // check whether we have to remove a column from the active set
569  needToRemoveColumn = false;
570  if(lasso_modification)
571  {
572  // find dimensions whose weight changes sign below gamma*searchDirection
573  Matrix<T> s(Shape(d.activeSetSize, 1), NumericTraits<T>::max());
574  for(MultiArrayIndex k=0; k<d.activeSetSize; ++k)
575  {
576  if(( enforce_positive && d.next_lsq_solution(k,0) < 0.0) ||
577  (!enforce_positive && sign(d.lars_solution(k,0))*sign(d.next_lsq_solution(k,0)) == -1.0))
578  s(k,0) = d.lars_solution(k,0) / (d.lars_solution(k,0) - d.next_lsq_solution(k,0));
579  }
580 
581  columnToBeRemoved = argMinIf(s, Arg1() <= Param(gamma));
582  if(columnToBeRemoved >= 0)
583  {
584  needToRemoveColumn = true; // remove takes precedence over add
585  gamma = s(columnToBeRemoved, 0);
586  }
587  }
588 
589  // compute the current solutions
590  d.lars_prediction = gamma * d.next_lsq_prediction + (1.0 - gamma) * d.lars_prediction;
591  d.lars_solution = gamma * d.next_lsq_solution + (1.0 - gamma) * d.lars_solution;
592  if(needToRemoveColumn)
593  d.lars_solution(columnToBeRemoved, 0) = 0.0; // turn possible epsilon into an exact zero
594 
595  // write the current solution
596  ++currentSolutionCount;
597  activeSets.push_back(typename Array1::value_type(d.columnPermutation.begin(), d.columnPermutation.begin()+d.activeSetSize));
598 
599  if(lsq_solutions != 0)
600  {
601  if(enforce_positive)
602  {
603  ArrayVector<Matrix<T> > nnresults;
604  ArrayVector<ArrayVector<MultiArrayIndex> > nnactiveSets;
605  LarsData<T, C1, C2> nnd(d, d.activeSetSize);
606 
607  leastAngleRegressionMainLoop(nnd, nnactiveSets, &nnresults, static_cast<Array3*>(0),
608  LeastAngleRegressionOptions().leastSquaresSolutions(false).nnlasso());
609  //Matrix<T> nnlsq_solution(d.activeSetSize, 1);
610  typename Array2::value_type nnlsq_solution(Shape(d.activeSetSize, 1));
611  for(unsigned int k=0; k<nnactiveSets.back().size(); ++k)
612  {
613  nnlsq_solution(nnactiveSets.back()[k],0) = nnresults.back()[k];
614  }
615  //lsq_solutions->push_back(nnlsq_solution);
616  lsq_solutions->push_back(typename Array3::value_type());
617  lsq_solutions->back() = nnlsq_solution;
618  }
619  else
620  {
621  //lsq_solutions->push_back(d.next_lsq_solution.subarray(Shape(0,0), Shape(d.activeSetSize, 1)));
622  lsq_solutions->push_back(typename Array3::value_type());
623  lsq_solutions->back() = d.next_lsq_solution.subarray(Shape(0,0), Shape(d.activeSetSize, 1));
624  }
625  }
626  if(lars_solutions != 0)
627  {
628  //lars_solutions->push_back(d.lars_solution.subarray(Shape(0,0), Shape(d.activeSetSize, 1)));
629  lars_solutions->push_back(typename Array2::value_type());
630  lars_solutions->back() = d.lars_solution.subarray(Shape(0,0), Shape(d.activeSetSize, 1));
631  }
632 
633  // no further solutions possible
634  if(gamma == 1.0)
635  break;
636 
637  if(needToRemoveColumn)
638  {
639  --d.activeSetSize;
640  if(columnToBeRemoved != d.activeSetSize)
641  {
642  // remove column 'columnToBeRemoved' and restore triangular form of R
643  // note: columnPermutation is automatically swapped here
644  detail::upperTriangularSwapColumns(columnToBeRemoved, d.activeSetSize, d.R, d.qtb, d.columnPermutation);
645 
646  // swap solution entries
647  std::swap(d.lars_solution(columnToBeRemoved, 0), d.lars_solution(d.activeSetSize,0));
648  std::swap(d.next_lsq_solution(columnToBeRemoved, 0), d.next_lsq_solution(d.activeSetSize,0));
649  columnToBeRemoved = d.activeSetSize; // keep track of removed column
650  }
651  d.lars_solution(d.activeSetSize,0) = 0.0;
652  d.next_lsq_solution(d.activeSetSize,0) = 0.0;
653  }
654  else
655  {
656  vigra_invariant(columnToBeAdded >= 0,
657  "leastAngleRegression(): internal error (columnToBeAdded < 0)");
658  // add column 'columnToBeAdded'
659  if(d.activeSetSize != columnToBeAdded)
660  {
661  std::swap(d.columnPermutation[d.activeSetSize], d.columnPermutation[columnToBeAdded]);
662  columnVector(d.R, d.activeSetSize).swapData(columnVector(d.R, columnToBeAdded));
663  columnToBeAdded = d.activeSetSize; // keep track of added column
664  }
665 
666  // zero the corresponding entries of the solutions
667  d.next_lsq_solution(d.activeSetSize,0) = 0.0;
668  d.lars_solution(d.activeSetSize,0) = 0.0;
669 
670  // reduce R (i.e. its newly added column) to triangular form
671  detail::qrColumnHouseholderStep(d.activeSetSize, d.R, d.qtb);
672  ++d.activeSetSize;
673  }
674 
675  // compute the LSQ solution of the new active set
676  Subarray Ractive = d.R.subarray(Shape(0,0), Shape(d.activeSetSize, d.activeSetSize));
677  Subarray qtbactive = d.qtb.subarray(Shape(0,0), Shape(d.activeSetSize, 1));
678  Subarray next_lsq_solution_view = d.next_lsq_solution.subarray(Shape(0,0), Shape(d.activeSetSize, 1));
679  linearSolveUpperTriangular(Ractive, qtbactive, next_lsq_solution_view);
680 
681  // compute the LSQ prediction of the new active set
682  d.next_lsq_prediction.init(0.0);
683  for(MultiArrayIndex k=0; k<d.activeSetSize; ++k)
684  d.next_lsq_prediction += next_lsq_solution_view(k,0)*columnVector(d.A, d.columnPermutation[k]);
685  }
686 
687  return static_cast<unsigned int>(currentSolutionCount);
688 }
689 
690 template <class T, class C1, class C2, class Array1, class Array2>
691 unsigned int
692 leastAngleRegressionImpl(MultiArrayView<2, T, C1> const & A, MultiArrayView<2, T, C2> const &b,
693  Array1 & activeSets, Array2 * lasso_solutions, Array2 * lsq_solutions,
694  LeastAngleRegressionOptions const & options)
695 {
696  using namespace vigra::functor;
697 
698  const MultiArrayIndex rows = rowCount(A);
699 
700  vigra_precondition(rowCount(b) == rows && columnCount(b) == 1,
701  "leastAngleRegression(): Shape mismatch between matrices A and b.");
702 
703  bool enforce_positive = (options.mode == LeastAngleRegressionOptions::NNLASSO);
704 
705  detail::LarsData<T, C1, C2> d(A, b);
706 
707  // find dimension with largest correlation
708  Matrix<T> c = transpose(A)*b;
709  MultiArrayIndex initialColumn = enforce_positive
710  ? argMaxIf(c, Arg1() > Param(0.0))
711  : argMax(abs(c));
712  if(initialColumn == -1)
713  return 0; // no solution found
714 
715  // prepare initial active set and search direction etc.
716  std::swap(d.columnPermutation[0], d.columnPermutation[initialColumn]);
717  columnVector(d.R, 0).swapData(columnVector(d.R, initialColumn));
718  detail::qrColumnHouseholderStep(0, d.R, d.qtb);
719  d.next_lsq_solution(0,0) = d.qtb(0,0) / d.R(0,0);
720  d.next_lsq_prediction = d.next_lsq_solution(0,0) * columnVector(A, d.columnPermutation[0]);
721  d.searchVector = d.next_lsq_solution(0,0) * columnVector(A, d.columnPermutation[0]);
722 
723  return leastAngleRegressionMainLoop(d, activeSets, lasso_solutions, lsq_solutions, options);
724 }
725 
726 } // namespace detail
727 
728 /** Least Angle Regression.
729 
730  <b>\#include</b> <vigra/regression.hxx><br/>
731  Namespaces: vigra and vigra::linalg
732 
733  <b> Declarations:</b>
734 
735  \code
736  namespace vigra {
737  namespace linalg {
738  // compute either LASSO or least squares solutions
739  template <class T, class C1, class C2, class Array1, class Array2>
740  unsigned int
741  leastAngleRegression(MultiArrayView<2, T, C1> const & A, MultiArrayView<2, T, C2> const &b,
742  Array1 & activeSets, Array2 & solutions,
743  LeastAngleRegressionOptions const & options = LeastAngleRegressionOptions());
744 
745  // compute LASSO and least squares solutions
746  template <class T, class C1, class C2, class Array1, class Array2>
747  unsigned int
748  leastAngleRegression(MultiArrayView<2, T, C1> const & A, MultiArrayView<2, T, C2> const &b,
749  Array1 & activeSets, Array2 & lasso_solutions, Array2 & lsq_solutions,
750  LeastAngleRegressionOptions const & options = LeastAngleRegressionOptions());
751  }
752  using linalg::leastAngleRegression;
753  }
754  \endcode
755 
756  This function implements Least Angle Regression (LARS) as described in
757 
758  &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
759  B.Efron, T.Hastie, I.Johnstone, and R.Tibshirani: <i>"Least Angle Regression"</i>,
760  Annals of Statistics 32(2):407-499, 2004.
761 
762  It is an efficient algorithm to solve the L1-regularized least squares (LASSO) problem
763 
764  \f[ \tilde \textrm{\bf x} = \textrm{argmin}
765  \left|\left|\textrm{\bf A} \textrm{\bf x} - \textrm{\bf b}\right|\right|_2^2
766  \textrm{ subject to } \left|\left|\textrm{\bf x}\right|\right|_1\le s
767  \f]
768 
769  and the L1-regularized non-negative least squares (NN-LASSO) problem
770 
771  \f[ \tilde \textrm{\bf x} = \textrm{argmin} \left|\left|\textrm{\bf A} \textrm{\bf x} - \textrm{\bf b}\right|\right|_2^2
772  \textrm{ subject to } \left|\left|\textrm{\bf x}\right|\right|_1\le s \textrm{ and } \textrm{\bf x}\ge \textrm{\bf 0}
773  \f]
774 
775  where \a A is a matrix with <tt>m</tt> rows and <tt>n</tt> columns (often with <tt>m < n</tt>),
776  \a b a vector of length <tt>m</tt>, and a regularization parameter s >= 0.0.
777  L1-regularization has the desirable effect that it causes the solution <b>x</b> to be sparse, i.e. only
778  the most important elements in <b>x</b> (called the <em>active set</em>) have non-zero values. The
779  key insight of the LARS algorithm is the following: When the solution vector <b>x</b> is considered
780  as a function of the regularization parameter s, then <b>x</b>(s) is a piecewise
781  linear function, i.e. a polyline in n-dimensional space. The knots of the polyline <b>x</b>(s)
782  are located precisely at those values of s where one variable enters or leaves the active set
783  and can be efficiently computed.
784 
785  Therefore, leastAngleRegression() returns the entire solution path as a sequence of knot points, starting
786  at \f$\textrm{\bf x}(s=0)\f$ (where the only feasible solution is obviously <b>x</b> = 0) and ending at
787  \f$\textrm{\bf x}(s=\infty)\f$ (where the solution becomes the ordinary least squares solution). Actually,
788  the initial null solution is not explicitly returned, i.e. the sequence starts at the first non-zero
789  solution with one variable in the active set. The function leastAngleRegression() returns the number
790  of solutions (i.e. knot points) computed.
791 
792  The sequences of active sets and corresponding variable weights are returned in \a activeSets and
793  \a solutions respectively. That is, <tt>activeSets[i]</tt> is an \ref vigra::ArrayVector "ArrayVector<int>"
794  containing the indices of the variables that are active at the i-th knot, and <tt>solutions</tt> is a
795  \ref vigra::linalg::Matrix "Matrix<T>" containing the weights of those variables, in the same order (see
796  example below). Variables not contained in <tt>activeSets[i]</tt> are zero at this solution.
797 
798  The behavior of the algorithm can be adapted by \ref vigra::linalg::LeastAngleRegressionOptions
799  "LeastAngleRegressionOptions":
800  <DL>
801  <DT><b>options.lasso()</b> (active by default)
802  <DD> Compute the LASSO solution as described above.
803  <DT><b>options.nnlasso()</b> (inactive by default)
804  <DD> Compute non-negative LASSO solutions, i.e. use the additional constraint that
805  <b>x</b> >= 0 in all solutions.
806  <DT><b>options.lars()</b> (inactive by default)
807  <DD> Compute a solution path according to the plain LARS rule, i.e. never remove
808  a variable from the active set once it entered.
809  <DT><b>options.leastSquaresSolutions(bool)</b> (default: true)
810  <DD> Use the algorithm mode selected above
811  to determine the sequence of active sets, but then compute and return an
812  ordinary (unconstrained) least squares solution for every active set.<br>
813  <b>Note:</b> The second form of leastAngleRegression() ignores this option and
814  does always compute both constrained and unconstrained solutions (returned in
815  \a lasso_solutions and \a lsq_solutions respectively).
816  <DT><b>maxSolutionCount(unsigned int n)</b> (default: n = 0, i.e. compute all solutions)
817  <DD> Compute at most <tt>n</tt> solutions.
818  </DL>
819 
820  <b>Usage:</b>
821 
822  \code
823  int m = ..., n = ...;
824  Matrix<double> A(m, n), b(m, 1);
825  ... // fill A and b
826 
827  // normalize the input
828  Matrix<double> offset(1,n), scaling(1,n);
829  prepareColumns(A, A, offset, scaling, DataPreparationGoals(ZeroMean|UnitVariance));
830  prepareColumns(b, b, DataPreparationGoals(ZeroMean));
831 
832  // arrays to hold the output
833  ArrayVector<ArrayVector<int> > activeSets;
834  ArrayVector<Matrix<double> > solutions;
835 
836  // run leastAngleRegression() in non-negative LASSO mode
837  int numSolutions = leastAngleRegression(A, b, activeSets, solutions,
838  LeastAngleRegressionOptions().nnlasso());
839 
840  // print results
841  Matrix<double> denseSolution(1, n);
842  for (MultiArrayIndex k = 0; k < numSolutions; ++k)
843  {
844  // transform the sparse solution into a dense vector
845  denseSolution.init(0.0); // ensure that inactive variables are zero
846  for (unsigned int i = 0; i < activeSets[k].size(); ++i)
847  {
848  // set the values of the active variables;
849  // activeSets[k][i] is the true index of the i-th variable in the active set
850  denseSolution(0, activeSets[k][i]) = solutions[k](i,0);
851  }
852 
853  // invert the input normalization
854  denseSolution = denseSolution * pointWise(scaling);
855 
856  // output the solution
857  std::cout << "solution " << k << ":\n" << denseSolution << std::endl;
858  }
859  \endcode
860 
861  <b>Required Interface:</b>
862 
863  <ul>
864  <li> <tt>T</tt> must be numeric type (compatible to double)
865  <li> <tt>Array1 a1;</tt><br>
866  <tt>a1.push_back(ArrayVector<int>());</tt>
867  <li> <tt>Array2 a2;</tt><br>
868  <tt>a2.push_back(Matrix<T>());</tt>
869  </ul>
870  */
871 doxygen_overloaded_function(template <...> unsigned int leastAngleRegression)
872 
873 template <class T, class C1, class C2, class Array1, class Array2>
874 inline unsigned int
875 leastAngleRegression(MultiArrayView<2, T, C1> const & A, MultiArrayView<2, T, C2> const &b,
876  Array1 & activeSets, Array2 & solutions,
877  LeastAngleRegressionOptions const & options = LeastAngleRegressionOptions())
878 {
879  if(options.least_squares_solutions)
880  return detail::leastAngleRegressionImpl(A, b, activeSets, static_cast<Array2*>(0), &solutions, options);
881  else
882  return detail::leastAngleRegressionImpl(A, b, activeSets, &solutions, static_cast<Array2*>(0), options);
883 }
884 
885 template <class T, class C1, class C2, class Array1, class Array2>
886 inline unsigned int
887 leastAngleRegression(MultiArrayView<2, T, C1> const & A, MultiArrayView<2, T, C2> const &b,
888  Array1 & activeSets, Array2 & lasso_solutions, Array2 & lsq_solutions,
889  LeastAngleRegressionOptions const & options = LeastAngleRegressionOptions())
890 {
891  return detail::leastAngleRegressionImpl(A, b, activeSets, &lasso_solutions, &lsq_solutions, options);
892 }
893 
894  /** Non-negative Least Squares Regression.
895 
896  Given a matrix \a A with <tt>m</tt> rows and <tt>n</tt> columns (with <tt>m >= n</tt>),
897  and a column vector \a b of length <tt>m</tt> rows, this function computes
898  a column vector \a x of length <tt>n</tt> with <b>non-negative entries</b> that solves the optimization problem
899 
900  \f[ \tilde \textrm{\bf x} = \textrm{argmin}
901  \left|\left|\textrm{\bf A} \textrm{\bf x} - \textrm{\bf b}\right|\right|_2^2
902  \textrm{ subject to } \textrm{\bf x} \ge \textrm{\bf 0}
903  \f]
904 
905  Both \a b and \a x must be column vectors (i.e. matrices with <tt>1</tt> column).
906  Note that all matrices must already have the correct shape. The solution is computed by means
907  of \ref leastAngleRegression() with non-negativity constraint.
908 
909  <b>\#include</b> <vigra/regression.hxx><br/>
910  Namespaces: vigra and vigra::linalg
911 
912  <b> Declarations:</b>
913 
914  \code
915  namespace vigra {
916  namespace linalg {
917  template <class T, class C1, class C2, class C3>
918  void
919  nonnegativeLeastSquares(MultiArrayView<2, T, C1> const & A,
920  MultiArrayView<2, T, C2> const &b,
921  MultiArrayView<2, T, C3> &x);
922  }
923  using linalg::nonnegativeLeastSquares;
924  }
925  \endcode
926  */
927 doxygen_overloaded_function(template <...> unsigned int nonnegativeLeastSquares)
928 
929 template <class T, class C1, class C2, class C3>
930 inline void
931 nonnegativeLeastSquares(MultiArrayView<2, T, C1> const & A,
932  MultiArrayView<2, T, C2> const &b, MultiArrayView<2, T, C3> &x)
933 {
934  vigra_precondition(columnCount(A) == rowCount(x) && rowCount(A) == rowCount(b),
935  "nonnegativeLeastSquares(): Matrix shape mismatch.");
936  vigra_precondition(columnCount(b) == 1 && columnCount(x) == 1,
937  "nonnegativeLeastSquares(): RHS and solution must be vectors (i.e. columnCount == 1).");
938 
939  ArrayVector<ArrayVector<MultiArrayIndex> > activeSets;
940  ArrayVector<Matrix<T> > results;
941 
942  leastAngleRegression(A, b, activeSets, results,
943  LeastAngleRegressionOptions().leastSquaresSolutions(false).nnlasso());
944  x.init(NumericTraits<T>::zero());
945  if(activeSets.size() > 0)
946  for(unsigned int k=0; k<activeSets.back().size(); ++k)
947  x(activeSets.back()[k],0) = results.back()[k];
948 }
949 
950 
951 //@}
952 
953 } // namespace linalg
954 
962 using linalg::LeastAngleRegressionOptions;
963 
964 namespace detail {
965 
966 template <class T, class S>
967 inline T
968 getRow(MultiArrayView<1, T, S> const & a, MultiArrayIndex i)
969 {
970  return a(i);
971 }
972 
973 template <class T, class S>
974 inline MultiArrayView<1, T>
975 getRow(MultiArrayView<2, T, S> const & a, MultiArrayIndex i)
976 {
977  return a.bindInner(i);
978 }
979 
980 } // namespace detail
981 
982 /** \addtogroup Optimization
983  */
984 //@{
985 
986 /** \brief Pass options to nonlinearLeastSquares().
987 
988  <b>\#include</b> <vigra/regression.hxx>
989  Namespace: vigra
990 */
992 {
993  public:
994 
995  double epsilon, lambda, tau;
996  int max_iter;
997 
998  /** \brief Initialize options with default values.
999  */
1001  : epsilon(0.0),
1002  lambda(0.1),
1003  tau(1.4),
1004  max_iter(50)
1005  {}
1006 
1007  /** \brief Set minimum relative improvement in residual.
1008 
1009  The algorithm stops when the relative improvement in residuals
1010  between consecutive iterations is less than this value.
1011 
1012  Default: 0 (i.e. choose tolerance automatically, will be 10*epsilon of the numeric type)
1013  */
1015  {
1016  epsilon = eps;
1017  return *this;
1018  }
1019 
1020  /** \brief Set maximum number of iterations.
1021 
1022  Default: 50
1023  */
1025  {
1026  max_iter = iter;
1027  return *this;
1028  }
1029 
1030  /** \brief Set damping parameters for Levenberg-Marquardt algorithm.
1031 
1032  \a lambda determines by how much the diagonal is emphasized, and \a v is
1033  the factor by which lambda will be increased if more damping is needed
1034  for convergence
1035  (see <a href="http://en.wikipedia.org/wiki/Levenberg%E2%80%93Marquardt_algorithm">Wikipedia</a>
1036  for more explanations).
1037 
1038  Default: lambda = 0.1, v = 1.4
1039  */
1040  NonlinearLSQOptions & dampingParamters(double lambda, double v)
1041  {
1042  vigra_precondition(lambda > 0.0 && v > 0.0,
1043  "NonlinearLSQOptions::dampingParamters(): parameters must be positive.");
1044  this->lambda = lambda;
1045  tau = v;
1046  return *this;
1047  }
1048 };
1049 
1050 template <unsigned int D, class T, class S1, class S2,
1051  class U, int N,
1052  class Functor>
1053 T
1054 nonlinearLeastSquaresImpl(MultiArrayView<D, T, S1> const & features,
1055  MultiArrayView<1, T, S2> const & response,
1056  TinyVector<U, N> & p,
1057  Functor model,
1058  NonlinearLSQOptions const & options)
1059 {
1060  vigra_precondition(features.shape(0) == response.shape(0),
1061  "nonlinearLeastSquares(): shape mismatch between features and response.");
1062 
1063  double t = options.tau, l = options.lambda; // initial damping parameters
1064 
1065  double epsilonT = NumericTraits<T>::epsilon()*10.0,
1066  epsilonU = NumericTraits<U>::epsilon()*10.0,
1067  epsilon = options.epsilon <= 0.0
1068  ? std::max(epsilonT, epsilonU)
1069  : options.epsilon;
1070 
1071  linalg::Matrix<T> jj(N,N); // outer product of the Jacobian
1072  TinyVector<U, N> jr, dp;
1073 
1074  T residual = 0.0;
1075  bool didStep = true;
1076 
1077  for(int iter=0; iter<options.max_iter; ++iter)
1078  {
1079  if(didStep)
1080  {
1081  // update the residual and Jacobian
1082  residual = 0.0;
1083  jr = 0.0;
1084  jj = 0.0;
1085 
1086  for(int i=0; i<features.shape(0); ++i)
1087  {
1088  autodiff::DualVector<U, N> res = model(detail::getRow(features, i), autodiff::dualMatrix(p));
1089 
1090  T r = response(i) - res.v;
1091  jr += r * res.d;
1092  jj += outer(res.d);
1093  residual += sq(r);
1094  }
1095  }
1096 
1097  // perform a damped gradient step
1098  linalg::Matrix<T> djj(jj);
1099  djj.diagonal() *= 1.0 + l;
1100  linearSolve(djj, jr, dp);
1101 
1102  TinyVector<U, N> p_new = p + dp;
1103 
1104  // compute the new residual
1105  T residual_new = 0.0;
1106  for(int i=0; i<features.shape(0); ++i)
1107  {
1108  residual_new += sq(response(i) - model(detail::getRow(features, i), p_new));
1109  }
1110 
1111  if(residual_new < residual)
1112  {
1113  // accept the step
1114  p = p_new;
1115  if(std::abs((residual - residual_new) / residual) < epsilon)
1116  return residual_new;
1117  // try less damping in the next iteration
1118  l /= t;
1119  didStep = true;
1120  }
1121  else
1122  {
1123  // reject the step und use more damping in the next iteration
1124  l *= t;
1125  didStep = false;
1126  }
1127  }
1128 
1129  return residual;
1130 }
1131 
1132 /********************************************************/
1133 /* */
1134 /* nonlinearLeastSquares */
1135 /* */
1136 /********************************************************/
1137 
1138 /** \brief Fit a non-linear model to given data by minimizing least squares loss.
1139 
1140  <b> Declarations:</b>
1141 
1142  \code
1143  namespace vigra {
1144  // variant 1: optimize a univariate model ('x' is a 1D array of scalar data points)
1145  template <class T, class S1, class S2,
1146  class U, int N,
1147  class Functor>
1148  T
1149  nonlinearLeastSquares(MultiArrayView<1, T, S1> const & x,
1150  MultiArrayView<1, T, S2> const & y,
1151  TinyVector<U, N> & model_parameters,
1152  Functor model,
1153  NonlinearLSQOptions const & options = NonlinearLSQOptions());
1154 
1155  // variant 2: optimize a multivariate model ('x' is a 2D array of vector-valued data points)
1156  template <class T, class S1, class S2,
1157  class U, int N,
1158  class Functor>
1159  T
1160  nonlinearLeastSquares(MultiArrayView<2, T, S1> const & x,
1161  MultiArrayView<1, T, S2> const & y,
1162  TinyVector<U, N> & model_parameters,
1163  Functor model,
1164  NonlinearLSQOptions const & options = NonlinearLSQOptions());
1165  }
1166  \endcode
1167 
1168  This function implements the
1169  <a href="http://en.wikipedia.org/wiki/Levenberg%E2%80%93Marquardt_algorithm">Levenberg-Marquardt algorithm</a>
1170  to fit a non-linear model to given data. The model depends on a vector of
1171  parameters <b>p</b> which are to be choosen such that the least-squares residual
1172  between the data and the model's predictions is minimized according to the objective function:
1173 
1174  \f[ \tilde \textrm{\bf p} = \textrm{argmin}_{\textrm{\bf p}} \sum_i \left( y_i - f(\textrm{\bf x}_i; \textrm{\bf p}) \right)^2
1175  \f]
1176 
1177  where \f$f(\textrm{\bf x}; \textrm{\bf p})\f$ is the model to be optimized
1178  (with arguments \f$\textrm{\bf x}\f$ and parameters \f$\textrm{\bf p}\f$), and
1179  \f$(\textrm{\bf x}_i; y_i)\f$ are the feature/response pairs of the given data.
1180  Since the model is non-linear (otherwise, you should use ordinary \ref leastSquares()),
1181  it must be linearized in terms of a first-order Taylor expansion, and the optimal
1182  parameters <b>p</b> have to be determined iteratively. In order for the iterations to
1183  converge to the desired solution, a good initial guess on <b>p</b> is required.
1184 
1185  The model must be specified by a functor which takes one of the following forms:
1186  \code
1187  typedef double DataType; // type of your data samples, may be any numeric type
1188  static const int N = ...; // number of model parameters
1189 
1190  // variant 1: the features x are scalars
1191  struct UnivariateModel
1192  {
1193  template <class T>
1194  T operator()(DataType x, TinyVector<T, N> const & p) const { ... }
1195  };
1196 
1197  // variant 2: the features x are vectors
1198  struct MultivariateModel
1199  {
1200  template <class T>
1201  T operator()(MultiArrayView<1, DataType> const & x, TinyVector<T, N> const & p) const { ... }
1202  };
1203  \endcode
1204  Each call to the functor's <tt>operator()</tt> computes the model's prediction for a single data
1205  point. The current model parameters are specified in a TinyVector of appropriate length.
1206  The type <tt>T</tt> must be templated: normally, it is the same as <tt>DataType</tt>, but
1207  the nonlinearLeastSquares() function will temporarily replace it with a special number type
1208  that supports <a href="http://en.wikipedia.org/wiki/Automatic_differentiation">automatic differentiation</a>
1209  (see \ref vigra::autodiff::DualVector). In this way, the derivatives needed in the model's Taylor
1210  expansion can be computed automatically.
1211 
1212  When the model is univariate (has a single scalar argument), the samples must be passed to
1213  nonlinearLeastSquares() in a pair 'x', 'y' of 1D <tt>MultiArrayView</tt>s (variant 1).
1214  When the model is multivariate (has a vector-valued argument), the 'x' input must
1215  be a 2D <tt>MultiArrayView</tt> (variant 2) whose rows represent a single data sample
1216  (i.e. the number of columns corresponds to the length of the model's argument vector).
1217  The number of rows in 'x' defines the number of data samples and must match the length
1218  of array 'y'.
1219 
1220  The <tt>TinyVector</tt> 'model_parameters' holds the initial guess for the model parameters and
1221  will be overwritten by the optimal values found by the algorithm. The algorithm's internal behavior
1222  can be controlled by customizing the option object \ref vigra::NonlinearLSQOptions.
1223 
1224  The function returns the residual sum of squared errors of the final solution.
1225 
1226  <b> Usage:</b>
1227 
1228  <b>\#include</b> <vigra/regression.hxx><br>
1229  Namespace: vigra
1230 
1231  Suppose that we want to fit a centered Gaussian function of the form
1232  \f[ f(x ; a, s, b) = a \exp\left(-\frac{x^2}{2 s^2}\right) + b \f]
1233  to noisy data \f$(x_i, y_i)\f$, i.e. we want to find parameters a, s, b such that
1234  the residual \f$\sum_i \left(y_i - f(x_i; a,s,b)\right)^2\f$ is minimized.
1235  The model parameters are placed in a <tt>TinyVector<T, 3></tt> <b>p</b> according to the rules<br/>
1236  <tt>p[0] <=> a</tt>, <tt>p[1] <=> s</tt> and <tt>p[2] <=> b</tt>.<br/> The following
1237  functor computes the model's prediction for a single data point <tt>x</tt>:
1238  \code
1239  struct GaussianModel
1240  {
1241  template <class T>
1242  T operator()(double x, TinyVector<T, 3> const & p) const
1243  {
1244  return p[0] * exp(-0.5 * sq(x / p[1])) + p[2];
1245  }
1246  };
1247  \endcode
1248  Now we can find optimal values for the parameters like this:
1249  \code
1250  int size = ...; // number of data points
1251  MultiArray<1, double> x(size), y(size);
1252  ... // fill the data arrays
1253 
1254  TinyVector<double, 3> p(2.0, 1.0, 0.5); // your initial guess of the parameters
1255  // (will be overwritten with the optimal values)
1256  double residual = nonlinearLeastSquares(x, y, p, GaussianModel());
1257 
1258  std::cout << "Model parameters: a=" << p[0] << ", s=" << p[1] << ", b=" << p[2] << " (residual: " << residual << ")\n";
1259  \endcode
1260 */
1262 
1263 template <class T, class S1, class S2,
1264  class U, int N,
1265  class Functor>
1266 inline T
1267 nonlinearLeastSquares(MultiArrayView<1, T, S1> const & features,
1268  MultiArrayView<1, T, S2> const & response,
1269  TinyVector<U, N> & p,
1270  Functor model,
1271  NonlinearLSQOptions const & options = NonlinearLSQOptions())
1272 {
1273  return nonlinearLeastSquaresImpl(features, response, p, model, options);
1274 }
1275 
1276 template <class T, class S1, class S2,
1277  class U, int N,
1278  class Functor>
1279 inline T
1280 nonlinearLeastSquares(MultiArrayView<2, T, S1> const & features,
1281  MultiArrayView<1, T, S2> const & response,
1282  TinyVector<U, N> & p,
1283  Functor model,
1284  NonlinearLSQOptions const & options = NonlinearLSQOptions())
1285 {
1286  return nonlinearLeastSquaresImpl(features, response, p, model, options);
1287 }
1288 
1289 //@}
1290 
1291 } // namespace vigra
1292 
1293 #endif // VIGRA_REGRESSION_HXX
LeastAngleRegressionOptions & maxSolutionCount(unsigned int n)
Definition: regression.hxx:365
bool ridgeRegression(MultiArrayView< 2, T, C1 > const &A, MultiArrayView< 2, T, C2 > const &b, MultiArrayView< 2, T, C3 > &x, double lambda)
Definition: regression.hxx:181
bool weightedLeastSquares(MultiArrayView< 2, T, C1 > const &A, MultiArrayView< 2, T, C2 > const &b, MultiArrayView< 2, T, C3 > const &weights, MultiArrayView< 2, T, C4 > &x, std::string method="QR")
Definition: regression.hxx:123
MultiArrayView< 2, T, C > columnVector(MultiArrayView< 2, T, C > const &m, MultiArrayIndex d)
Definition: matrix.hxx:727
LeastAngleRegressionOptions & nnlasso()
Definition: regression.hxx:419
std::string tolower(std::string s)
Definition: utilities.hxx:96
Pass options to leastAngleRegression().
Definition: regression.hxx:344
NonlinearLSQOptions & maxIterations(int iter)
Set maximum number of iterations.
Definition: regression.hxx:1024
MultiArrayIndex rowCount(const MultiArrayView< 2, T, C > &x)
Definition: matrix.hxx:671
void transpose(const MultiArrayView< 2, T, C1 > &v, MultiArrayView< 2, T, C2 > &r)
Definition: matrix.hxx:965
int argMin(MultiArrayView< 2, T, C > const &a)
Find the index of the minimum element in a matrix.
Definition: matrix.hxx:1953
const difference_type & shape() const
Definition: multi_array.hxx:1648
NonlinearLSQOptions & dampingParamters(double lambda, double v)
Set damping parameters for Levenberg-Marquardt algorithm.
Definition: regression.hxx:1040
linalg::TemporaryMatrix< T > sign(MultiArrayView< 2, T, C > const &v)
LeastAngleRegressionOptions & leastSquaresSolutions(bool select=true)
Definition: regression.hxx:432
NonlinearLSQOptions()
Initialize options with default values.
Definition: regression.hxx:1000
int argMinIf(MultiArrayView< 2, T, C > const &a, UnaryFunctor condition)
Find the index of the minimum element in a matrix subject to a condition.
Definition: matrix.hxx:2021
LeastAngleRegressionOptions & lars()
Definition: regression.hxx:397
Pass options to nonlinearLeastSquares().
Definition: regression.hxx:991
int argMaxIf(MultiArrayView< 2, T, C > const &a, UnaryFunctor condition)
Find the index of the maximum element in a matrix subject to a condition.
Definition: matrix.hxx:2056
std::ptrdiff_t MultiArrayIndex
Definition: multi_fwd.hxx:60
bool ridgeRegressionSeries(MultiArrayView< 2, T, C1 > const &A, MultiArrayView< 2, T, C2 > const &b, MultiArrayView< 2, T, C3 > &x, Array const &lambda)
Definition: regression.hxx:304
bool leastSquares(MultiArrayView< 2, T, C1 > const &A, MultiArrayView< 2, T, C2 > const &b, MultiArrayView< 2, T, C3 > &x, std::string method="QR")
Definition: regression.hxx:81
unsigned int nonnegativeLeastSquares(...)
Definition: multi_fwd.hxx:63
LeastAngleRegressionOptions()
Definition: regression.hxx:351
linalg::TemporaryMatrix< T > sq(MultiArrayView< 2, T, C > const &v)
bool weightedRidgeRegression(MultiArrayView< 2, T, C1 > const &A, MultiArrayView< 2, T, C2 > const &b, MultiArrayView< 2, T, C3 > const &weights, MultiArrayView< 2, T, C4 > &x, double lambda)
Definition: regression.hxx:252
doxygen_overloaded_function(template<...> void separableConvolveBlockwise) template< unsigned int N
Separated convolution on ChunkedArrays.
TinyVector< MultiArrayIndex, N > type
Definition: multi_shape.hxx:272
LeastAngleRegressionOptions & setMode(std::string mode)
Definition: regression.hxx:378
int argMax(MultiArrayView< 2, T, C > const &a)
Find the index of the maximum element in a matrix.
Definition: matrix.hxx:1986
double gamma(double x)
The gamma function.
Definition: mathutil.hxx:1587
void outer(const MultiArrayView< 2, T, C1 > &x, const MultiArrayView< 2, T, C2 > &y, MultiArrayView< 2, T, C3 > &r)
Definition: matrix.hxx:1459
MultiArrayIndex columnCount(const MultiArrayView< 2, T, C > &x)
Definition: matrix.hxx:684
LeastAngleRegressionOptions & lasso()
Definition: regression.hxx:408
FFTWComplex< R >::NormType abs(const FFTWComplex< R > &a)
absolute value (= magnitude)
Definition: fftw3.hxx:1002
bool linearSolveUpperTriangular(const MultiArrayView< 2, T, C1 > &r, const MultiArrayView< 2, T, C2 > &b, MultiArrayView< 2, T, C3 > x)
Definition: linear_solve.hxx:1068
unsigned int singularValueDecomposition(MultiArrayView< 2, T, C1 > const &A, MultiArrayView< 2, T, C2 > &U, MultiArrayView< 2, T, C3 > &S, MultiArrayView< 2, T, C4 > &V)
Definition: singular_value_decomposition.hxx:75
void nonlinearLeastSquares(...)
Fit a non-linear model to given data by minimizing least squares loss.
linalg::TemporaryMatrix< T > abs(MultiArrayView< 2, T, C > const &v)
unsigned int leastAngleRegression(...)
bool linearSolve(...)
NonlinearLSQOptions & tolerance(double eps)
Set minimum relative improvement in residual.
Definition: regression.hxx:1014
SquareRootTraits< FixedPoint< IntBits, FracBits > >::SquareRootResult sqrt(FixedPoint< IntBits, FracBits > v)
square root.
Definition: fixedpoint.hxx:616

© Ullrich Köthe (ullrich.koethe@iwr.uni-heidelberg.de)
Heidelberg Collaboratory for Image Processing, University of Heidelberg, Germany

html generated using doxygen and Python
vigra 1.11.1 (Fri May 19 2017)