SHOGUN  v1.1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
LaRank.cpp
Go to the documentation of this file.
1 // -*- C++ -*-
2 // Main functions of the LaRank algorithm for soving Multiclass SVM
3 // Copyright (C) 2008- Antoine Bordes
4 // Shogun specific adjustments (w) 2009 Soeren Sonnenburg
5 
6 // This library is free software; you can redistribute it and/or
7 // modify it under the terms of the GNU Lesser General Public
8 // License as published by the Free Software Foundation; either
9 // version 2.1 of the License, or (at your option) any later version.
10 //
11 // This program is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
15 //
16 // You should have received a copy of the GNU General Public License
17 // aint64_t with this program; if not, write to the Free Software
18 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA
19 //
20 /***********************************************************************
21  *
22  * LUSH Lisp Universal Shell
23  * Copyright (C) 2002 Leon Bottou, Yann Le Cun, AT&T Corp, NECI.
24  * Includes parts of TL3:
25  * Copyright (C) 1987-1999 Leon Bottou and Neuristique.
26  * Includes selected parts of SN3.2:
27  * Copyright (C) 1991-2001 AT&T Corp.
28  *
29  * This program is free software; you can redistribute it and/or modify
30  * it under the terms of the GNU General Public License as published by
31  * the Free Software Foundation; either version 2 of the License, or
32  * (at your option) any later version.
33  *
34  * This program is distributed in the hope that it will be useful,
35  * but WITHOUT ANY WARRANTY; without even the implied warranty of
36  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
37  * GNU General Public License for more details.
38  *
39  * You should have received a copy of the GNU General Public License
40  * aint64_t with this program; if not, write to the Free Software
41  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA
42  *
43  ***********************************************************************/
44 
45 /***********************************************************************
46  * $Id: kcache.c,v 1.9 2007/01/25 22:42:09 leonb Exp $
47  **********************************************************************/
48 
49 #include <vector>
50 #include <algorithm>
51 
52 #include <shogun/io/SGIO.h>
53 #include <shogun/lib/Signal.h>
54 #include <shogun/lib/Time.h>
57 #include <shogun/kernel/Kernel.h>
58 
59 using namespace shogun;
60 
61 namespace shogun
62 {
63  static larank_kcache_t* larank_kcache_create (CKernel* kernelfunc)
64  {
65  larank_kcache_t *self;
66  self = SG_CALLOC (larank_kcache_t, 1);
67  self->l = 0;
68  self->maxrowlen = 0;
69  self->func = kernelfunc;
70  self->prevbuddy = self;
71  self->nextbuddy = self;
72  self->cursize = sizeof (larank_kcache_t);
73  self->maxsize = 256 * 1024 * 1024;
74  self->qprev = SG_MALLOC(int32_t, 1);
75  self->qnext = SG_MALLOC(int32_t, 1);
76  self->rnext = self->qnext + 1;
77  self->rprev = self->qprev + 1;
78  self->rprev[-1] = -1;
79  self->rnext[-1] = -1;
80  return self;
81  }
82 
83  static void xtruncate (larank_kcache_t * self, int32_t k, int32_t nlen)
84  {
85  int32_t olen = self->rsize[k];
86  if (nlen < olen)
87  {
88  float32_t *ndata;
89  float32_t *odata = self->rdata[k];
90  if (nlen > 0)
91  {
92  ndata = SG_MALLOC(float32_t, nlen);
93  memcpy (ndata, odata, nlen * sizeof (float32_t));
94  }
95  else
96  {
97  ndata = 0;
98  self->rnext[self->rprev[k]] = self->rnext[k];
99  self->rprev[self->rnext[k]] = self->rprev[k];
100  self->rnext[k] = self->rprev[k] = k;
101  }
102  SG_FREE (odata);
103  self->rdata[k] = ndata;
104  self->rsize[k] = nlen;
105  self->cursize += (int64_t) (nlen - olen) * sizeof (float32_t);
106  }
107  }
108 
109  static void xpurge (larank_kcache_t * self)
110  {
111  if (self->cursize > self->maxsize)
112  {
113  int32_t k = self->rprev[-1];
114  while (self->cursize > self->maxsize && k != self->rnext[-1])
115  {
116  int32_t pk = self->rprev[k];
117  xtruncate (self, k, 0);
118  k = pk;
119  }
120  }
121  }
122 
123  static void larank_kcache_set_maximum_size (larank_kcache_t * self, int64_t entries)
124  {
125  ASSERT (self);
126  ASSERT (entries > 0);
127  self->maxsize = entries;
128  xpurge (self);
129  }
130 
131  static void larank_kcache_destroy (larank_kcache_t * self)
132  {
133  if (self)
134  {
135  int32_t i;
136  larank_kcache_t *nb = self->nextbuddy;
137  larank_kcache_t *pb = self->prevbuddy;
138  pb->nextbuddy = nb;
139  nb->prevbuddy = pb;
140  /* delete */
141  if (self->i2r)
142  SG_FREE (self->i2r);
143  if (self->r2i)
144  SG_FREE (self->r2i);
145  if (self->rdata)
146  for (i = 0; i < self->l; i++)
147  if (self->rdata[i])
148  SG_FREE (self->rdata[i]);
149  if (self->rdata)
150  SG_FREE (self->rdata);
151  if (self->rsize)
152  SG_FREE (self->rsize);
153  if (self->rdiag)
154  SG_FREE (self->rdiag);
155  if (self->qnext)
156  SG_FREE (self->qnext);
157  if (self->qprev)
158  SG_FREE (self->qprev);
159  memset (self, 0, sizeof (larank_kcache_t));
160  SG_FREE (self);
161  }
162  }
163 
164  static void xminsize (larank_kcache_t * self, int32_t n)
165  {
166  int32_t ol = self->l;
167  if (n > ol)
168  {
169  int32_t i;
170  int32_t nl = CMath::max (256, ol);
171  while (nl < n)
172  nl = nl + nl;
173  self->i2r = SG_REALLOC (int32_t, self->i2r, nl);
174  self->r2i = SG_REALLOC (int32_t, self->r2i, nl);
175  self->rsize = SG_REALLOC (int32_t, self->rsize, nl);
176  self->qnext = SG_REALLOC (int32_t, self->qnext, (1 + nl));
177  self->qprev = SG_REALLOC (int32_t, self->qprev, (1 + nl));
178  self->rdiag = SG_REALLOC (float32_t, self->rdiag, nl);
179  self->rdata = SG_REALLOC (float32_t*, self->rdata, nl);
180  self->rnext = self->qnext + 1;
181  self->rprev = self->qprev + 1;
182  for (i = ol; i < nl; i++)
183  {
184  self->i2r[i] = i;
185  self->r2i[i] = i;
186  self->rsize[i] = -1;
187  self->rnext[i] = i;
188  self->rprev[i] = i;
189  self->rdata[i] = 0;
190  }
191  self->l = nl;
192  }
193  }
194 
195  static int32_t* larank_kcache_r2i (larank_kcache_t * self, int32_t n)
196  {
197  xminsize (self, n);
198  return self->r2i;
199  }
200 
201  static void xextend (larank_kcache_t * self, int32_t k, int32_t nlen)
202  {
203  int32_t olen = self->rsize[k];
204  if (nlen > olen)
205  {
206  float32_t *ndata = SG_MALLOC(float32_t, nlen);
207  if (olen > 0)
208  {
209  float32_t *odata = self->rdata[k];
210  memcpy (ndata, odata, olen * sizeof (float32_t));
211  SG_FREE (odata);
212  }
213  self->rdata[k] = ndata;
214  self->rsize[k] = nlen;
215  self->cursize += (int64_t) (nlen - olen) * sizeof (float32_t);
216  self->maxrowlen = CMath::max (self->maxrowlen, nlen);
217  }
218  }
219 
220  static void xswap (larank_kcache_t * self, int32_t i1, int32_t i2, int32_t r1, int32_t r2)
221  {
222  /* swap row data */
223  if (r1 < self->maxrowlen || r2 < self->maxrowlen)
224  {
225  int32_t mrl = 0;
226  int32_t k = self->rnext[-1];
227  while (k >= 0)
228  {
229  int32_t nk = self->rnext[k];
230  int32_t n = self->rsize[k];
231  int32_t rr = self->i2r[k];
232  float32_t *d = self->rdata[k];
233  if (r1 < n)
234  {
235  if (r2 < n)
236  {
237  float32_t t1 = d[r1];
238  float32_t t2 = d[r2];
239  d[r1] = t2;
240  d[r2] = t1;
241  }
242  else if (rr == r2)
243  {
244  d[r1] = self->rdiag[k];
245  }
246  else
247  {
248  int32_t arsize = self->rsize[i2];
249  if (rr < arsize && rr != r1)
250  d[r1] = self->rdata[i2][rr];
251  else
252  xtruncate (self, k, r1);
253  }
254  }
255  else if (r2 < n)
256  {
257  if (rr == r1)
258  {
259  d[r2] = self->rdiag[k];
260  }
261  else
262  {
263  int32_t arsize = self->rsize[i1];
264  if (rr < arsize && rr != r2)
265  d[r2] = self->rdata[i1][rr];
266  else
267  xtruncate (self, k, r2);
268  }
269  }
270  mrl = CMath::max (mrl, self->rsize[k]);
271  k = nk;
272  }
273  self->maxrowlen = mrl;
274  }
275  /* swap r2i and i2r */
276  self->r2i[r1] = i2;
277  self->r2i[r2] = i1;
278  self->i2r[i1] = r2;
279  self->i2r[i2] = r1;
280  }
281 
282  static void larank_kcache_swap_rr (larank_kcache_t * self, int32_t r1, int32_t r2)
283  {
284  xminsize (self, 1 + CMath::max(r1, r2));
285  xswap (self, self->r2i[r1], self->r2i[r2], r1, r2);
286  }
287 
288  static void larank_kcache_swap_ri (larank_kcache_t * self, int32_t r1, int32_t i2)
289  {
290  xminsize (self, 1 + CMath::max (r1, i2));
291  xswap (self, self->r2i[r1], i2, r1, self->i2r[i2]);
292  }
293 
294  static float64_t xquery (larank_kcache_t * self, int32_t i, int32_t j)
295  {
296  /* search buddies */
297  larank_kcache_t *cache = self->nextbuddy;
298  do
299  {
300  int32_t l = cache->l;
301  if (i < l && j < l)
302  {
303  int32_t s = cache->rsize[i];
304  int32_t p = cache->i2r[j];
305  if (p < s)
306  return cache->rdata[i][p];
307  if (i == j && s >= 0)
308  return cache->rdiag[i];
309  p = cache->i2r[i];
310  s = cache->rsize[j];
311  if (p < s)
312  return cache->rdata[j][p];
313  }
314  cache = cache->nextbuddy;
315  }
316  while (cache != self);
317  /* compute */
318  return self->func->kernel(i, j);
319  }
320 
321 
322  static float64_t larank_kcache_query (larank_kcache_t * self, int32_t i, int32_t j)
323  {
324  ASSERT (self);
325  ASSERT (i >= 0);
326  ASSERT (j >= 0);
327  return xquery (self, i, j);
328  }
329 
330 
331  static void larank_kcache_set_buddy (larank_kcache_t * self, larank_kcache_t * buddy)
332  {
333  larank_kcache_t *p = self;
334  larank_kcache_t *selflast = self->prevbuddy;
335  larank_kcache_t *buddylast = buddy->prevbuddy;
336  /* check functions are identical */
337  ASSERT (self->func == buddy->func);
338  /* make sure we are not already buddies */
339  do
340  {
341  if (p == buddy)
342  return;
343  p = p->nextbuddy;
344  }
345  while (p != self);
346  /* link */
347  selflast->nextbuddy = buddy;
348  buddy->prevbuddy = selflast;
349  buddylast->nextbuddy = self;
350  self->prevbuddy = buddylast;
351  }
352 
353 
354  static float32_t* larank_kcache_query_row (larank_kcache_t * self, int32_t i, int32_t len)
355  {
356  ASSERT (i >= 0);
357  if (i < self->l && len <= self->rsize[i])
358  {
359  self->rnext[self->rprev[i]] = self->rnext[i];
360  self->rprev[self->rnext[i]] = self->rprev[i];
361  }
362  else
363  {
364  int32_t olen, p;
365  float32_t *d;
366  if (i >= self->l || len >= self->l)
367  xminsize (self, CMath::max (1 + i, len));
368  olen = self->rsize[i];
369  if (olen < len)
370  {
371  if (olen < 0)
372  {
373  self->rdiag[i] = self->func->kernel(i, i);
374  olen = self->rsize[i] = 0;
375  }
376  xextend (self, i, len);
377  d = self->rdata[i];
378  self->rsize[i] = olen;
379  for (p = olen; p < len; p++)
380  d[p] = larank_kcache_query (self, self->r2i[p], i);
381  self->rsize[i] = len;
382  }
383  self->rnext[self->rprev[i]] = self->rnext[i];
384  self->rprev[self->rnext[i]] = self->rprev[i];
385  xpurge (self);
386  }
387  self->rprev[i] = -1;
388  self->rnext[i] = self->rnext[-1];
389  self->rnext[self->rprev[i]] = i;
390  self->rprev[self->rnext[i]] = i;
391  return self->rdata[i];
392  }
393 
394 }
395 
396 
397 // Initializing an output class (basically creating a kernel cache for it)
398 void LaRankOutput::initialize (CKernel* kfunc, int64_t cache)
399 {
400  kernel = larank_kcache_create (kfunc);
401  larank_kcache_set_maximum_size (kernel, cache * 1024 * 1024);
402  beta = SG_MALLOC(float32_t, 1);
403  g = SG_MALLOC(float32_t, 1);
404  *beta=0;
405  *g=0;
406  l = 0;
407 }
408 
409 // Destroying an output class (basically destroying the kernel cache)
410 void LaRankOutput::destroy ()
411 {
412  larank_kcache_destroy (kernel);
413  kernel=NULL;
414  SG_FREE(beta);
415  SG_FREE(g);
416  beta=NULL;
417  g=NULL;
418 }
419 
420 // !Important! Computing the score of a given input vector for the actual output
421 float64_t LaRankOutput::computeScore (int32_t x_id)
422 {
423  if (l == 0)
424  return 0;
425  else
426  {
427  float32_t *row = larank_kcache_query_row (kernel, x_id, l);
428  return CMath::dot (beta, row, l);
429  }
430 }
431 
432 // !Important! Computing the gradient of a given input vector for the actual output
433 float64_t LaRankOutput::computeGradient (int32_t xi_id, int32_t yi, int32_t ythis)
434 {
435  return (yi == ythis ? 1 : 0) - computeScore (xi_id);
436 }
437 
438 // Updating the solution in the actual output
439 void LaRankOutput::update (int32_t x_id, float64_t lambda, float64_t gp)
440 {
441  int32_t *r2i = larank_kcache_r2i (kernel, l);
442  int64_t xr = l + 1;
443  for (int32_t r = 0; r < l; r++)
444  if (r2i[r] == x_id)
445  {
446  xr = r;
447  break;
448  }
449 
450  // updates the cache order and the beta coefficient
451  if (xr < l)
452  {
453  beta[xr]+=lambda;
454  }
455  else
456  {
457  larank_kcache_swap_ri (kernel, l, x_id);
458  CMath::resize(g, l, l+1);
459  CMath::resize(beta, l, l+1);
460  g[l]=gp;
461  beta[l]=lambda;
462  l++;
463  }
464 
465  // update stored gradients
466  float32_t *row = larank_kcache_query_row (kernel, x_id, l);
467  for (int32_t r = 0; r < l; r++)
468  {
469  float64_t oldg = g[r];
470  g[r]=oldg - lambda * row[r];
471  }
472 }
473 
474 // Linking the cahe of this output to the cache of an other "buddy" output
475 // so that if a requested value is not found in this cache, you can ask your buddy if it has it.
476 void LaRankOutput::set_kernel_buddy (larank_kcache_t * bud)
477 {
478  larank_kcache_set_buddy (bud, kernel);
479 }
480 
481 // Removing useless support vectors (for which beta=0)
482 int32_t LaRankOutput::cleanup ()
483 {
484  int32_t count = 0;
485  std::vector < int32_t >idx;
486  for (int32_t x = 0; x < l; x++)
487  {
488  if ((beta[x] < FLT_EPSILON) && (beta[x] > -FLT_EPSILON))
489  {
490  idx.push_back (x);
491  count++;
492  }
493  }
494  int32_t new_l = l - count;
495  for (int32_t xx = 0; xx < count; xx++)
496  {
497  int32_t i = idx[xx] - xx;
498  for (int32_t r = i; r < (l - 1); r++)
499  {
500  larank_kcache_swap_rr (kernel, r, int64_t(r) + 1);
501  beta[r]=beta[r + 1];
502  g[r]=g[r + 1];
503  }
504  }
505  CMath::resize(beta, l, new_l+1);
506  CMath::resize(g, l, new_l+1);
507  beta[new_l]=0;
508  g[new_l]=0;
509  l = new_l;
510  return count;
511 }
512 
513 // --- Below are information or "get" functions --- //
514 //
515 float64_t LaRankOutput::getW2 ()
516 {
517  float64_t sum = 0;
518  int32_t *r2i = larank_kcache_r2i (kernel, l + 1);
519  for (int32_t r = 0; r < l; r++)
520  {
521  float32_t *row_r = larank_kcache_query_row (kernel, r2i[r], l);
522  sum += beta[r] * CMath::dot (beta, row_r, l);
523  }
524  return sum;
525 }
526 
527 float64_t LaRankOutput::getKii (int32_t x_id)
528 {
529  return larank_kcache_query (kernel, x_id, x_id);
530 }
531 
532 //
533 float64_t LaRankOutput::getBeta (int32_t x_id)
534 {
535  int32_t *r2i = larank_kcache_r2i (kernel, l);
536  int32_t xr = -1;
537  for (int32_t r = 0; r < l; r++)
538  if (r2i[r] == x_id)
539  {
540  xr = r;
541  break;
542  }
543  return (xr < 0 ? 0 : beta[xr]);
544 }
545 
546 //
547 float64_t LaRankOutput::getGradient (int32_t x_id)
548 {
549  int32_t *r2i = larank_kcache_r2i (kernel, l);
550  int32_t xr = -1;
551  for (int32_t r = 0; r < l; r++)
552  if (r2i[r] == x_id)
553  {
554  xr = r;
555  break;
556  }
557  return (xr < 0 ? 0 : g[xr]);
558 }
559 bool LaRankOutput::isSupportVector (int32_t x_id) const
560 {
561  int32_t *r2i = larank_kcache_r2i (kernel, l);
562  int32_t xr = -1;
563  for (int32_t r = 0; r < l; r++)
564  if (r2i[r] == x_id)
565  {
566  xr = r;
567  break;
568  }
569  return (xr >= 0);
570 }
571 
572 //
573 int32_t LaRankOutput::getSV (float32_t* &sv) const
574 {
575  sv=SG_MALLOC(float32_t, l);
576  int32_t *r2i = larank_kcache_r2i (kernel, l);
577  for (int32_t r = 0; r < l; r++)
578  sv[r]=r2i[r];
579  return l;
580 }
581 
583  nb_seen_examples (0), nb_removed (0),
584  n_pro (0), n_rep (0), n_opt (0),
585  w_pro (1), w_rep (1), w_opt (1), y0 (0), dual (0),
586  batch_mode(true), step(0)
587 {
588 }
589 
591  CMultiClassSVM(ONE_VS_REST, C, k, lab),
592  nb_seen_examples (0), nb_removed (0),
593  n_pro (0), n_rep (0), n_opt (0),
594  w_pro (1), w_rep (1), w_opt (1), y0 (0), dual (0),
595  batch_mode(true), step(0)
596 {
597 }
598 
600 {
601  destroy();
602 }
603 
605 {
606  tau = 0.0001;
607 
608  ASSERT(kernel);
610 
612 
613  if (data)
614  {
615  if (data->get_num_vectors() != labels->get_num_labels())
616  {
617  SG_ERROR("Numbert of vectors (%d) does not match number of labels (%d)\n",
619  }
620  kernel->init(data, data);
621  }
622 
624 
627 
628  int32_t n_it = 1;
629  float64_t gap = DBL_MAX;
630 
631  SG_INFO("Training on %d examples\n", nb_train);
632  while (gap > C1 && (!CSignal::cancel_computations())) // stopping criteria
633  {
634  float64_t tr_err = 0;
635  int32_t ind = step;
636  for (int32_t i = 0; i < nb_train; i++)
637  {
638  int32_t y=labels->get_label(i);
639  if (add (i, y) != y) // call the add function
640  tr_err++;
641 
642  if (ind && i / ind)
643  {
644  SG_DEBUG("Done: %d %% Train error (online): %f%%\n",
645  (int32_t) (((float64_t) i) / nb_train * 100), (tr_err / ((float64_t) i + 1)) * 100);
646  ind += step;
647  }
648  }
649 
650  SG_DEBUG("End of iteration %d\n", n_it++);
651  SG_DEBUG("Train error (online): %f%%\n", (tr_err / nb_train) * 100);
652  gap = computeGap ();
653  SG_ABS_PROGRESS(gap, -CMath::log10(gap), -CMath::log10(DBL_MAX), -CMath::log10(C1), 6);
654 
655  if (!batch_mode) // skip stopping criteria if online mode
656  gap = 0;
657  }
658  SG_DONE();
659 
660  int32_t num_classes = outputs.size();
661  create_multiclass_svm(num_classes);
662  SG_DEBUG("%d classes\n", num_classes);
663 
664  // Used for saving a model file
665  int32_t i=0;
666  for (outputhash_t::const_iterator it = outputs.begin (); it != outputs.end (); ++it)
667  {
668  const LaRankOutput* o=&(it->second);
669 
670  larank_kcache_t* k=o->getKernel();
671  int32_t l=o->get_l();
672  float32_t* beta=o->getBetas();
673  int32_t *r2i = larank_kcache_r2i (k, l);
674 
675  ASSERT(l>0);
676  SG_DEBUG("svm[%d] has %d sv, b=%f\n", i, l, 0.0);
677 
678  CSVM* svm=new CSVM(l);
679 
680  for (int32_t j=0; j<l; j++)
681  {
682  svm->set_alpha(j, beta[j]);
683  svm->set_support_vector(j, r2i[j]);
684  }
685 
686  svm->set_bias(0);
687  set_svm(i, svm);
688  i++;
689  }
690  destroy();
691 
692  return true;
693 }
694 
695 // LEARNING FUNCTION: add new patterns and run optimization steps selected with adaptative schedule
696 int32_t CLaRank::add (int32_t x_id, int32_t yi)
697 {
698  ++nb_seen_examples;
699  // create a new output object if this one has never been seen before
700  if (!getOutput (yi))
701  {
702  outputs.insert (std::make_pair (yi, LaRankOutput ()));
703  LaRankOutput *cur = getOutput (yi);
704  cur->initialize (kernel, cache);
705  if (outputs.size () == 1)
706  y0 = outputs.begin ()->first;
707  // link the cache of this new output to a buddy
708  if (outputs.size () > 1)
709  {
710  LaRankOutput *out0 = getOutput (y0);
711  cur->set_kernel_buddy (out0->getKernel ());
712  }
713  }
714 
715  LaRankPattern tpattern (x_id, yi);
716  LaRankPattern & pattern = (patterns.isPattern (x_id)) ? patterns.getPattern (x_id) : tpattern;
717 
718  // ProcessNew with the "fresh" pattern
719  float64_t time1 = CTime::get_curtime();
720  process_return_t pro_ret = process (pattern, processNew);
721  float64_t dual_increase = pro_ret.dual_increase;
722  float64_t duration = (CTime::get_curtime() - time1);
723  float64_t coeff = dual_increase / (0.00001 + duration);
724  dual += dual_increase;
725  n_pro++;
726  w_pro = 0.05 * coeff + (1 - 0.05) * w_pro;
727 
728  // ProcessOld & Optimize until ready for a new processnew
729  // (Adaptative schedule here)
730  for (;;)
731  {
732  float64_t w_sum = w_pro + w_rep + w_opt;
733  float64_t prop_min = w_sum / 20;
734  if (w_pro < prop_min)
735  w_pro = prop_min;
736  if (w_rep < prop_min)
737  w_rep = prop_min;
738  if (w_opt < prop_min)
739  w_opt = prop_min;
740  w_sum = w_pro + w_rep + w_opt;
741  float64_t r = CMath::random(0.0, w_sum);
742  if (r <= w_pro)
743  {
744  break;
745  }
746  else if ((r > w_pro) && (r <= w_pro + w_rep)) // ProcessOld here
747  {
748  float64_t ltime1 = CTime::get_curtime ();
749  float64_t ldual_increase = reprocess ();
750  float64_t lduration = (CTime::get_curtime () - ltime1);
751  float64_t lcoeff = ldual_increase / (0.00001 + lduration);
752  dual += ldual_increase;
753  n_rep++;
754  w_rep = 0.05 * lcoeff + (1 - 0.05) * w_rep;
755  }
756  else // Optimize here
757  {
758  float64_t ltime1 = CTime::get_curtime ();
759  float64_t ldual_increase = optimize ();
760  float64_t lduration = (CTime::get_curtime () - ltime1);
761  float64_t lcoeff = ldual_increase / (0.00001 + lduration);
762  dual += ldual_increase;
763  n_opt++;
764  w_opt = 0.05 * lcoeff + (1 - 0.05) * w_opt;
765  }
766  }
767  if (nb_seen_examples % 100 == 0) // Cleanup useless Support Vectors/Patterns sometimes
768  nb_removed += cleanup ();
769  return pro_ret.ypred;
770 }
771 
772 // PREDICTION FUNCTION: main function in la_rank_classify
773 int32_t CLaRank::predict (int32_t x_id)
774 {
775  int32_t res = -1;
776  float64_t score_max = -DBL_MAX;
777  for (outputhash_t::iterator it = outputs.begin (); it != outputs.end ();++it)
778  {
779  float64_t score = it->second.computeScore (x_id);
780  if (score > score_max)
781  {
782  score_max = score;
783  res = it->first;
784  }
785  }
786  return res;
787 }
788 
790 {
791  for (outputhash_t::iterator it = outputs.begin (); it != outputs.end ();++it)
792  it->second.destroy ();
793 }
794 
795 
796 // Compute Duality gap (costly but used in stopping criteria in batch mode)
798 {
799  float64_t sum_sl = 0;
800  float64_t sum_bi = 0;
801  for (uint32_t i = 0; i < patterns.maxcount (); ++i)
802  {
803  const LaRankPattern & p = patterns[i];
804  if (!p.exists ())
805  continue;
806  LaRankOutput *out = getOutput (p.y);
807  if (!out)
808  continue;
809  sum_bi += out->getBeta (p.x_id);
810  float64_t gi = out->computeGradient (p.x_id, p.y, p.y);
811  float64_t gmin = DBL_MAX;
812  for (outputhash_t::iterator it = outputs.begin (); it != outputs.end (); ++it)
813  {
814  if (it->first != p.y && it->second.isSupportVector (p.x_id))
815  {
816  float64_t g =
817  it->second.computeGradient (p.x_id, p.y, it->first);
818  if (g < gmin)
819  gmin = g;
820  }
821  }
822  sum_sl += CMath::max (0.0, gi - gmin);
823  }
824  return CMath::max (0.0, computeW2 () + C1 * sum_sl - sum_bi);
825 }
826 
827 // Nuber of classes so far
828 uint32_t CLaRank::getNumOutputs () const
829 {
830  return outputs.size ();
831 }
832 
833 // Number of Support Vectors
834 int32_t CLaRank::getNSV ()
835 {
836  int32_t res = 0;
837  for (outputhash_t::const_iterator it = outputs.begin (); it != outputs.end (); ++it)
838  {
839  float32_t* sv=NULL;
840  res += it->second.getSV (sv);
841  SG_FREE(sv);
842  }
843  return res;
844 }
845 
846 // Norm of the parameters vector
848 {
849  float64_t res = 0;
850  for (uint32_t i = 0; i < patterns.maxcount (); ++i)
851  {
852  const LaRankPattern & p = patterns[i];
853  if (!p.exists ())
854  continue;
855  for (outputhash_t::iterator it = outputs.begin (); it != outputs.end (); ++it)
856  if (it->second.getBeta (p.x_id))
857  res += it->second.getBeta (p.x_id) * it->second.computeScore (p.x_id);
858  }
859  return res;
860 }
861 
862 // Compute Dual objective value
864 {
865  float64_t res = 0;
866  for (uint32_t i = 0; i < patterns.maxcount (); ++i)
867  {
868  const LaRankPattern & p = patterns[i];
869  if (!p.exists ())
870  continue;
871  LaRankOutput *out = getOutput (p.y);
872  if (!out)
873  continue;
874  res += out->getBeta (p.x_id);
875  }
876  return res - computeW2 () / 2;
877 }
878 
879 LaRankOutput *CLaRank::getOutput (int32_t index)
880 {
881  outputhash_t::iterator it = outputs.find (index);
882  return it == outputs.end ()? NULL : &it->second;
883 }
884 
885 // IMPORTANT Main SMO optimization step
886 CLaRank::process_return_t CLaRank::process (const LaRankPattern & pattern, process_type ptype)
887 {
888  process_return_t pro_ret = process_return_t (0, 0);
889 
890  /*
891  ** compute gradient and sort
892  */
893  std::vector < outputgradient_t > outputgradients;
894 
895  outputgradients.reserve (getNumOutputs ());
896 
897  std::vector < outputgradient_t > outputscores;
898  outputscores.reserve (getNumOutputs ());
899 
900  for (outputhash_t::iterator it = outputs.begin (); it != outputs.end (); ++it)
901  {
902  if (ptype != processOptimize
903  || it->second.isSupportVector (pattern.x_id))
904  {
905  float64_t g =
906  it->second.computeGradient (pattern.x_id, pattern.y, it->first);
907  outputgradients.push_back (outputgradient_t (it->first, g));
908  if (it->first == pattern.y)
909  outputscores.push_back (outputgradient_t (it->first, (1 - g)));
910  else
911  outputscores.push_back (outputgradient_t (it->first, -g));
912  }
913  }
914 
915  std::sort (outputgradients.begin (), outputgradients.end ());
916 
917  /*
918  ** determine the prediction
919  */
920  std::sort (outputscores.begin (), outputscores.end ());
921  pro_ret.ypred = outputscores[0].output;
922 
923  /*
924  ** Find yp (1st part of the pair)
925  */
926  outputgradient_t ygp;
927  LaRankOutput *outp = NULL;
928  uint32_t p;
929  for (p = 0; p < outputgradients.size (); ++p)
930  {
931  outputgradient_t & current = outputgradients[p];
932  LaRankOutput *output = getOutput (current.output);
933  bool support = ptype == processOptimize || output->isSupportVector (pattern.x_id);
934  bool goodclass = current.output == pattern.y;
935  if ((!support && goodclass) ||
936  (support && output->getBeta (pattern.x_id) < (goodclass ? C1 : 0)))
937  {
938  ygp = current;
939  outp = output;
940  break;
941  }
942  }
943  if (p == outputgradients.size ())
944  return pro_ret;
945 
946  /*
947  ** Find ym (2nd part of the pair)
948  */
949  outputgradient_t ygm;
950  LaRankOutput *outm = NULL;
951  int32_t m;
952  for (m = outputgradients.size () - 1; m >= 0; --m)
953  {
954  outputgradient_t & current = outputgradients[m];
955  LaRankOutput *output = getOutput (current.output);
956  bool support = ptype == processOptimize || output->isSupportVector (pattern.x_id);
957  bool goodclass = current.output == pattern.y;
958  if (!goodclass || (support && output->getBeta (pattern.x_id) > 0))
959  {
960  ygm = current;
961  outm = output;
962  break;
963  }
964  }
965  if (m < 0)
966  return pro_ret;
967 
968  /*
969  ** Throw or Insert pattern
970  */
971  if ((ygp.gradient - ygm.gradient) < tau)
972  return pro_ret;
973  if (ptype == processNew)
974  patterns.insert (pattern);
975 
976  /*
977  ** compute lambda and clip it
978  */
979  float64_t kii = outp->getKii (pattern.x_id);
980  float64_t lambda = (ygp.gradient - ygm.gradient) / (2 * kii);
981  if (ptype == processOptimize || outp->isSupportVector (pattern.x_id))
982  {
983  float64_t beta = outp->getBeta (pattern.x_id);
984  if (ygp.output == pattern.y)
985  lambda = CMath::min (lambda, C1 - beta);
986  else
987  lambda = CMath::min (lambda, fabs (beta));
988  }
989  else
990  lambda = CMath::min (lambda, C1);
991 
992  /*
993  ** update the solution
994  */
995  outp->update (pattern.x_id, lambda, ygp.gradient);
996  outm->update (pattern.x_id, -lambda, ygm.gradient);
997 
998  pro_ret.dual_increase = lambda * ((ygp.gradient - ygm.gradient) - lambda * kii);
999  return pro_ret;
1000 }
1001 
1002 // ProcessOld
1003 float64_t CLaRank::reprocess ()
1004 {
1005  if (patterns.size ())
1006  {
1007  for (int32_t n = 0; n < 10; ++n)
1008  {
1009  process_return_t pro_ret = process (patterns.sample (), processOld);
1010  if (pro_ret.dual_increase)
1011  return pro_ret.dual_increase;
1012  }
1013  }
1014  return 0;
1015 }
1016 
1017 // Optimize
1018 float64_t CLaRank::optimize ()
1019 {
1020  float64_t dual_increase = 0;
1021  if (patterns.size ())
1022  {
1023  for (int32_t n = 0; n < 10; ++n)
1024  {
1025  process_return_t pro_ret =
1026  process (patterns.sample(), processOptimize);
1027  dual_increase += pro_ret.dual_increase;
1028  }
1029  }
1030  return dual_increase;
1031 }
1032 
1033 // remove patterns and return the number of patterns that were removed
1034 uint32_t CLaRank::cleanup ()
1035 {
1036  /*
1037  for (outputhash_t::iterator it = outputs.begin (); it != outputs.end (); ++it)
1038  it->second.cleanup ();
1039 
1040  uint32_t res = 0;
1041  for (uint32_t i = 0; i < patterns.size (); ++i)
1042  {
1043  LaRankPattern & p = patterns[i];
1044  if (p.exists () && !outputs[p.y].isSupportVector (p.x_id))
1045  {
1046  patterns.remove (i);
1047  ++res;
1048  }
1049  }
1050  return res;
1051  */
1052  return 0;
1053 }

SHOGUN Machine Learning Toolbox - Documentation