SHOGUN  v3.0.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Kernel.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 1999-2009 Soeren Sonnenburg
8  * Written (W) 1999-2008 Gunnar Raetsch
9  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
10  */
11 
12 #include <shogun/lib/config.h>
13 #include <shogun/lib/common.h>
14 #include <shogun/io/SGIO.h>
15 #include <shogun/io/File.h>
16 #include <shogun/lib/Time.h>
17 #include <shogun/lib/Signal.h>
18 
19 #include <shogun/base/Parallel.h>
20 
21 #include <shogun/kernel/Kernel.h>
24 #include <shogun/base/Parameter.h>
25 
27 
28 #include <string.h>
29 #include <unistd.h>
30 #include <math.h>
31 
32 #ifdef HAVE_PTHREAD
33 #include <pthread.h>
34 #endif
35 
36 using namespace shogun;
37 
39 {
40  init();
42 }
43 
44 CKernel::CKernel(int32_t size) : CSGObject()
45 {
46  init();
47 
48  if (size<10)
49  size=10;
50 
51  cache_size=size;
53 }
54 
55 
56 CKernel::CKernel(CFeatures* p_lhs, CFeatures* p_rhs, int32_t size) : CSGObject()
57 {
58  init();
59 
60  if (size<10)
61  size=10;
62 
63  cache_size=size;
64 
66  init(p_lhs, p_rhs);
68 }
69 
71 {
72  if (get_is_initialized())
73  SG_ERROR("Kernel still initialized on destruction.\n")
74 
77 
78  SG_INFO("Kernel deleted (%p).\n", this)
79 }
80 
81 
82 
83 bool CKernel::init(CFeatures* l, CFeatures* r)
84 {
85  /* make sure that features are not deleted if same ones are used */
86  SG_REF(l);
87  SG_REF(r);
88 
89  //make sure features were indeed supplied
90  REQUIRE(l, "CKernel::init(%p, %p): Left hand side features required!\n", l, r)
91  REQUIRE(r, "CKernel::init(%p, %p): Right hand side features required!\n", l, r)
92 
93  //make sure features are compatible
96 
97  //remove references to previous features
99 
100  //increase reference counts
101  SG_REF(l);
102  if (l==r)
103  lhs_equals_rhs=true;
104  else // l!=r
105  SG_REF(r);
106 
107  lhs=l;
108  rhs=r;
109 
112 
115 
116  /* unref "safety" refs from beginning */
117  SG_UNREF(r);
118  SG_UNREF(l);
119 
120  SG_DEBUG("leaving CKernel::init(%p, %p)\n", l, r)
121  return true;
122 }
123 
125 {
126  SG_REF(n);
127  if (lhs && rhs)
128  n->init(this);
129 
131  normalizer=n;
132 
133  return (normalizer!=NULL);
134 }
135 
137 {
139  return normalizer;
140 }
141 
143 {
144  return normalizer->init(this);
145 }
146 
148 {
150 }
151 
152 
153 
154 void CKernel::load(CFile* loader)
155 {
158 }
159 
160 void CKernel::save(CFile* writer)
161 {
162  SGMatrix<float64_t> k_matrix=get_kernel_matrix<float64_t>();
164  writer->set_matrix(k_matrix.matrix, k_matrix.num_rows, k_matrix.num_cols);
166 }
167 
169 {
170  SG_DEBUG("entering CKernel::remove_lhs_and_rhs\n")
171  if (rhs!=lhs)
172  SG_UNREF(rhs);
173  rhs = NULL;
174  num_rhs=0;
175 
176  SG_UNREF(lhs);
177  lhs = NULL;
178  num_lhs=0;
179  lhs_equals_rhs=false;
180 
181 
182  SG_DEBUG("leaving CKernel::remove_lhs_and_rhs\n")
183 }
184 
186 {
187  if (rhs==lhs)
188  rhs=NULL;
189  SG_UNREF(lhs);
190  lhs = NULL;
191  num_lhs=0;
192  lhs_equals_rhs=false;
193 
194 }
195 
198 {
199  if (rhs!=lhs)
200  SG_UNREF(rhs);
201  rhs = NULL;
202  num_rhs=0;
203  lhs_equals_rhs=false;
204 
205 
206 }
207 
208 #define ENUM_CASE(n) case n: SG_INFO(#n " ") break;
209 
211 {
212  SG_INFO("%p - \"%s\" weight=%1.2f OPT:%s", this, get_name(),
214  get_optimization_type()==FASTBUTMEMHUNGRY ? "FASTBUTMEMHUNGRY" :
215  "SLOWBUTMEMEFFICIENT");
216 
217  switch (get_kernel_type())
218  {
278  }
279 
280  switch (get_feature_class())
281  {
292  ENUM_CASE(C_WD)
302  }
303 
304  switch (get_feature_type())
305  {
320  }
321  SG_INFO("\n")
322 }
323 #undef ENUM_CASE
324 
326  int32_t count, int32_t *IDX, float64_t * weights)
327 {
328  SG_ERROR("kernel does not support linadd optimization\n")
329  return false ;
330 }
331 
333 {
334  SG_ERROR("kernel does not support linadd optimization\n")
335  return false;
336 }
337 
339 {
340  SG_ERROR("kernel does not support linadd optimization\n")
341  return 0;
342 }
343 
345  int32_t num_vec, int32_t* vec_idx, float64_t* target, int32_t num_suppvec,
346  int32_t* IDX, float64_t* weights, float64_t factor)
347 {
348  SG_ERROR("kernel does not support batch computation\n")
349 }
350 
351 void CKernel::add_to_normal(int32_t vector_idx, float64_t weight)
352 {
353  SG_ERROR("kernel does not support linadd optimization, add_to_normal not implemented\n")
354 }
355 
357 {
358  SG_ERROR("kernel does not support linadd optimization, clear_normal not implemented\n")
359 }
360 
362 {
363  return 1;
364 }
365 
367  int32_t vector_idx, float64_t * subkernel_contrib)
368 {
369  SG_ERROR("kernel compute_by_subkernel not implemented\n")
370 }
371 
372 const float64_t* CKernel::get_subkernel_weights(int32_t &num_weights)
373 {
374  num_weights=1 ;
375  return &combined_kernel_weight ;
376 }
377 
379 {
380  int num_weights = 1;
381  const float64_t* weight = get_subkernel_weights(num_weights);
382  return SGVector<float64_t>(const_cast<float64_t*>(weight),1,false);
383 }
384 
386 {
387  ASSERT(weights.vector)
388  if (weights.vlen!=1)
389  SG_ERROR("number of subkernel weights should be one ...\n")
390 
391  combined_kernel_weight = weights.vector[0] ;
392 }
393 
395 {
396  if (kernel)
397  {
398  CKernel* casted=dynamic_cast<CKernel*>(kernel);
399  REQUIRE(casted, "CKernel::obtain_from_generic(): Error, provided object"
400  " of class \"%s\" is not a subclass of CKernel!\n",
401  kernel->get_name());
402  return casted;
403  }
404  else
405  return NULL;
406 }
407 
409 {
410  int32_t num_suppvec=svm->get_num_support_vectors();
411  int32_t* sv_idx=SG_MALLOC(int32_t, num_suppvec);
412  float64_t* sv_weight=SG_MALLOC(float64_t, num_suppvec);
413 
414  for (int32_t i=0; i<num_suppvec; i++)
415  {
416  sv_idx[i] = svm->get_support_vector(i);
417  sv_weight[i] = svm->get_alpha(i);
418  }
419  bool ret = init_optimization(num_suppvec, sv_idx, sv_weight);
420 
421  SG_FREE(sv_idx);
422  SG_FREE(sv_weight);
423  return ret;
424 }
425 
427 {
429  if (lhs_equals_rhs)
430  rhs=lhs;
431 }
432 
434 {
436 
437  if (lhs_equals_rhs)
438  rhs=NULL;
439 }
440 
442 {
444 
445  if (lhs_equals_rhs)
446  rhs=lhs;
447 }
448 
450  SG_ADD(&cache_size, "cache_size",
451  "Cache size in MB.", MS_NOT_AVAILABLE);
452  SG_ADD((CSGObject**) &lhs, "lhs",
453  "Feature vectors to occur on left hand side.", MS_NOT_AVAILABLE);
454  SG_ADD((CSGObject**) &rhs, "rhs",
455  "Feature vectors to occur on right hand side.", MS_NOT_AVAILABLE);
456  SG_ADD(&lhs_equals_rhs, "lhs_equals_rhs",
457  "If features on lhs are the same as on rhs.", MS_NOT_AVAILABLE);
458  SG_ADD(&num_lhs, "num_lhs", "Number of feature vectors on left hand side.",
460  SG_ADD(&num_rhs, "num_rhs", "Number of feature vectors on right hand side.",
462  SG_ADD(&combined_kernel_weight, "combined_kernel_weight",
463  "Combined kernel weight.", MS_AVAILABLE);
464  SG_ADD(&optimization_initialized, "optimization_initialized",
465  "Optimization is initialized.", MS_NOT_AVAILABLE);
466  SG_ADD((machine_int_t*) &opt_type, "opt_type",
467  "Optimization type.", MS_NOT_AVAILABLE);
468  SG_ADD(&properties, "properties", "Kernel properties.", MS_NOT_AVAILABLE);
469  SG_ADD((CSGObject**) &normalizer, "normalizer", "Normalize the kernel.",
470  MS_AVAILABLE);
471 }
472 
473 
474 void CKernel::init()
475 {
476  cache_size=10;
477  kernel_matrix=NULL;
478  lhs=NULL;
479  rhs=NULL;
480  num_lhs=0;
481  num_rhs=0;
482  lhs_equals_rhs=false;
487  normalizer=NULL;
488 
489 
490 
492 }
493 
494 namespace shogun
495 {
497 template <class T> struct K_THREAD_PARAM
498 {
502  int32_t start;
504  int32_t end;
506  int32_t total_start;
508  int32_t total_end;
510  int32_t m;
512  int32_t n;
514  T* result;
516  bool symmetric;
518  bool verbose;
519 };
520 }
521 
522 template <class T> void* CKernel::get_kernel_matrix_helper(void* p)
523 {
524  K_THREAD_PARAM<T>* params= (K_THREAD_PARAM<T>*) p;
525  int32_t i_start=params->start;
526  int32_t i_end=params->end;
527  CKernel* k=params->kernel;
528  T* result=params->result;
529  bool symmetric=params->symmetric;
530  int32_t n=params->n;
531  int32_t m=params->m;
532  bool verbose=params->verbose;
533  int64_t total_start=params->total_start;
534  int64_t total_end=params->total_end;
535  int64_t total=total_start;
536 
537  for (int32_t i=i_start; i<i_end; i++)
538  {
539  int32_t j_start=0;
540 
541  if (symmetric)
542  j_start=i;
543 
544  for (int32_t j=j_start; j<n; j++)
545  {
546  float64_t v=k->kernel(i,j);
547  result[i+j*m]=v;
548 
549  if (symmetric && i!=j)
550  result[j+i*m]=v;
551 
552  if (verbose)
553  {
554  total++;
555 
556  if (symmetric && i!=j)
557  total++;
558 
559  if (total%100 == 0)
560  SG_OBJ_PROGRESS(k, total, total_start, total_end)
561 
563  break;
564  }
565  }
566 
567  }
568 
569  return NULL;
570 }
571 
572 template <class T>
574 {
575  T* result = NULL;
576 
577  REQUIRE(has_features(), "no features assigned to kernel\n")
578 
579  int32_t m=get_num_vec_lhs();
580  int32_t n=get_num_vec_rhs();
581 
582  int64_t total_num = int64_t(m)*n;
583 
584  // if lhs == rhs and sizes match assume k(i,j)=k(j,i)
585  bool symmetric= (lhs && lhs==rhs && m==n);
586 
587  SG_DEBUG("returning kernel matrix of size %dx%d\n", m, n)
588 
589  result=SG_MALLOC(T, total_num);
590 
591  int32_t num_threads=parallel->get_num_threads();
592  if (num_threads < 2)
593  {
594  K_THREAD_PARAM<T> params;
595  params.kernel=this;
596  params.result=result;
597  params.start=0;
598  params.end=m;
599  params.total_start=0;
600  params.total_end=total_num;
601  params.n=n;
602  params.m=m;
603  params.symmetric=symmetric;
604  params.verbose=true;
605  get_kernel_matrix_helper<T>((void*) &params);
606  }
607  else
608  {
609  pthread_t* threads = SG_MALLOC(pthread_t, num_threads-1);
610  K_THREAD_PARAM<T>* params = SG_MALLOC(K_THREAD_PARAM<T>, num_threads);
611  int64_t step= total_num/num_threads;
612 
613  int32_t t;
614 
615  num_threads--;
616  for (t=0; t<num_threads; t++)
617  {
618  params[t].kernel = this;
619  params[t].result = result;
620  params[t].start = compute_row_start(t*step, n, symmetric);
621  params[t].end = compute_row_start((t+1)*step, n, symmetric);
622  params[t].total_start=t*step;
623  params[t].total_end=(t+1)*step;
624  params[t].n=n;
625  params[t].m=m;
626  params[t].symmetric=symmetric;
627  params[t].verbose=false;
628 
629  int code=pthread_create(&threads[t], NULL,
630  CKernel::get_kernel_matrix_helper<T>, (void*)&params[t]);
631 
632  if (code != 0)
633  {
634  SG_WARNING("Thread creation failed (thread %d of %d) "
635  "with error:'%s'\n",t, num_threads, strerror(code));
636  num_threads=t;
637  break;
638  }
639  }
640 
641  params[t].kernel = this;
642  params[t].result = result;
643  params[t].start = compute_row_start(t*step, n, symmetric);
644  params[t].end = m;
645  params[t].total_start=t*step;
646  params[t].total_end=total_num;
647  params[t].n=n;
648  params[t].m=m;
649  params[t].symmetric=symmetric;
650  params[t].verbose=true;
651  get_kernel_matrix_helper<T>(&params[t]);
652 
653  for (t=0; t<num_threads; t++)
654  {
655  if (pthread_join(threads[t], NULL) != 0)
656  SG_WARNING("pthread_join of thread %d/%d failed\n", t, num_threads)
657  }
658 
659  SG_FREE(params);
660  SG_FREE(threads);
661  }
662 
663  SG_DONE()
664 
665  return SGMatrix<T>(result,m,n,true);
666 }
667 
670 
671 template void* CKernel::get_kernel_matrix_helper<float64_t>(void* p);
672 template void* CKernel::get_kernel_matrix_helper<float32_t>(void* p);
virtual void clear_normal()
Definition: Kernel.cpp:356
virtual const char * get_name() const =0
virtual void load_serializable_post()
Definition: Kernel.cpp:426
virtual bool init(CFeatures *lhs, CFeatures *rhs)
Definition: Kernel.cpp:83
int32_t compute_row_start(int64_t offs, int32_t n, bool symmetric)
Definition: Kernel.h:630
#define SG_INFO(...)
Definition: SGIO.h:120
virtual void cleanup()
Definition: Kernel.cpp:147
#define SG_RESET_LOCALE
Definition: SGIO.h:88
#define SG_DONE()
Definition: SGIO.h:159
virtual void set_matrix(const bool *matrix, int32_t num_feat, int32_t num_vec)
Definition: File.cpp:118
virtual void compute_by_subkernel(int32_t vector_idx, float64_t *subkernel_contrib)
Definition: Kernel.cpp:366
int32_t num_rhs
number of feature vectors on right hand side
Definition: Kernel.h:714
static void * get_kernel_matrix_helper(void *p)
Definition: Kernel.cpp:522
Class ShogunException defines an exception which is thrown whenever an error inside of shogun occurs...
virtual bool set_normalizer(CKernelNormalizer *normalizer)
Definition: Kernel.cpp:124
virtual int32_t get_num_vectors() const =0
virtual void save_serializable_pre()
Definition: SGObject.cpp:1033
#define SG_ERROR(...)
Definition: SGIO.h:131
#define REQUIRE(x,...)
Definition: SGIO.h:208
virtual bool delete_optimization()
Definition: Kernel.cpp:332
float64_t kernel(int32_t idx_a, int32_t idx_b)
Definition: Kernel.h:198
#define ENUM_CASE(n)
Definition: Kernel.cpp:208
uint64_t properties
Definition: Kernel.h:727
Parallel * parallel
Definition: SGObject.h:516
virtual void remove_rhs()
takes all necessary steps if the rhs is removed from kernel
Definition: Kernel.cpp:197
virtual int32_t get_num_vec_lhs()
Definition: Kernel.h:355
SGMatrix< float64_t > get_kernel_matrix()
Definition: Kernel.h:211
#define SG_REF(x)
Definition: SGObject.h:53
#define SG_SET_LOCALE_C
Definition: SGIO.h:87
int32_t cache_size
cache_size in MB
Definition: Kernel.h:695
bool get_is_initialized()
Definition: Kernel.h:477
float64_t combined_kernel_weight
Definition: Kernel.h:717
virtual void register_params()
Definition: Kernel.cpp:449
void save(CFile *writer)
Definition: Kernel.cpp:160
virtual void remove_lhs_and_rhs()
Definition: Kernel.cpp:168
virtual CKernelNormalizer * get_normalizer()
Definition: Kernel.cpp:136
#define ASSERT(x)
Definition: SGIO.h:203
Class SGObject is the base class of all shogun objects.
Definition: SGObject.h:114
#define SG_OBJ_PROGRESS(o,...)
Definition: SGIO.h:149
virtual SGVector< float64_t > get_subkernel_weights()
Definition: Kernel.cpp:378
double float64_t
Definition: common.h:48
virtual EFeatureType get_feature_type()=0
KERNELCACHE_ELEM * kernel_matrix
Definition: Kernel.h:701
A File access base class.
Definition: File.h:34
virtual void save_serializable_post()
Definition: Kernel.cpp:441
virtual float64_t compute_optimized(int32_t vector_idx)
Definition: Kernel.cpp:338
EOptimizationType get_optimization_type()
Definition: Kernel.h:465
index_t num_rows
Definition: SGMatrix.h:301
virtual void save_serializable_post()
Definition: SGObject.cpp:1038
void list_kernel()
Definition: Kernel.cpp:210
float64_t get_alpha(int32_t idx)
float64_t get_combined_kernel_weight()
Definition: Kernel.h:526
virtual EFeatureClass get_feature_class() const =0
Identity Kernel Normalization, i.e. no normalization is applied.
index_t num_cols
Definition: SGMatrix.h:303
int32_t num_lhs
number of feature vectors on left hand side
Definition: Kernel.h:712
The class Kernel Normalizer defines a function to post-process kernel values.
int32_t get_support_vector(int32_t idx)
static bool cancel_computations()
Definition: Signal.h:85
virtual int32_t get_num_vec_rhs()
Definition: Kernel.h:364
virtual void set_subkernel_weights(SGVector< float64_t > weights)
Definition: Kernel.cpp:385
virtual bool init_normalizer()
Definition: Kernel.cpp:142
bool optimization_initialized
Definition: Kernel.h:720
float float32_t
Definition: common.h:47
EOptimizationType opt_type
Definition: Kernel.h:724
void load(CFile *loader)
Definition: Kernel.cpp:154
virtual void load_serializable_post()
Definition: SGObject.cpp:1028
CFeatures * rhs
feature vectors to occur on right hand side
Definition: Kernel.h:706
static CKernel * obtain_from_generic(CSGObject *kernel)
Definition: Kernel.cpp:394
#define SG_UNREF(x)
Definition: SGObject.h:54
#define SG_DEBUG(...)
Definition: SGIO.h:109
virtual bool init(CKernel *k)=0
virtual void compute_batch(int32_t num_vec, int32_t *vec_idx, float64_t *target, int32_t num_suppvec, int32_t *IDX, float64_t *alphas, float64_t factor=1.0)
Definition: Kernel.cpp:344
bool lhs_equals_rhs
lhs
Definition: Kernel.h:709
int machine_int_t
Definition: common.h:57
virtual EKernelType get_kernel_type()=0
virtual bool init_optimization(int32_t count, int32_t *IDX, float64_t *weights)
Definition: Kernel.cpp:325
CFeatures * lhs
feature vectors to occur on left hand side
Definition: Kernel.h:704
The class Features is the base class of all feature objects.
Definition: Features.h:62
virtual void save_serializable_pre()
Definition: Kernel.cpp:433
virtual void remove_lhs()
Definition: Kernel.cpp:185
virtual int32_t get_num_subkernels()
Definition: Kernel.cpp:361
bool init_optimization_svm(CSVM *svm)
Definition: Kernel.cpp:408
A generic Support Vector Machine Interface.
Definition: SVM.h:47
The Kernel base class.
Definition: Kernel.h:150
CKernelNormalizer * normalizer
Definition: Kernel.h:731
#define SG_WARNING(...)
Definition: SGIO.h:130
#define SG_ADD(...)
Definition: SGObject.h:83
virtual bool has_features()
Definition: Kernel.h:373
virtual ~CKernel()
Definition: Kernel.cpp:70
virtual void add_to_normal(int32_t vector_idx, float64_t weight)
Definition: Kernel.cpp:351
virtual EFeatureType get_feature_type() const =0
index_t vlen
Definition: SGVector.h:706
virtual EFeatureClass get_feature_class()=0

SHOGUN Machine Learning Toolbox - Documentation