SHOGUN  v1.1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Kernel.h
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 1999-2009 Soeren Sonnenburg
8  * Written (W) 1999-2008 Gunnar Raetsch
9  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
10  */
11 
12 #ifndef _KERNEL_H___
13 #define _KERNEL_H___
14 
15 #include <shogun/lib/common.h>
16 #include <shogun/lib/Signal.h>
17 #include <shogun/io/File.h>
20 #include <shogun/base/SGObject.h>
23 
24 #include <vector>
25 
26 namespace shogun
27 {
28  class CFile;
29  class CFeatures;
31 
32 #ifdef USE_SHORTREAL_KERNELCACHE
33 
35 #else
36 
38 #endif
39 
41 typedef int64_t KERNELCACHE_IDX;
42 
43 
46 {
49 };
50 
53 {
54  K_UNKNOWN = 0,
55  K_LINEAR = 10,
56  K_POLY = 20,
57  K_GAUSSIAN = 30,
61  K_SALZBERG = 41,
69  K_POLYMATCH = 100,
70  K_ALIGNMENT = 110,
75  K_COMBINED = 140,
76  K_AUC = 150,
77  K_CUSTOM = 160,
78  K_SIGMOID = 170,
79  K_CHI2 = 180,
80  K_DIAG = 190,
81  K_CONST = 200,
82  K_DISTANCE = 220,
85  K_OLIGO = 250,
86  K_MATCHWORD = 260,
87  K_TPPK = 270,
91  K_WAVELET = 310,
92  K_WAVE = 320,
93  K_CAUCHY = 330,
94  K_TSTUDENT = 340,
98  K_SPHERICAL = 380,
99  K_SPLINE = 390,
100  K_ANOVA = 400,
101  K_POWER = 410,
102  K_LOG = 420,
103  K_CIRCULAR = 430,
106  K_BESSEL = 460,
107 };
108 
111 {
112  KP_NONE = 0,
113  KP_LINADD = 1, // Kernels that can be optimized via doing normal updates w + dw
114  KP_KERNCOMBINATION = 2, // Kernels that are infact a linear combination of subkernels K=\sum_i b_i*K_i
115  KP_BATCHEVALUATION = 4 // Kernels that can on the fly generate normals in linadd and more quickly/memory efficient process batches instead of single examples
116 };
117 
118 #ifndef DOXYGEN_SHOULD_SKIP_THIS
119 
120 template <class T> struct K_THREAD_PARAM
121 {
123  CKernel* kernel;
125  int32_t start;
127  int32_t end;
129  int32_t total_start;
131  int32_t total_end;
133  int32_t m;
135  int32_t n;
137  T* result;
139  bool symmetric;
141  bool verbose;
142 };
143 #endif
144 
145 class CSVM;
146 
172 class CKernel : public CSGObject
173 {
184  friend class CDiceKernelNormalizer;
186 
187  public:
188 
192  CKernel();
193 
194 
199  CKernel(int32_t size);
200 
207  CKernel(CFeatures* l, CFeatures* r, int32_t size);
208 
209  virtual ~CKernel();
210 
218  inline float64_t kernel(int32_t idx_a, int32_t idx_b)
219  {
220  if (idx_a<0 || idx_b<0 || idx_a>=num_lhs || idx_b>=num_rhs)
221  {
222  SG_ERROR("Index out of Range: idx_a=%d/%d idx_b=%d/%d\n",
223  idx_a,num_lhs, idx_b,num_rhs);
224  }
225 
226  return normalizer->normalize(compute(idx_a, idx_b), idx_a, idx_b);
227  }
228 
234  {
235  return get_kernel_matrix<float64_t>();
236  }
237 
244  {
245 
247 
248  for (int32_t i=0; i!=num_rhs; i++)
249  col[i] = kernel(i,j);
250 
251  return col;
252  }
253 
254 
261  {
263 
264  for (int32_t j=0; j!=num_lhs; j++)
265  row[j] = kernel(i,j);
266 
267  return row;
268  }
269 
274  template <class T>
276  {
277  T* result = NULL;
278 
279  if (!has_features())
280  SG_ERROR( "no features assigned to kernel\n");
281 
282  int32_t m=get_num_vec_lhs();
283  int32_t n=get_num_vec_rhs();
284 
285  int64_t total_num = int64_t(m)*n;
286 
287  // if lhs == rhs and sizes match assume k(i,j)=k(j,i)
288  bool symmetric= (lhs && lhs==rhs && m==n);
289 
290  SG_DEBUG( "returning kernel matrix of size %dx%d\n", m, n);
291 
292  result=SG_MALLOC(T, total_num);
293 
294  int32_t num_threads=parallel->get_num_threads();
295  if (num_threads < 2)
296  {
297  K_THREAD_PARAM<T> params;
298  params.kernel=this;
299  params.result=result;
300  params.start=0;
301  params.end=m;
302  params.total_start=0;
303  params.total_end=total_num;
304  params.n=n;
305  params.m=m;
306  params.symmetric=symmetric;
307  params.verbose=true;
308  get_kernel_matrix_helper<T>((void*) &params);
309  }
310  else
311  {
312  pthread_t* threads = SG_MALLOC(pthread_t, num_threads-1);
313  K_THREAD_PARAM<T>* params = SG_MALLOC(K_THREAD_PARAM<T>, num_threads);
314  int64_t step= total_num/num_threads;
315 
316  int32_t t;
317 
318  num_threads--;
319  for (t=0; t<num_threads; t++)
320  {
321  params[t].kernel = this;
322  params[t].result = result;
323  params[t].start = compute_row_start(t*step, n, symmetric);
324  params[t].end = compute_row_start((t+1)*step, n, symmetric);
325  params[t].total_start=t*step;
326  params[t].total_end=(t+1)*step;
327  params[t].n=n;
328  params[t].m=m;
329  params[t].symmetric=symmetric;
330  params[t].verbose=false;
331 
332  int code=pthread_create(&threads[t], NULL,
333  CKernel::get_kernel_matrix_helper<T>, (void*)&params[t]);
334 
335  if (code != 0)
336  {
337  SG_WARNING("Thread creation failed (thread %d of %d) "
338  "with error:'%s'\n",t, num_threads, strerror(code));
339  num_threads=t;
340  break;
341  }
342  }
343 
344  params[t].kernel = this;
345  params[t].result = result;
346  params[t].start = compute_row_start(t*step, n, symmetric);
347  params[t].end = m;
348  params[t].total_start=t*step;
349  params[t].total_end=total_num;
350  params[t].n=n;
351  params[t].m=m;
352  params[t].symmetric=symmetric;
353  params[t].verbose=true;
354  get_kernel_matrix_helper<T>(&params[t]);
355 
356  for (t=0; t<num_threads; t++)
357  {
358  if (pthread_join(threads[t], NULL) != 0)
359  SG_WARNING("pthread_join of thread %d/%d failed\n", t, num_threads);
360  }
361 
362  SG_FREE(params);
363  SG_FREE(threads);
364  }
365 
366  SG_DONE();
367 
368  return SGMatrix<T>(result,m,n,true);
369  }
370 
371 
382  virtual bool init(CFeatures* lhs, CFeatures* rhs);
383 
389 
395 
399  virtual bool init_normalizer();
400 
407  virtual void cleanup();
408 
413  void load(CFile* loader);
414 
419  void save(CFile* writer);
420 
425  inline CFeatures* get_lhs() { SG_REF(lhs); return lhs; }
426 
431  inline CFeatures* get_rhs() { SG_REF(rhs); return rhs; }
432 
437  virtual inline int32_t get_num_vec_lhs()
438  {
439  return num_lhs;
440  }
441 
446  virtual inline int32_t get_num_vec_rhs()
447  {
448  return num_rhs;
449  }
450 
455  virtual inline bool has_features()
456  {
457  return lhs && rhs;
458  }
459 
464  inline bool get_lhs_equals_rhs()
465  {
466  return lhs_equals_rhs;
467  }
468 
470  virtual void remove_lhs_and_rhs();
471 
473  virtual void remove_lhs();
474 
476  virtual void remove_rhs();
477 
485  virtual EKernelType get_kernel_type()=0 ;
486 
493  virtual EFeatureType get_feature_type()=0;
494 
501  virtual EFeatureClass get_feature_class()=0;
502 
507  inline void set_cache_size(int32_t size)
508  {
509  cache_size = size;
510 
511  }
512 
517  inline int32_t get_cache_size() { return cache_size; }
518 
519 
520 
522  void list_kernel();
523 
529  inline bool has_property(EKernelProperty p) { return (properties & p) != 0; }
530 
534  virtual void clear_normal();
535 
541  virtual void add_to_normal(int32_t vector_idx, float64_t weight);
542 
548 
553  virtual inline void set_optimization_type(EOptimizationType t) { opt_type=t;}
554 
560 
568  virtual bool init_optimization(
569  int32_t count, int32_t *IDX, float64_t *weights);
570 
575  virtual bool delete_optimization();
576 
582  bool init_optimization_svm(CSVM * svm) ;
583 
589  virtual float64_t compute_optimized(int32_t vector_idx);
590 
599  virtual void compute_batch(
600  int32_t num_vec, int32_t* vec_idx, float64_t* target,
601  int32_t num_suppvec, int32_t* IDX, float64_t* alphas,
602  float64_t factor=1.0);
603 
609 
615 
620  virtual int32_t get_num_subkernels();
621 
627  virtual void compute_by_subkernel(
628  int32_t vector_idx, float64_t * subkernel_contrib);
629 
635  virtual const float64_t* get_subkernel_weights(int32_t& num_weights);
636 
641  virtual void set_subkernel_weights(SGVector<float64_t> weights);
642 
643  protected:
649  {
650  properties |= p;
651  }
652 
658  {
659  properties &= (properties | p) ^ p;
660  }
661 
666  inline void set_is_initialized(bool p_init) { optimization_initialized=p_init; }
667 
678  virtual float64_t compute(int32_t x, int32_t y)=0;
679 
686  int32_t compute_row_start(int64_t offs, int32_t n, bool symmetric)
687  {
688  int32_t i_start;
689 
690  if (symmetric)
691  i_start=(int32_t) CMath::floor(n-CMath::sqrt(CMath::sq((float64_t) n)-offs));
692  else
693  i_start=(int32_t) (offs/int64_t(n));
694 
695  return i_start;
696  }
697 
702  template <class T>
703  static void* get_kernel_matrix_helper(void* p)
704  {
705  K_THREAD_PARAM<T>* params= (K_THREAD_PARAM<T>*) p;
706  int32_t i_start=params->start;
707  int32_t i_end=params->end;
708  CKernel* k=params->kernel;
709  T* result=params->result;
710  bool symmetric=params->symmetric;
711  int32_t n=params->n;
712  int32_t m=params->m;
713  bool verbose=params->verbose;
714  int64_t total_start=params->total_start;
715  int64_t total_end=params->total_end;
716  int64_t total=total_start;
717 
718  for (int32_t i=i_start; i<i_end; i++)
719  {
720  int32_t j_start=0;
721 
722  if (symmetric)
723  j_start=i;
724 
725  for (int32_t j=j_start; j<n; j++)
726  {
727  float64_t v=k->kernel(i,j);
728  result[i+j*m]=v;
729 
730  if (symmetric && i!=j)
731  result[j+i*m]=v;
732 
733  if (verbose)
734  {
735  total++;
736 
737  if (symmetric && i!=j)
738  total++;
739 
740  if (total%100 == 0)
741  k->SG_PROGRESS(total, total_start, total_end);
742 
744  break;
745  }
746  }
747 
748  }
749 
750  return NULL;
751  }
752 
761  virtual void load_serializable_post() throw (ShogunException);
762 
771  virtual void save_serializable_pre() throw (ShogunException);
772 
781  virtual void save_serializable_post() throw (ShogunException);
786  virtual void register_params();
787 
788  private:
791  void init();
792 
793 
794 
796 
797  protected:
799  int32_t cache_size;
800 
801 
802 
806 
811 
814 
816  int32_t num_lhs;
818  int32_t num_rhs;
819 
822 
829 
831  uint64_t properties;
832 
836 };
837 
838 }
839 #endif /* _KERNEL_H__ */

SHOGUN Machine Learning Toolbox - Documentation