SHOGUN  v1.1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
WeightedDegreeStringKernel.h
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 1999-2009 Soeren Sonnenburg
8  * Written (W) 1999-2008 Gunnar Raetsch
9  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
10  */
11 
12 #ifndef _WEIGHTEDDEGREESTRINGKERNEL_H___
13 #define _WEIGHTEDDEGREESTRINGKERNEL_H___
14 
15 #include <shogun/lib/common.h>
16 #include <shogun/lib/Trie.h>
20 
21 namespace shogun
22 {
23 
26 {
27  E_WD=0,
29 
36 };
37 
38 
54 {
55  public:
56 
61 
62 
69 
76 
84  CStringFeatures<char>* l, CStringFeatures<char>* r, int32_t degree);
85 
87 
94  virtual bool init(CFeatures* l, CFeatures* r);
95 
97  virtual void cleanup();
98 
107  {
108  return type;
109  }
110 
116 
121  virtual const char* get_name() const {
122  return "WeightedDegreeStringKernel";
123  }
124 
132  inline virtual bool init_optimization(
133  int32_t count, int32_t *IDX, float64_t* alphas)
134  {
135  return init_optimization(count, IDX, alphas, -1);
136  }
137 
148  virtual bool init_optimization(
149  int32_t count, int32_t *IDX, float64_t* alphas, int32_t tree_num);
150 
155  virtual bool delete_optimization();
156 
162  virtual float64_t compute_optimized(int32_t idx)
163  {
164  if (get_is_initialized())
165  return compute_by_tree(idx);
166 
167  SG_ERROR( "CWeightedDegreeStringKernel optimization not initialized\n");
168  return 0;
169  }
170 
175  static void* compute_batch_helper(void* p);
176 
187  virtual void compute_batch(
188  int32_t num_vec, int32_t* vec_idx, float64_t* target,
189  int32_t num_suppvec, int32_t* IDX, float64_t* alphas,
190  float64_t factor=1.0);
191 
195  inline virtual void clear_normal()
196  {
197  if (get_is_initialized())
198  {
199 
201  SG_ERROR("not implemented");
202 
204  set_is_initialized(false);
205  }
206  }
207 
213  inline virtual void add_to_normal(int32_t idx, float64_t weight)
214  {
215 
217  SG_ERROR("not implemented");
218 
219  if (max_mismatch==0)
220  add_example_to_tree(idx, weight);
221  else
222  add_example_to_tree_mismatch(idx, weight);
223 
224  set_is_initialized(true);
225  }
226 
231  inline virtual int32_t get_num_subkernels()
232  {
234  return ((CMultitaskKernelMklNormalizer*)normalizer)->get_num_betas();
235  if (position_weights!=NULL)
236  return (int32_t) ceil(1.0*seq_length/mkl_stepsize) ;
237  if (length==0)
238  return (int32_t) ceil(1.0*get_degree()/mkl_stepsize);
239  return (int32_t) ceil(1.0*get_degree()*length/mkl_stepsize) ;
240  }
241 
247  inline void compute_by_subkernel(
248  int32_t idx, float64_t * subkernel_contrib)
249  {
250 
251  if (get_is_initialized())
252  {
253 
255  SG_ERROR("not implemented");
256 
257  compute_by_tree(idx, subkernel_contrib);
258  return ;
259  }
260 
261  SG_ERROR( "CWeightedDegreeStringKernel optimization not initialized\n");
262  }
263 
269  inline const float64_t* get_subkernel_weights(int32_t& num_weights)
270  {
271 
272  num_weights = get_num_subkernels();
273 
275  weights_buffer = SG_MALLOC(float64_t, num_weights);
276 
278  for (int32_t i=0; i<num_weights; i++)
280  else if (position_weights!=NULL)
281  for (int32_t i=0; i<num_weights; i++)
283  else
284  for (int32_t i=0; i<num_weights; i++)
285  weights_buffer[i] = weights[i*mkl_stepsize];
286 
287  return weights_buffer;
288  }
289 
295  {
296  float64_t* weights2=w.vector;
297  int32_t num_weights2=w.vlen;
298  int32_t num_weights = get_num_subkernels();
299  if (num_weights!=num_weights2)
300  SG_ERROR( "number of weights do not match\n");
301 
302 
304  for (int32_t i=0; i<num_weights; i++)
305  ((CMultitaskKernelMklNormalizer*)normalizer)->set_beta(i, weights2[i]);
306  else if (position_weights!=NULL)
307  {
308  for (int32_t i=0; i<num_weights; i++)
309  {
310  for (int32_t j=0; j<mkl_stepsize; j++)
311  {
312  if (i*mkl_stepsize+j<seq_length)
313  position_weights[i*mkl_stepsize+j] = weights2[i];
314  }
315  }
316  }
317  else if (length==0)
318  {
319  for (int32_t i=0; i<num_weights; i++)
320  {
321  for (int32_t j=0; j<mkl_stepsize; j++)
322  {
323  if (i*mkl_stepsize+j<get_degree())
324  weights[i*mkl_stepsize+j] = weights2[i];
325  }
326  }
327  }
328  else
329  {
330  for (int32_t i=0; i<num_weights; i++)
331  {
332  for (int32_t j=0; j<mkl_stepsize; j++)
333  {
334  if (i*mkl_stepsize+j<get_degree()*length)
335  weights[i*mkl_stepsize+j] = weights2[i];
336  }
337  }
338  }
339  }
340 
345  virtual bool set_normalizer(CKernelNormalizer* normalizer_) {
346 
347  if (normalizer_ && strcmp(normalizer_->get_name(),"MultitaskKernelTreeNormalizer")==0) {
350  }
351  else
352  {
355  }
356 
357 
358  return CStringKernel<char>::set_normalizer(normalizer_);
359 
360  }
361 
362  // other kernel tree operations
368  float64_t *compute_abs_weights(int32_t & len);
369 
376  void compute_by_tree(int32_t idx, float64_t *LevelContrib);
377 
383 
389  inline float64_t *get_degree_weights(int32_t& d, int32_t& len)
390  {
391  d=degree;
392  len=length;
393  return weights;
394  }
395 
401  inline float64_t *get_weights(int32_t& num_weights)
402  {
403 
405  SG_ERROR("not implemented");
406 
407  if (position_weights!=NULL)
408  {
409  num_weights = seq_length ;
410  return position_weights ;
411  }
412  if (length==0)
413  num_weights = degree ;
414  else
415  num_weights = degree*length ;
416  return weights;
417  }
418 
424  inline float64_t *get_position_weights(int32_t& len)
425  {
426  len=seq_length;
427  return position_weights;
428  }
429 
436 
441  inline void set_wd_weights(SGVector<float64_t> new_weights)
442  {
443  set_weights(SGMatrix<float64_t>(new_weights.vector,new_weights.vlen,0));
444  }
445 
450  bool set_weights(SGMatrix<float64_t> new_weights);
451 
458  bool set_position_weights(float64_t* pws, int32_t len);
459 
464  bool init_block_weights();
465 
471 
477 
483 
489 
495 
501 
506  bool init_block_weights_exp();
507 
512  bool init_block_weights_log();
513 
519  {
521  position_weights=NULL;
522  return true;
523  }
524 
530  bool set_max_mismatch(int32_t max);
531 
536  inline int32_t get_max_mismatch() const { return max_mismatch; }
537 
543  inline bool set_degree(int32_t deg) { degree=deg; return true; }
544 
549  inline int32_t get_degree() const { return degree; }
550 
556  inline bool set_use_block_computation(bool block)
557  {
558  block_computation=block;
559  return true;
560  }
561 
567 
573  inline bool set_mkl_stepsize(int32_t step)
574  {
575  if (step<1)
576  SG_ERROR("Stepsize must be a positive integer\n");
577  mkl_stepsize=step;
578  return true;
579  }
580 
585  inline int32_t get_mkl_stepsize() { return mkl_stepsize; }
586 
592  inline bool set_which_degree(int32_t which)
593  {
594  which_degree=which;
595  return true;
596  }
597 
602  inline int32_t get_which_degree() { return which_degree; }
603 
604  protected:
606  void create_empty_tries();
607 
613  void add_example_to_tree(int32_t idx, float64_t weight);
614 
622  int32_t idx, float64_t weight, int32_t tree_num);
623 
629  void add_example_to_tree_mismatch(int32_t idx, float64_t weight);
630 
638  int32_t idx, float64_t weight, int32_t tree_num);
639 
645  float64_t compute_by_tree(int32_t idx);
646 
655  float64_t compute(int32_t idx_a, int32_t idx_b);
656 
666  char* avec, int32_t alen, char* bvec, int32_t blen);
667 
677  char* avec, int32_t alen, char* bvec, int32_t blen);
678 
688  char* avec, int32_t alen, char* bvec, int32_t blen);
689 
698  float64_t compute_using_block(char* avec, int32_t alen,
699  char* bvec, int32_t blen);
700 
702  virtual void remove_lhs();
703 
704  private:
707  void init();
708 
709  protected:
715  int32_t weights_degree;
717  int32_t weights_length;
718 
719 
727  int32_t mkl_stepsize;
729  int32_t degree;
731  int32_t length;
732 
734  int32_t max_mismatch;
736  int32_t seq_length;
737 
740 
743 
749  int32_t which_degree;
750 
753 
756 
759 };
760 
761 }
762 
763 #endif /* _WEIGHTEDDEGREESTRINGKERNEL_H__ */

SHOGUN Machine Learning Toolbox - Documentation