SHOGUN  v1.1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
WeightedDegreePositionStringKernel.h
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 1999-2009 Soeren Sonnenburg
8  * Written (W) 1999-2008 Gunnar Raetsch
9  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
10  */
11 
12 #ifndef _WEIGHTEDDEGREEPOSITIONSTRINGKERNEL_H___
13 #define _WEIGHTEDDEGREEPOSITIONSTRINGKERNEL_H___
14 
15 #include <shogun/lib/common.h>
18 #include <shogun/lib/Trie.h>
19 
20 namespace shogun
21 {
22 
23 class CSVM;
24 
49 {
50  public:
53 
62  int32_t size, int32_t degree,
63  int32_t max_mismatch=0, int32_t mkl_stepsize=1);
64 
76  int32_t size, float64_t* weights, int32_t degree,
77  int32_t max_mismatch, int32_t* shift, int32_t shift_len,
78  int32_t mkl_stepsize=1);
79 
87  CStringFeatures<char>* l, CStringFeatures<char>* r, int32_t degree);
88 
90 
97  virtual bool init(CFeatures* l, CFeatures* r);
98 
100  virtual void cleanup();
101 
107 
112  virtual const char* get_name() const { return "WeightedDegreePositionStringKernel"; }
113 
121  inline virtual bool init_optimization(
122  int32_t p_count, int32_t *IDX, float64_t * alphas)
123  {
124  return init_optimization(p_count, IDX, alphas, -1);
125  }
126 
138  virtual bool init_optimization(
139  int32_t count, int32_t *IDX, float64_t * alphas, int32_t tree_num,
140  int32_t upto_tree=-1);
141 
146  virtual bool delete_optimization();
147 
153  inline virtual float64_t compute_optimized(int32_t idx)
154  {
156  ASSERT(alphabet);
158  return compute_by_tree(idx);
159  }
160 
165  static void* compute_batch_helper(void* p);
166 
177  virtual void compute_batch(
178  int32_t num_vec, int32_t* vec_idx, float64_t* target,
179  int32_t num_suppvec, int32_t* IDX, float64_t* alphas,
180  float64_t factor=1.0);
181 
185  inline virtual void clear_normal()
186  {
188  {
190  SG_DEBUG( "disabling compact trie nodes with FASTBUTMEMHUNGRY\n") ;
191  }
192 
193  if (get_is_initialized())
194  {
196  tries.delete_trees(true);
197  else if (opt_type==FASTBUTMEMHUNGRY)
198  tries.delete_trees(false); // still buggy
199  else
200  SG_ERROR( "unknown optimization type\n");
201 
202  set_is_initialized(false);
203  }
204  }
205 
211  inline virtual void add_to_normal(int32_t idx, float64_t weight)
212  {
213  add_example_to_tree(idx, weight);
214  set_is_initialized(true);
215  }
216 
221  inline virtual int32_t get_num_subkernels()
222  {
223  if (position_weights!=NULL)
224  return (int32_t) ceil(1.0*seq_length/mkl_stepsize) ;
225  if (length==0)
226  return (int32_t) ceil(1.0*get_degree()/mkl_stepsize);
227  return (int32_t) ceil(1.0*get_degree()*length/mkl_stepsize) ;
228  }
229 
235  inline void compute_by_subkernel(
236  int32_t idx, float64_t * subkernel_contrib)
237  {
238  if (get_is_initialized())
239  {
240  compute_by_tree(idx, subkernel_contrib);
241  return ;
242  }
243 
244  SG_ERROR( "CWeightedDegreePositionStringKernel optimization not initialized\n") ;
245  }
246 
252  inline const float64_t* get_subkernel_weights(int32_t& num_weights)
253  {
254  num_weights = get_num_subkernels() ;
255 
257  weights_buffer = SG_MALLOC(float64_t, num_weights);
258 
259  if (position_weights!=NULL)
260  for (int32_t i=0; i<num_weights; i++)
262  else
263  for (int32_t i=0; i<num_weights; i++)
264  weights_buffer[i] = weights[i*mkl_stepsize] ;
265 
266  return weights_buffer ;
267  }
268 
275  {
276  float64_t* weights2=w.vector;
277  int32_t num_weights2=w.vlen;
278 
279  int32_t num_weights = get_num_subkernels() ;
280  if (num_weights!=num_weights2)
281  SG_ERROR( "number of weights do not match\n") ;
282 
283  if (position_weights!=NULL)
284  for (int32_t i=0; i<num_weights; i++)
285  for (int32_t j=0; j<mkl_stepsize; j++)
286  {
287  if (i*mkl_stepsize+j<seq_length)
288  position_weights[i*mkl_stepsize+j] = weights2[i] ;
289  }
290  else if (length==0)
291  {
292  for (int32_t i=0; i<num_weights; i++)
293  for (int32_t j=0; j<mkl_stepsize; j++)
294  if (i*mkl_stepsize+j<get_degree())
295  weights[i*mkl_stepsize+j] = weights2[i] ;
296  }
297  else
298  {
299  for (int32_t i=0; i<num_weights; i++)
300  for (int32_t j=0; j<mkl_stepsize; j++)
301  if (i*mkl_stepsize+j<get_degree()*length)
302  weights[i*mkl_stepsize+j] = weights2[i] ;
303  }
304  }
305 
306  // other kernel tree operations
312  float64_t* compute_abs_weights(int32_t & len);
313 
319 
324  inline int32_t get_max_mismatch() { return max_mismatch; }
325 
330  inline int32_t get_degree() { return degree; }
331 
337  inline float64_t *get_degree_weights(int32_t& d, int32_t& len)
338  {
339  d=degree;
340  len=length;
341  return weights;
342  }
343 
349  inline float64_t *get_weights(int32_t& num_weights)
350  {
351  if (position_weights!=NULL)
352  {
353  num_weights = seq_length ;
354  return position_weights ;
355  }
356  if (length==0)
357  num_weights = degree ;
358  else
359  num_weights = degree*length ;
360  return weights;
361  }
362 
368  inline float64_t *get_position_weights(int32_t& len)
369  {
370  len=seq_length;
371  return position_weights;
372  }
373 
378  void set_shifts(SGVector<int32_t> shifts);
379 
384  bool set_weights(SGMatrix<float64_t> new_weights);
385 
390  virtual bool set_wd_weights();
391 
397  virtual void set_position_weights(SGVector<float64_t> pws);
398 
406  bool set_position_weights_lhs(float64_t* pws, int32_t len, int32_t num);
407 
415  bool set_position_weights_rhs(float64_t* pws, int32_t len, int32_t num);
416 
421  bool init_block_weights();
422 
428 
434 
440 
446 
452 
458 
463  bool init_block_weights_exp();
464 
469  bool init_block_weights_log();
470 
476  {
478  position_weights=NULL;
479  return true;
480  }
481 
487  {
490  return true;
491  }
492 
498  {
501  return true;
502  }
503 
509  virtual float64_t compute_by_tree(int32_t idx);
510 
516  virtual void compute_by_tree(int32_t idx, float64_t* LevelContrib);
517 
531  int32_t max_degree, int32_t& num_feat, int32_t& num_sym,
532  float64_t* target, int32_t num_suppvec, int32_t* IDX,
533  float64_t* weights);
534 
543  char* compute_consensus(
544  int32_t &num_feat, int32_t num_suppvec, int32_t* IDX,
545  float64_t* alphas);
546 
559  int32_t max_degree, int32_t& num_feat, int32_t& num_sym,
560  float64_t* w_result, int32_t num_suppvec, int32_t* IDX,
561  float64_t* alphas);
562 
576  int32_t max_degree, int32_t& num_feat, int32_t& num_sym,
577  float64_t* poim_result, int32_t num_suppvec, int32_t* IDX,
578  float64_t* alphas, float64_t* distrib);
579 
586  void prepare_POIM2(
587  float64_t* distrib, int32_t num_sym, int32_t num_feat);
588 
595  void compute_POIM2(int32_t max_degree, CSVM* svm);
596 
602  void get_POIM2(float64_t** poim, int32_t* result_len);
603 
605  void cleanup_POIM2();
606 
607  protected:
609  void create_empty_tries();
610 
616  virtual void add_example_to_tree(
617  int32_t idx, float64_t weight);
618 
626  int32_t idx, float64_t weight, int32_t tree_num);
627 
636  virtual float64_t compute(int32_t idx_a, int32_t idx_b);
637 
647  char* avec, int32_t alen, char* bvec, int32_t blen);
648 
658  char* avec, int32_t alen, char* bvec, int32_t blen);
659 
669  char* avec, int32_t alen, char* bvec, int32_t blen);
670 
682  char* avec, float64_t *posweights_lhs, int32_t alen,
683  char* bvec, float64_t *posweights_rhs, int32_t blen);
684 
686  virtual void remove_lhs();
687 
696  virtual void load_serializable_post() throw (ShogunException);
697 
698  private:
701  void init();
702 
703  protected:
705  float64_t* weights;
707  int32_t weights_degree;
709  int32_t weights_length;
710 
715 
726 
730  int32_t mkl_stepsize;
731 
733  int32_t degree;
735  int32_t length;
736 
738  int32_t max_mismatch;
740  int32_t seq_length;
741 
743  int32_t *shift;
745  int32_t shift_len;
747  int32_t max_shift;
748 
751 
757  int32_t which_degree;
758 
760  CTrie<DNATrie> tries;
762  CTrie<POIMTrie> poim_tries;
763 
768 
773 
775  int32_t m_poim_num_sym;
780 
783 };
784 }
785 #endif /* _WEIGHTEDDEGREEPOSITIONSTRINGKERNEL_H__ */

SHOGUN Machine Learning Toolbox - Documentation