SHOGUN  v1.1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
DynProg.h
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 1999-2009 Gunnar Raetsch
8  * Written (W) 1999-2009 Soeren Sonnenburg
9  * Written (W) 2008-2009 Jonas Behr
10  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
11  */
12 
13 #ifndef __CDYNPROG_H__
14 #define __CDYNPROG_H__
15 
17 #include <shogun/lib/common.h>
18 #include <shogun/base/SGObject.h>
19 #include <shogun/io/SGIO.h>
20 #include <shogun/lib/config.h>
23 #include <shogun/structure/Plif.h>
30 #include <shogun/lib/Array.h>
31 #include <shogun/lib/Array2.h>
32 #include <shogun/lib/Array3.h>
33 #include <shogun/lib/Time.h>
34 
35 #include <stdio.h>
36 #include <limits.h>
37 
38 namespace shogun
39 {
40  template <class T> class CSparseFeatures;
41  class CIntronList;
42  class CPlifMatrix;
43  class CSegmentLoss;
44  template <class T> class CArray;
45 
46 //#define DYNPROG_TIMING
47 
48 #ifdef USE_BIGSTATES
49 typedef uint16_t T_STATES ;
50 #else
51 typedef uint8_t T_STATES ;
52 #endif
53 typedef T_STATES* P_STATES ;
54 
55 #ifndef DOXYGEN_SHOULD_SKIP_THIS
56 
57 struct segment_loss_struct
58 {
60  int32_t maxlookback;
62  int32_t seqlen;
64  int32_t *segments_changed;
66  float64_t *num_segment_id;
68  int32_t *length_segment_id ;
69 };
70 #endif
71 
77 class CDynProg : public CSGObject
78 {
79 public:
84  CDynProg(int32_t p_num_svms=8);
85  virtual ~CDynProg();
86 
87  // model related functions
93  void set_num_states(int32_t N);
94 
96  int32_t get_num_states();
97 
99  int32_t get_num_svms();
100 
106  void init_content_svm_value_array(const int32_t p_num_svms);
107 
115  void init_tiling_data(int32_t* probe_pos, float64_t* intensities, const int32_t num_probes);
116 
123  void precompute_tiling_plifs(CPlif** PEN, const int32_t* tiling_plif_ids, const int32_t num_tiling_plifs);
124 
129  void resize_lin_feat(int32_t num_new_feat);
135 
141 
146  void set_a(SGMatrix<float64_t> a);
147 
152  void set_a_id(SGMatrix<int32_t> a);
153 
159 
164  void init_mod_words_array(SGMatrix<int32_t> p_mod_words_array);
165 
171  bool check_svm_arrays();
172 
178 
185  int32_t get_num_positions();
186 
197 
202  void set_pos(SGVector<int32_t> pos);
203 
209  void set_orf_info(SGMatrix<int32_t> orf_info);
210 
215  void set_gene_string(SGVector<char> genestr);
216 
217 
222  void set_dict_weights(SGMatrix<float64_t> dictionary_weights);
223 
229 
236  void best_path_set_segment_ids_mask(int32_t* segment_ids, float64_t* segment_mask, int32_t m);
237 
240 
245  void set_plif_matrices(CPlifMatrix* pm);
246 
247  // best_path result retrieval functions
253 
259 
265 
266 
275  void compute_nbest_paths(int32_t max_num_signals,
276  bool use_orf, int16_t nbest, bool with_loss, bool with_multiple_sequences);
277 
279 
292  int32_t* my_state_seq, int32_t *my_pos_seq,
293  int32_t my_seq_len, const float64_t *seq_array, int32_t max_num_signals);
294 
295  // additional best_path_trans_deriv functions
300  void set_my_state_seq(int32_t* my_state_seq);
301 
306  void set_my_pos_seq(int32_t* my_pos_seq);
307 
315  void get_path_scores(float64_t** my_scores, int32_t* seq_len);
316 
324  void get_path_losses(float64_t** my_losses, int32_t* seq_len);
325 
326 
328  inline T_STATES get_N() const
329  {
330  return m_N ;
331  }
332 
337  inline void set_q(T_STATES offset, float64_t value)
338  {
339  m_end_state_distribution_q[offset]=value;
340  }
341 
346  inline void set_p(T_STATES offset, float64_t value)
347  {
348  m_initial_state_distribution_p[offset]=value;
349  }
350 
357  inline void set_a(T_STATES line_, T_STATES column, float64_t value)
358  {
359  m_transition_matrix_a.element(line_,column)=value; // look also best_path!
360  }
361 
367  inline float64_t get_q(T_STATES offset) const
368  {
369  return m_end_state_distribution_q[offset];
370  }
371 
377  inline float64_t get_q_deriv(T_STATES offset) const
378  {
379  return m_end_state_distribution_q_deriv[offset];
380  }
381 
387  inline float64_t get_p(T_STATES offset) const
388  {
389  return m_initial_state_distribution_p[offset];
390  }
391 
397  inline float64_t get_p_deriv(T_STATES offset) const
398  {
400  }
401 
406 
413  inline float64_t* get_lin_feat(int32_t & dim1, int32_t & dim2)
414  {
415  m_lin_feat.get_array_size(dim1, dim2);
416  return m_lin_feat.get_array();
417  }
426  inline void set_lin_feat(float64_t* p_lin_feat, int32_t p_num_svms, int32_t p_seq_len)
427  {
428  m_lin_feat.set_array(p_lin_feat, p_num_svms, p_seq_len, true, true);
429  }
434  void create_word_string();
435 
438  void precompute_stop_codons();
439 
446  inline float64_t get_a(T_STATES line_, T_STATES column) const
447  {
448  return m_transition_matrix_a.element(line_, column); // look also best_path()!
449  }
450 
457  inline float64_t get_a_deriv(T_STATES line_, T_STATES column) const
458  {
459  return m_transition_matrix_a_deriv.element(line_, column); // look also best_path()!
460  }
462 
467  void set_intron_list(CIntronList* intron_list, int32_t num_plifs);
468 
471  {
472  return m_seg_loss_obj;
473  }
474 
481  void long_transition_settings(bool use_long_transitions, int32_t threshold, int32_t max_len)
482  {
483  m_long_transitions = use_long_transitions;
484  m_long_transition_threshold = threshold;
485  SG_DEBUG("ignoring max_len\n") ;
486  //m_long_transition_max = max_len;
487  }
488 
489 protected:
490 
491  /* helper functions */
492 
502  void lookup_content_svm_values(const int32_t from_state,
503  const int32_t to_state, const int32_t from_pos, const int32_t to_pos,
504  float64_t* svm_values, int32_t frame);
505 
513  inline void lookup_tiling_plif_values(const int32_t from_state,
514  const int32_t to_state, const int32_t len, float64_t* svm_values);
515 
520  inline int32_t find_frame(const int32_t from_state);
521 
530  inline int32_t raw_intensities_interval_query(
531  const int32_t from_pos, const int32_t to_pos, float64_t* intensities, int32_t type);
532 
533 #ifndef DOXYGEN_SHOULD_SKIP_THIS
534 
535  struct svm_values_struct
536  {
538  int32_t maxlookback;
540  int32_t seqlen;
541 
543  int32_t* start_pos;
545  float64_t ** svm_values_unnormalized;
547  float64_t * svm_values;
549  bool *** word_used;
551  int32_t **num_unique_words;
552  };
553 #endif // DOXYGEN_SHOULD_SKIP_THIS
554 
563  bool extend_orf(int32_t orf_from, int32_t orf_to, int32_t start, int32_t &last_pos, int32_t to);
564 
566  inline virtual const char* get_name() const { return "DynProg"; }
567 
568 private:
569 
570  T_STATES trans_list_len;
571  T_STATES **trans_list_forward;
572  T_STATES *trans_list_forward_cnt;
573  float64_t **trans_list_forward_val;
574  int32_t **trans_list_forward_id;
575  bool mem_initialized;
576 
577 #ifdef DYNPROG_TIMING
578  CTime MyTime;
579  CTime MyTime2;
580  CTime MyTime3;
581 
582  float64_t segment_init_time;
583  float64_t segment_pos_time;
584  float64_t segment_clean_time;
585  float64_t segment_extend_time;
586  float64_t orf_time;
587  float64_t content_time;
588  float64_t content_penalty_time;
589  float64_t content_svm_values_time ;
590  float64_t content_plifs_time ;
591  float64_t svm_init_time;
592  float64_t svm_pos_time;
593  float64_t inner_loop_time;
594  float64_t inner_loop_max_time ;
595  float64_t svm_clean_time;
596  float64_t long_transition_time ;
597 #endif
598 
599 
600 protected:
605 
606  int32_t m_N;
607 
612 
616 
620 
622 
624  int32_t m_num_degrees;
626  int32_t m_num_svms;
627 
650 
652 // CArray<int32_t> m_svm_pos_start;
658  int32_t m_max_a_id;
659 
660  // input arguments
666  int32_t m_seq_len;
693  uint16_t*** m_wordstr;
710 
714 
715  // output arguments
722 
729 
734 
738 
741 
747 
751  int32_t* m_probe_pos;
757  int32_t m_num_raw_data;
758 
768  //int32_t m_long_transition_max ;
769 
773  static int32_t word_degree_default[4];
774 
778  static int32_t cum_num_words_default[5];
779 
782  static int32_t frame_plifs[3];
783 
786  static int32_t num_words_default[4];
787 
789  static int32_t mod_words_default[32];
790 
792  static bool sign_words_default[16];
793 
795  static int32_t string_words_default[16];
796 };
797 }
798 #endif

SHOGUN Machine Learning Toolbox - Documentation