SHOGUN  v1.1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
SparseFeatures.h
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 1999-2010 Soeren Sonnenburg
8  * Written (W) 1999-2008 Gunnar Raetsch
9  * Subset support written (W) 2011 Heiko Strathmann
10  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
11  * Copyright (C) 2010 Berlin Institute of Technology
12  */
13 
14 #ifndef _SPARSEFEATURES__H__
15 #define _SPARSEFEATURES__H__
16 
17 #include <shogun/lib/common.h>
18 #include <shogun/lib/Cache.h>
19 #include <shogun/io/File.h>
20 
21 #include <shogun/features/Labels.h>
25 
26 namespace shogun
27 {
28 
29 class CFile;
30 class CLabels;
31 class CFeatures;
32 class CDotFeatures;
33 template <class ST> class CSimpleFeatures;
34 
52 template <class ST> class CSparseFeatures : public CDotFeatures
53 {
54  public:
59  CSparseFeatures(int32_t size=0);
60 
70  int32_t num_feat, int32_t num_vec,bool copy=false);
71 
78 
85 
87  CSparseFeatures(const CSparseFeatures & orig);
88 
93  CSparseFeatures(CFile* loader);
94 
96  virtual ~CSparseFeatures();
97 
103 
108  void free_sparse_features();
109 
114  virtual CFeatures* duplicate() const;
115 
125  ST get_feature(int32_t num, int32_t index);
126 
135  ST* get_full_feature_vector(int32_t num, int32_t& len);
136 
143 
149  virtual int32_t get_nnz_features_for_vector(int32_t num);
150 
161 
172  static ST sparse_dot(ST alpha, SGSparseVectorEntry<ST>* avec, int32_t alen,
173  SGSparseVectorEntry<ST>* bvec, int32_t blen);
174 
187  ST dense_dot(ST alpha, int32_t num, ST* vec, int32_t dim, ST b);
188 
200  void add_to_dense_vec(float64_t alpha, int32_t num,
201  float64_t* vec, int32_t dim, bool abs_val=false);
202 
210  void free_sparse_feature_vector(SGSparseVector<ST> vec, int32_t num);
211 
221  SGSparseVector<ST>* get_sparse_feature_matrix(int32_t &num_feat, int32_t &num_vec);
222 
231 
237  static void clean_tsparse(SGSparseVector<ST>* sfm, int32_t num_vec);
238 
246 
258  SGSparseVector<ST>* get_transposed(int32_t &num_feat, int32_t &num_vec);
259 
268 
276 
286  virtual bool set_full_feature_matrix(SGMatrix<ST> full);
287 
295  virtual bool apply_preprocessor(bool force_preprocessing=false);
296 
301  virtual int32_t get_size();
302 
311 
316  virtual int32_t get_num_vectors() const;
317 
322  int32_t get_num_features();
323 
335  int32_t set_num_features(int32_t num);
336 
342 
347  virtual EFeatureType get_feature_type();
348 
356  void free_feature_vector(SGSparseVector<ST> vec, int32_t num);
357 
362  int64_t get_num_nonzero_entries();
363 
372 
388  float64_t* sq_lhs, int32_t idx_a,
390  int32_t idx_b);
391 
398  void load(CFile* loader);
399 
406  void save(CFile* writer);
407 
417  CLabels* load_svmlight_file(char* fname, bool do_sort_features=true);
418 
424  void sort_features();
425 
434  bool write_svmlight_file(char* fname, CLabels* label);
435 
443  virtual int32_t get_dim_feature_space() const;
444 
454  virtual float64_t dot(int32_t vec_idx1, CDotFeatures* df, int32_t vec_idx2);
455 
464  virtual float64_t dense_dot(int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len);
465 
466  #ifndef DOXYGEN_SHOULD_SKIP_THIS
467 
468  struct sparse_feature_iterator
469  {
472 
474  int32_t index;
475 
477  void print_info()
478  {
479  SG_SPRINT("sv=%p, vidx=%d, num_feat_entries=%d, index=%d\n",
480  sv.features, sv.vec_index, sv.num_feat_entries, index);
481  }
482  };
483  #endif
484 
496  virtual void* get_feature_iterator(int32_t vector_index);
497 
508  virtual bool get_next_feature(int32_t& index, float64_t& value, void* iterator);
509 
515  virtual void free_feature_iterator(void* iterator);
516 
523  virtual CFeatures* copy_subset(SGVector<index_t> indices);
524 
526  inline virtual const char* get_name() const { return "SparseFeatures"; }
527 
528  protected:
540  int32_t& len, SGSparseVectorEntry<ST>* target=NULL);
541 
542  private:
543  void init();
544 
545  protected:
546 
548  int32_t num_vectors;
549 
551  int32_t num_features;
552 
555 
558 };
559 }
560 #endif /* _SPARSEFEATURES__H__ */

SHOGUN Machine Learning Toolbox - Documentation