28 init(file, is_labelled, size);
34 if (parser.is_running())
41 ASSERT(index>=0 && index<current_num_features)
42 return current_sgvector.get_feature(index);
53 int32_t n=current_num_features;
55 current_num_features=num;
62 int32_t dim = get_dim_feature_space();
65 vec = SG_REALLOC(
float32_t, vec, len, dim);
66 memset(&vec[len], 0, (dim-len) *
sizeof(
float32_t));
74 int32_t dim = get_dim_feature_space();
77 vec = SG_REALLOC(
float64_t, vec, len, dim);
78 memset(&vec[len], 0, (dim-len) *
sizeof(
float64_t));
104 ASSERT(dim>=current_num_features)
106 return current_sgvector.dense_dot(alpha, vec, dim, b);
113 if (vec2_len < current_num_features)
115 SG_ERROR(
"dimension of vec2 (=%d) does not match number of features (=%d)\n",
116 vec2_len, current_num_features);
119 int32_t current_length = current_sgvector.num_feat_entries;
125 for (int32_t i=0; i<current_length; i++)
126 result+=vec2[current_vector[i].feat_index]*current_vector[i].entry;
136 if (vec2_len < current_num_features)
138 SG_ERROR(
"dimension of vec2 (=%d) does not match number of features (=%d)\n",
139 vec2_len, current_num_features);
142 int32_t current_length = current_sgvector.num_feat_entries;
148 for (int32_t i=0; i<current_length; i++)
149 result+=vec2[current_vector[i].feat_index]*current_vector[i].entry;
159 if (vec2_len < current_num_features)
161 SG_ERROR(
"dimension of vec (=%d) does not match number of features (=%d)\n",
162 vec2_len, current_num_features);
166 int32_t num_feat=current_sgvector.num_feat_entries;
172 for (int32_t i=0; i<num_feat; i++)
173 vec2[sv[i].feat_index]+= alpha*
CMath::abs(sv[i].entry);
177 for (int32_t i=0; i<num_feat; i++)
178 vec2[sv[i].feat_index]+= alpha*sv[i].entry;
187 if (vec2_len < current_num_features)
189 SG_ERROR(
"dimension of vec (=%d) does not match number of features (=%d)\n",
190 vec2_len, current_num_features);
194 int32_t num_feat=current_sgvector.num_feat_entries;
200 for (int32_t i=0; i<num_feat; i++)
201 vec2[sv[i].feat_index]+= alpha*
CMath::abs(sv[i].entry);
205 for (int32_t i=0; i<num_feat; i++)
206 vec2[sv[i].feat_index]+= alpha*sv[i].entry;
214 return current_sgvector.num_feat_entries;
220 int32_t current_length = current_sgvector.num_feat_entries;
227 for (int32_t i=0; i<current_length; i++)
228 sq += current_vector[i].entry * current_vector[i].entry;
240 get_vector().sort_features(
true);
242 ASSERT(old_ptr == current_sgvector.features);
254 if (current_sgvector.features)
266 parser.set_read_vector_and_label
270 #define GET_FEATURE_TYPE(f_type, sg_type) \
271 template<> EFeatureType CStreamingSparseFeatures<sg_type>::get_feature_type() const \
289 #undef GET_FEATURE_TYPE
293 void CStreamingSparseFeatures<T>::init()
297 current_num_features=-1;
303 void CStreamingSparseFeatures<T>::init(CStreamingFile* file,
308 has_labels = is_labelled;
311 parser.init(file, is_labelled, size);
312 parser.set_free_vector_after_release(
false);
318 if (!parser.is_running())
319 parser.start_parser();
331 int32_t current_length = 0;
335 ret_value = (bool) parser.get_next_example(current_vector,
347 current_num_features =
CMath::max(current_num_features, current_dimension);
356 return current_sgvector;
364 return current_label;
370 parser.finalize_example();
376 return current_num_features;
389 return current_num_features;
395 return current_sgvector.num_feat_entries;
virtual void set_vector_reader()
int32_t get_num_dimensions()
int64_t get_num_nonzero_entries()
T get_feature(int32_t index)
#define SG_NOTIMPLEMENTED
virtual void add_to_dense_vec(float64_t alpha, float64_t *vec2, int32_t vec2_len, bool abs_val=false)
T sparse_dot(const SGSparseVector< T > &v)
virtual void start_parser()
SGSparseVector< T > get_vector()
virtual int32_t get_num_features()
CStreamingSparseFeatures()
static T sparse_dot(T alpha, SGSparseVectorEntry< T > *avec, int32_t alen, SGSparseVectorEntry< T > *bvec, int32_t blen)
EFeatureClass
shogun feature class
virtual void reset_stream()
float32_t compute_squared()
A Streaming File access class.
virtual float32_t dot(CStreamingDotFeatures *df)
virtual int32_t get_nnz_features_for_vector()
virtual int32_t get_dim_feature_space() const
virtual void get_sparse_vector_and_label(SGSparseVectorEntry< bool > *&vector, int32_t &len, float64_t &label)
virtual CFeatures * duplicate() const
static T max(T a, T b)
return the maximum of two integers
Streaming features that support dot products among other operations.
virtual int32_t get_num_vectors() const
virtual EFeatureClass get_feature_class() const
virtual float64_t get_label()
virtual void expand_if_required(float32_t *&vec, int32_t &len)
void set_read_functions()
virtual void end_parser()
virtual void get_sparse_vector(SGSparseVectorEntry< bool > *&vector, int32_t &len)
virtual bool get_next_example()
int32_t set_num_features(int32_t num)
The class Features is the base class of all feature objects.
#define GET_FEATURE_TYPE(f_type, sg_type)
virtual void set_vector_and_label_reader()
T dense_dot(T alpha, T *vec, int32_t dim, T b)
This class implements streaming features with sparse feature vectors. The vector is represented as an...
virtual void release_example()
virtual ~CStreamingSparseFeatures()
static T abs(T a)
return the absolute value of a number