SHOGUN  v1.1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
RealFileFeatures.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 1999-2009 Soeren Sonnenburg
8  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
9  */
10 
13 #include <shogun/io/SGIO.h>
14 
15 #include <stdio.h>
16 #include <string.h>
17 
18 using namespace shogun;
19 
21 {
22  SG_UNSTABLE("CRealFileFeatures::CRealFileFeatures()", "\n");
23 
24  working_file=NULL;
25  working_filename=strdup("");
26  intlen=0;
27  doublelen=0;
28  endian=0;
29  fourcc=0;
30  preprocd=0;
31  labels=NULL;
32  status=false;
33 }
34 
35 CRealFileFeatures::CRealFileFeatures(int32_t size, char* fname)
37 {
38  working_file=fopen(fname, "r");
39  working_filename=strdup(fname);
41  intlen=0;
42  doublelen=0;
43  endian=0;
44  fourcc=0;
45  preprocd=0;
46  labels=NULL;
48 }
49 
50 CRealFileFeatures::CRealFileFeatures(int32_t size, FILE* file)
51 : CSimpleFeatures<float64_t>(size), working_file(file), working_filename(NULL)
52 {
54  intlen=0;
55  doublelen=0;
56  endian=0;
57  fourcc=0;
58  preprocd=0;
59  labels=NULL;
61 }
62 
64 {
67  SG_FREE(labels);
68 }
69 
71 : CSimpleFeatures<float64_t>(orig), working_file(orig.working_file), status(orig.status)
72 {
73  if (orig.working_filename)
75  if (orig.labels && get_num_vectors())
76  {
77  labels=SG_MALLOC(int32_t, get_num_vectors());
78  memcpy(labels, orig.labels, sizeof(int32_t)*get_num_vectors());
79  }
80 }
81 
83  int32_t num, int32_t &len, float64_t* target)
84 {
85  ASSERT(num<num_vectors);
86  len=num_features;
87  float64_t* featurevector=target;
88  if (!featurevector)
89  featurevector=SG_MALLOC(float64_t, num_features);
91  fseek(working_file, filepos+num_features*doublelen*num, SEEK_SET);
92  ASSERT(fread(featurevector, doublelen, num_features, working_file)==(size_t) num_features);
93  return featurevector;
94 }
95 
97 {
99  fseek(working_file, filepos, SEEK_SET);
101 
102  SG_INFO( "allocating feature matrix of size %.2fM\n", sizeof(double)*num_features*num_vectors/1024.0/1024.0);
105 
106  SG_INFO( "loading... be patient.\n");
107 
108  for (int32_t i=0; i<(int32_t) num_vectors; i++)
109  {
110  if (!(i % (num_vectors/10+1)))
111  SG_PRINT( "%02d%%.", (int) (100.0*i/num_vectors));
112  else if (!(i % (num_vectors/200+1)))
113  SG_PRINT( ".");
114 
116  }
117  SG_DONE();
118 
119  return feature_matrix;
120 }
121 
122 int32_t CRealFileFeatures::get_label(int32_t idx)
123 {
124  ASSERT(idx<num_vectors);
125  if (labels)
126  return labels[idx];
127  return 0;
128 }
129 
131 {
133  uint32_t num_vec=0;
134  uint32_t num_feat=0;
135 
136  ASSERT(fread(&intlen, sizeof(uint8_t), 1, working_file)==1);
137  ASSERT(fread(&doublelen, sizeof(uint8_t), 1, working_file)==1);
138  ASSERT(fread(&endian, (uint32_t) intlen, 1, working_file)== 1);
139  ASSERT(fread(&fourcc, (uint32_t) intlen, 1, working_file)==1);
140  ASSERT(fread(&num_vec, (uint32_t) intlen, 1, working_file)==1);
141  ASSERT(fread(&num_feat, (uint32_t) intlen, 1, working_file)==1);
142  ASSERT(fread(&preprocd, (uint32_t) intlen, 1, working_file)==1);
143  SG_INFO( "detected: intsize=%d, doublesize=%d, num_vec=%d, num_feat=%d, preprocd=%d\n", intlen, doublelen, num_vec, num_feat, preprocd);
144  filepos=ftell(working_file);
145  set_num_vectors(num_vec);
146  set_num_features(num_feat);
148  SG_FREE(labels);
149  labels=SG_MALLOC(int, num_vec);
150  ASSERT(fread(labels, intlen, num_vec, working_file) == num_vec);
151  return true;
152 }

SHOGUN Machine Learning Toolbox - Documentation