SHOGUN  v1.1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Distance.cpp
Go to the documentation of this file.
1 /*
2  * this program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2006-2009 Christian Gehl
8  * Written (W) 2006-2009 Soeren Sonnenburg
9  * Copyright (C) 2006-2009 Fraunhofer Institute FIRST and Max-Planck-Society
10  */
11 
12 #include <shogun/lib/config.h>
13 #include <shogun/lib/common.h>
14 #include <shogun/io/SGIO.h>
15 #include <shogun/io/File.h>
16 #include <shogun/lib/Time.h>
17 #include <shogun/base/Parallel.h>
18 #include <shogun/base/Parameter.h>
19 
22 
23 #include <string.h>
24 #include <unistd.h>
25 
26 #ifdef HAVE_PTHREAD
27 #include <pthread.h>
28 #endif
29 
30 using namespace shogun;
31 
32 #ifndef DOXYGEN_SHOULD_SKIP_THIS
33 struct DISTANCE_THREAD_PARAM
34 {
35  // CDistance instance used by thread to compute distance
36  CDistance* distance;
37  // distance matrix to store computed distances
38  float64_t* distance_matrix;
39  // starting index of the main loop
40  int32_t idx_start;
41  // end index of the main loop
42  int32_t idx_stop;
43  // step of the main loop
44  int32_t idx_step;
45  // number of lhs vectors
46  int32_t lhs_vectors_number;
47  // number of rhs vectors
48  int32_t rhs_vectors_number;
49  // whether matrix distance is symmetric
50  bool symmetric;
51  // chunking method
52  bool chunk_by_lhs;
53 };
54 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
55 
57 {
58  init();
59 }
60 
61 
63 {
64  init();
65  init(p_lhs, p_rhs);
66 }
67 
69 {
71  precomputed_matrix=NULL;
72 
74 }
75 
76 bool CDistance::init(CFeatures* l, CFeatures* r)
77 {
78  //make sure features were indeed supplied
79  ASSERT(l);
80  ASSERT(r);
81 
82  //make sure features are compatible
85 
86  //remove references to previous features
88 
89  //increase reference counts
90  SG_REF(l);
91  SG_REF(r);
92 
93  lhs=l;
94  rhs=r;
95 
97  precomputed_matrix=NULL ;
98 
99  return true;
100 }
101 
102 void CDistance::load(CFile* loader)
103 {
106 }
107 
108 void CDistance::save(CFile* writer)
109 {
112 }
113 
115 {
116  SG_UNREF(rhs);
117  rhs = NULL;
118 
119  SG_UNREF(lhs);
120  lhs = NULL;
121 }
122 
124 {
125  SG_UNREF(lhs);
126  lhs = NULL;
127 }
128 
131 {
132  SG_UNREF(rhs);
133  rhs = NULL;
134 }
135 
137 {
138  //make sure features were indeed supplied
139  ASSERT(r);
140 
141  //make sure features are compatible
144 
145  //remove references to previous rhs features
146  CFeatures* tmp=rhs;
147 
148  rhs=r;
149 
151  precomputed_matrix=NULL ;
152 
153  // return old features including reference count
154  return tmp;
155 }
156 
158 {
159  int32_t num_left=lhs->get_num_vectors();
160  int32_t num_right=rhs->get_num_vectors();
161  SG_INFO( "precomputing distance matrix (%ix%i)\n", num_left, num_right) ;
162 
163  ASSERT(num_left==num_right);
164  ASSERT(lhs==rhs);
165  int32_t num=num_left;
166 
168  precomputed_matrix=SG_MALLOC(float32_t, num*(num+1)/2);
169 
170  for (int32_t i=0; i<num; i++)
171  {
172  SG_PROGRESS(i*i,0,num*num);
173  for (int32_t j=0; j<=i; j++)
174  precomputed_matrix[i*(i+1)/2+j] = compute(i,j) ;
175  }
176 
177  SG_PROGRESS(num*num,0,num*num);
178  SG_DONE();
179 }
180 
182 {
183  int32_t m,n;
184  float64_t* data=get_distance_matrix_real(m,n,NULL);
185  return SGMatrix<float64_t>(data, m,n);
186 }
187 
189  int32_t &num_vec1, int32_t &num_vec2, float32_t* target)
190 {
191  float32_t* result = NULL;
192  CFeatures* f1 = lhs;
193  CFeatures* f2 = rhs;
194 
195  if (f1 && f2)
196  {
197  if (target && (num_vec1!=f1->get_num_vectors() || num_vec2!=f2->get_num_vectors()))
198  SG_ERROR("distance matrix does not fit into target\n");
199 
200  num_vec1=f1->get_num_vectors();
201  num_vec2=f2->get_num_vectors();
202  int64_t total_num=num_vec1*num_vec2;
203  int32_t num_done=0;
204 
205  SG_DEBUG("returning distance matrix of size %dx%d\n", num_vec1, num_vec2);
206 
207  if (target)
208  result=target;
209  else
210  result=SG_MALLOC(float32_t, total_num);
211 
212  if ( (f1 == f2) && (num_vec1 == num_vec2) )
213  {
214  for (int32_t i=0; i<num_vec1; i++)
215  {
216  for (int32_t j=i; j<num_vec1; j++)
217  {
218  float64_t v=distance(i,j);
219 
220  result[i+j*num_vec1]=v;
221  result[j+i*num_vec1]=v;
222 
223  if (num_done%100000)
224  SG_PROGRESS(num_done, 0, total_num-1);
225 
226  if (i!=j)
227  num_done+=2;
228  else
229  num_done+=1;
230  }
231  }
232  }
233  else
234  {
235  for (int32_t i=0; i<num_vec1; i++)
236  {
237  for (int32_t j=0; j<num_vec2; j++)
238  {
239  result[i+j*num_vec1]=distance(i,j) ;
240 
241  if (num_done%100000)
242  SG_PROGRESS(num_done, 0, total_num-1);
243 
244  num_done++;
245  }
246  }
247  }
248 
249  SG_DONE();
250  }
251  else
252  SG_ERROR("no features assigned to distance\n");
253 
254  return result;
255 }
256 
258  int32_t &lhs_vectors_number, int32_t &rhs_vectors_number, float64_t* target)
259 {
260  float64_t* distance_matrix = NULL;
261  CFeatures* lhs_features = lhs;
262  CFeatures* rhs_features = rhs;
263 
264  // check for errors
265  if (!lhs_features || !rhs_features)
266  SG_ERROR("No features assigned to the distance.\n");
267 
268  if (target &&
269  (lhs_vectors_number!=lhs_features->get_num_vectors() ||
270  rhs_vectors_number!=rhs_features->get_num_vectors()))
271  SG_ERROR("Distance matrix does not fit into the given target.\n");
272 
273  // init numbers of vectors and total number of distances
274  lhs_vectors_number = lhs_features->get_num_vectors();
275  rhs_vectors_number = rhs_features->get_num_vectors();
276  int64_t total_distances_number = lhs_vectors_number*rhs_vectors_number;
277 
278  SG_DEBUG("Calculating distance matrix of size %dx%d.\n", lhs_vectors_number, rhs_vectors_number);
279 
280  // redirect to target or allocate memory
281  if (target)
282  distance_matrix = target;
283  else
284  distance_matrix = SG_MALLOC(float64_t, total_distances_number);
285 
286  // check if we're computing symmetric distance_matrix
287  bool symmetric = (lhs_features==rhs_features) || (lhs_vectors_number==rhs_vectors_number);
288  // select chunking method according to greatest dimension
289  bool chunk_by_lhs = (lhs_vectors_number >= rhs_vectors_number);
290 
291 #ifdef HAVE_PTHREAD
292  // init parallel to work
293  int32_t num_threads = parallel->get_num_threads();
294  ASSERT(num_threads>0);
295  pthread_t* threads = SG_MALLOC(pthread_t, num_threads);
296  DISTANCE_THREAD_PARAM* parameters = SG_MALLOC(DISTANCE_THREAD_PARAM,num_threads);
297  pthread_attr_t attr;
298  pthread_attr_init(&attr);
299  pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
300  // run threads
301  for (int32_t t=0; t<num_threads; t++)
302  {
303  parameters[t].idx_start = t;
304  parameters[t].idx_stop = chunk_by_lhs ? lhs_vectors_number : rhs_vectors_number;
305  parameters[t].idx_step = num_threads;
306  parameters[t].distance_matrix = distance_matrix;
307  parameters[t].symmetric = symmetric;
308  parameters[t].lhs_vectors_number = lhs_vectors_number;
309  parameters[t].rhs_vectors_number = rhs_vectors_number;
310  parameters[t].chunk_by_lhs = chunk_by_lhs;
311  parameters[t].distance = this;
312  pthread_create(&threads[t], &attr, run_distance_thread, (void*)&parameters[t]);
313  }
314  // join, i.e. wait threads for finish
315  for (int32_t t=0; t<num_threads; t++)
316  {
317  pthread_join(threads[t], NULL);
318  }
319  // cleanup
320  pthread_attr_destroy(&attr);
321  SG_FREE(parameters);
322  SG_FREE(threads);
323 #else
324  // init one-threaded parameters
325  DISTANCE_THREAD_PARAM single_thread_param;
326  single_thread_param.idx_start = 0;
327  single_thread_param.idx_stop = chunk_by_lhs ? lhs_vectors_number : rhs_vectors_number;
328  single_thread_param.idx_step = 1;
329  single_thread_param.distance_matrix = distance_matrix;
330  single_thread_param.symmetric = symmetric;
331  single_thread_param.lhs_vectors_number = lhs_vectors_number;
332  single_thread_param.rhs_vectors_number = rhs_vectors_number;
333  single_thread_param.chunk_by_lhs = chunk_by_lhs;
334  single_thread_param.distance = this;
335  // run thread
336  run_distance_thread((void*)&single_thread_param);
337 #endif
338 
339  return distance_matrix;
340 }
341 
342 void CDistance::init()
343 {
344  precomputed_matrix = NULL;
345  precompute_matrix = false;
346  lhs = NULL;
347  rhs = NULL;
348 
349  m_parameters->add((CSGObject**) &lhs, "lhs",
350  "Feature vectors to occur on left hand side.");
351  m_parameters->add((CSGObject**) &rhs, "rhs",
352  "Feature vectors to occur on right hand side.");
353 }
354 
356 {
357  DISTANCE_THREAD_PARAM* parameters = (DISTANCE_THREAD_PARAM*)p;
358  float64_t* distance_matrix = parameters->distance_matrix;
359  CDistance* distance = parameters->distance;
360  int32_t idx_start = parameters->idx_start;
361  int32_t idx_stop = parameters->idx_stop;
362  int32_t idx_step = parameters->idx_step;
363  int32_t lhs_vectors_number = parameters->lhs_vectors_number;
364  int32_t rhs_vectors_number = parameters->rhs_vectors_number;
365  bool symmetric = parameters->symmetric;
366  bool chunk_by_lhs = parameters->chunk_by_lhs;
367 
368  if (symmetric)
369  {
370  for (int32_t i=idx_start; i<idx_stop; i+=idx_step)
371  {
372  for (int32_t j=i; j<rhs_vectors_number; j++)
373  {
374  float64_t ij_distance = distance->compute(i,j);
375  distance_matrix[i*rhs_vectors_number+j] = ij_distance;
376  distance_matrix[j*rhs_vectors_number+i] = ij_distance;
377  }
378  }
379  }
380  else
381  {
382  if (chunk_by_lhs)
383  {
384  for (int32_t i=idx_start; i<idx_stop; i+=idx_step)
385  {
386  for (int32_t j=0; j<rhs_vectors_number; j++)
387  {
388  distance_matrix[j*lhs_vectors_number+i] = distance->compute(i,j);
389  }
390  }
391  }
392  else
393  {
394  for (int32_t j=idx_start; j<idx_stop; j+=idx_step)
395  {
396  for (int32_t i=0; i<lhs_vectors_number; i++)
397  {
398  distance_matrix[j*lhs_vectors_number+i] = distance->compute(i,j);
399  }
400  }
401  }
402  }
403 
404  return NULL;
405 }

SHOGUN Machine Learning Toolbox - Documentation