SHOGUN  v1.1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Math.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 1999-2009 Soeren Sonnenburg
8  * Written (W) 1999-2008 Gunnar Raetsch
9  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
10  */
11 
12 // Math.cpp: implementation of the CMath class.
13 //
15 
16 
17 #include <shogun/lib/common.h>
20 #include <shogun/io/SGIO.h>
21 
22 #include <stdio.h>
23 #include <stdlib.h>
24 #include <math.h>
25 
26 using namespace shogun;
27 
28 #ifdef USE_LOGCACHE
29 #ifdef USE_HMMDEBUG
30 #define MAX_LOG_TABLE_SIZE 10*1024*1024
31 #define LOG_TABLE_PRECISION 1e-6
32 #else //USE_HMMDEBUG
33 #define MAX_LOG_TABLE_SIZE 123*1024*1024
34 #define LOG_TABLE_PRECISION 1e-15
35 #endif //USE_HMMDEBUG
36 int32_t CMath::LOGACCURACY = 0; // 100000 steps per integer
37 #endif // USE_LOGCACHE
38 
39 int32_t CMath::LOGRANGE = 0; // range for logtable: log(1+exp(x)) -25 <= x <= 0
40 
41 const float64_t CMath::INFTY = -log(0.0); // infinity
42 const float64_t CMath::ALMOST_INFTY = +1e+20; //a large number
44 const float64_t CMath::PI=PI;
48 
49 #ifdef USE_LOGCACHE
50 float64_t* CMath::logtable = NULL;
51 #endif
52 char* CMath::rand_state = NULL;
53 uint32_t CMath::seed = 0;
54 
56 : CSGObject()
57 {
59  init_random();
60 
61 #ifdef USE_LOGCACHE
62  LOGRANGE=CMath::determine_logrange();
63  LOGACCURACY=CMath::determine_logaccuracy(LOGRANGE);
64  CMath::logtable=SG_MALLOC(float64_t, LOGRANGE*LOGACCURACY);
65  init_log_table();
66 #else
67  int32_t i=0;
68  while ((float64_t)log(1+((float64_t)exp(-float64_t(i)))))
69  i++;
70 
71  LOGRANGE=i;
72 #endif
73 }
74 
76 {
78  CMath::rand_state=NULL;
79 #ifdef USE_LOGCACHE
80  SG_FREE(CMath::logtable);
81  CMath::logtable=NULL;
82 #endif
83 }
84 
85 namespace shogun
86 {
87 template <>
88 void CMath::display_vector(const uint8_t* vector, int32_t n, const char* name)
89 {
90  ASSERT(n>=0);
91  SG_SPRINT("%s=[", name);
92  for (int32_t i=0; i<n; i++)
93  SG_SPRINT("%d%s", vector[i], i==n-1? "" : ",");
94  SG_SPRINT("]\n");
95 }
96 
97 template <>
98 void CMath::display_vector(const int32_t* vector, int32_t n, const char* name)
99 {
100  ASSERT(n>=0);
101  SG_SPRINT("%s=[", name);
102  for (int32_t i=0; i<n; i++)
103  SG_SPRINT("%d%s", vector[i], i==n-1? "" : ",");
104  SG_SPRINT("]\n");
105 }
106 
107 template <>
108 void CMath::display_vector(const int64_t* vector, int32_t n, const char* name)
109 {
110  ASSERT(n>=0);
111  SG_SPRINT("%s=[", name);
112  for (int32_t i=0; i<n; i++)
113  SG_SPRINT("%lld%s", vector[i], i==n-1? "" : ",");
114  SG_SPRINT("]\n");
115 }
116 
117 template <>
118 void CMath::display_vector(const uint64_t* vector, int32_t n, const char* name)
119 {
120  ASSERT(n>=0);
121  SG_SPRINT("%s=[", name);
122  for (int32_t i=0; i<n; i++)
123  SG_SPRINT("%llu%s", vector[i], i==n-1? "" : ",");
124  SG_SPRINT("]\n");
125 }
126 
127 template <>
128 void CMath::display_vector(const float32_t* vector, int32_t n, const char* name)
129 {
130  ASSERT(n>=0);
131  SG_SPRINT("%s=[", name);
132  for (int32_t i=0; i<n; i++)
133  SG_SPRINT("%g%s", vector[i], i==n-1? "" : ",");
134  SG_SPRINT("]\n");
135 }
136 
137 template <>
138 void CMath::display_vector(const float64_t* vector, int32_t n, const char* name)
139 {
140  ASSERT(n>=0);
141  SG_SPRINT("%s=[", name);
142  for (int32_t i=0; i<n; i++)
143  SG_SPRINT("%.18g%s", vector[i], i==n-1? "" : ",");
144  SG_SPRINT("]\n");
145 }
146 
147 template <>
148 void CMath::display_vector(const floatmax_t* vector, int32_t n, const char* name)
149 {
150  ASSERT(n>=0);
151  SG_SPRINT("%s=[", name);
152  for (int32_t i=0; i<n; i++)
153  SG_SPRINT("%.36Lg%s", (long double) vector[i], i==n-1? "" : ",");
154  SG_SPRINT("]\n");
155 }
156 
157 template <>
159  const int32_t* matrix, int32_t rows, int32_t cols, const char* name)
160 {
161  ASSERT(rows>=0 && cols>=0);
162  SG_SPRINT("%s=[\n", name);
163  for (int32_t i=0; i<rows; i++)
164  {
165  SG_SPRINT("[");
166  for (int32_t j=0; j<cols; j++)
167  SG_SPRINT("\t%d%s", matrix[j*rows+i],
168  j==cols-1? "" : ",");
169  SG_SPRINT("]%s\n", i==rows-1? "" : ",");
170  }
171  SG_SPRINT("]\n");
172 }
173 
174 template <>
176  const float64_t* matrix, int32_t rows, int32_t cols, const char* name)
177 {
178  ASSERT(rows>=0 && cols>=0);
179  SG_SPRINT("%s=[\n", name);
180  for (int32_t i=0; i<rows; i++)
181  {
182  SG_SPRINT("[");
183  for (int32_t j=0; j<cols; j++)
184  SG_SPRINT("\t%.18g%s", (double) matrix[j*rows+i],
185  j==cols-1? "" : ",");
186  SG_SPRINT("]%s\n", i==rows-1? "" : ",");
187  }
188  SG_SPRINT("]\n");
189 }
190 
191 template <>
193  const float32_t* matrix, int32_t rows, int32_t cols, const char* name)
194 {
195  ASSERT(rows>=0 && cols>=0);
196  SG_SPRINT("%s=[\n", name);
197  for (int32_t i=0; i<rows; i++)
198  {
199  SG_SPRINT("[");
200  for (int32_t j=0; j<cols; j++)
201  SG_SPRINT("\t%.18g%s", (float) matrix[j*rows+i],
202  j==cols-1? "" : ",");
203  SG_SPRINT("]%s\n", i==rows-1? "" : ",");
204  }
205  SG_SPRINT("]\n");
206 }
207 
208 }
209 
211 {
212  SGMatrix<float64_t> table(NULL,2,3,false);
213  int32_t len=tables.num_cols/3;
214 
215  SGVector<float64_t> v(len);
216  for (int32_t i=0; i<len; i++)
217  {
218  table.matrix=&tables.matrix[2*3*i];
220  }
221  return v;
222 }
223 
225 {
226  ASSERT(table.num_rows==2);
227  ASSERT(table.num_cols==3);
228 
229  int32_t m_len=3+2;
230  float64_t* m=SG_MALLOC(float64_t, 3+2);
231  m[0]=table.matrix[0]+table.matrix[2]+table.matrix[4];
232  m[1]=table.matrix[1]+table.matrix[3]+table.matrix[5];
233  m[2]=table.matrix[0]+table.matrix[1];
234  m[3]=table.matrix[2]+table.matrix[3];
235  m[4]=table.matrix[4]+table.matrix[5];
236 
237  float64_t n = CMath::sum(m, m_len) / 2.0;
238  int32_t x_len=2*3* CMath::sq(CMath::max(m, m_len));
239  float64_t* x = SG_MALLOC(float64_t, x_len);
240  CMath::fill_vector(x, x_len, 0.0);
241 
242  float64_t log_nom=0.0;
243  for (int32_t i=0; i<3+2; i++)
244  log_nom+=CMath::lgamma(m[i]+1);
245  log_nom-=CMath::lgamma(n+1.0);
246 
247  float64_t log_denomf=0;
248  floatmax_t log_denom=0;
249 
250  for (int32_t i=0; i<3*2; i++)
251  {
252  log_denom+=CMath::lgammal((floatmax_t) table.matrix[i]+1);
253  log_denomf+=gamma(table.matrix[i]+1);
254  }
255 
256  floatmax_t prob_table_log=log_nom - log_denom;
257 
258  int32_t dim1 = CMath::min(m[0], m[2]);
259 
260  //traverse all possible tables with given m
261  int32_t counter = 0;
262  for (int32_t k=0; k<=dim1; k++)
263  {
264  for (int32_t l=CMath::max(0.0,m[0]-m[4]-k); l<=CMath::min(m[0]-k, m[3]); l++)
265  {
266  x[0 + 0*2 + counter*2*3] = k;
267  x[0 + 1*2 + counter*2*3] = l;
268  x[0 + 2*2 + counter*2*3] = m[0] - x[0 + 0*2 + counter*2*3] - x[0 + 1*2 + counter*2*3];
269  x[1 + 0*2 + counter*2*3] = m[2] - x[0 + 0*2 + counter*2*3];
270  x[1 + 1*2 + counter*2*3] = m[3] - x[0 + 1*2 + counter*2*3];
271  x[1 + 2*2 + counter*2*3] = m[4] - x[0 + 2*2 + counter*2*3];
272 
273  counter++;
274  }
275  }
276 
277 //#define DEBUG_FISHER_TABLE
278 #ifdef DEBUG_FISHER_TABLE
279  SG_SPRINT("counter=%d\n", counter);
280  SG_SPRINT("dim1=%d\n", dim1);
281  SG_SPRINT("l=%g...%g\n", CMath::max(0.0,m[0]-m[4]-0), CMath::min(m[0]-0, m[3]));
282  SG_SPRINT("n=%g\n", n);
283  SG_SPRINT("prob_table_log=%.18Lg\n", prob_table_log);
284  SG_SPRINT("log_denomf=%.18g\n", log_denomf);
285  SG_SPRINT("log_denom=%.18Lg\n", log_denom);
286  SG_SPRINT("log_nom=%.18g\n", log_nom);
287  display_vector(m, m_len, "marginals");
288  display_vector(x, 2*3*counter, "x");
289 #endif // DEBUG_FISHER_TABLE
290 
291 
292  floatmax_t* log_denom_vec=SG_MALLOC(floatmax_t, counter);
293  CMath::fill_vector(log_denom_vec, counter, (floatmax_t) 0.0);
294 
295  for (int32_t k=0; k<counter; k++)
296  {
297  for (int32_t j=0; j<3; j++)
298  {
299  for (int32_t i=0; i<2; i++)
300  log_denom_vec[k]+=CMath::lgammal(x[i + j*2 + k*2*3]+1.0);
301  }
302  }
303 
304  for (int32_t i=0; i<counter; i++)
305  log_denom_vec[i]=log_nom-log_denom_vec[i];
306 
307 #ifdef DEBUG_FISHER_TABLE
308  display_vector(log_denom_vec, counter, "log_denom_vec");
309 #endif // DEBUG_FISHER_TABLE
310 
311 
312  float64_t nonrand_p=-CMath::INFTY;
313  for (int32_t i=0; i<counter; i++)
314  {
315  if (log_denom_vec[i]<=prob_table_log)
316  nonrand_p=CMath::logarithmic_sum(nonrand_p, log_denom_vec[i]);
317  }
318 
319 #ifdef DEBUG_FISHER_TABLE
320  SG_SPRINT("nonrand_p=%.18g\n", nonrand_p);
321  SG_SPRINT("exp_nonrand_p=%.18g\n", CMath::exp(nonrand_p));
322 #endif // DEBUG_FISHER_TABLE
323 
324  nonrand_p=CMath::exp(nonrand_p);
325 
326  SG_FREE(log_denom_vec);
327  SG_FREE(x);
328  SG_FREE(m);
329 
330  return nonrand_p;
331 }
332 
333 
334 #ifdef USE_LOGCACHE
335 int32_t CMath::determine_logrange()
336 {
337  int32_t i;
338  float64_t acc=0;
339  for (i=0; i<50; i++)
340  {
341  acc=((float64_t)log(1+((float64_t)exp(-float64_t(i)))));
342  if (acc<=(float64_t)LOG_TABLE_PRECISION)
343  break;
344  }
345 
346  SG_SINFO( "determined range for x in table log(1+exp(-x)) is:%d (error:%G)\n",i,acc);
347  return i;
348 }
349 
350 int32_t CMath::determine_logaccuracy(int32_t range)
351 {
352  range=MAX_LOG_TABLE_SIZE/range/((int)sizeof(float64_t));
353  SG_SINFO( "determined accuracy for x in table log(1+exp(-x)) is:%d (error:%G)\n",range,1.0/(double) range);
354  return range;
355 }
356 
357 //init log table of form log(1+exp(x))
358 void CMath::init_log_table()
359 {
360  for (int32_t i=0; i< LOGACCURACY*LOGRANGE; i++)
361  logtable[i]=log(1+exp(float64_t(-i)/float64_t(LOGACCURACY)));
362 }
363 #endif
364 
365 void CMath::sort(int32_t *a, int32_t cols, int32_t sort_col)
366 {
367  int32_t changed=1;
368  if (a[0]==-1) return ;
369  while (changed)
370  {
371  changed=0; int32_t i=0 ;
372  while ((a[(i+1)*cols]!=-1) && (a[(i+1)*cols+1]!=-1)) // to be sure
373  {
374  if (a[i*cols+sort_col]>a[(i+1)*cols+sort_col])
375  {
376  for (int32_t j=0; j<cols; j++)
377  CMath::swap(a[i*cols+j],a[(i+1)*cols+j]) ;
378  changed=1 ;
379  } ;
380  i++ ;
381  } ;
382  } ;
383 }
384 
385 void CMath::sort(float64_t *a, int32_t* idx, int32_t N)
386 {
387  int32_t changed=1;
388  while (changed)
389  {
390  changed=0;
391  for (int32_t i=0; i<N-1; i++)
392  {
393  if (a[i]>a[i+1])
394  {
395  swap(a[i],a[i+1]) ;
396  swap(idx[i],idx[i+1]) ;
397  changed=1 ;
398  } ;
399  } ;
400  } ;
401 
402 }
403 
405  char* seq1, char* seq2, int32_t l1, int32_t l2, float64_t gapCost)
406 {
407  float64_t actCost=0 ;
408  int32_t i1, i2 ;
409  float64_t* const gapCosts1 = SG_MALLOC(float64_t, l1 );
410  float64_t* const gapCosts2 = SG_MALLOC(float64_t, l2 );
411  float64_t* costs2_0 = SG_MALLOC(float64_t, l2 + 1 );
412  float64_t* costs2_1 = SG_MALLOC(float64_t, l2 + 1 );
413 
414  // initialize borders
415  for( i1 = 0; i1 < l1; ++i1 ) {
416  gapCosts1[ i1 ] = gapCost * i1;
417  }
418  costs2_1[ 0 ] = 0;
419  for( i2 = 0; i2 < l2; ++i2 ) {
420  gapCosts2[ i2 ] = gapCost * i2;
421  costs2_1[ i2+1 ] = costs2_1[ i2 ] + gapCosts2[ i2 ];
422  }
423  // compute alignment
424  for( i1 = 0; i1 < l1; ++i1 ) {
425  swap( costs2_0, costs2_1 );
426  actCost = costs2_0[ 0 ] + gapCosts1[ i1 ];
427  costs2_1[ 0 ] = actCost;
428  for( i2 = 0; i2 < l2; ++i2 ) {
429  const float64_t actMatch = costs2_0[ i2 ] + ( seq1[i1] == seq2[i2] );
430  const float64_t actGap1 = costs2_0[ i2+1 ] + gapCosts1[ i1 ];
431  const float64_t actGap2 = actCost + gapCosts2[ i2 ];
432  const float64_t actGap = min( actGap1, actGap2 );
433  actCost = min( actMatch, actGap );
434  costs2_1[ i2+1 ] = actCost;
435  }
436  }
437 
438  delete [] gapCosts1;
439  delete [] gapCosts2;
440  delete [] costs2_0;
441  delete [] costs2_1;
442 
443  // return the final cost
444  return actCost;
445 }
446 
448 {
449  double e=0;
450 
451  for (int32_t i=0; i<len; i++)
452  for (int32_t j=0; j<len; j++)
453  e+=exp(p2[j*len+i])*(p2[j*len+i]-p1[i]-p1[j]);
454 
455  return (float64_t) e;
456 }
457 
459 {
460  double e=0;
461 
462  for (int32_t i=0; i<len; i++)
463  e+=exp(p[i])*(p[i]-q[i]);
464 
465  return (float64_t) e;
466 }
467 
469 {
470  double e=0;
471 
472  for (int32_t i=0; i<len; i++)
473  e-=exp(p[i])*p[i];
474 
475  return (float64_t) e;
476 }
477 
478 
479 //Howto construct the pseudo inverse (from "The Matrix Cookbook")
480 //
481 //Assume A does not have full rank, i.e. A is n \times m and rank(A) = r < min(n;m).
482 //
483 //The matrix A+ known as the pseudo inverse is unique and does always exist.
484 //
485 //The pseudo inverse A+ can be constructed from the singular value
486 //decomposition A = UDV^T , by A^+ = V(D+)U^T.
487 
488 #ifdef HAVE_LAPACK
490  float64_t* matrix, int32_t rows, int32_t cols, float64_t* target)
491 {
492  if (!target)
493  target=SG_MALLOC(float64_t, rows*cols);
494 
495  char jobu='A';
496  char jobvt='A';
497  int m=rows; /* for calling external lib */
498  int n=cols; /* for calling external lib */
499  int lda=m; /* for calling external lib */
500  int ldu=m; /* for calling external lib */
501  int ldvt=n; /* for calling external lib */
502  int info=-1; /* for calling external lib */
503  int32_t lsize=CMath::min((int32_t) m, (int32_t) n);
504  double* s=SG_MALLOC(double, lsize);
505  double* u=SG_MALLOC(double, m*m);
506  double* vt=SG_MALLOC(double, n*n);
507 
508  wrap_dgesvd(jobu, jobvt, m, n, matrix, lda, s, u, ldu, vt, ldvt, &info);
509  ASSERT(info==0);
510 
511  for (int32_t i=0; i<n; i++)
512  {
513  for (int32_t j=0; j<lsize; j++)
514  vt[i*n+j]=vt[i*n+j]/s[j];
515  }
516 
517  cblas_dgemm(CblasColMajor, CblasTrans, CblasTrans, m, n, m, 1.0, vt, ldvt, u, ldu, 0, target, m);
518 
519  SG_FREE(u);
520  SG_FREE(vt);
521  SG_FREE(s);
522 
523  return target;
524 }
525 #endif

SHOGUN Machine Learning Toolbox - Documentation