23 using namespace shogun;
52 void CLibLinear::init()
86 SG_ERROR(
"Specified features are not of type CDotFeatures\n");
101 if (num_feat!=num_train_labels)
103 SG_ERROR(
"L1 methods require the data to be transposed: "
104 "number of features %d does not match number of "
105 "training labels %d\n",
106 num_feat, num_train_labels);
113 if (num_vec!=num_train_labels)
115 SG_ERROR(
"number of vectors %d does not match "
116 "number of training labels %d\n",
117 num_vec, num_train_labels);
143 for (int32_t i=0; i<prob.l; i++)
148 for(
int i=0;i<prob.l;i++)
155 SG_INFO(
"%d training points %d dims\n", prob.l, prob.n);
157 function *fun_obj=NULL;
164 fun_obj=
new l2r_lr_fun(&prob, Cp, Cn);
166 SG_DEBUG(
"starting L2R_LR training via tron\n");
174 fun_obj=
new l2r_l2_svc_fun(&prob, Cp, Cn);
211 SG_ERROR(
"Error: unknown solver_type\n");
250 #define GETI(i) (y[i]+1)
253 void CLibLinear::solve_l2r_l1l2_svc(
257 int w_size = prob->n;
270 double PGmax_new, PGmin_new;
273 double diag[3] = {0.5/Cn, 0, 0.5/Cp};
288 for(i=0; i<w_size; i++)
302 QD[i] = diag[
GETI(i)];
304 QD[i] += prob->x->dot(i, prob->x,i);
318 for (i=0; i<active_size; i++)
320 int j = i+rand()%(active_size-i);
324 for (s=0;s<active_size;s++)
329 G = prob->x->dense_dot(i,
w, n);
338 C = upper_bound[
GETI(i)];
339 G += alpha[i]*diag[
GETI(i)];
354 else if (alpha[i] == C)
372 if(fabs(PG) > 1.0e-12)
374 double alpha_old = alpha[i];
376 d = (alpha[i] - alpha_old)*yi;
378 prob->x->add_to_dense_vec(d, i,
w, n);
401 PGmax_old = PGmax_new;
402 PGmin_old = PGmin_new;
410 SG_INFO(
"optimization finished, #iter = %d\n",iter);
413 SG_WARNING(
"reaching max number of iterations\nUsing -s 2 may be faster"
414 "(also see liblinear FAQ)\n\n");
421 for(i=0; i<w_size; i++)
425 v += alpha[i]*(alpha[i]*diag[
GETI(i)] - 2);
429 SG_INFO(
"Objective value = %lf\n",v/2);
450 #define GETI(i) (y[i]+1)
453 void CLibLinear::solve_l1r_l2_svc(
454 problem *prob_col,
double eps,
double Cp,
double Cn)
457 int w_size = prob_col->n;
459 int active_size = w_size;
460 int max_num_linesearch = 20;
463 double d, G_loss, G,
H;
467 double d_old, d_diff;
468 double loss_old=0, loss_new;
469 double appxcond, cond;
474 double *xj_sq =
SG_MALLOC(
double, w_size);
481 double C[3] = {Cn,0,Cp};
484 if (prob_col->use_bias)
490 if(prob_col->y[j] > 0)
496 for(j=0; j<w_size; j++)
502 if (use_bias && j==n)
504 for (ind=0; ind<l; ind++)
505 xj_sq[n] += C[
GETI(ind)];
510 while (x->get_next_feature(ind, val, iterator))
511 xj_sq[j] += C[
GETI(ind)]*val*val;
512 x->free_feature_iterator(iterator);
520 if (max_train_time > 0 && start_time.
cur_time_diff() > max_train_time)
525 for(j=0; j<active_size; j++)
527 int i = j+rand()%(active_size-j);
531 for(s=0; s<active_size; s++)
537 if (use_bias && j==n)
539 for (ind=0; ind<l; ind++)
543 double tmp = C[
GETI(ind)]*y[ind];
544 G_loss -= tmp*b[ind];
551 iterator=x->get_feature_iterator(j);
553 while (x->get_next_feature(ind, val, iterator))
557 double tmp = C[
GETI(ind)]*val*y[ind];
558 G_loss -= tmp*b[ind];
562 x->free_feature_iterator(iterator);
573 double violation = 0;
580 else if(Gp>Gmax_old/l && Gn<-Gmax_old/l)
589 violation = fabs(Gp);
591 violation = fabs(Gn);
598 else if(Gn >=
H*w[j])
603 if(fabs(d) < 1.0e-12)
606 double delta = fabs(w[j]+d)-fabs(w[j]) + G*d;
609 for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++)
612 cond = fabs(w[j]+d)-fabs(w[j]) - sigma*delta;
614 appxcond = xj_sq[j]*d*d + G_loss*d + cond;
617 if (use_bias && j==n)
619 for (ind=0; ind<l; ind++)
620 b[ind] += d_diff*y[ind];
625 iterator=x->get_feature_iterator(j);
626 while (x->get_next_feature(ind, val, iterator))
627 b[ind] += d_diff*val*y[ind];
629 x->free_feature_iterator(iterator);
634 if(num_linesearch == 0)
639 if (use_bias && j==n)
641 for (ind=0; ind<l; ind++)
644 loss_old += C[
GETI(ind)]*b[ind]*b[ind];
645 double b_new = b[ind] + d_diff*y[ind];
648 loss_new += C[
GETI(ind)]*b_new*b_new;
653 iterator=x->get_feature_iterator(j);
654 while (x->get_next_feature(ind, val, iterator))
657 loss_old += C[
GETI(ind)]*b[ind]*b[ind];
658 double b_new = b[ind] + d_diff*val*y[ind];
661 loss_new += C[
GETI(ind)]*b_new*b_new;
663 x->free_feature_iterator(iterator);
669 if (use_bias && j==n)
671 for (ind=0; ind<l; ind++)
673 double b_new = b[ind] + d_diff*y[ind];
676 loss_new += C[
GETI(ind)]*b_new*b_new;
681 iterator=x->get_feature_iterator(j);
682 while (x->get_next_feature(ind, val, iterator))
684 double b_new = b[ind] + d_diff*val*y[ind];
687 loss_new += C[
GETI(ind)]*b_new*b_new;
689 x->free_feature_iterator(iterator);
693 cond = cond + loss_new - loss_old;
707 if(num_linesearch >= max_num_linesearch)
710 for(
int i=0; i<l; i++)
713 for(
int i=0; i<n; i++)
718 iterator=x->get_feature_iterator(i);
719 while (x->get_next_feature(ind, val, iterator))
720 b[ind] -= w[i]*val*y[ind];
721 x->free_feature_iterator(iterator);
724 if (use_bias && w[n])
726 for (ind=0; ind<l; ind++)
727 b[ind] -= w[n]*y[ind];
733 Gmax_init = Gmax_new;
738 if(Gmax_new <= eps*Gmax_init)
740 if(active_size == w_size)
744 active_size = w_size;
754 SG_INFO(
"optimization finished, #iter = %d\n", iter);
755 if(iter >= max_iterations)
756 SG_WARNING(
"\nWARNING: reaching max number of iterations\n");
762 for(j=0; j<w_size; j++)
772 v += C[
GETI(j)]*b[j]*b[j];
774 SG_INFO(
"Objective value = %lf\n", v);
775 SG_INFO(
"#nonzeros/#features = %d/%d\n", nnz, w_size);
795 #define GETI(i) (y[i]+1)
798 void CLibLinear::solve_l1r_lr(
799 const problem *prob_col,
double eps,
800 double Cp,
double Cn)
803 int w_size = prob_col->n;
805 int active_size = w_size;
806 int max_num_linesearch = 20;
814 double sum1, appxcond1;
815 double sum2, appxcond2;
821 double *exp_wTx_new =
SG_MALLOC(
double, l);
822 double *xj_max =
SG_MALLOC(
double, w_size);
823 double *C_sum =
SG_MALLOC(
double, w_size);
824 double *xjneg_sum =
SG_MALLOC(
double, w_size);
825 double *xjpos_sum =
SG_MALLOC(
double, w_size);
832 double C[3] = {Cn,0,Cp};
835 if (prob_col->use_bias)
841 if(prob_col->y[j] > 0)
846 for(j=0; j<w_size; j++)
857 for (ind=0; ind<l; ind++)
861 C_sum[j] += C[
GETI(ind)];
863 xjneg_sum[j] += C[
GETI(ind)];
865 xjpos_sum[j] += C[
GETI(ind)];
875 C_sum[j] += C[
GETI(ind)];
877 xjneg_sum[j] += C[
GETI(ind)]*val;
879 xjpos_sum[j] += C[
GETI(ind)]*val;
893 for(j=0; j<active_size; j++)
895 int i = j+rand()%(active_size-j);
899 for(s=0; s<active_size; s++)
908 for (ind=0; ind<l; ind++)
910 double exp_wTxind = exp_wTx[ind];
911 double tmp1 = 1.0/(1+exp_wTxind);
912 double tmp2 = C[
GETI(ind)]*tmp1;
913 double tmp3 = tmp2*exp_wTxind;
924 double exp_wTxind = exp_wTx[ind];
925 double tmp1 = val/(1+exp_wTxind);
926 double tmp2 = C[
GETI(ind)]*tmp1;
927 double tmp3 = tmp2*exp_wTxind;
935 G = -sum2 + xjneg_sum[j];
939 double violation = 0;
946 else if(Gp>Gmax_old/l && Gn<-Gmax_old/l)
955 violation = fabs(Gp);
957 violation = fabs(Gn);
964 else if(Gn >= H*
w[j])
969 if(fabs(d) < 1.0e-12)
974 double delta = fabs(
w[j]+d)-fabs(
w[j]) + G*d;
976 for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++)
978 cond = fabs(
w[j]+d)-fabs(
w[j]) - sigma*delta;
982 double tmp = exp(d*xj_max[j]);
983 appxcond1 = log(1+sum1*(tmp-1)/xj_max[j]/C_sum[j])*C_sum[j] + cond - d*xjpos_sum[j];
984 appxcond2 = log(1+sum2*(1/tmp-1)/xj_max[j]/C_sum[j])*C_sum[j] + cond + d*xjneg_sum[j];
989 for (ind=0; ind<l; ind++)
990 exp_wTx[ind] *= exp(d);
997 exp_wTx[ind] *= exp(d*val);
1004 cond += d*xjneg_sum[j];
1010 for (ind=0; ind<l; ind++)
1012 double exp_dx = exp(d);
1013 exp_wTx_new[i] = exp_wTx[ind]*exp_dx;
1014 cond += C[
GETI(ind)]*log((1+exp_wTx_new[i])/(exp_dx+exp_wTx_new[i]));
1024 double exp_dx = exp(d*val);
1025 exp_wTx_new[i] = exp_wTx[ind]*exp_dx;
1026 cond += C[
GETI(ind)]*log((1+exp_wTx_new[i])/(exp_dx+exp_wTx_new[i]));
1037 for (ind=0; ind<l; ind++)
1039 exp_wTx[ind] = exp_wTx_new[i];
1048 exp_wTx[ind] = exp_wTx_new[i];
1065 if(num_linesearch >= max_num_linesearch)
1068 for(
int i=0; i<l; i++)
1071 for(
int i=0; i<w_size; i++)
1073 if(
w[i]==0)
continue;
1077 for (ind=0; ind<l; ind++)
1078 exp_wTx[ind] +=
w[i];
1084 exp_wTx[ind] +=
w[i]*val;
1089 for(
int i=0; i<l; i++)
1090 exp_wTx[i] = exp(exp_wTx[i]);
1095 Gmax_init = Gmax_new;
1099 if(Gmax_new <= eps*Gmax_init)
1101 if(active_size == w_size)
1105 active_size = w_size;
1111 Gmax_old = Gmax_new;
1115 SG_INFO(
"optimization finished, #iter = %d\n", iter);
1117 SG_WARNING(
"\nWARNING: reaching max number of iterations\n");
1123 for(j=0; j<w_size; j++)
1131 v += C[
GETI(j)]*log(1+1/exp_wTx[j]);
1133 v += C[
GETI(j)]*log(1+exp_wTx[j]);
1135 SG_INFO(
"Objective value = %lf\n", v);
1136 SG_INFO(
"#nonzeros/#features = %d/%d\n", nnz, w_size);
1141 delete [] exp_wTx_new;
1144 delete [] xjneg_sum;
1145 delete [] xjpos_sum;
1151 SG_ERROR(
"Please assign linear term first!\n");
1159 SG_ERROR(
"Please assign labels first!\n");
1167 #endif //HAVE_LAPACK