Skylark (Sketching Library)  0.1
/var/lib/jenkins/jobs/Skylark/workspace/ml/FunctionProx.hpp
Go to the documentation of this file.
00001 /*
00002  * FunctionProx.hpp
00003  *
00004  *  Created on: Jan 12, 2014
00005  *      Author: vikas
00006  */
00007 
00008 #ifndef FUNCTIONPROX_HPP_
00009 #define FUNCTIONPROX_HPP_
00010 
00011 #include <elemental.hpp>
00012 #include "options.hpp"
00013 #include <cstdlib>
00014 #include <cmath>
00015 
00016 #ifdef SKYLARK_HAVE_OPENMP
00017 #include <omp.h>
00018 #endif
00019 
00020 // Simple abstract class to represent a function and its prox operator
00021 // these are defined for local matrices.
00022 typedef elem::Matrix<double> LocalDenseMatrixType;
00023 typedef elem::Matrix<double>  LocalTargetMatrixType;
00024 
00025 // abstract class for representing loss functions and their prox operators
00026 class lossfunction
00027 {
00028 public:
00029         virtual double evaluate(LocalDenseMatrixType& O, LocalTargetMatrixType& T) = 0 ;
00030         virtual void proxoperator(LocalDenseMatrixType& X, double lambda, LocalTargetMatrixType& T, LocalDenseMatrixType& Y) = 0 ;
00031 
00032         virtual ~lossfunction(void){}
00033 
00034 private:
00035         std::string type;
00036 };
00037 
00038 // abstract class for representing regularizers and their prox operators
00039 class regularization
00040 {
00041 public:
00042         virtual double evaluate(LocalDenseMatrixType& W) = 0 ;
00043         virtual void proxoperator(LocalDenseMatrixType& W, double lambda, LocalDenseMatrixType& mu, LocalDenseMatrixType& P) = 0 ;
00044 
00045         virtual ~regularization(void){}
00046 };
00047 
00048 // Class to represent 0.5*||O - T||^2_{fro}
00049 class squaredloss : public lossfunction {
00050 public:
00051         virtual double evaluate(LocalDenseMatrixType& O, LocalTargetMatrixType& T);
00052         virtual void proxoperator(LocalDenseMatrixType& X, double lambda, LocalTargetMatrixType& T, LocalDenseMatrixType& Y);
00053 };
00054 
00055 // Class to represent Least Absolute Deviations ||O - T||_1
00056 class ladloss : public lossfunction {
00057 public:
00058         virtual double evaluate(LocalDenseMatrixType& O, LocalTargetMatrixType& T);
00059         virtual void proxoperator(LocalDenseMatrixType& X, double lambda, LocalTargetMatrixType& T, LocalDenseMatrixType& Y);
00060 };
00061 
00062 // Class to represent hinge loss sum(max(1-t*o,0))
00063 class hingeloss : public lossfunction {
00064 public:
00065         virtual double evaluate(LocalDenseMatrixType& O, LocalTargetMatrixType& T);
00066         virtual void proxoperator(LocalDenseMatrixType& X, double lambda, LocalTargetMatrixType& T, LocalDenseMatrixType& Y);
00067 };
00068 
00069 class logisticloss : public lossfunction {
00070 public:
00071     logisticloss() : epsilon(1e-4) {}
00072     virtual double evaluate(LocalDenseMatrixType& O, LocalTargetMatrixType& T);
00073     virtual void proxoperator(LocalDenseMatrixType& X, double lambda, LocalTargetMatrixType& T, LocalDenseMatrixType& Y);
00074 private:
00075     double logsumexp(double* x, int n);
00076     double normsquare(double* x, double* y, int n);
00077     double objective(int index, double* x, double* v, int n, double lambda);
00078     int logexp(int index, double* v, int n, double lambda, double* x, int MAXITER, double epsilon, int DISPLAY);
00079 
00080     static const int MAXITER = 100;
00081     static const int DISPLAY = 0;
00082 
00083     const double epsilon;
00084 };
00085 
00086 
00087 class l2: public regularization {
00088 public:
00089         virtual double evaluate(LocalDenseMatrixType& W);
00090         virtual void proxoperator(LocalDenseMatrixType& W, double lambda, LocalDenseMatrixType& mu, LocalDenseMatrixType& P);
00091 };
00092 
00093 class l1: public regularization {
00094 public:
00095         virtual double evaluate(LocalDenseMatrixType& W);
00096         virtual void proxoperator(LocalDenseMatrixType& W, double lambda, LocalDenseMatrixType& mu, LocalDenseMatrixType& P);
00097         double soft_threshold(double x, double lambda);
00098 };
00099 
00100 double ladloss::evaluate(LocalDenseMatrixType& O, LocalTargetMatrixType& T) {
00101                 double loss = 0.0;
00102                 int k = O.Height();
00103                 int n = O.Width();
00104 
00105                 // check for size compatability
00106 
00107                 double* Obuf = O.Buffer();
00108                 double*  Tbuf = T.Buffer();
00109                 double x;
00110                 int i, j, label;
00111 
00112                 if (k==1) {
00113 #ifdef SKYLARK_HAVE_OPENMP
00114                         #pragma omp parallel for reduction(+:loss) private(i, x)
00115 #endif
00116                         for(i=0; i<n; i++) {
00117                                 x = Obuf[i] - Tbuf[i];
00118                                 loss += std::abs(x);
00119                         }
00120                 }
00121 
00122                 if (k>1) {
00123 #ifdef SKYLARK_HAVE_OPENMP
00124                         #pragma omp parallel for reduction(+:loss) private(i,j, x, label)
00125 #endif
00126                         for(i=0; i<n; i++) {
00127                                 label = (int) Tbuf[i];
00128                                 for(j=0;j<k;j++) {
00129                                      x = O.Get(j,i) - (j==label ? 1.0:-1.0);
00130                                      loss += std::abs(x);
00131                                 }
00132                         }
00133                 }
00134 
00135                 return loss;
00136         }
00137 
00138         //solution to Y = prox[X] = argmin_Y 0.5*||X-Y||^2_{fro} + lambda ||Y-T||_1
00139 void ladloss::proxoperator(LocalDenseMatrixType& X, double lambda, LocalTargetMatrixType& T, LocalDenseMatrixType& Y) {
00140                 int k = X.Height();
00141                 int n = X.Width();
00142 
00143                                 // check for size compatability
00144 
00145                 double* Xbuf = X.Buffer();
00146                 double* Tbuf = T.Buffer();
00147 
00148                 double* Ybuf = Y.Buffer();
00149                 double ilambda = 1.0/(1.0 + lambda);
00150 
00151                 int label, i, j;
00152                 double t, x;
00153 
00154                 if (k==1) {
00155 #ifdef SKYLARK_HAVE_OPENMP
00156             #pragma omp parallel for private(i)
00157 #endif
00158                         for(int i=0; i<n; i++) {
00159                                 Ybuf[i] = Tbuf[i];
00160                                 if (Xbuf[i] > (Tbuf[i]+lambda))
00161                                         Ybuf[i] = Xbuf[i] - lambda;
00162                                 if (Xbuf[i] < (Tbuf[i] - lambda))
00163                                         Ybuf[i] = Xbuf[i] + lambda;
00164                         }
00165                 }
00166 
00167                 if(k>1) {
00168 #ifdef SKYLARK_HAVE_OPENMP
00169             #pragma omp parallel for private(i,j, label, t, x)
00170 #endif
00171                         for(int i=0; i<n; i++) {
00172                                 label = (int) Tbuf[i];
00173                                 for(j=0;j<k;j++) {
00174                                          t = (j==label ? 1.0:-1.0);
00175                                          x = X.Get(j,i);
00176                                      Y.Set(j, i,  t);
00177                                      if (x > t + lambda)
00178                                          Y.Set(j, i,  x - lambda);
00179                                      if (x < t - lambda)
00180                                          Y.Set(j, i,  x + lambda);
00181                                 }
00182                         }
00183                 }
00184         }
00185 
00186 
00187 
00188 double squaredloss::evaluate(LocalDenseMatrixType& O, LocalTargetMatrixType& T) {
00189                 double loss = 0.0;
00190                 int k = O.Height();
00191                 int n = O.Width();
00192 
00193                 // check for size compatability
00194 
00195                 double* Obuf = O.Buffer();
00196                 double*  Tbuf = T.Buffer();
00197                 double x;
00198                 int i, j, label;
00199 
00200                 if (k==1) {
00201 #ifdef SKYLARK_HAVE_OPENMP
00202                         #pragma omp parallel for reduction(+:loss) private(i, x)
00203 #endif
00204                         for(i=0; i<n; i++) {
00205                                 x = Obuf[i] - Tbuf[i];
00206                                 loss += x*x;
00207                         }
00208                 }
00209 
00210                 if (k>1) {
00211 #ifdef SKYLARK_HAVE_OPENMP
00212                         #pragma omp parallel for reduction(+:loss) private(i,j, x, label)
00213 #endif
00214                         for(i=0; i<n; i++) {
00215                                 label = (int) Tbuf[i];
00216                                 for(j=0;j<k;j++) {
00217                                      x = O.Get(j,i) - (j==label ? 1.0:-1.0);
00218                                      loss += x*x;
00219                                 }
00220                         }
00221                 }
00222 
00223                 return 0.5*loss;
00224         }
00225 
00226         //solution to Y = prox[X] = argmin_Y 0.5*||X-Y||^2_{fro} + lambda 0.5 ||Y-T||^2_{fro}
00227 void squaredloss::proxoperator(LocalDenseMatrixType& X, double lambda, LocalTargetMatrixType& T, LocalDenseMatrixType& Y) {
00228                 int k = X.Height();
00229                 int n = X.Width();
00230 
00231                                 // check for size compatability
00232 
00233                 double* Xbuf = X.Buffer();
00234                 double* Tbuf = T.Buffer();
00235 
00236                 double* Ybuf = Y.Buffer();
00237                 double ilambda = 1.0/(1.0 + lambda);
00238 
00239                 int label, i, j;
00240 
00241                 if (k==1) {
00242 #ifdef SKYLARK_HAVE_OPENMP
00243             #pragma omp parallel for private(i)
00244 #endif
00245                         for(int i=0; i<n; i++)
00246                                 Ybuf[i] = ilambda*(Xbuf[i] + lambda*Tbuf[i]);
00247                 }
00248 
00249                 if(k>1) {
00250 #ifdef SKYLARK_HAVE_OPENMP
00251             #pragma omp parallel for private(i,j, label)
00252 #endif
00253                         for(int i=0; i<n; i++) {
00254                                 label = (int) Tbuf[i];
00255                                 for(j=0;j<k;j++) {
00256                                      Y.Set(j, i,  ilambda*(X.Get(j,i) + lambda*(j==label ? 1.0:-1.0)));
00257                                 }
00258                         }
00259                 }
00260         }
00261 
00262 
00263 
00264 double hingeloss::evaluate(LocalDenseMatrixType& O, LocalTargetMatrixType& T) {
00265                 double loss = 0.0;
00266                 int k = O.Height();
00267                 int n = O.Width();
00268                 int kn = O.Height()*O.Width();
00269                 int label, i, j;
00270                 // check for size compatability
00271 
00272                 double* Obuf = O.Buffer();
00273                 double* Tbuf = T.Buffer();
00274                 double obj = 0.0;
00275                 double yx;
00276 
00277                 int noutputs = O.Height();
00278 
00279                 if(noutputs==1) {
00280 #ifdef SKYLARK_HAVE_OPENMP
00281                #pragma omp parallel for reduction(+:obj) private(i, yx)
00282 #endif
00283                        for(i=0;i<n;i++) {
00284                                         yx = Obuf[i]*Tbuf[i];
00285                                         if(yx<1.0)
00286                                                 obj += (1.0 - yx);
00287                                 }
00288 
00289                         }
00290 
00291 
00292                 if(noutputs>1) {
00293 #ifdef SKYLARK_HAVE_OPENMP
00294                #pragma omp parallel for reduction(+:obj) private(i, j, label, yx)
00295 #endif
00296                        for(i=0;i<n;i++) {
00297                                 label = (int) Tbuf[i];
00298                                 for(j=0;j<k;j++) {
00299                                      yx = O.Get(j,i)* (j==label ? 1.0:-1.0);
00300                                      if(yx<1.0)
00301                                          obj += (1.0 - yx);
00302                                 }
00303                        }
00304 
00305                 }
00306                 return obj;
00307         }
00308 
00309         //solution to Y = prox[X] = argmin_Y 0.5*||X-Y||^2_{fro} + lambda 0.5 ||Y-T||^2_{fro}
00310 void hingeloss::proxoperator(LocalDenseMatrixType& X, double lambda, LocalTargetMatrixType& T, LocalDenseMatrixType& Y) {
00311 
00312         int i, j;
00313         double* Tbuf = T.Buffer();
00314         double* Xbuf = X.Buffer();
00315         double* Ybuf = Y.Buffer();
00316         int k = X.Height();
00317         int m = X.Width();
00318     double yv, yy;
00319     int label;
00320 
00321     int noutputs = k;
00322 
00323         if(noutputs==1) { // We assume cy has +1 or -1 entries for n=1 outputs
00324 #ifdef SKYLARK_HAVE_OPENMP
00325                         #pragma omp parallel for private(i,yv)
00326 #endif
00327                                 for(i=0;i<m;i++) {
00328                                         yv = Tbuf[i]*Xbuf[i];
00329 
00330                                         if (yv > 1.0) {
00331                                                 Ybuf[i] = Xbuf[i];
00332                                         }
00333                                         else {
00334                                                 if(yv < (1.0-lambda)) {
00335                                                         Ybuf[i] = Xbuf[i] + lambda*Tbuf[i];
00336                                                 }
00337                                                 else {
00338                                                         Ybuf[i] = Tbuf[i];
00339                                                 }
00340                                         }
00341                                 }
00342                         }
00343 
00344         if (noutputs>1) {
00345 #ifdef SKYLARK_HAVE_OPENMP
00346                         #pragma omp parallel for private(i,j,yv, yy, label)
00347 #endif
00348                                 for(i=0;i<m;i++) {
00349                                         label = (int) Tbuf[i];
00350                                         for(j=0;j<k;j++) {
00351                                                 yv = X.Get(j,i);
00352                                                 yy = +1.0;
00353                                                 if(!(j==label)) {
00354                                                         yv = -yv;
00355                                                         yy = -1.0;
00356                                                 }
00357                                                 if (yv>1.0)
00358                                                                                 Y.Set(j,i,  X.Get(j,i));
00359                                                                         else {
00360                                                                                 if(yv<1.0-lambda)
00361                                                                                         Y.Set(j,i, X.Get(j,i) + lambda*yy);
00362                                                                                 else
00363                                                                                         Y.Set(j,i, yy);
00364                                                                         }
00365                                         }
00366                                 }
00367                         }
00368 
00369 
00370         }
00371 
00372 
00373 double logisticloss::evaluate(LocalDenseMatrixType& O, LocalTargetMatrixType& T) {
00374         double obj = 0.0;
00375         int m = O.Width();
00376         int n = O.Height();
00377         double t;
00378         int i;
00379         //double start = omp_get_wtime( );
00380 
00381 #ifdef SKYLARK_HAVE_OPENMP
00382         #pragma omp parallel for reduction(+:obj) private(i,t)
00383 #endif
00384         for(int i=0;i<m;i++) {
00385                 t = (int) T.Get(i, 0);
00386             obj += -O.Get(t, i) + logsumexp(O.Buffer(0, i), n);
00387         }
00388 
00389         //double end = omp_get_wtime( );
00390 
00391         // std::cout << end - start <<  " secs " << std::endl;
00392 
00393         return obj;
00394 }
00395 
00396 
00397 void logisticloss::proxoperator(LocalDenseMatrixType& X, double lambda, LocalTargetMatrixType& T, LocalDenseMatrixType& Y) {
00398     int m = X.Width();
00399     int n = X.Height();
00400     int i;
00401 
00402 #ifdef SKYLARK_HAVE_OPENMP
00403     #pragma omp parallel for private(i)
00404 #endif
00405     for(int i=0;i<m;i++) {
00406                 logexp((int) T.Get(i, 0), X.Buffer(0, i), n, 1.0/lambda, Y.Buffer(0, i), MAXITER, epsilon, DISPLAY);
00407     }
00408 
00409 }
00410 
00411 double logisticloss::logsumexp(double* x, int n) {
00412         int i;
00413         double max=x[0];
00414         double f = 0.0;
00415         for(i=0;i<n;i++) {
00416                 if (x[i]>max) {
00417                         max = x[i];
00418                 }
00419         }
00420         for(i=0;i<n;i++)
00421                 f +=  exp(x[i] - max);
00422         f = max + log(f);
00423 
00424         return f;
00425 }
00426 
00427 double logisticloss::objective(int index, double* x, double* v, int n, double lambda) {
00428         double nrmsqr = normsquare(x,v,n);
00429         double obj = -x[index] + logsumexp(x, n) + 0.5*lambda*nrmsqr;
00430         return obj;
00431         }
00432 
00433 double logisticloss::normsquare(double* x, double* y, int n){
00434         double nrm = 0.0;
00435         int i;
00436         for(i=0;i<n;i++)
00437                 nrm+= pow(x[i] - y[i], 2);
00438         return nrm;
00439 }
00440 
00441 int logisticloss::logexp(int index, double* v, int n, double lambda, double* x, int MAXITER, double epsilon, int DISPLAY) {
00442     /* solution to - log exp(x(i))/sum(exp(x(j))) + lambda/2 ||x - v||_2^2 */
00443     /* n is length of v and x */
00444     /* writes over x */
00445     double alpha = 0.1;
00446     double beta = 0.5;
00447     int iter, i;
00448     double t, logsum, p, pu, pptil, decrement;
00449     double *u = (double *) malloc(n*sizeof(double));
00450     double *z = (double *) malloc(n*sizeof(double));
00451     double *grad = (double *) malloc(n*sizeof(double));
00452     double newobj=0.0, obj=0.0;
00453     obj = objective(index, x, v, n, lambda);
00454 
00455     for(iter=0;iter<MAXITER;iter++) {
00456         logsum = logsumexp(x,n);
00457         if(DISPLAY)
00458             printf("iter=%d, obj=%f\n", iter, obj);
00459         pu = 0.0;
00460         pptil = 0.0;
00461         for(i=0;i<n;i++) {
00462             p = exp(x[i] - logsum);
00463             grad[i] = p + lambda*(x[i] - v[i]);
00464             if(i==index)
00465                 grad[i] += -1.0;
00466             u[i] = grad[i]/(p+lambda);
00467             pu += p*u[i];
00468             z[i] = p/(p+lambda);
00469             pptil += z[i]*p;
00470         }
00471         pptil = 1 - pptil;
00472         decrement = 0.0;
00473         for(i=0;i<n;i++) {
00474             u[i] -= (pu/pptil)*z[i];
00475             decrement += grad[i]*u[i];
00476         }
00477         if (decrement < 2*epsilon) {
00478                 // std::cout << "decrement =  " << decrement << std::endl;
00479             free(u);
00480             free(z);
00481             free(grad);
00482             return 0;
00483         }
00484         t = 1.0;
00485         while(1) {
00486             for(i=0;i<n;i++)
00487                 z[i] = x[i] - t*u[i];
00488             newobj = objective(index, z, v, n, lambda);
00489             if (newobj <= obj + alpha*t*decrement)
00490                 break;
00491             t = beta*t;
00492         }
00493         for(i=0;i<n;i++)
00494             x[i] = z[i];
00495             obj = newobj;
00496     }
00497     free(u);
00498     free(z);
00499     free(grad);
00500     return 1;
00501 }
00502 
00503 double l2::evaluate(LocalDenseMatrixType& W) {
00504                 double norm = elem::Norm(W);
00505                 return 0.5*norm*norm;
00506         }
00507 
00508 
00509 void l2::proxoperator(LocalDenseMatrixType& W, double lambda, LocalDenseMatrixType& mu, LocalDenseMatrixType& P) {
00510                 double *Wbuf = W.Buffer();
00511                 double *mubuf = mu.Buffer();
00512                 double *Pbuf = P.Buffer();
00513                 int mn = W.Height()*W.Width();
00514                 double ilambda = 1.0/(1.0 + lambda);
00515 
00516                 for(int i=0;i<mn; i++)
00517                         Pbuf[i] = (Wbuf[i] - mubuf[i])*ilambda;
00518         }
00519 
00520 double l1::evaluate(LocalDenseMatrixType& W) {
00521                 double norm = elem::EntrywiseOneNorm(W);
00522                 return norm;
00523         }
00524 
00525 double l1::soft_threshold(double x, double lambda) {
00526         double v = 0;
00527         if (std::abs(x) <= lambda)
00528                 v = 0.0;
00529         if (x > lambda)
00530                 v =  x - lambda;
00531         if (x < -lambda)
00532                 v = x + lambda;
00533         return v;
00534 }
00535 
00536 void l1::proxoperator(LocalDenseMatrixType& W, double lambda, LocalDenseMatrixType& mu, LocalDenseMatrixType& P) {
00537                 double *Wbuf = W.Buffer();
00538                 double *mubuf = mu.Buffer();
00539                 double *Pbuf = P.Buffer();
00540                 int mn = W.Height()*W.Width();
00541 
00542                 for(int i=0;i<mn; i++)
00543                         Pbuf[i] = soft_threshold(Wbuf[i] - mubuf[i], lambda);
00544         }
00545 
00546 
00547 #endif /* FUNCTIONPROX_HPP_ */