Skylark (Sketching Library)
0.1
|
00001 /* 00002 * FunctionProx.hpp 00003 * 00004 * Created on: Jan 12, 2014 00005 * Author: vikas 00006 */ 00007 00008 #ifndef FUNCTIONPROX_HPP_ 00009 #define FUNCTIONPROX_HPP_ 00010 00011 #include <elemental.hpp> 00012 #include "options.hpp" 00013 #include <cstdlib> 00014 #include <cmath> 00015 00016 #ifdef SKYLARK_HAVE_OPENMP 00017 #include <omp.h> 00018 #endif 00019 00020 // Simple abstract class to represent a function and its prox operator 00021 // these are defined for local matrices. 00022 typedef elem::Matrix<double> LocalDenseMatrixType; 00023 typedef elem::Matrix<double> LocalTargetMatrixType; 00024 00025 // abstract class for representing loss functions and their prox operators 00026 class lossfunction 00027 { 00028 public: 00029 virtual double evaluate(LocalDenseMatrixType& O, LocalTargetMatrixType& T) = 0 ; 00030 virtual void proxoperator(LocalDenseMatrixType& X, double lambda, LocalTargetMatrixType& T, LocalDenseMatrixType& Y) = 0 ; 00031 00032 virtual ~lossfunction(void){} 00033 00034 private: 00035 std::string type; 00036 }; 00037 00038 // abstract class for representing regularizers and their prox operators 00039 class regularization 00040 { 00041 public: 00042 virtual double evaluate(LocalDenseMatrixType& W) = 0 ; 00043 virtual void proxoperator(LocalDenseMatrixType& W, double lambda, LocalDenseMatrixType& mu, LocalDenseMatrixType& P) = 0 ; 00044 00045 virtual ~regularization(void){} 00046 }; 00047 00048 // Class to represent 0.5*||O - T||^2_{fro} 00049 class squaredloss : public lossfunction { 00050 public: 00051 virtual double evaluate(LocalDenseMatrixType& O, LocalTargetMatrixType& T); 00052 virtual void proxoperator(LocalDenseMatrixType& X, double lambda, LocalTargetMatrixType& T, LocalDenseMatrixType& Y); 00053 }; 00054 00055 // Class to represent Least Absolute Deviations ||O - T||_1 00056 class ladloss : public lossfunction { 00057 public: 00058 virtual double evaluate(LocalDenseMatrixType& O, LocalTargetMatrixType& T); 00059 virtual void proxoperator(LocalDenseMatrixType& X, double lambda, LocalTargetMatrixType& T, LocalDenseMatrixType& Y); 00060 }; 00061 00062 // Class to represent hinge loss sum(max(1-t*o,0)) 00063 class hingeloss : public lossfunction { 00064 public: 00065 virtual double evaluate(LocalDenseMatrixType& O, LocalTargetMatrixType& T); 00066 virtual void proxoperator(LocalDenseMatrixType& X, double lambda, LocalTargetMatrixType& T, LocalDenseMatrixType& Y); 00067 }; 00068 00069 class logisticloss : public lossfunction { 00070 public: 00071 logisticloss() : epsilon(1e-4) {} 00072 virtual double evaluate(LocalDenseMatrixType& O, LocalTargetMatrixType& T); 00073 virtual void proxoperator(LocalDenseMatrixType& X, double lambda, LocalTargetMatrixType& T, LocalDenseMatrixType& Y); 00074 private: 00075 double logsumexp(double* x, int n); 00076 double normsquare(double* x, double* y, int n); 00077 double objective(int index, double* x, double* v, int n, double lambda); 00078 int logexp(int index, double* v, int n, double lambda, double* x, int MAXITER, double epsilon, int DISPLAY); 00079 00080 static const int MAXITER = 100; 00081 static const int DISPLAY = 0; 00082 00083 const double epsilon; 00084 }; 00085 00086 00087 class l2: public regularization { 00088 public: 00089 virtual double evaluate(LocalDenseMatrixType& W); 00090 virtual void proxoperator(LocalDenseMatrixType& W, double lambda, LocalDenseMatrixType& mu, LocalDenseMatrixType& P); 00091 }; 00092 00093 class l1: public regularization { 00094 public: 00095 virtual double evaluate(LocalDenseMatrixType& W); 00096 virtual void proxoperator(LocalDenseMatrixType& W, double lambda, LocalDenseMatrixType& mu, LocalDenseMatrixType& P); 00097 double soft_threshold(double x, double lambda); 00098 }; 00099 00100 double ladloss::evaluate(LocalDenseMatrixType& O, LocalTargetMatrixType& T) { 00101 double loss = 0.0; 00102 int k = O.Height(); 00103 int n = O.Width(); 00104 00105 // check for size compatability 00106 00107 double* Obuf = O.Buffer(); 00108 double* Tbuf = T.Buffer(); 00109 double x; 00110 int i, j, label; 00111 00112 if (k==1) { 00113 #ifdef SKYLARK_HAVE_OPENMP 00114 #pragma omp parallel for reduction(+:loss) private(i, x) 00115 #endif 00116 for(i=0; i<n; i++) { 00117 x = Obuf[i] - Tbuf[i]; 00118 loss += std::abs(x); 00119 } 00120 } 00121 00122 if (k>1) { 00123 #ifdef SKYLARK_HAVE_OPENMP 00124 #pragma omp parallel for reduction(+:loss) private(i,j, x, label) 00125 #endif 00126 for(i=0; i<n; i++) { 00127 label = (int) Tbuf[i]; 00128 for(j=0;j<k;j++) { 00129 x = O.Get(j,i) - (j==label ? 1.0:-1.0); 00130 loss += std::abs(x); 00131 } 00132 } 00133 } 00134 00135 return loss; 00136 } 00137 00138 //solution to Y = prox[X] = argmin_Y 0.5*||X-Y||^2_{fro} + lambda ||Y-T||_1 00139 void ladloss::proxoperator(LocalDenseMatrixType& X, double lambda, LocalTargetMatrixType& T, LocalDenseMatrixType& Y) { 00140 int k = X.Height(); 00141 int n = X.Width(); 00142 00143 // check for size compatability 00144 00145 double* Xbuf = X.Buffer(); 00146 double* Tbuf = T.Buffer(); 00147 00148 double* Ybuf = Y.Buffer(); 00149 double ilambda = 1.0/(1.0 + lambda); 00150 00151 int label, i, j; 00152 double t, x; 00153 00154 if (k==1) { 00155 #ifdef SKYLARK_HAVE_OPENMP 00156 #pragma omp parallel for private(i) 00157 #endif 00158 for(int i=0; i<n; i++) { 00159 Ybuf[i] = Tbuf[i]; 00160 if (Xbuf[i] > (Tbuf[i]+lambda)) 00161 Ybuf[i] = Xbuf[i] - lambda; 00162 if (Xbuf[i] < (Tbuf[i] - lambda)) 00163 Ybuf[i] = Xbuf[i] + lambda; 00164 } 00165 } 00166 00167 if(k>1) { 00168 #ifdef SKYLARK_HAVE_OPENMP 00169 #pragma omp parallel for private(i,j, label, t, x) 00170 #endif 00171 for(int i=0; i<n; i++) { 00172 label = (int) Tbuf[i]; 00173 for(j=0;j<k;j++) { 00174 t = (j==label ? 1.0:-1.0); 00175 x = X.Get(j,i); 00176 Y.Set(j, i, t); 00177 if (x > t + lambda) 00178 Y.Set(j, i, x - lambda); 00179 if (x < t - lambda) 00180 Y.Set(j, i, x + lambda); 00181 } 00182 } 00183 } 00184 } 00185 00186 00187 00188 double squaredloss::evaluate(LocalDenseMatrixType& O, LocalTargetMatrixType& T) { 00189 double loss = 0.0; 00190 int k = O.Height(); 00191 int n = O.Width(); 00192 00193 // check for size compatability 00194 00195 double* Obuf = O.Buffer(); 00196 double* Tbuf = T.Buffer(); 00197 double x; 00198 int i, j, label; 00199 00200 if (k==1) { 00201 #ifdef SKYLARK_HAVE_OPENMP 00202 #pragma omp parallel for reduction(+:loss) private(i, x) 00203 #endif 00204 for(i=0; i<n; i++) { 00205 x = Obuf[i] - Tbuf[i]; 00206 loss += x*x; 00207 } 00208 } 00209 00210 if (k>1) { 00211 #ifdef SKYLARK_HAVE_OPENMP 00212 #pragma omp parallel for reduction(+:loss) private(i,j, x, label) 00213 #endif 00214 for(i=0; i<n; i++) { 00215 label = (int) Tbuf[i]; 00216 for(j=0;j<k;j++) { 00217 x = O.Get(j,i) - (j==label ? 1.0:-1.0); 00218 loss += x*x; 00219 } 00220 } 00221 } 00222 00223 return 0.5*loss; 00224 } 00225 00226 //solution to Y = prox[X] = argmin_Y 0.5*||X-Y||^2_{fro} + lambda 0.5 ||Y-T||^2_{fro} 00227 void squaredloss::proxoperator(LocalDenseMatrixType& X, double lambda, LocalTargetMatrixType& T, LocalDenseMatrixType& Y) { 00228 int k = X.Height(); 00229 int n = X.Width(); 00230 00231 // check for size compatability 00232 00233 double* Xbuf = X.Buffer(); 00234 double* Tbuf = T.Buffer(); 00235 00236 double* Ybuf = Y.Buffer(); 00237 double ilambda = 1.0/(1.0 + lambda); 00238 00239 int label, i, j; 00240 00241 if (k==1) { 00242 #ifdef SKYLARK_HAVE_OPENMP 00243 #pragma omp parallel for private(i) 00244 #endif 00245 for(int i=0; i<n; i++) 00246 Ybuf[i] = ilambda*(Xbuf[i] + lambda*Tbuf[i]); 00247 } 00248 00249 if(k>1) { 00250 #ifdef SKYLARK_HAVE_OPENMP 00251 #pragma omp parallel for private(i,j, label) 00252 #endif 00253 for(int i=0; i<n; i++) { 00254 label = (int) Tbuf[i]; 00255 for(j=0;j<k;j++) { 00256 Y.Set(j, i, ilambda*(X.Get(j,i) + lambda*(j==label ? 1.0:-1.0))); 00257 } 00258 } 00259 } 00260 } 00261 00262 00263 00264 double hingeloss::evaluate(LocalDenseMatrixType& O, LocalTargetMatrixType& T) { 00265 double loss = 0.0; 00266 int k = O.Height(); 00267 int n = O.Width(); 00268 int kn = O.Height()*O.Width(); 00269 int label, i, j; 00270 // check for size compatability 00271 00272 double* Obuf = O.Buffer(); 00273 double* Tbuf = T.Buffer(); 00274 double obj = 0.0; 00275 double yx; 00276 00277 int noutputs = O.Height(); 00278 00279 if(noutputs==1) { 00280 #ifdef SKYLARK_HAVE_OPENMP 00281 #pragma omp parallel for reduction(+:obj) private(i, yx) 00282 #endif 00283 for(i=0;i<n;i++) { 00284 yx = Obuf[i]*Tbuf[i]; 00285 if(yx<1.0) 00286 obj += (1.0 - yx); 00287 } 00288 00289 } 00290 00291 00292 if(noutputs>1) { 00293 #ifdef SKYLARK_HAVE_OPENMP 00294 #pragma omp parallel for reduction(+:obj) private(i, j, label, yx) 00295 #endif 00296 for(i=0;i<n;i++) { 00297 label = (int) Tbuf[i]; 00298 for(j=0;j<k;j++) { 00299 yx = O.Get(j,i)* (j==label ? 1.0:-1.0); 00300 if(yx<1.0) 00301 obj += (1.0 - yx); 00302 } 00303 } 00304 00305 } 00306 return obj; 00307 } 00308 00309 //solution to Y = prox[X] = argmin_Y 0.5*||X-Y||^2_{fro} + lambda 0.5 ||Y-T||^2_{fro} 00310 void hingeloss::proxoperator(LocalDenseMatrixType& X, double lambda, LocalTargetMatrixType& T, LocalDenseMatrixType& Y) { 00311 00312 int i, j; 00313 double* Tbuf = T.Buffer(); 00314 double* Xbuf = X.Buffer(); 00315 double* Ybuf = Y.Buffer(); 00316 int k = X.Height(); 00317 int m = X.Width(); 00318 double yv, yy; 00319 int label; 00320 00321 int noutputs = k; 00322 00323 if(noutputs==1) { // We assume cy has +1 or -1 entries for n=1 outputs 00324 #ifdef SKYLARK_HAVE_OPENMP 00325 #pragma omp parallel for private(i,yv) 00326 #endif 00327 for(i=0;i<m;i++) { 00328 yv = Tbuf[i]*Xbuf[i]; 00329 00330 if (yv > 1.0) { 00331 Ybuf[i] = Xbuf[i]; 00332 } 00333 else { 00334 if(yv < (1.0-lambda)) { 00335 Ybuf[i] = Xbuf[i] + lambda*Tbuf[i]; 00336 } 00337 else { 00338 Ybuf[i] = Tbuf[i]; 00339 } 00340 } 00341 } 00342 } 00343 00344 if (noutputs>1) { 00345 #ifdef SKYLARK_HAVE_OPENMP 00346 #pragma omp parallel for private(i,j,yv, yy, label) 00347 #endif 00348 for(i=0;i<m;i++) { 00349 label = (int) Tbuf[i]; 00350 for(j=0;j<k;j++) { 00351 yv = X.Get(j,i); 00352 yy = +1.0; 00353 if(!(j==label)) { 00354 yv = -yv; 00355 yy = -1.0; 00356 } 00357 if (yv>1.0) 00358 Y.Set(j,i, X.Get(j,i)); 00359 else { 00360 if(yv<1.0-lambda) 00361 Y.Set(j,i, X.Get(j,i) + lambda*yy); 00362 else 00363 Y.Set(j,i, yy); 00364 } 00365 } 00366 } 00367 } 00368 00369 00370 } 00371 00372 00373 double logisticloss::evaluate(LocalDenseMatrixType& O, LocalTargetMatrixType& T) { 00374 double obj = 0.0; 00375 int m = O.Width(); 00376 int n = O.Height(); 00377 double t; 00378 int i; 00379 //double start = omp_get_wtime( ); 00380 00381 #ifdef SKYLARK_HAVE_OPENMP 00382 #pragma omp parallel for reduction(+:obj) private(i,t) 00383 #endif 00384 for(int i=0;i<m;i++) { 00385 t = (int) T.Get(i, 0); 00386 obj += -O.Get(t, i) + logsumexp(O.Buffer(0, i), n); 00387 } 00388 00389 //double end = omp_get_wtime( ); 00390 00391 // std::cout << end - start << " secs " << std::endl; 00392 00393 return obj; 00394 } 00395 00396 00397 void logisticloss::proxoperator(LocalDenseMatrixType& X, double lambda, LocalTargetMatrixType& T, LocalDenseMatrixType& Y) { 00398 int m = X.Width(); 00399 int n = X.Height(); 00400 int i; 00401 00402 #ifdef SKYLARK_HAVE_OPENMP 00403 #pragma omp parallel for private(i) 00404 #endif 00405 for(int i=0;i<m;i++) { 00406 logexp((int) T.Get(i, 0), X.Buffer(0, i), n, 1.0/lambda, Y.Buffer(0, i), MAXITER, epsilon, DISPLAY); 00407 } 00408 00409 } 00410 00411 double logisticloss::logsumexp(double* x, int n) { 00412 int i; 00413 double max=x[0]; 00414 double f = 0.0; 00415 for(i=0;i<n;i++) { 00416 if (x[i]>max) { 00417 max = x[i]; 00418 } 00419 } 00420 for(i=0;i<n;i++) 00421 f += exp(x[i] - max); 00422 f = max + log(f); 00423 00424 return f; 00425 } 00426 00427 double logisticloss::objective(int index, double* x, double* v, int n, double lambda) { 00428 double nrmsqr = normsquare(x,v,n); 00429 double obj = -x[index] + logsumexp(x, n) + 0.5*lambda*nrmsqr; 00430 return obj; 00431 } 00432 00433 double logisticloss::normsquare(double* x, double* y, int n){ 00434 double nrm = 0.0; 00435 int i; 00436 for(i=0;i<n;i++) 00437 nrm+= pow(x[i] - y[i], 2); 00438 return nrm; 00439 } 00440 00441 int logisticloss::logexp(int index, double* v, int n, double lambda, double* x, int MAXITER, double epsilon, int DISPLAY) { 00442 /* solution to - log exp(x(i))/sum(exp(x(j))) + lambda/2 ||x - v||_2^2 */ 00443 /* n is length of v and x */ 00444 /* writes over x */ 00445 double alpha = 0.1; 00446 double beta = 0.5; 00447 int iter, i; 00448 double t, logsum, p, pu, pptil, decrement; 00449 double *u = (double *) malloc(n*sizeof(double)); 00450 double *z = (double *) malloc(n*sizeof(double)); 00451 double *grad = (double *) malloc(n*sizeof(double)); 00452 double newobj=0.0, obj=0.0; 00453 obj = objective(index, x, v, n, lambda); 00454 00455 for(iter=0;iter<MAXITER;iter++) { 00456 logsum = logsumexp(x,n); 00457 if(DISPLAY) 00458 printf("iter=%d, obj=%f\n", iter, obj); 00459 pu = 0.0; 00460 pptil = 0.0; 00461 for(i=0;i<n;i++) { 00462 p = exp(x[i] - logsum); 00463 grad[i] = p + lambda*(x[i] - v[i]); 00464 if(i==index) 00465 grad[i] += -1.0; 00466 u[i] = grad[i]/(p+lambda); 00467 pu += p*u[i]; 00468 z[i] = p/(p+lambda); 00469 pptil += z[i]*p; 00470 } 00471 pptil = 1 - pptil; 00472 decrement = 0.0; 00473 for(i=0;i<n;i++) { 00474 u[i] -= (pu/pptil)*z[i]; 00475 decrement += grad[i]*u[i]; 00476 } 00477 if (decrement < 2*epsilon) { 00478 // std::cout << "decrement = " << decrement << std::endl; 00479 free(u); 00480 free(z); 00481 free(grad); 00482 return 0; 00483 } 00484 t = 1.0; 00485 while(1) { 00486 for(i=0;i<n;i++) 00487 z[i] = x[i] - t*u[i]; 00488 newobj = objective(index, z, v, n, lambda); 00489 if (newobj <= obj + alpha*t*decrement) 00490 break; 00491 t = beta*t; 00492 } 00493 for(i=0;i<n;i++) 00494 x[i] = z[i]; 00495 obj = newobj; 00496 } 00497 free(u); 00498 free(z); 00499 free(grad); 00500 return 1; 00501 } 00502 00503 double l2::evaluate(LocalDenseMatrixType& W) { 00504 double norm = elem::Norm(W); 00505 return 0.5*norm*norm; 00506 } 00507 00508 00509 void l2::proxoperator(LocalDenseMatrixType& W, double lambda, LocalDenseMatrixType& mu, LocalDenseMatrixType& P) { 00510 double *Wbuf = W.Buffer(); 00511 double *mubuf = mu.Buffer(); 00512 double *Pbuf = P.Buffer(); 00513 int mn = W.Height()*W.Width(); 00514 double ilambda = 1.0/(1.0 + lambda); 00515 00516 for(int i=0;i<mn; i++) 00517 Pbuf[i] = (Wbuf[i] - mubuf[i])*ilambda; 00518 } 00519 00520 double l1::evaluate(LocalDenseMatrixType& W) { 00521 double norm = elem::EntrywiseOneNorm(W); 00522 return norm; 00523 } 00524 00525 double l1::soft_threshold(double x, double lambda) { 00526 double v = 0; 00527 if (std::abs(x) <= lambda) 00528 v = 0.0; 00529 if (x > lambda) 00530 v = x - lambda; 00531 if (x < -lambda) 00532 v = x + lambda; 00533 return v; 00534 } 00535 00536 void l1::proxoperator(LocalDenseMatrixType& W, double lambda, LocalDenseMatrixType& mu, LocalDenseMatrixType& P) { 00537 double *Wbuf = W.Buffer(); 00538 double *mubuf = mu.Buffer(); 00539 double *Pbuf = P.Buffer(); 00540 int mn = W.Height()*W.Width(); 00541 00542 for(int i=0;i<mn; i++) 00543 Pbuf[i] = soft_threshold(Wbuf[i] - mubuf[i], lambda); 00544 } 00545 00546 00547 #endif /* FUNCTIONPROX_HPP_ */