Skylark (Sketching Library)  0.1
/var/lib/jenkins/jobs/Skylark/workspace/ml/options.hpp
Go to the documentation of this file.
00001 #ifndef SKYLARK_HILBERT_OPTIONS_HPP
00002 #define SKYLARK_HILBERT_OPTIONS_HPP
00003 
00004 #ifndef SKYLARK_AVOID_BOOST_PO
00005 
00006 #include <boost/program_options.hpp>
00007 namespace po = boost::program_options;
00008 
00009 #else
00010 
00011 #include <boost/lexical_cast.hpp>
00012 
00013 #endif
00014 
00015 #define DEFAULT_LAMBDA 0.0
00016 #define DEFAULT_RHO 1.0
00017 #define DEFAULT_THREADS 1
00018 #define DEFAULT_FEATURE_PARTITIONS 1
00019 #define DEFAULT_KERPARAM 1.0
00020 #define DEFAULT_TOL 0.001
00021 #define DEFAULT_MAXITER 100
00022 #define DEFAULT_SEED 12345
00023 #define DEFAULT_RF 100
00024 #define DEFAULT_KERNEL 0
00025 #define DEFAULT_FILEFORMAT 0
00026 
00027 enum LossType {SQUARED = 0, LAD = 1, HINGE = 2, LOGISTIC = 3};
00028 std::string Losses[] = {"Squared Loss",
00029                         "Least Absolute Deviations",
00030                         "Hinge Loss (SVMs)",
00031                         "Logistic Loss"};
00032 
00033 enum RegularizerType {L2 = 0 , L1 = 1};
00034 std::string Regularizers[] = {"L2", "L1"};
00035 
00036 enum ProblemType {REGRESSION = 0, CLASSIFICATION = 1};
00037 std::string Problems[] = {"Regression", "Classification"};
00038 
00039 enum KernelType {LINEAR = 0, GAUSSIAN = 1, POLYNOMIAL = 2,
00040                  LAPLACIAN = 3, EXPSEMIGROUP = 4};
00041 std::string Kernels[] = {"Linear", "Gaussian",
00042                          "Polynomial", "Laplacian", "ExpSemigroup"};
00043 
00044 enum FileFormatType {LIBSVM_DENSE = 0, LIBSVM_SPARSE = 1, HDF5_DENSE = 2, HDF5_SPARSE = 3};
00045 std::string FileFormats[] = {"libsvm-dense", "libsvm-sparse", "hdf5_dense", "hdf5_sparse"};
00046 
00051 struct hilbert_options_t {
00054     LossType lossfunction;
00055     RegularizerType regularizer;
00056     KernelType kernel;
00057 
00059     double kernelparam;
00060     double kernelparam2;
00061     double kernelparam3;
00062 
00063     double lambda;
00064 ;
00066     int MAXITER;
00067     double tolerance;
00068     double rho;
00069 
00071     int seed;
00072     int randomfeatures;
00073     bool regularmap;
00074     bool cachetransforms;
00075 
00076     /* parallelization options */
00077     int numfeaturepartitions;
00078     int numthreads;
00079     int nummpiprocesses;
00080 
00081     int fileformat;
00082 
00084     std::string trainfile;
00085     std::string modelfile;
00086     std::string testfile;
00087     std::string valfile;
00088     std::string str = "";
00089 
00091     bool exit_on_return;
00092 
00093 
00094 
00098     hilbert_options_t (int argc, char** argv, int nproc) :
00099         nummpiprocesses(nproc), exit_on_return(false) {
00100 
00101 #ifndef SKYLARK_AVOID_BOOST_PO
00102 
00103         po::options_description desc
00104             ("Usage: skylark_ml [options] --trainfile trainfile --modelfile modelfile\nUsage: skylark_ml --modelfile modelfile --testfile testfile ");
00105 
00106         desc.add_options()
00107             ("help,h", "produce a help message")
00108             ("lossfunction,l",
00109                 po::value<int>((int*) &lossfunction)->default_value(SQUARED),
00110                 "Loss function (0:SQUARED, 1:LAD, 2:HINGE, 3:LOGISTIC")
00111             ("regularizer,r",
00112                 po::value<int>((int*) &regularizer)->default_value(L2),
00113                 "Regularizer (0:L2, 1:L1)")
00114             ("kernel,k",
00115                 po::value<int>((int*) &kernel)->default_value(LINEAR),
00116                 "Kernel (0:LINEAR, 1:GAUSSIAN, 2:POLYNOMIAL, "
00117                 "3:LAPLACIAN, 4:EXPSEMIGROUP)")
00118             ("kernelparam,g",
00119                 po::value<double>(&kernelparam)->default_value(DEFAULT_KERPARAM),
00120                 "Kernel Parameter")
00121             ("kernelparam2,x",
00122                 po::value<double>(&kernelparam2)->default_value(0),
00123                 "If Applicable - Second Kernel Parameter (Polynomial Kernel: c)")
00124             ("kernelparam3,y",
00125                 po::value<double>(&kernelparam3)->default_value(1),
00126                 "If Applicable - Third Kernel Parameter (Polynomial Kernel: gamma)")
00127             ("lambda,c",
00128                 po::value<double>(&lambda)->default_value(DEFAULT_LAMBDA),
00129                 "Regularization Parameter")
00130             ("tolerance,e",
00131                 po::value<double>(&tolerance)->default_value(DEFAULT_TOL),
00132                 "Tolerance")
00133             ("rho",
00134                 po::value<double>(&rho)->default_value(DEFAULT_RHO),
00135                 "ADMM rho parameter")
00136             ("seed,s",
00137                 po::value<int>(&seed)->default_value(DEFAULT_SEED),
00138                 "Seed for Random Number Generator")
00139             ("randomfeatures,f",
00140                 po::value<int>(&randomfeatures)->default_value(DEFAULT_RF),
00141                 "Number of Random Features (default: 100)")
00142             ("numfeaturepartitions,n",
00143                 po::value<int>(&numfeaturepartitions)->
00144                 default_value(DEFAULT_FEATURE_PARTITIONS),
00145                 "Number of Feature Partitions (default: 1)")
00146             ("numthreads,t",
00147                 po::value<int>(&numthreads)->default_value(DEFAULT_THREADS),
00148                 "Number of Threads (default: 1)")
00149             ("regular",
00150                 po::value<bool>(&regularmap)->default_value(true),
00151                 "Default is to use 'fast' feature mapping, if available."
00152                 "Use this flag to force regular mapping (default: false)")
00153             ("cachetransforms",
00154                 po::value<bool>(&cachetransforms)->default_value(false),
00155                 "Default is to not cache feature transforms per iteration, but generate on fly"
00156                  "Use this flag to force transform caching if you have enough memory (default: false)")
00157             ("fileformat",
00158                 po::value<int>(&fileformat)->default_value(DEFAULT_FILEFORMAT),
00159                 "Fileformat (default: 0 (libsvm->dense), 1 (libsvm->sparse), 2 (hdf5->dense), 3 (hdf5->sparse)")
00160             ("MAXITER,i",
00161                 po::value<int>(&MAXITER)->default_value(DEFAULT_MAXITER),
00162                 "Maximum Number of Iterations (default: 100)")
00163             ("trainfile",
00164                 po::value<std::string>(&trainfile)->default_value(""),
00165                 "Training data file (required in training mode)")
00166             ("modelfile",
00167                 po::value<std::string>(&modelfile)->required(),
00168                 "Model output file")
00169             ("valfile",
00170                 po::value<std::string>(&valfile)->default_value(""),
00171                 "Validation file (optional)")
00172             ("testfile",
00173                 po::value<std::string>(&testfile)->default_value(""),
00174                 "Test file (optional in training mode; required in testing mode)")
00175             ; /* end options */
00176 
00177         po::positional_options_description positionalOptions;
00178         positionalOptions.add("trainfile", 1);
00179         positionalOptions.add("modelfile", 1);
00180 
00182         po::variables_map vm;
00183         try {
00184             po::store(po::command_line_parser(argc, argv)
00185                 .options(desc).positional(positionalOptions).run(), vm);
00186 
00188             if (vm.count ("help")) {
00189                 std::cout << desc;
00190                 exit_on_return = true;
00191                 return;
00192             }
00193             po::notify(vm); // throws on error, so do after help in case
00194             // there are any problems
00195         }
00196         catch(po::error& e) {
00197             std::cerr << e.what() << std::endl;
00198             std::cerr << desc << std::endl;
00199             exit_on_return = true;
00200             return;
00201         }
00202 
00203 #else
00204         // The following is much less robust, but should work even without
00205         // Boost::program_options.
00206 
00207         lossfunction = SQUARED;
00208         regularizer = L2;
00209         kernel = LINEAR;
00210         kernelparam = DEFAULT_KERPARAM;
00211         kernelparam2 = 0;
00212         kernelparam3 = 1;
00213         lambda = DEFAULT_LAMBDA;
00214         tolerance = DEFAULT_TOL;
00215         rho = DEFAULT_RHO;
00216         seed = DEFAULT_SEED;
00217         randomfeatures = DEFAULT_RF;
00218         numfeaturepartitions = DEFAULT_FEATURE_PARTITIONS;
00219         numthreads = DEFAULT_THREADS;
00220         regularmap = true;
00221         fileformat = DEFAULT_FILEFORMAT;
00222         MAXITER = DEFAULT_MAXITER;
00223         valfile = "";
00224         testfile = "";
00225 
00226         for (int i = 1; i < argc; i += 2) {
00227             std::string flag = argv[i];
00228             std::string value = argv[i+1];
00229 
00230             if (flag == "--lossfunction" || flag == "-l")
00231                 lossfunction =
00232                     static_cast<LossType>(boost::lexical_cast<int>(value));
00233             if (flag == "--regularizer" || flag == "-r")
00234                 regularizer =
00235                     static_cast<RegularizerType>(boost::lexical_cast<int>(value));
00236             if (flag == "--kernel" || flag == "-k")
00237                 kernel =
00238                     static_cast<KernelType>(boost::lexical_cast<int>(value));
00239             if (flag == "--kernelparam" || flag == "-g")
00240                 kernelparam = boost::lexical_cast<double>(value);
00241             if (flag == "--kernelparam2" || flag == "-x")
00242                 kernelparam2 = boost::lexical_cast<double>(value);
00243             if (flag == "--kernelparam3" || flag == "-y")
00244                 kernelparam3 = boost::lexical_cast<double>(value);
00245             if (flag == "--lambda" || flag == "-c")
00246                 lambda = boost::lexical_cast<double>(value);
00247             if (flag == "--tolerance" || flag == "-e")
00248                 tolerance = boost::lexical_cast<double>(value);
00249             if (flag == "--rho")
00250                 rho = boost::lexical_cast<double>(value);
00251             if (flag == "--seed" || flag == "-s")
00252                 seed = boost::lexical_cast<int>(value);
00253             if (flag == "--randomfeatures" || flag == "-f")
00254                 randomfeatures = boost::lexical_cast<int>(value);
00255             if (flag == "--numfeaturepartitions" || flag == "-n")
00256                 numfeaturepartitions = boost::lexical_cast<int>(value);
00257             if (flag == "--numthreads" || flag == "-t")
00258                 numthreads = boost::lexical_cast<int>(value);
00259             if (flag == "--regular")
00260                 regularmap = value == "on";
00261             if (flag == "--fileformat")
00262                 fileformat =
00263                     static_cast<FileFormatType>(boost::lexical_cast<int>(value));
00264             if (flag == "--MAXITER" || flag == "-i")
00265                 MAXITER = boost::lexical_cast<int>(value);
00266             if (flag == "--trainfile")
00267                 trainfile = value;
00268             if (flag == "--modelfile")
00269                 modelfile = value;
00270             if (flag == "--valfile")
00271                 valfile = value;
00272             if (flag == "--testfile")
00273                 testfile = value;
00274         }
00275 #endif
00276 
00277         for(int i=0;i<argc;i++) {
00278                 str.append(argv[i]);
00279                 if (i<argc-1)
00280                         str.append(" ");
00281         }
00282 
00283 
00284     }
00285 
00286     std::string print () const {
00287         std::stringstream optionstring;
00288 
00289         optionstring << "# Generated using libSkylark/hilbert ";
00290         optionstring << "using the following command-line: " << std::endl;
00291         optionstring << "#\t" << str << std::endl;
00292         optionstring << "#" << std::endl;
00293         optionstring << "# Training File = " << trainfile << std::endl;
00294         optionstring << "# Model File = " << modelfile << std::endl;
00295         optionstring << "# Validation File = " << valfile << std::endl;
00296         optionstring << "# Test File = " << testfile << std::endl;
00297         optionstring << "# File Format = " << fileformat << std::endl;
00298         optionstring << "# Loss function = " << lossfunction
00299                      << " ("<< Losses[lossfunction]<< ")" << std::endl;
00300         optionstring << "# Regularizer = " << regularizer
00301                      << " ("<< Regularizers[regularizer]<< ")" << std::endl;
00302         optionstring << "# Kernel = " << kernel
00303                      << " ("<< Kernels[kernel]<< ")" << std::endl;
00304         optionstring << "# Kernel Parameter = " << kernelparam << std::endl;
00305         if (kernelparam2 != -1)
00306             optionstring << "# Second Kernel Parameter = "
00307                          << kernelparam2 << std::endl;
00308         if (kernelparam3 != -1)
00309             optionstring << "# Third Kernel Parameter = "
00310                          << kernelparam3 << std::endl;
00311         optionstring << "# Regularization Parameter = " << lambda << std::endl;
00312         optionstring << "# Maximum Iterations = " << MAXITER << std::endl;
00313         optionstring << "# Tolerance = " << tolerance << std::endl;
00314         optionstring << "# rho = " << rho << std::endl;
00315         optionstring << "# Seed = " << seed << std::endl;
00316         optionstring << "# Random Features = " << randomfeatures << std::endl;
00317         optionstring << "# Caching Transforms = " << cachetransforms << std::endl;
00318         optionstring << "# Slow/Fast feature mapping = " << regularmap  << std::endl;
00319         optionstring << "# Number of feature partitions = "
00320                      << numfeaturepartitions << std::endl;
00321         optionstring << "# Threads = " << numthreads << std::endl;
00322         optionstring <<"# Number of MPI Processes = "
00323                      << nummpiprocesses << std::endl;
00324 
00325         return optionstring.str();
00326     }
00327 };
00328 
00329 #endif /* SKYLARK_HILBERT_OPTIONS_HPP */
00330