Skylark (Sketching Library)
0.1
|
00001 #ifndef SKYLARK_HILBERT_OPTIONS_HPP 00002 #define SKYLARK_HILBERT_OPTIONS_HPP 00003 00004 #ifndef SKYLARK_AVOID_BOOST_PO 00005 00006 #include <boost/program_options.hpp> 00007 namespace po = boost::program_options; 00008 00009 #else 00010 00011 #include <boost/lexical_cast.hpp> 00012 00013 #endif 00014 00015 #define DEFAULT_LAMBDA 0.0 00016 #define DEFAULT_RHO 1.0 00017 #define DEFAULT_THREADS 1 00018 #define DEFAULT_FEATURE_PARTITIONS 1 00019 #define DEFAULT_KERPARAM 1.0 00020 #define DEFAULT_TOL 0.001 00021 #define DEFAULT_MAXITER 100 00022 #define DEFAULT_SEED 12345 00023 #define DEFAULT_RF 100 00024 #define DEFAULT_KERNEL 0 00025 #define DEFAULT_FILEFORMAT 0 00026 00027 enum LossType {SQUARED = 0, LAD = 1, HINGE = 2, LOGISTIC = 3}; 00028 std::string Losses[] = {"Squared Loss", 00029 "Least Absolute Deviations", 00030 "Hinge Loss (SVMs)", 00031 "Logistic Loss"}; 00032 00033 enum RegularizerType {L2 = 0 , L1 = 1}; 00034 std::string Regularizers[] = {"L2", "L1"}; 00035 00036 enum ProblemType {REGRESSION = 0, CLASSIFICATION = 1}; 00037 std::string Problems[] = {"Regression", "Classification"}; 00038 00039 enum KernelType {LINEAR = 0, GAUSSIAN = 1, POLYNOMIAL = 2, 00040 LAPLACIAN = 3, EXPSEMIGROUP = 4}; 00041 std::string Kernels[] = {"Linear", "Gaussian", 00042 "Polynomial", "Laplacian", "ExpSemigroup"}; 00043 00044 enum FileFormatType {LIBSVM_DENSE = 0, LIBSVM_SPARSE = 1, HDF5_DENSE = 2, HDF5_SPARSE = 3}; 00045 std::string FileFormats[] = {"libsvm-dense", "libsvm-sparse", "hdf5_dense", "hdf5_sparse"}; 00046 00051 struct hilbert_options_t { 00054 LossType lossfunction; 00055 RegularizerType regularizer; 00056 KernelType kernel; 00057 00059 double kernelparam; 00060 double kernelparam2; 00061 double kernelparam3; 00062 00063 double lambda; 00064 ; 00066 int MAXITER; 00067 double tolerance; 00068 double rho; 00069 00071 int seed; 00072 int randomfeatures; 00073 bool regularmap; 00074 bool cachetransforms; 00075 00076 /* parallelization options */ 00077 int numfeaturepartitions; 00078 int numthreads; 00079 int nummpiprocesses; 00080 00081 int fileformat; 00082 00084 std::string trainfile; 00085 std::string modelfile; 00086 std::string testfile; 00087 std::string valfile; 00088 std::string str = ""; 00089 00091 bool exit_on_return; 00092 00093 00094 00098 hilbert_options_t (int argc, char** argv, int nproc) : 00099 nummpiprocesses(nproc), exit_on_return(false) { 00100 00101 #ifndef SKYLARK_AVOID_BOOST_PO 00102 00103 po::options_description desc 00104 ("Usage: skylark_ml [options] --trainfile trainfile --modelfile modelfile\nUsage: skylark_ml --modelfile modelfile --testfile testfile "); 00105 00106 desc.add_options() 00107 ("help,h", "produce a help message") 00108 ("lossfunction,l", 00109 po::value<int>((int*) &lossfunction)->default_value(SQUARED), 00110 "Loss function (0:SQUARED, 1:LAD, 2:HINGE, 3:LOGISTIC") 00111 ("regularizer,r", 00112 po::value<int>((int*) ®ularizer)->default_value(L2), 00113 "Regularizer (0:L2, 1:L1)") 00114 ("kernel,k", 00115 po::value<int>((int*) &kernel)->default_value(LINEAR), 00116 "Kernel (0:LINEAR, 1:GAUSSIAN, 2:POLYNOMIAL, " 00117 "3:LAPLACIAN, 4:EXPSEMIGROUP)") 00118 ("kernelparam,g", 00119 po::value<double>(&kernelparam)->default_value(DEFAULT_KERPARAM), 00120 "Kernel Parameter") 00121 ("kernelparam2,x", 00122 po::value<double>(&kernelparam2)->default_value(0), 00123 "If Applicable - Second Kernel Parameter (Polynomial Kernel: c)") 00124 ("kernelparam3,y", 00125 po::value<double>(&kernelparam3)->default_value(1), 00126 "If Applicable - Third Kernel Parameter (Polynomial Kernel: gamma)") 00127 ("lambda,c", 00128 po::value<double>(&lambda)->default_value(DEFAULT_LAMBDA), 00129 "Regularization Parameter") 00130 ("tolerance,e", 00131 po::value<double>(&tolerance)->default_value(DEFAULT_TOL), 00132 "Tolerance") 00133 ("rho", 00134 po::value<double>(&rho)->default_value(DEFAULT_RHO), 00135 "ADMM rho parameter") 00136 ("seed,s", 00137 po::value<int>(&seed)->default_value(DEFAULT_SEED), 00138 "Seed for Random Number Generator") 00139 ("randomfeatures,f", 00140 po::value<int>(&randomfeatures)->default_value(DEFAULT_RF), 00141 "Number of Random Features (default: 100)") 00142 ("numfeaturepartitions,n", 00143 po::value<int>(&numfeaturepartitions)-> 00144 default_value(DEFAULT_FEATURE_PARTITIONS), 00145 "Number of Feature Partitions (default: 1)") 00146 ("numthreads,t", 00147 po::value<int>(&numthreads)->default_value(DEFAULT_THREADS), 00148 "Number of Threads (default: 1)") 00149 ("regular", 00150 po::value<bool>(®ularmap)->default_value(true), 00151 "Default is to use 'fast' feature mapping, if available." 00152 "Use this flag to force regular mapping (default: false)") 00153 ("cachetransforms", 00154 po::value<bool>(&cachetransforms)->default_value(false), 00155 "Default is to not cache feature transforms per iteration, but generate on fly" 00156 "Use this flag to force transform caching if you have enough memory (default: false)") 00157 ("fileformat", 00158 po::value<int>(&fileformat)->default_value(DEFAULT_FILEFORMAT), 00159 "Fileformat (default: 0 (libsvm->dense), 1 (libsvm->sparse), 2 (hdf5->dense), 3 (hdf5->sparse)") 00160 ("MAXITER,i", 00161 po::value<int>(&MAXITER)->default_value(DEFAULT_MAXITER), 00162 "Maximum Number of Iterations (default: 100)") 00163 ("trainfile", 00164 po::value<std::string>(&trainfile)->default_value(""), 00165 "Training data file (required in training mode)") 00166 ("modelfile", 00167 po::value<std::string>(&modelfile)->required(), 00168 "Model output file") 00169 ("valfile", 00170 po::value<std::string>(&valfile)->default_value(""), 00171 "Validation file (optional)") 00172 ("testfile", 00173 po::value<std::string>(&testfile)->default_value(""), 00174 "Test file (optional in training mode; required in testing mode)") 00175 ; /* end options */ 00176 00177 po::positional_options_description positionalOptions; 00178 positionalOptions.add("trainfile", 1); 00179 positionalOptions.add("modelfile", 1); 00180 00182 po::variables_map vm; 00183 try { 00184 po::store(po::command_line_parser(argc, argv) 00185 .options(desc).positional(positionalOptions).run(), vm); 00186 00188 if (vm.count ("help")) { 00189 std::cout << desc; 00190 exit_on_return = true; 00191 return; 00192 } 00193 po::notify(vm); // throws on error, so do after help in case 00194 // there are any problems 00195 } 00196 catch(po::error& e) { 00197 std::cerr << e.what() << std::endl; 00198 std::cerr << desc << std::endl; 00199 exit_on_return = true; 00200 return; 00201 } 00202 00203 #else 00204 // The following is much less robust, but should work even without 00205 // Boost::program_options. 00206 00207 lossfunction = SQUARED; 00208 regularizer = L2; 00209 kernel = LINEAR; 00210 kernelparam = DEFAULT_KERPARAM; 00211 kernelparam2 = 0; 00212 kernelparam3 = 1; 00213 lambda = DEFAULT_LAMBDA; 00214 tolerance = DEFAULT_TOL; 00215 rho = DEFAULT_RHO; 00216 seed = DEFAULT_SEED; 00217 randomfeatures = DEFAULT_RF; 00218 numfeaturepartitions = DEFAULT_FEATURE_PARTITIONS; 00219 numthreads = DEFAULT_THREADS; 00220 regularmap = true; 00221 fileformat = DEFAULT_FILEFORMAT; 00222 MAXITER = DEFAULT_MAXITER; 00223 valfile = ""; 00224 testfile = ""; 00225 00226 for (int i = 1; i < argc; i += 2) { 00227 std::string flag = argv[i]; 00228 std::string value = argv[i+1]; 00229 00230 if (flag == "--lossfunction" || flag == "-l") 00231 lossfunction = 00232 static_cast<LossType>(boost::lexical_cast<int>(value)); 00233 if (flag == "--regularizer" || flag == "-r") 00234 regularizer = 00235 static_cast<RegularizerType>(boost::lexical_cast<int>(value)); 00236 if (flag == "--kernel" || flag == "-k") 00237 kernel = 00238 static_cast<KernelType>(boost::lexical_cast<int>(value)); 00239 if (flag == "--kernelparam" || flag == "-g") 00240 kernelparam = boost::lexical_cast<double>(value); 00241 if (flag == "--kernelparam2" || flag == "-x") 00242 kernelparam2 = boost::lexical_cast<double>(value); 00243 if (flag == "--kernelparam3" || flag == "-y") 00244 kernelparam3 = boost::lexical_cast<double>(value); 00245 if (flag == "--lambda" || flag == "-c") 00246 lambda = boost::lexical_cast<double>(value); 00247 if (flag == "--tolerance" || flag == "-e") 00248 tolerance = boost::lexical_cast<double>(value); 00249 if (flag == "--rho") 00250 rho = boost::lexical_cast<double>(value); 00251 if (flag == "--seed" || flag == "-s") 00252 seed = boost::lexical_cast<int>(value); 00253 if (flag == "--randomfeatures" || flag == "-f") 00254 randomfeatures = boost::lexical_cast<int>(value); 00255 if (flag == "--numfeaturepartitions" || flag == "-n") 00256 numfeaturepartitions = boost::lexical_cast<int>(value); 00257 if (flag == "--numthreads" || flag == "-t") 00258 numthreads = boost::lexical_cast<int>(value); 00259 if (flag == "--regular") 00260 regularmap = value == "on"; 00261 if (flag == "--fileformat") 00262 fileformat = 00263 static_cast<FileFormatType>(boost::lexical_cast<int>(value)); 00264 if (flag == "--MAXITER" || flag == "-i") 00265 MAXITER = boost::lexical_cast<int>(value); 00266 if (flag == "--trainfile") 00267 trainfile = value; 00268 if (flag == "--modelfile") 00269 modelfile = value; 00270 if (flag == "--valfile") 00271 valfile = value; 00272 if (flag == "--testfile") 00273 testfile = value; 00274 } 00275 #endif 00276 00277 for(int i=0;i<argc;i++) { 00278 str.append(argv[i]); 00279 if (i<argc-1) 00280 str.append(" "); 00281 } 00282 00283 00284 } 00285 00286 std::string print () const { 00287 std::stringstream optionstring; 00288 00289 optionstring << "# Generated using libSkylark/hilbert "; 00290 optionstring << "using the following command-line: " << std::endl; 00291 optionstring << "#\t" << str << std::endl; 00292 optionstring << "#" << std::endl; 00293 optionstring << "# Training File = " << trainfile << std::endl; 00294 optionstring << "# Model File = " << modelfile << std::endl; 00295 optionstring << "# Validation File = " << valfile << std::endl; 00296 optionstring << "# Test File = " << testfile << std::endl; 00297 optionstring << "# File Format = " << fileformat << std::endl; 00298 optionstring << "# Loss function = " << lossfunction 00299 << " ("<< Losses[lossfunction]<< ")" << std::endl; 00300 optionstring << "# Regularizer = " << regularizer 00301 << " ("<< Regularizers[regularizer]<< ")" << std::endl; 00302 optionstring << "# Kernel = " << kernel 00303 << " ("<< Kernels[kernel]<< ")" << std::endl; 00304 optionstring << "# Kernel Parameter = " << kernelparam << std::endl; 00305 if (kernelparam2 != -1) 00306 optionstring << "# Second Kernel Parameter = " 00307 << kernelparam2 << std::endl; 00308 if (kernelparam3 != -1) 00309 optionstring << "# Third Kernel Parameter = " 00310 << kernelparam3 << std::endl; 00311 optionstring << "# Regularization Parameter = " << lambda << std::endl; 00312 optionstring << "# Maximum Iterations = " << MAXITER << std::endl; 00313 optionstring << "# Tolerance = " << tolerance << std::endl; 00314 optionstring << "# rho = " << rho << std::endl; 00315 optionstring << "# Seed = " << seed << std::endl; 00316 optionstring << "# Random Features = " << randomfeatures << std::endl; 00317 optionstring << "# Caching Transforms = " << cachetransforms << std::endl; 00318 optionstring << "# Slow/Fast feature mapping = " << regularmap << std::endl; 00319 optionstring << "# Number of feature partitions = " 00320 << numfeaturepartitions << std::endl; 00321 optionstring << "# Threads = " << numthreads << std::endl; 00322 optionstring <<"# Number of MPI Processes = " 00323 << nummpiprocesses << std::endl; 00324 00325 return optionstring.str(); 00326 } 00327 }; 00328 00329 #endif /* SKYLARK_HILBERT_OPTIONS_HPP */ 00330