Skylark (Sketching Library)  0.1
/var/lib/jenkins/jobs/Skylark/workspace/sketch/hash_transform_Mixed.hpp
Go to the documentation of this file.
00001 #ifndef SKYLARK_HASH_TRANSFORM_MIXED_HPP
00002 #define SKYLARK_HASH_TRANSFORM_MIXED_HPP
00003 
00004 #include <map>
00005 #include <boost/serialization/map.hpp>
00006 
00007 #include "../utility/external/combblas_comm_grid.hpp"
00008 
00009 namespace skylark { namespace sketch {
00010 
00011 //FIXME:
00012 //  - Benchmark one-sided vs. col/row comm (or midpoint scheme):
00013 //    Most likely the scheme depends on the output Elemental distribution,
00014 //    here we use the same comm-scheme for all output types.
00015 //  - Processing Sparse matrix in blocks?
00016 //  - MPI-3 stuff, see: Enabling highly-scalable remote memory access
00017 //    programming with MPI-3 one sided, R. Gerstenbergerm,  M. Besta, and
00018 //    T. Hoefler.
00019 
00020 
00021 /* Specialization: SpParMat for input, distributed Elemental for output */
00022 template <typename IndexType,
00023           typename ValueType,
00024           elem::Distribution ColDist,
00025           elem::Distribution RowDist,
00026           template <typename> class IdxDistributionType,
00027           template <typename> class ValueDistribution>
00028 struct hash_transform_t <
00029     SpParMat<IndexType, ValueType, SpDCCols<IndexType, ValueType> >,
00030     elem::DistMatrix<ValueType, ColDist, RowDist>,
00031     IdxDistributionType,
00032     ValueDistribution > :
00033         public hash_transform_data_t<IdxDistributionType,
00034                                      ValueDistribution> {
00035     typedef IndexType index_type;
00036     typedef ValueType value_type;
00037     typedef SpDCCols< index_type, value_type > col_t;
00038     typedef FullyDistVec< index_type, value_type> mpi_vector_t;
00039     typedef SpParMat< index_type, value_type, col_t > matrix_type;
00040     typedef elem::DistMatrix< value_type, ColDist, RowDist > output_matrix_type;
00041     typedef hash_transform_data_t<IdxDistributionType,
00042                                   ValueDistribution> data_type;
00043 
00044 
00048     hash_transform_t (int N, int S, base::context_t& context) :
00049         data_type(N, S, context) {
00050 
00051     }
00052 
00056     template <typename InputMatrixType,
00057               typename OutputMatrixType>
00058     hash_transform_t (hash_transform_t<InputMatrixType,
00059                                        OutputMatrixType,
00060                                        IdxDistributionType,
00061                                        ValueDistribution>& other) :
00062         data_type(other) {}
00063 
00067     hash_transform_t (hash_transform_data_t<IdxDistributionType,
00068                                             ValueDistribution>& other_data) :
00069         data_type(other_data) {}
00070 
00071     template <typename Dimension>
00072     void apply (const matrix_type &A, output_matrix_type &sketch_of_A,
00073                 Dimension dimension) const {
00074         try {
00075             apply_impl (A, sketch_of_A, dimension);
00076         } catch(boost::mpi::exception e) {
00077             SKYLARK_THROW_EXCEPTION (
00078                 base::mpi_exception()
00079                     << base::error_msg(e.what()) );
00080         } catch (std::string e) {
00081             SKYLARK_THROW_EXCEPTION (
00082                 base::combblas_exception()
00083                     << base::error_msg(e) );
00084         } catch (std::logic_error e) {
00085             SKYLARK_THROW_EXCEPTION (
00086                 base::combblas_exception()
00087                     << base::error_msg(e.what()) );
00088         } catch (std::bad_alloc e) {
00089             SKYLARK_THROW_EXCEPTION (
00090                 base::skylark_exception()
00091                     << base::error_msg("bad_alloc: out of memory") );
00092         }
00093     }
00094 
00095 
00096 private:
00100     template <typename Dimension>
00101     void apply_impl (const matrix_type &A_,
00102         output_matrix_type &sketch_of_A,
00103         Dimension dist) const {
00104 
00105         // We are essentially doing a 'const' access to A, but the necessary,
00106         // 'const' option is missing from the interface
00107         matrix_type &A = const_cast<matrix_type&>(A_);
00108 
00109         const size_t rank = A.getcommgrid()->GetRank();
00110 
00111         // extract columns of matrix
00112         col_t &data = A.seq();
00113 
00114         const size_t ncols = sketch_of_A.Width();
00115 
00116         const size_t my_row_offset = utility::cb_my_row_offset(A);
00117         const size_t my_col_offset = utility::cb_my_col_offset(A);
00118 
00119         size_t comm_size = A.getcommgrid()->GetSize();
00120         std::vector< std::set<size_t> > proc_set(comm_size);
00121 
00122         // pre-compute processor targets of local sketch application
00123         for(typename col_t::SpColIter col = data.begcol();
00124             col != data.endcol(); col++) {
00125             for(typename col_t::SpColIter::NzIter nz = data.begnz(col);
00126                 nz != data.endnz(col); nz++) {
00127 
00128                 // compute global row and column id, and compress in one
00129                 // target position index
00130                 const index_type rowid = nz.rowid()  + my_row_offset;
00131                 const index_type colid = col.colid() + my_col_offset;
00132                 const size_t pos       = getPos(rowid, colid, ncols, dist);
00133 
00134                 // compute target processor for this target index
00135                 const size_t target =
00136                     sketch_of_A.Owner(pos / ncols, pos % ncols);
00137 
00138                 if(proc_set[target].count(pos) == 0) {
00139                     assert(target < comm_size);
00140                     proc_set[target].insert(pos);
00141                 }
00142             }
00143         }
00144 
00145         // constructing array holding start/end indices for one-sided access
00146         std::vector<index_type> proc_start_idx(comm_size + 1, 0);
00147         for(size_t i = 1; i < comm_size + 1; ++i)
00148             proc_start_idx[i] = proc_start_idx[i-1] + proc_set[i-1].size();
00149 
00150         // total number of nnz that will result when applying sketch locally
00151         std::vector<index_type> indicies(proc_start_idx[comm_size], 0);
00152         std::vector<value_type> values(proc_start_idx[comm_size], 0);
00153 
00154         // Apply sketch for all local values. Note that some of the resulting
00155         // values might end up on a different processor. The data structure
00156         // fills values (sorted by processor id) in one continuous array.
00157         // Subsequently, one-sided operations can be used to access values for
00158         // each processor.
00159         for(typename col_t::SpColIter col = data.begcol();
00160             col != data.endcol(); col++) {
00161             for(typename col_t::SpColIter::NzIter nz = data.begnz(col);
00162                 nz != data.endnz(col); nz++) {
00163 
00164                 // compute global row and column id, and compress in one
00165                 // target position index
00166                 const index_type rowid = nz.rowid()  + my_row_offset;
00167                 const index_type colid = col.colid() + my_col_offset;
00168                 const size_t pos       = getPos(rowid, colid, ncols, dist);
00169 
00170                 // compute target processor for this target index
00171                 const size_t proc = sketch_of_A.Owner(pos / ncols, pos % ncols);
00172 
00173                 // get offset in array for current element
00174                 const size_t ar_idx = proc_start_idx[proc] +
00175                     std::distance(proc_set[proc].begin(), proc_set[proc].find(pos));
00176 
00177                 indicies[ar_idx] = pos;
00178                 values[ar_idx]  += nz.value() *
00179                                    data_type::getValue(rowid, colid, dist);
00180             }
00181         }
00182 
00183         // Creating windows for all relevant arrays
00184         boost::mpi::communicator comm = utility::get_communicator(A);
00185 
00186         // tell MPI that we will not use locks
00187         MPI_Info info;
00188         MPI_Info_create(&info);
00189         MPI_Info_set(info, "no_locks", "true");
00190 
00191         MPI_Win start_offset_win, idx_win, val_win;
00192 
00193         MPI_Win_create(&proc_start_idx[0], sizeof(size_t) * (comm_size + 1),
00194                        sizeof(size_t), info, comm, &start_offset_win);
00195 
00196         MPI_Win_create(&indicies[0], sizeof(index_type) * indicies.size(),
00197                        sizeof(index_type), info, comm, &idx_win);
00198 
00199         MPI_Win_create(&values[0], sizeof(value_type) * values.size(),
00200                        sizeof(value_type), info, comm, &val_win);
00201 
00202         MPI_Info_free(&info);
00203 
00204         // Synchronize epoch, no subsequent put operations (read only) and no
00205         // preceding fence calls.
00206         MPI_Win_fence(MPI_MODE_NOPUT | MPI_MODE_NOPRECEDE, start_offset_win);
00207         MPI_Win_fence(MPI_MODE_NOPUT | MPI_MODE_NOPRECEDE, idx_win);
00208         MPI_Win_fence(MPI_MODE_NOPUT | MPI_MODE_NOPRECEDE, val_win);
00209 
00210 
00211         // accumulate values from other procs
00212         for(size_t p = 0; p < comm_size; ++p) {
00213 
00214             // get the start/end offset
00215             std::vector<size_t> offset(2);
00216             MPI_Get(&(offset[0]), 2, boost::mpi::get_mpi_datatype<size_t>(),
00217                     p, rank, 2, boost::mpi::get_mpi_datatype<size_t>(),
00218                     start_offset_win);
00219 
00220             MPI_Win_fence(MPI_MODE_NOPUT, start_offset_win);
00221             size_t num_values = offset[1] - offset[0];
00222 
00223             // and fill indices/values.
00224             std::vector<index_type> add_idx(num_values);
00225             std::vector<value_type> add_val(num_values);
00226             MPI_Get(&(add_idx[0]), num_values,
00227                     boost::mpi::get_mpi_datatype<index_type>(), p, offset[0],
00228                     num_values, boost::mpi::get_mpi_datatype<index_type>(),
00229                     idx_win);
00230 
00231             MPI_Get(&(add_val[0]), num_values,
00232                     boost::mpi::get_mpi_datatype<value_type>(), p, offset[0],
00233                     num_values, boost::mpi::get_mpi_datatype<value_type>(),
00234                     val_win);
00235 
00236             MPI_Win_fence(MPI_MODE_NOPUT, idx_win);
00237             MPI_Win_fence(MPI_MODE_NOPUT, val_win);
00238 
00239             // finally, set data in local buffer
00240             for(size_t i = 0; i < num_values; ++i) {
00241                 index_type lrow = sketch_of_A.LocalRow(add_idx[i] / ncols);
00242                 index_type lcol = sketch_of_A.LocalCol(add_idx[i] % ncols);
00243                 sketch_of_A.UpdateLocal(lrow, lcol, add_val[i]);
00244             }
00245         }
00246 
00247         MPI_Win_fence(MPI_MODE_NOPUT | MPI_MODE_NOSUCCEED, start_offset_win);
00248         MPI_Win_fence(MPI_MODE_NOPUT | MPI_MODE_NOSUCCEED, idx_win);
00249         MPI_Win_fence(MPI_MODE_NOPUT | MPI_MODE_NOSUCCEED, val_win);
00250 
00251         MPI_Win_free(&start_offset_win);
00252         MPI_Win_free(&idx_win);
00253         MPI_Win_free(&val_win);
00254     }
00255 
00256     inline index_type getPos(index_type rowid, index_type colid, size_t ncols,
00257         columnwise_tag) const {
00258         return colid + ncols * data_type::row_idx[rowid];
00259     }
00260 
00261     inline index_type getPos(index_type rowid, index_type colid, size_t ncols,
00262         rowwise_tag) const {
00263         return rowid * ncols + data_type::row_idx[colid];
00264     }
00265 };
00266 
00267 /* Specialization: SpParMat for input, Local Elemental output */
00268 template <typename IndexType,
00269           typename ValueType,
00270           template <typename> class IdxDistributionType,
00271           template <typename> class ValueDistribution>
00272 struct hash_transform_t <
00273     SpParMat<IndexType, ValueType, SpDCCols<IndexType, ValueType> >,
00274     elem::Matrix<ValueType>,
00275     IdxDistributionType,
00276     ValueDistribution > :
00277         public hash_transform_data_t<IdxDistributionType,
00278                                      ValueDistribution> {
00279     typedef IndexType index_type;
00280     typedef ValueType value_type;
00281     typedef SpDCCols< index_type, value_type > col_t;
00282     typedef FullyDistVec< index_type, value_type> mpi_vector_t;
00283     typedef SpParMat< index_type, value_type, col_t > matrix_type;
00284     typedef elem::Matrix< value_type > output_matrix_type;
00285     typedef hash_transform_data_t<IdxDistributionType,
00286                                   ValueDistribution> data_type;
00287 
00288 
00292     hash_transform_t (int N, int S, base::context_t& context) :
00293         data_type(N, S, context) {
00294 
00295     }
00296 
00300     template <typename InputMatrixType,
00301               typename OutputMatrixType>
00302     hash_transform_t (hash_transform_t<InputMatrixType,
00303                                        OutputMatrixType,
00304                                        IdxDistributionType,
00305                                        ValueDistribution>& other) :
00306         data_type(other) {}
00307 
00311     hash_transform_t (hash_transform_data_t<IdxDistributionType,
00312                                             ValueDistribution>& other_data) :
00313         data_type(other_data) {}
00314 
00315     template <typename Dimension>
00316     void apply (const matrix_type &A, output_matrix_type &sketch_of_A,
00317                 Dimension dimension) const {
00318         try {
00319             apply_impl (A, sketch_of_A, dimension);
00320         } catch(boost::mpi::exception e) {
00321             SKYLARK_THROW_EXCEPTION (
00322                 base::mpi_exception()
00323                     << base::error_msg(e.what()) );
00324         } catch (std::string e) {
00325             SKYLARK_THROW_EXCEPTION (
00326                 base::combblas_exception()
00327                     << base::error_msg(e) );
00328         } catch (std::logic_error e) {
00329             SKYLARK_THROW_EXCEPTION (
00330                 base::combblas_exception()
00331                     << base::error_msg(e.what()) );
00332         } catch (std::bad_alloc e) {
00333             SKYLARK_THROW_EXCEPTION (
00334                 base::skylark_exception()
00335                     << base::error_msg("bad_alloc: out of memory") );
00336         }
00337     }
00338 
00339 
00340 private:
00344     template <typename Dimension>
00345     void apply_impl (const matrix_type &A_,
00346         output_matrix_type &sketch_of_A,
00347         Dimension dist) const {
00348 
00349         // We are essentially doing a 'const' access to A, but the necessary,
00350         // 'const' option is missing from the interface
00351         matrix_type &A = const_cast<matrix_type&>(A_);
00352 
00353         const size_t rank = A.getcommgrid()->GetRank();
00354 
00355         // extract columns of matrix
00356         col_t &data = A.seq();
00357 
00358         const size_t my_row_offset = utility::cb_my_row_offset(A);
00359         const size_t my_col_offset = utility::cb_my_col_offset(A);
00360 
00361         int n_res_cols = A.getncol();
00362         int n_res_rows = A.getnrow();
00363         data_type::get_res_size(n_res_rows, n_res_cols, dist);
00364 
00365         // Apply sketch for all local values. Subsequently, all values are
00366         // gathered on processor 0 and the local matrix is populated.
00367         typedef std::map<index_type, value_type> col_values_t;
00368         col_values_t col_values;
00369         for(typename col_t::SpColIter col = data.begcol();
00370             col != data.endcol(); col++) {
00371             for(typename col_t::SpColIter::NzIter nz = data.begnz(col);
00372                 nz != data.endnz(col); nz++) {
00373 
00374                 index_type rowid = nz.rowid()  + my_row_offset;
00375                 index_type colid = col.colid() + my_col_offset;
00376 
00377                 const value_type value =
00378                     nz.value() * data_type::getValue(rowid, colid, dist);
00379                 data_type::finalPos(rowid, colid, dist);
00380                 col_values[colid * n_res_rows + rowid] += value;
00381             }
00382         }
00383 
00384         std::vector< std::map<index_type, value_type > > result;
00385         boost::mpi::gather(utility::get_communicator(A), col_values, result, 0);
00386 
00387         if(rank == 0) {
00388             typedef typename std::map<index_type, value_type>::iterator itr_t;
00389             for(size_t i = 0; i < result.size(); ++i) {
00390                 itr_t proc_itr = result[i].begin();
00391                 for(; proc_itr != result[i].end(); proc_itr++) {
00392                     int row = proc_itr->first % n_res_rows;
00393                     int col = proc_itr->first / n_res_rows;
00394                     sketch_of_A.Update(row, col, proc_itr->second);
00395                 }
00396             }
00397         }
00398     }
00399 };
00400 
00401 
00402 #if 0
00403 /* Specialization: SpParMat for input, Elemental[* / *] output */
00404 template <typename IndexType,
00405           typename ValueType,
00406           template <typename> class IdxDistributionType,
00407           template <typename> class ValueDistribution>
00408 struct hash_transform_t <
00409     SpParMat<IndexType, ValueType, SpDCCols<IndexType, ValueType> >,
00410     elem::DistMatrix<ValueType, elem::STAR, elem::STAR>,
00411     IdxDistributionType,
00412     ValueDistribution > :
00413         public hash_transform_data_t<IdxDistributionType,
00414                                      ValueDistribution> {
00415     typedef IndexType index_type;
00416     typedef ValueType value_type;
00417     typedef SpDCCols< index_type, value_type > col_t;
00418     typedef FullyDistVec< index_type, value_type> mpi_vector_t;
00419     typedef SpParMat< index_type, value_type, col_t > matrix_type;
00420     typedef elem::DistMatrix< value_type, elem::STAR, elem::STAR > output_matrix_type;
00421     typedef hash_transform_data_t<IdxDistributionType,
00422                                   ValueDistribution> data_type;
00423 
00424 
00428     hash_transform_t (int N, int S, base::context_t& context) :
00429         data_type(N, S, context) {
00430 
00431     }
00432 
00436     template <typename InputMatrixType,
00437               typename OutputMatrixType>
00438     hash_transform_t (hash_transform_t<InputMatrixType,
00439                                        OutputMatrixType,
00440                                        IdxDistributionType,
00441                                        ValueDistribution>& other) :
00442         data_type(other) {}
00443 
00447     hash_transform_t (hash_transform_data_t<IdxDistributionType,
00448                                             ValueDistribution>& other_data) :
00449         data_type(other_data) {}
00450 
00451     template <typename Dimension>
00452     void apply (const matrix_type &A, output_matrix_type &sketch_of_A,
00453                 Dimension dimension) const {
00454         try {
00455             apply_impl (A, sketch_of_A, dimension);
00456         } catch(boost::mpi::exception e) {
00457             SKYLARK_THROW_EXCEPTION (
00458                 base::mpi_exception()
00459                     << base::error_msg(e.what()) );
00460         } catch (std::string e) {
00461             SKYLARK_THROW_EXCEPTION (
00462                 base::combblas_exception()
00463                     << base::error_msg(e) );
00464         } catch (std::logic_error e) {
00465             SKYLARK_THROW_EXCEPTION (
00466                 base::combblas_exception()
00467                     << base::error_msg(e.what()) );
00468         } catch (std::bad_alloc e) {
00469             SKYLARK_THROW_EXCEPTION (
00470                 base::skylark_exception()
00471                     << base::error_msg("bad_alloc: out of memory") );
00472         }
00473     }
00474 
00475 
00476 private:
00480     template <typename Dimension>
00481     void apply_impl (const matrix_type &A_,
00482         output_matrix_type &sketch_of_A,
00483         Dimension dist) const {
00484 
00485         // We are essentially doing a 'const' access to A, but the necessary,
00486         // 'const' option is missing from the interface
00487         matrix_type &A = const_cast<matrix_type&>(A_);
00488 
00489         const size_t rank = A.getcommgrid()->GetRank();
00490 
00491         // extract columns of matrix
00492         col_t &data = A.seq();
00493 
00494         const size_t my_row_offset = utility::cb_my_row_offset(A);
00495         const size_t my_col_offset = utility::cb_my_col_offset(A);
00496 
00497         int n_res_cols = A.getncol();
00498         int n_res_rows = A.getnrow();
00499         data_type::get_res_size(n_res_rows, n_res_cols, dist);
00500 
00501         // Apply sketch for all local values. Subsequently, all values are
00502         // gathered on all processor and the "local" matrix is populated.
00503         typedef std::map<index_type, value_type> col_values_t;
00504         col_values_t col_values;
00505         for(typename col_t::SpColIter col = data.begcol();
00506             col != data.endcol(); col++) {
00507             for(typename col_t::SpColIter::NzIter nz = data.begnz(col);
00508                 nz != data.endnz(col); nz++) {
00509 
00510                 index_type rowid = nz.rowid()  + my_row_offset;
00511                 index_type colid = col.colid() + my_col_offset;
00512 
00513                 const value_type value =
00514                     nz.value() * data_type::getValue(rowid, colid, dist);
00515                 data_type::finalPos(rowid, colid, dist);
00516                 col_values[colid * n_res_rows + rowid] += value;
00517             }
00518         }
00519 
00520         std::vector< std::map<index_type, value_type > > result;
00521         boost::mpi::all_gather(
00522                 utility::get_communicator(A), col_values, result);
00523 
00524         typedef typename std::map<index_type, value_type>::iterator itr_t;
00525         for(size_t i = 0; i < result.size(); ++i) {
00526             itr_t proc_itr = result[i].begin();
00527             for(; proc_itr != result[i].end(); proc_itr++) {
00528                 int row = proc_itr->first % n_res_rows;
00529                 int col = proc_itr->first / n_res_rows;
00530                 sketch_of_A.Update(row, col, proc_itr->second);
00531             }
00532         }
00533     }
00534 };
00535 #endif
00536 
00537 } } 
00539 #endif // SKYLARK_HASH_TRANSFORM_MIXED_HPP