Skylark (Sketching Library)  0.1
/var/lib/jenkins/jobs/Skylark/workspace/sketch/hash_transform_local_sparse.hpp
Go to the documentation of this file.
00001 #ifndef SKYLARK_HASH_TRANSFORM_LOCAL_SPARSE_HPP
00002 #define SKYLARK_HASH_TRANSFORM_LOCAL_SPARSE_HPP
00003 
00004 #include <boost/dynamic_bitset.hpp>
00005 
00006 namespace skylark { namespace sketch {
00007 
00008 /* Specialization: local SpMat for input, output */
00009 template <typename ValueType,
00010           template <typename> class IdxDistributionType,
00011           template <typename> class ValueDistribution>
00012 struct hash_transform_t <
00013     base::sparse_matrix_t<ValueType>,
00014     base::sparse_matrix_t<ValueType>,
00015     IdxDistributionType,
00016     ValueDistribution > :
00017         public hash_transform_data_t<IdxDistributionType,
00018                                      ValueDistribution> {
00019     typedef size_t index_type;
00020     typedef ValueType value_type;
00021     typedef base::sparse_matrix_t<ValueType> matrix_type;
00022     typedef base::sparse_matrix_t<ValueType> output_matrix_type;
00023     typedef IdxDistributionType<index_type> idx_distribution_type;
00024     typedef ValueDistribution<value_type> value_distribution_type;
00025     typedef hash_transform_data_t<IdxDistributionType,
00026                                   ValueDistribution> data_type;
00027 
00028 
00032     hash_transform_t (int N, int S, base::context_t& context) :
00033         data_type(N, S, context) {
00034 
00035     }
00036 
00040     template <typename InputMatrixType,
00041               typename OutputMatrixType>
00042     hash_transform_t (hash_transform_t<InputMatrixType,
00043                                        OutputMatrixType,
00044                                        IdxDistributionType,
00045                                        ValueDistribution>& other) :
00046         data_type(other) {}
00047 
00051     hash_transform_t (hash_transform_data_t<IdxDistributionType,
00052                                             ValueDistribution>& other_data) :
00053         data_type(other_data) {}
00054 
00058     template <typename Dimension>
00059     void apply (const matrix_type &A, output_matrix_type &sketch_of_A,
00060                 Dimension dimension) const {
00061         try {
00062             apply_impl (A, sketch_of_A, dimension);
00063         } catch(boost::mpi::exception e) {
00064             SKYLARK_THROW_EXCEPTION (
00065                 base::mpi_exception()
00066                     << base::error_msg(e.what()) );
00067         } catch (std::string e) {
00068             SKYLARK_THROW_EXCEPTION (
00069                 base::combblas_exception()
00070                     << base::error_msg(e) );
00071         } catch (std::logic_error e) {
00072             SKYLARK_THROW_EXCEPTION (
00073                 base::combblas_exception()
00074                     << base::error_msg(e.what()) );
00075         }
00076     }
00077 
00078     int get_N() const { return this->_N; } 
00079     int get_S() const { return this->_S; } 
00081     const sketch_transform_data_t* get_data() const { return this; }
00082 
00083 private:
00088     void apply_impl (const matrix_type &A,
00089                      output_matrix_type &sketch_of_A,
00090                      columnwise_tag) const {
00091 
00092         index_type col_idx = 0;
00093 
00094         const int* indptr  = A.indptr();
00095         const int* indices = A.indices();
00096         const value_type* values = A.locked_values();
00097 
00098         index_type n_rows = data_type::_S;
00099         index_type n_cols = A.width();
00100 
00101         int nnz = 0;
00102         int *indptr_new = new int[n_cols + 1];
00103         std::vector<int> final_rows(A.nonzeros());
00104         std::vector<value_type> final_vals(A.nonzeros());
00105 
00106         indptr_new[0] = 0;
00107         std::vector<index_type> idx_map(n_rows, -1);
00108 
00109         for(index_type col = 0; col < A.width(); col++) {
00110 
00111 
00112             for(index_type idx = indptr[col]; idx < indptr[col + 1]; idx++) {
00113 
00114                 index_type row = indices[idx];
00115                 value_type val = values[idx] * data_type::row_value[row];
00116                 row            = data_type::row_idx[row];
00117 
00118                 //XXX: I think we should get rid of the if here...
00119                 if(idx_map[row] == -1) {
00120                     idx_map[row] = nnz;
00121                     final_rows[nnz] = row;
00122                     final_vals[nnz] = val;
00123                     nnz++;
00124                 } else {
00125                     final_vals[idx_map[row]] += val;
00126                 }
00127             }
00128 
00129             indptr_new[col + 1] = nnz;
00130 
00131             // reset idx_map
00132             for(int i = indptr_new[col]; i < nnz; ++i)
00133                 idx_map[final_rows[i]] = -1;
00134         }
00135 
00136         int *indices_new = new int[nnz];
00137         std::copy(final_rows.begin(), final_rows.begin() + nnz, indices_new);
00138 
00139         double *values_new = new double[nnz];
00140         std::copy(final_vals.begin(), final_vals.begin() + nnz, values_new);
00141 
00142         // let the sparse structure take ownership of the data
00143         sketch_of_A.attach(indptr_new, indices_new, values_new,
00144                            nnz, n_rows, n_cols, true);
00145     }
00146 
00147 
00152     void apply_impl (const matrix_type &A,
00153                      output_matrix_type &sketch_of_A,
00154                      rowwise_tag) const {
00155 
00156         index_type col_idx = 0;
00157 
00158         const int* indptr = A.indptr();
00159         const int* indices = A.indices();
00160         const value_type* values = A.locked_values();
00161 
00162         // target size
00163         index_type n_rows = A.height();
00164         index_type n_cols = data_type::_S;
00165 
00166         int nnz = 0;
00167         int *indptr_new = new int[n_cols + 1];
00168         std::vector<int> final_rows(A.nonzeros());
00169         std::vector<value_type> final_vals(A.nonzeros());
00170 
00171         indptr_new[0] = 0;
00172 
00173         // we adapt transversal order for this case
00174         //XXX: or transpose A (maybe better for cache)
00175         std::vector< std::vector<int> > inv_mapping(data_type::_S);
00176         for(int idx = 0; idx < data_type::row_idx.size(); ++idx) {
00177             inv_mapping[data_type::row_idx[idx]].push_back(idx);
00178         }
00179 
00180         std::vector<index_type> idx_map(n_rows, -1);
00181 
00182         for(index_type target_col = 0; target_col < data_type::_S;
00183             ++target_col) {
00184 
00185             std::vector<int>::iterator itr;
00186             for(itr = inv_mapping[target_col].begin();
00187                 itr != inv_mapping[target_col].end(); itr++) {
00188 
00189                 int col = *itr;
00190 
00191                 for(index_type idx = indptr[col]; idx < indptr[col + 1]; idx++) {
00192 
00193                     index_type row = indices[idx];
00194                     value_type val = values[idx] * data_type::row_value[col];
00195 
00196                     //XXX: I think we should get rid of the if here...
00197                     if(idx_map[row] == -1) {
00198                         idx_map[row] = nnz;
00199                         final_rows[nnz] = row;
00200                         final_vals[nnz] = val;
00201                         nnz++;
00202                     } else {
00203                         final_vals[idx_map[row]] += val;
00204                     }
00205                 }
00206             }
00207 
00208             indptr_new[target_col + 1] = nnz;
00209 
00210             // reset idx_map
00211             for(int i = indptr_new[target_col]; i < nnz; ++i)
00212                 idx_map[final_rows[i]] = -1;
00213         }
00214 
00215         int *indices_new = new int[nnz];
00216         std::copy(final_rows.begin(), final_rows.begin() + nnz, indices_new);
00217 
00218         double *values_new = new double[nnz];
00219         std::copy(final_vals.begin(), final_vals.begin() + nnz, values_new);
00220 
00221         sketch_of_A.attach(indptr_new, indices_new, values_new,
00222                            nnz, n_rows, n_cols, true);
00223     }
00224 };
00225 
00226 } } 
00228 #endif // SKYLARK_HASH_TRANSFORM_LOCAL_SPARSE_HPP