Skylark (Sketching Library): /var/lib/jenkins/jobs/Skylark/workspace/base/sparse

Go to the documentation of this file.
00001 #ifndef SKYLARK_SPARSE_MATRIX_HPP
00002 #define SKYLARK_SPARSE_MATRIX_HPP
00003 
00004 #include <set>
00005 #include <vector>
00006 #include <boost/tuple/tuple.hpp>
00007 #include <boost/unordered_map.hpp>
00008 
00009 namespace skylark { namespace base {
00010 
00019 template<typename ValueType=double>
00020 struct sparse_matrix_t {
00021 
00022     typedef int index_type;
00023     typedef ValueType value_type;
00024 
00025     typedef boost::tuple<index_type, index_type, value_type> coord_tuple_t;
00026     typedef std::vector<coord_tuple_t> coords_t;
00027 
00028     sparse_matrix_t()
00029         : _ownindptr(false), _ownindices(false), _ownvalues(false),
00030           _dirty_struct(false), _height(0), _width(0), _nnz(0),
00031           _indptr(nullptr), _indices(nullptr), _values(nullptr)
00032     {}
00033 
00034     // The following relies on C++11
00035     sparse_matrix_t(sparse_matrix_t<ValueType>&& A) :
00036         _ownindptr(A._ownindptr), _ownindices(A._ownindices),
00037         _ownvalues(A._ownvalues), _dirty_struct(A._dirty_struct),
00038         _height(A._height), _width(A._width), _nnz(A._nnz),
00039         _indptr(A._indptr), _indices(A._indices), _values(A._values)
00040     {
00041         A._ownindptr = false;
00042         A._ownindices = false;
00043         A._ownvalues = false;
00044     }
00045 
00046     ~sparse_matrix_t() {
00047         _free_data();
00048     }
00049 
00050     bool struct_updated() const { return _dirty_struct; }
00051     void reset_update_flag()    { _dirty_struct = false; }
00052 
00056     template<typename IdxType, typename ValType>
00057     void detach(IdxType *indptr, IdxType *indices, ValType *values) const {
00058 
00059         for(size_t i = 0; i <= _width; ++i)
00060             indptr[i] = static_cast<IdxType>(_indptr[i]);
00061 
00062         for(size_t i = 0; i < _nnz; ++i) {
00063             indices[i] = static_cast<IdxType>(_indices[i]);
00064             values[i] = static_cast<ValType>(_values[i]);
00065         }
00066     }
00067 
00071     void attach(const index_type *indptr, const index_type *indices, double *values,
00072         int nnz, int n_rows, int n_cols, bool _own = false) {
00073         attach(indptr, indices, values, nnz, n_rows, n_cols, _own, _own, _own);
00074     }
00075 
00079     void attach(const index_type *indptr, const index_type *indices, double *values,
00080         int nnz, int n_rows, int n_cols,
00081         bool ownindptr, bool ownindices, bool ownvalues) {
00082         _free_data();
00083 
00084         _indptr = indptr;
00085         _indices = indices;
00086         _values = values;
00087         _nnz = nnz;
00088         _width = n_cols;
00089         _height = n_rows;
00090 
00091         _ownindptr = ownindptr;
00092         _ownindices = ownindices;
00093         _ownvalues = ownvalues;
00094 
00095         _dirty_struct = true;
00096     }
00097 
00098 
00099     // attaching a coordinate structure facilitates going from distributed
00100     // input to local output.
00101     void set(coords_t coords, int n_rows = 0, int n_cols = 0) {
00102 
00103         sort(coords.begin(), coords.end(), &sparse_matrix_t::_sort_coords);
00104 
00105         n_cols = std::max(n_cols, boost::get<1>(coords.back()) + 1);
00106         index_type *indptr = new index_type[n_cols + 1];
00107 
00108         // Count non-zeros
00109         int nnz = 0;
00110         for(size_t i = 0; i < coords.size(); ++i) {
00111             nnz++;
00112             index_type cur_row = boost::get<0>(coords[i]);
00113             index_type cur_col = boost::get<1>(coords[i]);
00114             while(i + 1 < coords.size() &&
00115                   cur_row == boost::get<0>(coords[i + 1]) &&
00116                   cur_col == boost::get<1>(coords[i + 1]))
00117                 i++;
00118         }
00119 
00120         index_type *indices = new index_type[nnz];
00121         value_type *values = new value_type[nnz];
00122 
00123         nnz = 0;
00124         int indptr_idx = 0;
00125         indptr[indptr_idx] = 0;
00126         for(size_t i = 0; i < coords.size(); ++i) {
00127             index_type cur_row = boost::get<0>(coords[i]);
00128             index_type cur_col = boost::get<1>(coords[i]);
00129             value_type cur_val = boost::get<2>(coords[i]);
00130 
00131             for(; indptr_idx < cur_col; ++indptr_idx)
00132                 indptr[indptr_idx + 1] = nnz;
00133             nnz++;
00134 
00135             // sum duplicates
00136             while(i + 1 < coords.size() &&
00137                   cur_row == boost::get<0>(coords[i + 1]) &&
00138                   cur_col == boost::get<1>(coords[i + 1])) {
00139 
00140                 cur_val += boost::get<2>(coords[i + 1]);
00141                 i++;
00142             }
00143 
00144             indices[nnz - 1] = cur_row;
00145             values[nnz - 1] = cur_val;
00146 
00147             n_rows = std::max(cur_row + 1, n_rows);
00148         }
00149 
00150         for(; indptr_idx < n_cols; ++indptr_idx)
00151             indptr[indptr_idx + 1] = nnz;
00152 
00153         attach(indptr, indices, values, nnz, n_rows, n_cols, true);
00154     }
00155 
00156     int height() const {
00157         return _height;
00158     }
00159 
00160     int width() const {
00161         return _width;
00162     }
00163 
00164     int nonzeros() const {
00165         return _nnz;
00166     }
00167 
00168     const index_type* indptr() const {
00169         return _indptr;
00170     }
00171 
00172     const index_type* indices() const {
00173         return _indices;
00174     }
00175 
00176     value_type* values() {
00177         return _values;
00178     }
00179 
00180     const value_type* locked_values() const {
00181         return _values;
00182     }
00183 
00184     bool operator==(const sparse_matrix_t &rhs) const {
00185 
00186         // column pointer arrays have to be exactly the same
00187         if (std::vector<int>(_indptr, _indptr+_width) !=
00188             std::vector<int>(rhs._indptr, rhs._indptr + rhs._width))
00189             return false;
00190 
00191         // check more carefully for unordered row indices
00192         const int* indptr  = _indptr;
00193         const int* indices = _indices;
00194         const double* values = _values;
00195 
00196         const int* indices_rhs   = rhs.indices();
00197         const double* values_rhs = rhs.locked_values();
00198 
00199         for(int col = 0; col < width(); col++) {
00200 
00201             boost::unordered_map<int, double> col_values;
00202 
00203             for(int idx = indptr[col]; idx < indptr[col + 1]; idx++)
00204                 col_values.insert(std::make_pair(indices[idx], values[idx]));
00205 
00206             for(int idx = indptr[col]; idx < indptr[col + 1]; idx++) {
00207                 if(col_values[indices_rhs[idx]] != values_rhs[idx])
00208                     return false;
00209             }
00210         }
00211 
00212         return true;
00213     }
00214 
00215 private:
00216     bool _ownindptr;
00217     bool _ownindices;
00218     bool _ownvalues;
00219 
00220     bool _dirty_struct;
00221 
00222     int _height;
00223     int _width;
00224     int _nnz;
00225 
00226     const index_type* _indptr;
00227     const index_type* _indices;
00228     value_type* _values;
00229 
00230     // TODO add the following
00231     sparse_matrix_t(const sparse_matrix_t&);
00232     void operator=(const sparse_matrix_t&);
00233 
00234     void _free_data() {
00235         if (_ownindptr)
00236             delete[] _indptr;
00237         if (_ownindices)
00238             delete[] _indices;
00239         if (_ownvalues)
00240             delete[] _values;
00241     }
00242 
00243     static bool _sort_coords(coord_tuple_t lhs, coord_tuple_t rhs) {
00244         if(boost::get<1>(lhs) != boost::get<1>(rhs))
00245             return boost::get<1>(lhs) < boost::get<1>(rhs);
00246         else
00247             return boost::get<0>(lhs) < boost::get<0>(rhs);
00248     }
00249 };
00250 
00251 template<typename T>
00252 void Transpose(const sparse_matrix_t<T>& A, sparse_matrix_t<T>& B) {
00253     const int* aindptr = A.indptr();
00254     const int* aindices = A.indices();
00255     const double* avalues = A.locked_values();
00256 
00257     int m = A.width();
00258     int n = A.height();
00259     int nnz = A.nonzeros();
00260 
00261     int *indptr = new int[n + 1];
00262     int *indices = new int[nnz];
00263     double *values = new double[nnz];
00264 
00265     // Count nonzeros in each row
00266     int *nzrow = new int[n];
00267     std::fill(nzrow, nzrow + n, 0);
00268     for(int col = 0; col < m; col++)
00269         for(int idx = aindptr[col]; idx < aindptr[col + 1]; idx++)
00270             nzrow[aindices[idx]]++;
00271 
00272     // Set indptr
00273     indptr[0] = 0;
00274     for(int col = 1; col <= n; col++)
00275         indptr[col] = indptr[col - 1] + nzrow[col - 1];
00276 
00277     // Fill values
00278     std::fill(nzrow, nzrow + n, 0);
00279     for(int col = 0; col < m; col++)
00280         for(int idx = aindptr[col]; idx < aindptr[col + 1]; idx++) {
00281             int row = aindices[idx];
00282             double val = avalues[idx];
00283             indices[indptr[row] + nzrow[row]] = col;
00284             values[indptr[row] + nzrow[row]] = val;
00285             nzrow[row]++;
00286         }
00287 
00288     delete[] nzrow;
00289 
00290     B.attach(indptr, indices, values, nnz, m, n, true);
00291 }
00292 
00293 } }
00294 
00295 #endif