summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorspiros <andyspiros@gmail.com>2011-07-23 13:45:29 +0200
committerspiros <andyspiros@gmail.com>2011-07-23 13:45:29 +0200
commitc51172bc46f4b95af6282d8782e4b145911c7afe (patch)
tree324fdb1ec70ca8a50f7bd6ffa09fa2658dd3d98a
parentAdded cholesky. Solved some issues. (diff)
downloadauto-numerical-bench-c51172bc46f4b95af6282d8782e4b145911c7afe.tar.gz
auto-numerical-bench-c51172bc46f4b95af6282d8782e4b145911c7afe.tar.bz2
auto-numerical-bench-c51172bc46f4b95af6282d8782e4b145911c7afe.zip
Added working QR decomposition; added working symm_ev (but some negative
MFlops).
-rw-r--r--btl/actions/action_parallel_cholesky.hh3
-rw-r--r--btl/actions/action_parallel_lu_decomp.hh3
-rw-r--r--btl/actions/action_parallel_qr_decomp.hh100
-rw-r--r--btl/actions/action_parallel_symm_ev.hh121
-rw-r--r--btl/libs/PBLAS/main.cpp14
-rw-r--r--btl/libs/PBLAS/pblas.h8
-rw-r--r--btl/libs/PBLAS/pblas_interface_impl.hh54
-rw-r--r--pblas.py3
8 files changed, 295 insertions, 11 deletions
diff --git a/btl/actions/action_parallel_cholesky.hh b/btl/actions/action_parallel_cholesky.hh
index f89eb98..05ef3ef 100644
--- a/btl/actions/action_parallel_cholesky.hh
+++ b/btl/actions/action_parallel_cholesky.hh
@@ -39,7 +39,8 @@ public :
Global_A_stl.push_back(temp_stl[r][c]);
}
- Interface::scatter_matrix(Global_A_stl, Local_A_stl, desc, size, size, 64, 64);
+ const int blocksize = std::max(std::min(size/4, 64), 2);
+ Interface::scatter_matrix(Global_A_stl, Local_A_stl, desc, size, size, blocksize, blocksize);
LocalRows = desc[8];
LocalCols = Local_A_stl.size()/desc[8];
diff --git a/btl/actions/action_parallel_lu_decomp.hh b/btl/actions/action_parallel_lu_decomp.hh
index 18b4ac7..d3dc620 100644
--- a/btl/actions/action_parallel_lu_decomp.hh
+++ b/btl/actions/action_parallel_lu_decomp.hh
@@ -29,7 +29,8 @@ public :
init_vector<pseudo_random>(Global_A_stl, size*size);
}
- Interface::scatter_matrix(Global_A_stl, Local_A_stl, desc, size, size, 64, 64);
+ const int blocksize = std::max(std::min(size/4, 64), 2);
+ Interface::scatter_matrix(Global_A_stl, Local_A_stl, desc, size, size, blocksize, blocksize);
LocalRows = desc[8];
LocalCols = Local_A_stl.size()/desc[8];
diff --git a/btl/actions/action_parallel_qr_decomp.hh b/btl/actions/action_parallel_qr_decomp.hh
new file mode 100644
index 0000000..a41414c
--- /dev/null
+++ b/btl/actions/action_parallel_qr_decomp.hh
@@ -0,0 +1,100 @@
+#ifndef ACTION_PARALLEL_QR_DECOMP_HH_
+#define ACTION_PARALLEL_QR_DECOMP_HH_
+
+#include "utilities.h"
+#include "init/init_function.hh"
+#include "init/init_vector.hh"
+
+#include "lapack_interface.hh"
+#include "STL_interface.hh"
+
+#include <string>
+#include <algorithm>
+
+template<class Interface>
+class Action_parallel_qr_decomp {
+
+public :
+
+ // Constructor
+ BTL_DONT_INLINE Action_parallel_qr_decomp( int size ) : _size(size)
+ {
+ MESSAGE("Action_parallel_qr_decomp Ctor");
+
+ int myid, procnum;
+ blacs_pinfo_(&myid, &procnum);
+ iamroot = (myid == 0);
+
+ // STL matrix and vector initialization
+ if (iamroot) {
+ init_vector<pseudo_random>(Global_A_stl, size*size);
+ }
+
+ const int blocksize = std::max(std::min(size/4, 64), 2);
+ Interface::scatter_matrix(Global_A_stl, Local_A_stl, desc, size, size, blocksize, blocksize);
+ LocalRows = desc[8];
+ LocalCols = Local_A_stl.size()/desc[8];
+
+ // Generic local matrix and vectors initialization
+ Interface::matrix_from_stl(Local_A_ref, Local_A_stl);
+ Interface::matrix_from_stl(Local_A , Local_A_stl);
+
+ _cost = 2.0*size*size*size;
+ }
+
+
+ // Invalidate copy constructor
+ Action_parallel_qr_decomp(const Action_parallel_qr_decomp&)
+ {
+ INFOS("illegal call to Action_parallel_qr_decomp copy constructor");
+ exit(1);
+ }
+
+ // Destructor
+ ~Action_parallel_qr_decomp()
+ {
+ MESSAGE("Action_parallel_qr_decomp destructor");
+
+ // Deallocation
+ Interface::free_matrix(Local_A_ref, Local_A_stl.size());
+ Interface::free_matrix(Local_A , Local_A_stl.size());
+ }
+
+ // Action name
+ static inline std::string name()
+ {
+ return "qr_decomp_" + Interface::name();
+ }
+
+ double nb_op_base()
+ {
+ return _cost;
+ }
+
+ BTL_DONT_INLINE void initialize()
+ {
+ Interface::copy_matrix(Local_A_ref, Local_A, Local_A_stl.size());
+ }
+
+ BTL_DONT_INLINE void calculate()
+ {
+ Interface::parallel_qr_decomp(Local_A, desc);
+ }
+
+ BTL_DONT_INLINE void check_result()
+ {
+ }
+
+private:
+ int _size, desc[9], LocalRows, LocalCols;
+ double _cost;
+ bool iamroot;
+
+ typename Interface::stl_matrix Global_A_stl;
+ typename Interface::stl_matrix Local_A_stl;
+ typename Interface::gene_matrix Local_A_ref;
+ typename Interface::gene_matrix Local_A;
+};
+
+
+#endif /* ACTION_PARALLEL_QR_DECOMP_HH_ */
diff --git a/btl/actions/action_parallel_symm_ev.hh b/btl/actions/action_parallel_symm_ev.hh
new file mode 100644
index 0000000..f0af0e3
--- /dev/null
+++ b/btl/actions/action_parallel_symm_ev.hh
@@ -0,0 +1,121 @@
+#ifndef ACTION_PARALLEL_SYMM_EV_HH_
+#define ACTION_PARALLEL_SYMM_EV_HH_
+
+#include "utilities.h"
+#include "init/init_function.hh"
+#include "init/init_vector.hh"
+
+#include "lapack_interface.hh"
+#include "STL_interface.hh"
+
+#include <string>
+
+template<class Interface>
+class Action_parallel_symm_ev {
+
+public :
+
+ // Constructor
+ BTL_DONT_INLINE Action_parallel_symm_ev( int size ) : _size(size)
+ {
+ MESSAGE("Action_parallel_symm_ev constructor");
+
+ int myid, procnum;
+ blacs_pinfo_(&myid, &procnum);
+ iamroot = (myid == 0);
+
+ // STL matrix and vector initialization
+ if (iamroot) {
+ init_vector<pseudo_random>(Global_A_stl, size*size);
+ init_vector<null_function>(Global_Z_stl, size*size);
+ }
+ init_vector<null_function>(Local_w_stl, size);
+
+ const int blocksize = std::max(std::min(size/4, 64), 2);
+ Interface::scatter_matrix(Global_A_stl, Local_A_stl, descA, size, size, blocksize, blocksize);
+ Interface::scatter_matrix(Global_Z_stl, Local_Z_stl, descZ, size, size, blocksize, blocksize);
+ LocalRows = descA[8];
+ LocalCols = Local_A_stl.size()/descA[8];
+
+ // Generic local matrix and vectors initialization
+ Interface::matrix_from_stl(Local_A_ref, Local_A_stl);
+ Interface::matrix_from_stl(Local_A , Local_A_stl);
+ Interface::matrix_from_stl(Local_Z_ref, Local_Z_stl);
+ Interface::matrix_from_stl(Local_Z , Local_Z_stl);
+ Interface::vector_from_stl(Local_w , Local_w_stl);
+ Interface::vector_from_stl(Local_w_ref, Local_w_stl);
+
+ _cost = size*size*size;
+ }
+
+
+ // Invalidate copy constructor
+ Action_parallel_symm_ev(const Action_parallel_symm_ev&)
+ {
+ INFOS("illegal call to Action_parallel_symm_ev copy constructor");
+ exit(1);
+ }
+
+ // Destructor
+ ~Action_parallel_symm_ev()
+ {
+ MESSAGE("Action_parallel_lu_decomp destructor");
+
+ // Deallocation
+ Interface::free_matrix(Local_A_ref, Local_A_stl.size());
+ Interface::free_matrix(Local_A , Local_A_stl.size());
+ Interface::free_matrix(Local_Z_ref, Local_Z_stl.size());
+ Interface::free_matrix(Local_Z , Local_Z_stl.size());
+ Interface::free_vector(Local_w_ref);
+ Interface::free_vector(Local_w );
+ }
+
+ // Action name
+ static inline std::string name()
+ {
+ return "symm_ev_" + Interface::name();
+ }
+
+ double nb_op_base()
+ {
+ return _cost;
+ }
+
+ BTL_DONT_INLINE void initialize()
+ {
+ Interface::copy_matrix(Local_A_ref, Local_A, Local_A_stl.size());
+ Interface::copy_matrix(Local_Z_ref, Local_Z, Local_Z_stl.size());
+ Interface::copy_vector(Local_w_ref, Local_w, Local_w_stl.size());
+ }
+
+ BTL_DONT_INLINE void calculate()
+ {
+ Interface::parallel_symm_ev(Local_A, descA, Local_w, Local_Z, descZ);
+ }
+
+ BTL_DONT_INLINE void check_result()
+ {
+ }
+
+private:
+ int _size, descA[9], descZ[9], LocalRows, LocalCols;
+ double _cost;
+ bool iamroot;
+
+ typename Interface::stl_matrix Global_A_stl;
+ typename Interface::stl_matrix Local_A_stl;
+ typename Interface::gene_matrix Local_A_ref;
+ typename Interface::gene_matrix Local_A;
+
+ typename Interface::stl_matrix Global_Z_stl;
+ typename Interface::stl_matrix Local_Z_stl;
+ typename Interface::gene_matrix Local_Z_ref;
+ typename Interface::gene_matrix Local_Z;
+
+ typename Interface::stl_vector Local_w_stl;
+ typename Interface::gene_vector Local_w_ref;
+ typename Interface::gene_vector Local_w;
+};
+
+
+#endif /* ACTION_PARALLEL_LU_DECOMP_HH_ */
diff --git a/btl/libs/PBLAS/main.cpp b/btl/libs/PBLAS/main.cpp
index e7b636b..c209afe 100644
--- a/btl/libs/PBLAS/main.cpp
+++ b/btl/libs/PBLAS/main.cpp
@@ -14,6 +14,8 @@
#include "action_parallel_matrix_vector_product.hh"
#include "action_parallel_lu_decomp.hh"
#include "action_parallel_cholesky.hh"
+#include "action_parallel_qr_decomp.hh"
+#include "action_parallel_symm_ev.hh"
#include <string>
@@ -24,7 +26,7 @@ int main(int argc, char **argv)
bool iamroot = blacsinit(&argc, &argv);
bool
- general_solve=false, least_squares=false, lu_decomp=false, cholesky=false,
+ general_solve=false, qr_decomp=false, lu_decomp=false, cholesky=false,
symm_ev=false
;
@@ -32,7 +34,7 @@ int main(int argc, char **argv)
for (int i = 1; i < argc; ++i) {
std::string arg = argv[i];
if (arg == "general_solve") general_solve = true;
- else if (arg == "least_squares") least_squares = true;
+ else if (arg == "qr_decomp") qr_decomp = true;
else if (arg == "lu_decomp") lu_decomp = true;
else if (arg == "cholesky") cholesky = true;
else if (arg == "symm_ev") symm_ev = true;
@@ -42,8 +44,8 @@ int main(int argc, char **argv)
// if (general_solve)
// distr_bench<Action_general_solve<lapack_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT, !iamroot);
-// if (least_squares)
-// distr_bench<Action_least_squares<lapack_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT, !iamroot);
+ if (qr_decomp)
+ distr_bench<Action_parallel_qr_decomp<pblas_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT, !iamroot);
if (lu_decomp)
distr_bench<Action_parallel_lu_decomp<pblas_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT, !iamroot);
@@ -51,8 +53,8 @@ int main(int argc, char **argv)
if (cholesky)
distr_bench<Action_parallel_cholesky<pblas_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT, !iamroot);
-// if (symm_ev)
-// distr_bench<Action_symm_ev<lapack_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT, !iamroot);
+ if (symm_ev)
+ distr_bench<Action_parallel_symm_ev<pblas_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT, !iamroot);
int iZERO = 0;
diff --git a/btl/libs/PBLAS/pblas.h b/btl/libs/PBLAS/pblas.h
index 973b91c..a6cbeb2 100644
--- a/btl/libs/PBLAS/pblas.h
+++ b/btl/libs/PBLAS/pblas.h
@@ -54,6 +54,14 @@ extern "C" {
void pspotrf_(const char*, const int*, float*, const int*, const int*, const int*, int*);
void pdpotrf_(const char*, const int*, double*, const int*, const int*, const int*, int*);
+ // qr_decomp
+ void psgeqpf_(const int*, const int*, float*, const int*, const int*, const int*, int*, float*, float*, const int*, int*);
+ void pdgeqpf_(const int*, const int*, double*, const int*, const int*, const int*, int*, double*, double*, const int*, int*);
+
+ // symm_ev
+ void pssyevd_(const char*, const char*, const int*, float*, const int*, const int*, const int*, float*, float*, const int*, const int*, const int*, float*, const int*, int*, const int*, int*);
+ void pdsyevd_(const char*, const char*, const int*, double*, const int*, const int*, const int*, double*, double*, const int*, const int*, const int*, double*, const int*, int*, const int*, int*);
+
#ifdef __cplusplus
}
diff --git a/btl/libs/PBLAS/pblas_interface_impl.hh b/btl/libs/PBLAS/pblas_interface_impl.hh
index 1dbf3b9..4522946 100644
--- a/btl/libs/PBLAS/pblas_interface_impl.hh
+++ b/btl/libs/PBLAS/pblas_interface_impl.hh
@@ -61,8 +61,58 @@ public:
const char UPLO = 'U';
int info;
PBLAS_FUNC(potrf)(&UPLO, &N, X, &iONE, &iONE, desc, &info);
+ if (info != 0)
+ cerr << " { cholesky error : " << info << " } ";
+ }
+
+ static inline void parallel_qr_decomp(gene_matrix& X, const int* desc)
+ {
+ const int GlobalRows = desc[2], GlobalCols = desc[3],
+ BlockRows = desc[4], BlockCols = desc[5],
+ ctxt = desc[1];
+
+ int myrow, mycol, nprow, npcol, lwork;
+ SCALAR lworkd;
+ blacs_gridinfo_(&ctxt, &nprow, &npcol, &myrow, &mycol);
+
+ const int iONE = 1, iZERO = 0, imONE = -1,
+ ipivdim = numroc_(&GlobalCols, &BlockCols, &mycol, &iZERO, &npcol);
+ int info;
+ std::vector<int> ipiv(ipivdim);
+ std::vector<SCALAR> tau(ipivdim);
+
+ // Retrieve LWORK
+ PBLAS_FUNC(geqpf)(&GlobalRows, &GlobalCols, X, &iONE, &iONE, desc, &ipiv[0], &tau[0], &lworkd, &imONE, &info);
+ lwork = static_cast<int>(lworkd);
+// if (info != 0)
+// cerr << " { qr_decomp lwork error } ";
+
+ std::vector<SCALAR> work(lwork);
+ PBLAS_FUNC(geqpf)(&GlobalRows, &GlobalCols, X, &iONE, &iONE, desc, &ipiv[0], &tau[0], &work[0], &lwork, &info);
// if (info != 0)
-// cerr << " { cholesky error : " << info << " } ";
+// cerr << " { qr_decomp computation error } ";
}
-};
+ static inline void parallel_symm_ev(gene_matrix& A, const int* descA, gene_vector& w, gene_matrix& Z, const int* descZ)
+ {
+ const char jobz = 'V', uplo = 'u';
+ const int N = descA[2], iONE = 1, iZERO = 0, imONE = -1;
+ std::vector<SCALAR> work;
+ std::vector<int> iwork;
+ int lwork, liwork, info;
+ SCALAR lworkd;
+
+ // Retrieve l(i)work
+ PBLAS_FUNC(syevd)(&jobz, &uplo, &N, A, &iONE, &iONE, descA, w,
+ Z, &iONE, &iONE, descZ, &lworkd, &imONE, &liwork, &imONE, &info);
+ lwork = static_cast<int>(lworkd);
+ work.resize(lwork); iwork.resize(liwork);
+// if (info != 0)
+// cerr << " { symm_ev l(i)work error } ";
+
+ PBLAS_FUNC(syevd)(&jobz, &uplo, &N, A, &iONE, &iONE, descA, w,
+ Z, &iONE, &iONE, descZ, &work[0], &lwork, &iwork[0], &liwork, &info);
+// if (info != 0)
+// cerr << " { symm_ev computation error } ";
+ }
+};
diff --git a/pblas.py b/pblas.py
index 9cd087e..792f343 100644
--- a/pblas.py
+++ b/pblas.py
@@ -5,7 +5,8 @@ numproc = 4
class Module(btlbase.BTLBase):
def _initialize(self):
self.libname = "scalapack"
- self.avail = ['axpy', 'matrix_vector', 'lu_decomp', 'cholesky']
+ self.avail = ['axpy', 'matrix_vector', 'lu_decomp', 'cholesky',
+ 'qr_decomp', 'symm_ev']
def _parse_args(self, args):
# Parse arguments