public inbox for gentoo-commits@lists.gentoo.org
 help / color / mirror / Atom feed
From: "Andrea Arteaga" <andyspiros@gmail.com>
To: gentoo-commits@lists.gentoo.org
Subject: [gentoo-commits] proj/auto-numerical-bench:unstable commit in: /, btl/actions/, btl/libs/PBLAS/
Date: Sat, 23 Jul 2011 11:46:48 +0000 (UTC)	[thread overview]
Message-ID: <c51172bc46f4b95af6282d8782e4b145911c7afe.spiros@gentoo> (raw)
Message-ID: <20110723114648.sGiraHAapWrdWWDTBHEXRjBITRP147fWmGNZYBi9HlI@z> (raw)

commit:     c51172bc46f4b95af6282d8782e4b145911c7afe
Author:     spiros <andyspiros <AT> gmail <DOT> com>
AuthorDate: Sat Jul 23 11:45:29 2011 +0000
Commit:     Andrea Arteaga <andyspiros <AT> gmail <DOT> com>
CommitDate: Sat Jul 23 11:45:29 2011 +0000
URL:        http://git.overlays.gentoo.org/gitweb/?p=proj/auto-numerical-bench.git;a=commit;h=c51172bc

Added working QR decomposition; added working symm_ev (but some negative
MFlops).

---
 btl/actions/action_parallel_cholesky.hh            |    3 +-
 btl/actions/action_parallel_lu_decomp.hh           |    3 +-
 ...el_cholesky.hh => action_parallel_qr_decomp.hh} |   48 +++-----
 btl/actions/action_parallel_symm_ev.hh             |  121 ++++++++++++++++++++
 btl/libs/PBLAS/main.cpp                            |   14 ++-
 btl/libs/PBLAS/pblas.h                             |    8 ++
 btl/libs/PBLAS/pblas_interface_impl.hh             |   54 +++++++++-
 pblas.py                                           |    3 +-
 8 files changed, 213 insertions(+), 41 deletions(-)

diff --git a/btl/actions/action_parallel_cholesky.hh b/btl/actions/action_parallel_cholesky.hh
index f89eb98..05ef3ef 100644
--- a/btl/actions/action_parallel_cholesky.hh
+++ b/btl/actions/action_parallel_cholesky.hh
@@ -39,7 +39,8 @@ public :
               Global_A_stl.push_back(temp_stl[r][c]);
     }
 
-    Interface::scatter_matrix(Global_A_stl, Local_A_stl, desc, size, size, 64, 64);
+    const int blocksize = std::max(std::min(size/4, 64), 2);
+    Interface::scatter_matrix(Global_A_stl, Local_A_stl, desc, size, size, blocksize, blocksize);
     LocalRows = desc[8];
     LocalCols = Local_A_stl.size()/desc[8];
 

diff --git a/btl/actions/action_parallel_lu_decomp.hh b/btl/actions/action_parallel_lu_decomp.hh
index 18b4ac7..d3dc620 100644
--- a/btl/actions/action_parallel_lu_decomp.hh
+++ b/btl/actions/action_parallel_lu_decomp.hh
@@ -29,7 +29,8 @@ public :
       init_vector<pseudo_random>(Global_A_stl, size*size);
     }
 
-    Interface::scatter_matrix(Global_A_stl, Local_A_stl, desc, size, size, 64, 64);
+    const int blocksize = std::max(std::min(size/4, 64), 2);
+    Interface::scatter_matrix(Global_A_stl, Local_A_stl, desc, size, size, blocksize, blocksize);
     LocalRows = desc[8];
     LocalCols = Local_A_stl.size()/desc[8];
 

diff --git a/btl/actions/action_parallel_cholesky.hh b/btl/actions/action_parallel_qr_decomp.hh
similarity index 55%
copy from btl/actions/action_parallel_cholesky.hh
copy to btl/actions/action_parallel_qr_decomp.hh
index f89eb98..a41414c 100644
--- a/btl/actions/action_parallel_cholesky.hh
+++ b/btl/actions/action_parallel_qr_decomp.hh
@@ -1,5 +1,5 @@
-#ifndef ACTION_PARALLEL_CHOLESKY_HH_
-#define ACTION_PARALLEL_CHOLESKY_HH_
+#ifndef ACTION_PARALLEL_QR_DECOMP_HH_
+#define ACTION_PARALLEL_QR_DECOMP_HH_
 
 #include "utilities.h"
 #include "init/init_function.hh"
@@ -9,17 +9,17 @@
 #include "STL_interface.hh"
 
 #include <string>
+#include <algorithm>
 
 template<class Interface>
-class Action_parallel_cholesky {
-  typedef lapack_interface<typename Interface::real_type> LapackInterface;
+class Action_parallel_qr_decomp {
 
 public :
 
   // Constructor
-  BTL_DONT_INLINE Action_parallel_cholesky( int size ) : _size(size)
+  BTL_DONT_INLINE Action_parallel_qr_decomp( int size ) : _size(size)
   {
-    MESSAGE("Action_parallel_cholesky Ctor");
+    MESSAGE("Action_parallel_qr_decomp Ctor");
 
     int myid, procnum;
     blacs_pinfo_(&myid, &procnum);
@@ -27,19 +27,11 @@ public :
 
     // STL matrix and vector initialization
     if (iamroot) {
-        typename LapackInterface::stl_matrix temp_stl;
-        init_matrix_symm<pseudo_random>(temp_stl, size);
-        Global_A_stl.reserve(size*size);
-        const double add = 5000./size;
-        for (int r = 0; r < size; ++r)
-          for (int c = 0; c < size; ++c)
-            if (r==c)
-              Global_A_stl.push_back((std::abs(temp_stl[r][c])+add)*size);
-            else
-              Global_A_stl.push_back(temp_stl[r][c]);
+      init_vector<pseudo_random>(Global_A_stl, size*size);
     }
 
-    Interface::scatter_matrix(Global_A_stl, Local_A_stl, desc, size, size, 64, 64);
+    const int blocksize = std::max(std::min(size/4, 64), 2);
+    Interface::scatter_matrix(Global_A_stl, Local_A_stl, desc, size, size, blocksize, blocksize);
     LocalRows = desc[8];
     LocalCols = Local_A_stl.size()/desc[8];
 
@@ -47,25 +39,21 @@ public :
     Interface::matrix_from_stl(Local_A_ref, Local_A_stl);
     Interface::matrix_from_stl(Local_A    , Local_A_stl);
 
-    _cost = 0;
-    for (int j=0; j<_size; ++j) {
-      double r = std::max(_size - j -1,0);
-      _cost += 2*(r*j+r+j);
-    }
+    _cost = 2.0*size*size*size;
   }
 
 
   // Invalidate copy constructor
-  Action_parallel_cholesky(const Action_parallel_cholesky&)
+  Action_parallel_qr_decomp(const Action_parallel_qr_decomp&)
   {
-    INFOS("illegal call to Action_parallel_cholesky copy constructor");
+    INFOS("illegal call to Action_parallel_qr_decomp copy constructor");
     exit(1);
   }
 
   // Destructor
-  ~Action_parallel_cholesky()
+  ~Action_parallel_qr_decomp()
   {
-    MESSAGE("Action_parallel_cholesky destructor");
+    MESSAGE("Action_parallel_qr_decomp destructor");
 
     // Deallocation
     Interface::free_matrix(Local_A_ref, Local_A_stl.size());
@@ -75,7 +63,7 @@ public :
   // Action name
   static inline std::string name()
   {
-    return "cholesky_" + Interface::name();
+    return "qr_decomp_" + Interface::name();
   }
 
   double nb_op_base()
@@ -90,14 +78,13 @@ public :
 
   BTL_DONT_INLINE void calculate()
   {
-    Interface::parallel_cholesky(Local_A, desc);
+    Interface::parallel_qr_decomp(Local_A, desc);
   }
 
   BTL_DONT_INLINE void check_result()
   {
   }
 
-
 private:
   int _size, desc[9], LocalRows, LocalCols;
   double _cost;
@@ -109,4 +96,5 @@ private:
   typename Interface::gene_matrix Local_A;
 };
 
-#endif /* ACTION_PARALLEL_CHOLESKY_HH_ */
+
+#endif /* ACTION_PARALLEL_QR_DECOMP_HH_ */

diff --git a/btl/actions/action_parallel_symm_ev.hh b/btl/actions/action_parallel_symm_ev.hh
new file mode 100644
index 0000000..f0af0e3
--- /dev/null
+++ b/btl/actions/action_parallel_symm_ev.hh
@@ -0,0 +1,121 @@
+#ifndef ACTION_PARALLEL_SYMM_EV_HH_
+#define ACTION_PARALLEL_SYMM_EV_HH_
+
+#include "utilities.h"
+#include "init/init_function.hh"
+#include "init/init_vector.hh"
+
+#include "lapack_interface.hh"
+#include "STL_interface.hh"
+
+#include <string>
+
+template<class Interface>
+class Action_parallel_symm_ev {
+
+public :
+
+  // Constructor
+  BTL_DONT_INLINE Action_parallel_symm_ev( int size ) : _size(size)
+  {
+    MESSAGE("Action_parallel_symm_ev constructor");
+
+    int myid, procnum;
+    blacs_pinfo_(&myid, &procnum);
+    iamroot = (myid == 0);
+
+    // STL matrix and vector initialization
+    if (iamroot) {
+      init_vector<pseudo_random>(Global_A_stl, size*size);
+      init_vector<null_function>(Global_Z_stl, size*size);
+    }
+    init_vector<null_function>(Local_w_stl, size);
+
+    const int blocksize = std::max(std::min(size/4, 64), 2);
+    Interface::scatter_matrix(Global_A_stl, Local_A_stl, descA, size, size, blocksize, blocksize);
+    Interface::scatter_matrix(Global_Z_stl, Local_Z_stl, descZ, size, size, blocksize, blocksize);
+    LocalRows = descA[8];
+    LocalCols = Local_A_stl.size()/descA[8];
+
+    // Generic local matrix and vectors initialization
+    Interface::matrix_from_stl(Local_A_ref, Local_A_stl);
+    Interface::matrix_from_stl(Local_A    , Local_A_stl);
+    Interface::matrix_from_stl(Local_Z_ref, Local_Z_stl);
+    Interface::matrix_from_stl(Local_Z    , Local_Z_stl);
+    Interface::vector_from_stl(Local_w    , Local_w_stl);
+    Interface::vector_from_stl(Local_w_ref, Local_w_stl);
+
+    _cost = size*size*size;
+  }
+
+
+  // Invalidate copy constructor
+  Action_parallel_symm_ev(const Action_parallel_symm_ev&)
+  {
+    INFOS("illegal call to Action_parallel_symm_ev copy constructor");
+    exit(1);
+  }
+
+  // Destructor
+  ~Action_parallel_symm_ev()
+  {
+    MESSAGE("Action_parallel_lu_decomp destructor");
+
+    // Deallocation
+    Interface::free_matrix(Local_A_ref, Local_A_stl.size());
+    Interface::free_matrix(Local_A    , Local_A_stl.size());
+    Interface::free_matrix(Local_Z_ref, Local_Z_stl.size());
+    Interface::free_matrix(Local_Z    , Local_Z_stl.size());
+    Interface::free_vector(Local_w_ref);
+    Interface::free_vector(Local_w    );
+  }
+
+  // Action name
+  static inline std::string name()
+  {
+    return "symm_ev_" + Interface::name();
+  }
+
+  double nb_op_base()
+  {
+    return _cost;
+  }
+
+  BTL_DONT_INLINE void initialize()
+  {
+    Interface::copy_matrix(Local_A_ref, Local_A, Local_A_stl.size());
+    Interface::copy_matrix(Local_Z_ref, Local_Z, Local_Z_stl.size());
+    Interface::copy_vector(Local_w_ref, Local_w, Local_w_stl.size());
+  }
+
+  BTL_DONT_INLINE void calculate()
+  {
+    Interface::parallel_symm_ev(Local_A, descA, Local_w, Local_Z, descZ);
+  }
+
+  BTL_DONT_INLINE void check_result()
+  {
+  }
+
+private:
+  int _size, descA[9], descZ[9], LocalRows, LocalCols;
+  double _cost;
+  bool iamroot;
+
+  typename Interface::stl_matrix Global_A_stl;
+  typename Interface::stl_matrix Local_A_stl;
+  typename Interface::gene_matrix Local_A_ref;
+  typename Interface::gene_matrix Local_A;
+
+  typename Interface::stl_matrix Global_Z_stl;
+  typename Interface::stl_matrix Local_Z_stl;
+  typename Interface::gene_matrix Local_Z_ref;
+  typename Interface::gene_matrix Local_Z;
+
+  typename Interface::stl_vector Local_w_stl;
+  typename Interface::gene_vector Local_w_ref;
+  typename Interface::gene_vector Local_w;
+};
+
+
+#endif /* ACTION_PARALLEL_LU_DECOMP_HH_ */

diff --git a/btl/libs/PBLAS/main.cpp b/btl/libs/PBLAS/main.cpp
index e7b636b..c209afe 100644
--- a/btl/libs/PBLAS/main.cpp
+++ b/btl/libs/PBLAS/main.cpp
@@ -14,6 +14,8 @@
 #include "action_parallel_matrix_vector_product.hh"
 #include "action_parallel_lu_decomp.hh"
 #include "action_parallel_cholesky.hh"
+#include "action_parallel_qr_decomp.hh"
+#include "action_parallel_symm_ev.hh"
 
 #include <string>
 
@@ -24,7 +26,7 @@ int main(int argc, char **argv)
   bool iamroot = blacsinit(&argc, &argv);
 
   bool
-  general_solve=false, least_squares=false, lu_decomp=false, cholesky=false,
+  general_solve=false, qr_decomp=false, lu_decomp=false, cholesky=false,
   symm_ev=false
   ;
 
@@ -32,7 +34,7 @@ int main(int argc, char **argv)
   for (int i = 1; i < argc; ++i) {
           std::string arg = argv[i];
           if (arg == "general_solve") general_solve = true;
-          else if (arg == "least_squares") least_squares = true;
+          else if (arg == "qr_decomp") qr_decomp = true;
           else if (arg == "lu_decomp") lu_decomp = true;
           else if (arg == "cholesky") cholesky = true;
           else if (arg == "symm_ev") symm_ev = true;
@@ -42,8 +44,8 @@ int main(int argc, char **argv)
 //  if (general_solve)
 //  distr_bench<Action_general_solve<lapack_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT, !iamroot);
 
-//  if (least_squares)
-//  distr_bench<Action_least_squares<lapack_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT, !iamroot);
+  if (qr_decomp)
+  distr_bench<Action_parallel_qr_decomp<pblas_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT, !iamroot);
 
   if (lu_decomp)
   distr_bench<Action_parallel_lu_decomp<pblas_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT, !iamroot);
@@ -51,8 +53,8 @@ int main(int argc, char **argv)
   if (cholesky)
     distr_bench<Action_parallel_cholesky<pblas_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT, !iamroot);
 
-//  if (symm_ev)
-//  distr_bench<Action_symm_ev<lapack_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT, !iamroot);
+  if (symm_ev)
+  distr_bench<Action_parallel_symm_ev<pblas_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT, !iamroot);
 
 
   int iZERO = 0;

diff --git a/btl/libs/PBLAS/pblas.h b/btl/libs/PBLAS/pblas.h
index 973b91c..a6cbeb2 100644
--- a/btl/libs/PBLAS/pblas.h
+++ b/btl/libs/PBLAS/pblas.h
@@ -54,6 +54,14 @@ extern "C" {
   void pspotrf_(const char*, const int*,  float*, const int*, const int*, const int*, int*);
   void pdpotrf_(const char*, const int*, double*, const int*, const int*, const int*, int*);
 
+  // qr_decomp
+  void psgeqpf_(const int*, const int*,  float*, const int*, const int*, const int*, int*,  float*,  float*, const int*, int*);
+  void pdgeqpf_(const int*, const int*, double*, const int*, const int*, const int*, int*, double*, double*, const int*, int*);
+
+  // symm_ev
+  void pssyevd_(const char*, const char*, const int*,  float*, const int*, const int*, const int*,  float*,  float*, const int*, const int*, const int*,  float*, const int*, int*, const int*, int*);
+  void pdsyevd_(const char*, const char*, const int*, double*, const int*, const int*, const int*, double*, double*, const int*, const int*, const int*, double*, const int*, int*, const int*, int*);
+
 
 #ifdef __cplusplus
 }

diff --git a/btl/libs/PBLAS/pblas_interface_impl.hh b/btl/libs/PBLAS/pblas_interface_impl.hh
index 1dbf3b9..4522946 100644
--- a/btl/libs/PBLAS/pblas_interface_impl.hh
+++ b/btl/libs/PBLAS/pblas_interface_impl.hh
@@ -61,8 +61,58 @@ public:
     const char UPLO = 'U';
     int info;
     PBLAS_FUNC(potrf)(&UPLO, &N, X, &iONE, &iONE, desc, &info);
+    if (info != 0)
+      cerr << " { cholesky error : " << info << " } ";
+  }
+
+  static inline void parallel_qr_decomp(gene_matrix& X, const int* desc)
+  {
+    const int GlobalRows = desc[2], GlobalCols = desc[3],
+              BlockRows = desc[4], BlockCols = desc[5],
+              ctxt = desc[1];
+
+    int myrow, mycol, nprow, npcol, lwork;
+    SCALAR lworkd;
+    blacs_gridinfo_(&ctxt, &nprow, &npcol, &myrow, &mycol);
+
+    const int iONE = 1, iZERO = 0, imONE = -1,
+             ipivdim = numroc_(&GlobalCols, &BlockCols, &mycol, &iZERO, &npcol);
+    int info;
+    std::vector<int> ipiv(ipivdim);
+    std::vector<SCALAR> tau(ipivdim);
+
+    // Retrieve LWORK
+    PBLAS_FUNC(geqpf)(&GlobalRows, &GlobalCols, X, &iONE, &iONE, desc, &ipiv[0], &tau[0], &lworkd, &imONE, &info);
+    lwork = static_cast<int>(lworkd);
+//    if (info != 0)
+//      cerr << " { qr_decomp lwork error } ";
+
+    std::vector<SCALAR> work(lwork);
+    PBLAS_FUNC(geqpf)(&GlobalRows, &GlobalCols, X, &iONE, &iONE, desc, &ipiv[0], &tau[0], &work[0], &lwork, &info);
 //    if (info != 0)
-//      cerr << " { cholesky error : " << info << " } ";
+//      cerr << " { qr_decomp computation error } ";
   }
-};
 
+  static inline void parallel_symm_ev(gene_matrix& A, const int* descA, gene_vector& w, gene_matrix& Z, const int* descZ)
+  {
+    const char jobz = 'V', uplo = 'u';
+    const int N = descA[2], iONE = 1, iZERO = 0, imONE = -1;
+    std::vector<SCALAR> work;
+    std::vector<int> iwork;
+    int lwork, liwork, info;
+    SCALAR lworkd;
+
+    // Retrieve l(i)work
+    PBLAS_FUNC(syevd)(&jobz, &uplo, &N, A, &iONE, &iONE, descA, w,
+        Z, &iONE, &iONE, descZ, &lworkd, &imONE, &liwork, &imONE, &info);
+    lwork = static_cast<int>(lworkd);
+    work.resize(lwork); iwork.resize(liwork);
+//    if (info != 0)
+//      cerr << " { symm_ev l(i)work error } ";
+
+    PBLAS_FUNC(syevd)(&jobz, &uplo, &N, A, &iONE, &iONE, descA, w,
+        Z, &iONE, &iONE, descZ, &work[0], &lwork, &iwork[0], &liwork, &info);
+//    if (info != 0)
+//      cerr << " { symm_ev computation error } ";
+  }
+};

diff --git a/pblas.py b/pblas.py
index 9cd087e..792f343 100644
--- a/pblas.py
+++ b/pblas.py
@@ -5,7 +5,8 @@ numproc = 4
 class Module(btlbase.BTLBase):
     def _initialize(self):
         self.libname = "scalapack"
-        self.avail = ['axpy', 'matrix_vector', 'lu_decomp', 'cholesky']
+        self.avail = ['axpy', 'matrix_vector', 'lu_decomp', 'cholesky',
+          'qr_decomp', 'symm_ev']
     
     def _parse_args(self, args):     
         # Parse arguments



             reply	other threads:[~2011-07-23 11:47 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-08-02 18:45 Andrea Arteaga [this message]
2011-07-23 11:46 ` [gentoo-commits] proj/auto-numerical-bench:unstable commit in: /, btl/actions/, btl/libs/PBLAS/ Andrea Arteaga
  -- strict thread matches above, loose matches on Subject: below --
2011-07-23 22:59 Andrea Arteaga
2011-08-02 18:45 ` Andrea Arteaga

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=c51172bc46f4b95af6282d8782e4b145911c7afe.spiros@gentoo \
    --to=andyspiros@gmail.com \
    --cc=gentoo-commits@lists.gentoo.org \
    --cc=gentoo-dev@lists.gentoo.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox