* [gentoo-commits] proj/auto-numerical-bench:unstable commit in: btl/generic_bench/, btl/actions/, btl/generic_bench/timers/, btl/libs/PBLAS/
@ 2011-07-18 0:41 Andrea Arteaga
0 siblings, 0 replies; only message in thread
From: Andrea Arteaga @ 2011-07-18 0:41 UTC (permalink / raw
To: gentoo-commits
commit: 63a0b1731ec14427c3f7e5dfbb63e5e6724e69f9
Author: spiros <andyspiros <AT> gmail <DOT> com>
AuthorDate: Mon Jul 18 00:36:22 2011 +0000
Commit: Andrea Arteaga <andyspiros <AT> gmail <DOT> com>
CommitDate: Mon Jul 18 00:36:22 2011 +0000
URL: http://git.overlays.gentoo.org/gitweb/?p=proj/auto-numerical-bench.git;a=commit;h=63a0b173
Much work on distributed-memory BTL.
---
.../action_parallel_matrix_vector_product.hh | 11 ++-
btl/generic_bench/bench.hh | 112 ++++----------------
.../timers/distributed_perf_analyzer_node.hh | 78 ++++++++++++++
.../timers/distributed_perf_analyzer_root.hh | 94 ++++++++++++++++
btl/generic_bench/timers/portable_perf_analyzer.hh | 10 +--
btl/libs/PBLAS/main.cpp | 4 +-
6 files changed, 204 insertions(+), 105 deletions(-)
diff --git a/btl/actions/action_parallel_matrix_vector_product.hh b/btl/actions/action_parallel_matrix_vector_product.hh
index c166e01..07886a2 100644
--- a/btl/actions/action_parallel_matrix_vector_product.hh
+++ b/btl/actions/action_parallel_matrix_vector_product.hh
@@ -22,6 +22,7 @@
#include "utilities.h"
#include "STL_interface.hh"
#include <string>
+#include <algorithm>
#include "init/init_function.hh"
#include "init/init_vector.hh"
#include "init/init_matrix.hh"
@@ -76,9 +77,13 @@ public :
// Descinit
int context = Interface::context();
int info;
- descinit_(descA, &GlobalRows, &GlobalCols, &BlockRows, &BlockCols, &iZERO, &iZERO, &context, &LocalRows, &info);
- descinit_(descX, &GlobalCols, &iONE, &BlockRows, &BlockCols, &iZERO, &iZERO, &context, &LocalXRows, &info);
- descinit_(descY, &GlobalRows, &iONE, &BlockRows, &BlockCols, &iZERO, &iZERO, &context, &LocalYRows, &info);
+ int LDA, LDX, LDY;
+ LDA = std::max(1, LocalRows);
+ LDX = std::max(1, LocalXRows);
+ LDY = std::max(1, LocalYRows);
+ descinit_(descA, &GlobalRows, &GlobalCols, &BlockRows, &BlockCols, &iZERO, &iZERO, &context, &LDA, &info);
+ descinit_(descX, &GlobalCols, &iONE, &BlockRows, &BlockCols, &iZERO, &iZERO, &context, &LDX, &info);
+ descinit_(descY, &GlobalRows, &iONE, &BlockRows, &BlockCols, &iZERO, &iZERO, &context, &LDY, &info);
}
// invalidate copy ctor
diff --git a/btl/generic_bench/bench.hh b/btl/generic_bench/bench.hh
index d9906a4..2a5ba36 100644
--- a/btl/generic_bench/bench.hh
+++ b/btl/generic_bench/bench.hh
@@ -29,21 +29,19 @@
#include <vector>
#include <string>
#include "timers/portable_perf_analyzer.hh"
+#include "timers/distributed_perf_analyzer_root.hh"
+#include "timers/distributed_perf_analyzer_node.hh"
// #include "timers/mixed_perf_analyzer.hh"
// #include "timers/x86_perf_analyzer.hh"
// #include "timers/STL_perf_analyzer.hh"
#ifdef HAVE_MKL
extern "C" void cblas_saxpy(const int, const float, const float*, const int, float *, const int);
#endif
-using namespace std;
template <template<class> class Perf_Analyzer, class Action>
BTL_DONT_INLINE void bench( int size_min, int size_max, int nb_point, bool silent = false )
{
- if (BtlConfig::skipAction(Action::name()))
- return;
-
- string filename="bench_"+Action::name()+".dat";
+ std::string filename = "bench_"+Action::name()+".dat";
if (!silent) { INFOS("starting " <<filename); }
@@ -55,116 +53,44 @@ BTL_DONT_INLINE void bench( int size_min, int size_max, int nb_point, bool silen
// matrices and vector size calculations
size_lin_log(nb_point,size_min,size_max,tab_sizes);
- std::vector<int> oldSizes;
- std::vector<double> oldFlops;
- bool hasOldResults = read_xy_file(filename, oldSizes, oldFlops, true);
- int oldi = oldSizes.size() - 1;
-
// loop on matrix size
Perf_Analyzer<Action> perf_action;
- for (int i=nb_point-1;i>=0;i--)
+ for (int i=nb_point-1; i>=0; i--)
{
- //INFOS("size=" <<tab_sizes[i]<<" ("<<nb_point-i<<"/"<<nb_point<<")");
if (!silent)
std::cout << " " << "size = " << tab_sizes[i] << " " << std::flush;
BTL_DISABLE_SSE_EXCEPTIONS();
- #ifdef HAVE_MKL
- {
- float dummy;
- cblas_saxpy(1,0,&dummy,1,&dummy,1);
- }
- #endif
tab_mflops[i] = perf_action.eval_mflops(tab_sizes[i], silent);
- if (!silent) std::cout << tab_mflops[i];
-
- if (hasOldResults)
- {
- while (oldi>=0 && oldSizes[oldi]>tab_sizes[i])
- --oldi;
- if (oldi>=0 && oldSizes[oldi]==tab_sizes[i] && !silent)
- {
- if (oldFlops[oldi]<tab_mflops[i])
- std::cout << "\t > ";
- else
- std::cout << "\t < ";
- std::cout << oldFlops[oldi];
- }
- --oldi;
- }
if (!silent)
- std::cout << " MFlops (" << nb_point-i << "/" << nb_point << ")" << std::endl;
- }
-
- if (!BtlConfig::Instance.overwriteResults)
- {
- if (hasOldResults)
- {
- // merge the two data
- std::vector<int> newSizes;
- std::vector<double> newFlops;
- int i=0;
- int j=0;
- while (i<tab_sizes.size() && j<oldSizes.size())
- {
- if (tab_sizes[i] == oldSizes[j])
- {
- newSizes.push_back(tab_sizes[i]);
- newFlops.push_back(std::max(tab_mflops[i], oldFlops[j]));
- ++i;
- ++j;
- }
- else if (tab_sizes[i] < oldSizes[j])
- {
- newSizes.push_back(tab_sizes[i]);
- newFlops.push_back(tab_mflops[i]);
- ++i;
- }
- else
- {
- newSizes.push_back(oldSizes[j]);
- newFlops.push_back(oldFlops[j]);
- ++j;
- }
- }
- while (i<tab_sizes.size())
- {
- newSizes.push_back(tab_sizes[i]);
- newFlops.push_back(tab_mflops[i]);
- ++i;
- }
- while (j<oldSizes.size())
- {
- newSizes.push_back(oldSizes[j]);
- newFlops.push_back(oldFlops[j]);
- ++j;
- }
- tab_mflops = newFlops;
- tab_sizes = newSizes;
- }
+ std::cout << tab_mflops[i] << " MFlops (" << nb_point-i << "/" << nb_point << ")" << std::endl;
}
// dump the result in a file :
- if (!silent) dump_xy_file(tab_sizes,tab_mflops,filename);
+ if (!silent) dump_xy_file(tab_sizes, tab_mflops, filename);
}
// default Perf Analyzer
template <class Action>
-BTL_DONT_INLINE void bench( int size_min, int size_max, int nb_point, bool silent ){
-
- // if the rdtsc is not available :
+BTL_DONT_INLINE void bench( int size_min, int size_max, int nb_point, bool silent = false)
+{
bench<Portable_Perf_Analyzer,Action>(size_min,size_max,nb_point,silent);
- // if the rdtsc is available :
-// bench<Mixed_Perf_Analyzer,Action>(size_min,size_max,nb_point);
-
+}
- // Only for small problem size. Otherwize it will be too long
-// bench<X86_Perf_Analyzer,Action>(size_min,size_max,nb_point);
-// bench<STL_Perf_Analyzer,Action>(size_min,size_max,nb_point);
+// distributed Perf Analyzer
+template <class Action>
+BTL_DONT_INLINE void distr_bench( int size_min, int size_max, int nb_point, bool silent = false)
+{
+ int myid, nproc;
+ blacs_pinfo_(&myid, &nproc);
+ if (myid)
+ bench<Distributed_Perf_Analyzer_Node, Action>(size_min, size_max, nb_point, silent);
+ else
+ bench<Distributed_Perf_Analyzer_Root, Action>(size_min, size_max, nb_point, silent);
}
#endif
diff --git a/btl/generic_bench/timers/distributed_perf_analyzer_node.hh b/btl/generic_bench/timers/distributed_perf_analyzer_node.hh
new file mode 100644
index 0000000..7399d30
--- /dev/null
+++ b/btl/generic_bench/timers/distributed_perf_analyzer_node.hh
@@ -0,0 +1,78 @@
+#ifndef _PORTABLE_PERF_ANALYZER_NODE_HH
+#define _PORTABLE_PERF_ANALYZER_NODE_HH
+
+#include "utilities.h"
+#include "timers/portable_timer.hh"
+#include "blacs.h"
+
+template <class Action>
+class Distributed_Perf_Analyzer_Node{
+public:
+ Distributed_Perf_Analyzer_Node( ):_nb_calc(0){
+ MESSAGE("Distributed_Perf_Analyzer_Node Ctor");
+ int temp, what = 0;
+ blacs_get_(&temp, &what, &context);
+ };
+ Distributed_Perf_Analyzer_Node( const Distributed_Perf_Analyzer_Node& ){
+ INFOS("Copy Ctor not implemented");
+ exit(0);
+ };
+ ~Distributed_Perf_Analyzer_Node(){
+ MESSAGE("Distributed_Perf_Analyzer_Node Dtor");
+ };
+
+ BTL_DONT_INLINE double eval_mflops(int size, bool silent = false)
+ {
+ Action action(size);
+
+ /* Find best _nb_calc_ */
+ int bcast_receive, iZERO = 0, iONE = 1;
+ igebr2d_(&context, "A", " ", &iONE, &iONE, &bcast_receive, &iONE, &iZERO, &iZERO);
+ while (bcast_receive > 0) {
+ _nb_calc = bcast_receive;
+ action.initialize();
+ time_calculate(action);
+ igebr2d_(&context, "A", " ", &iONE, &iONE, &bcast_receive, &iONE, &iZERO, &iZERO);
+ }
+ int tries = -bcast_receive;
+
+ /* Optimize */
+ for (int i = 1; i < tries; ++i) {
+ Action _action(size);
+ _action.initialize();
+ time_calculate(_action);
+ }
+
+ /* Check */
+ int do_check;
+ igebr2d_(&context, "A", " ", &iONE, &iONE, &do_check, &iONE, &iZERO, &iZERO);
+ if (do_check > 0) {
+ action.initialize();
+ action.calculate();
+ action.check_result();
+ }
+
+ /* Return a void value */
+ return 0.;
+ }
+
+ BTL_DONT_INLINE void time_calculate(Action & action)
+ {
+ // no need for time measurement
+ action.calculate();
+ for (int i = 0; i < _nb_calc; ++i)
+ action.calculate();
+ }
+
+ unsigned long long get_nb_calc()
+ {
+ return _nb_calc;
+ }
+
+
+private:
+ int context;
+ unsigned long long _nb_calc;
+};
+
+#endif //_PORTABLE_PERF_ANALYZER_NODE_HH
diff --git a/btl/generic_bench/timers/distributed_perf_analyzer_root.hh b/btl/generic_bench/timers/distributed_perf_analyzer_root.hh
new file mode 100644
index 0000000..ca59738
--- /dev/null
+++ b/btl/generic_bench/timers/distributed_perf_analyzer_root.hh
@@ -0,0 +1,94 @@
+#ifndef _PORTABLE_PERF_ANALYZER_ROOT_HH
+#define _PORTABLE_PERF_ANALYZER_ROOT_HH
+
+#include "utilities.h"
+#include "timers/portable_timer.hh"
+#include "blacs.h"
+
+template <class Action>
+class Distributed_Perf_Analyzer_Root{
+public:
+ Distributed_Perf_Analyzer_Root( ):_nb_calc(0), m_time_action(0), _chronos(){
+ MESSAGE("Distributed_Perf_Analyzer_Root Ctor");
+ int temp, what = 0;
+ blacs_get_(&temp, &what, &context);
+ };
+ Distributed_Perf_Analyzer_Root( const Distributed_Perf_Analyzer_Root & ){
+ INFOS("Copy Ctor not implemented");
+ exit(0);
+ };
+ ~Distributed_Perf_Analyzer_Root(){
+ MESSAGE("Distributed_Perf_Analyzer_Root Dtor");
+ };
+
+ BTL_DONT_INLINE double eval_mflops(int size, bool silent = false)
+ {
+ Action action(size);
+ m_time_action = 0;
+ _nb_calc = 0;
+
+ /* Find best _nb_calc_ */
+ int bcast_send = _nb_calc;
+ int iONE = 1;
+ while (m_time_action < MIN_TIME) {
+ _nb_calc = _nb_calc ? 2*_nb_calc : 1;
+ bcast_send = _nb_calc;
+ igebs2d_(&context, "A", " ", &iONE, &iONE, &bcast_send, &iONE);
+ action.initialize();
+ m_time_action = time_calculate(action);
+ }
+ int tries = BtlConfig::Instance.tries;
+ bcast_send = -tries;
+ igebs2d_(&context, "A", " ", &iONE, &iONE, &bcast_send, &iONE);
+
+ /* Optimize */
+ for (int i = 1; i < tries; ++i) {
+ Action _action(size);
+ if (!silent)
+ std::cout << " " << _action.nb_op_base()*_nb_calc/(m_time_action*1e6) << " ";
+ _action.initialize();
+ m_time_action = std::min(m_time_action, time_calculate(_action));
+ }
+ double time_action = m_time_action / (double(_nb_calc));
+
+ /* Check */
+ int do_check = (BtlConfig::Instance.checkResults && size<128) ? 1 : 0;
+ igebs2d_(&context, "A", " ", &iONE, &iONE, &do_check, &iONE);
+ if (do_check > 0) {
+ action.initialize();
+ action.calculate();
+ action.check_result();
+ }
+
+ return action.nb_op_base()/(time_action*1e6);
+ }
+
+ BTL_DONT_INLINE double time_calculate(Action & action)
+ {
+ // time measurement
+ action.calculate();
+ _chronos.start();
+ for (int ii=0; ii<_nb_calc; ii++)
+ {
+ action.calculate();
+ }
+ _chronos.stop();
+ return _chronos.user_time();
+ }
+
+ unsigned long long get_nb_calc()
+ {
+ return _nb_calc;
+ }
+
+
+private:
+ int context;
+ unsigned long long _nb_calc;
+ double m_time_action;
+ Portable_Timer _chronos;
+
+};
+
+#endif //_PORTABLE_PERF_ANALYZER_ROOT_HH
+
diff --git a/btl/generic_bench/timers/portable_perf_analyzer.hh b/btl/generic_bench/timers/portable_perf_analyzer.hh
index 161992f..a8c261f 100644
--- a/btl/generic_bench/timers/portable_perf_analyzer.hh
+++ b/btl/generic_bench/timers/portable_perf_analyzer.hh
@@ -42,12 +42,8 @@ public:
{
Action action(size);
-// action.initialize();
-// time_action = time_calculate(action);
- while (m_time_action < MIN_TIME)
- {
- if(_nb_calc==0) _nb_calc = 1;
- else _nb_calc *= 2;
+ while (m_time_action < MIN_TIME) {
+ _nb_calc = _nb_calc ? 2*_nb_calc : 1;
action.initialize();
m_time_action = time_calculate(action);
}
@@ -79,7 +75,7 @@ public:
// time measurement
action.calculate();
_chronos.start();
- for (int ii=0;ii<_nb_calc;ii++)
+ for (int ii=0; ii<_nb_calc; ii++)
{
action.calculate();
}
diff --git a/btl/libs/PBLAS/main.cpp b/btl/libs/PBLAS/main.cpp
index 888f123..bca245c 100644
--- a/btl/libs/PBLAS/main.cpp
+++ b/btl/libs/PBLAS/main.cpp
@@ -3,7 +3,7 @@
#include "bench.hh"
#include <iostream>
-using namespace std;
+//using namespace std;
#include "pblas_interface.hh"
#include "action_parallel_matrix_vector_product.hh"
@@ -22,7 +22,7 @@ int main(int argc, char **argv)
blacs_gridinit_(&context, "Row-major", &procrows, &proccols);
bool iamroot = (myid == 0);
- bench<Action_parallel_matrix_vector_product<pblas_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT,!iamroot);
+ distr_bench<Action_parallel_matrix_vector_product<pblas_interface<REAL_TYPE> > >(10,MAX_MV,NB_POINT,!iamroot);
// blacs_exit_(&iZERO);
MPI_Finalize();
^ permalink raw reply related [flat|nested] only message in thread
only message in thread, other threads:[~2011-07-18 0:41 UTC | newest]
Thread overview: (only message) (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-07-18 0:41 [gentoo-commits] proj/auto-numerical-bench:unstable commit in: btl/generic_bench/, btl/actions/, btl/generic_bench/timers/, btl/libs/PBLAS/ Andrea Arteaga
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox