dune-fem  2.4.1-rc
flops.hh
Go to the documentation of this file.
1 #ifndef DUNE_FEM_FLOPS_HH
2 #define DUNE_FEM_FLOPS_HH
3 
4 #if HAVE_PAPI
5 #include <papi.h>
6 #endif
7 
8 //- system includes
9 #include <iostream>
10 #include <vector>
11 #include <cassert>
12 
13 //- dune-fem includes
17 
18 namespace Dune {
19 
20  namespace Fem {
21 
22  // FlopCounter
23  // -----------
24 
32  {
33  typedef std::vector< float > values_t ;
36  ThreadSafeValue< int > stopped_;
37 
38  // call PAPI_flops for given values
39  void evaluateCounters( float& realTime,
40  float& procTime,
41  float& mFlops,
42  long long& flop )
43  {
44 #if HAVE_PAPI
45  int retval = PAPI_flops(&realTime, &procTime, &flop, &mFlops);
46  if( retval < PAPI_OK )
47  {
48  std::cerr << "ERROR: PAPI_FP_OPS event is not available, check papi_avail!" << std::endl;
49  }
50 #endif
51  }
52 
53  // constructor
54  FlopCounter ()
55  : values_( values_t(3, float(0.0)) ),
56  stopped_( 0 )
57  {
58  }
59 
60  static unsigned long threadId ()
61  {
62  return ThreadManager :: thread();
63  }
64 
65  // initialize counters
66  void startCounter()
67  {
69  {
70 #if HAVE_PAPI
71  PAPI_thread_init( threadId );
72  PAPI_register_thread();
73 #endif
74  }
75  float realtime, proctime, mflops;
76  long long flop ;
77  evaluateCounters( realtime, proctime, mflops, flop );
78  // mark as not stopped
79  *stopped_ = 0;
80  }
81 
82  // stop counters and store values
83  void stopCounter()
84  {
85  if( *stopped_ == 0 )
86  {
87  // get reference to thread local value
88  values_t& values = *values_;
89  long long& flop = *flop_;
90  evaluateCounters( values[ 0 ], values[ 1 ], values[ 2 ], flop );
91 
92  // mark thread as stopped
93  *stopped_ = 1 ;
94  }
95  }
96 
97  // print values to given ostream, all values are gathered to
98  // the master rank
99  void printCounter( std::ostream& out ) const
100  {
101  // make sure this method is called in single thread mode only
103 
104  int allStopped = 0 ;
105  const int threads = ThreadManager :: maxThreads ();
106  for( int i=0; i<threads; ++i )
107  {
108  allStopped += stopped_[ i ];
109  }
110 
111  // make sure all other thread have been stopped, otherwise
112  // the results wont be coorect
113  if( allStopped != threads )
114  DUNE_THROW(InvalidStateException,"Not all thread have been stopped");
115 
116  typedef std::vector< double > result_t ;
117  result_t values( 5, 0.0 );
118 
119  for( int i=0; i<3; ++i )
120  values[ i ] = values_[ 0 ][ i ];
121  values[ 3 ] = flop_[ 0 ];
122 
123  // tkae maximum for times and sum flops for all threads
124  for( int i=1; i<threads; ++i )
125  {
126  values[ 0 ] = std::max( values[ 0 ], double(values_[ i ][ 0 ]) );
127  values[ 1 ] = std::max( values[ 1 ], double(values_[ i ][ 1 ]) );
128  values[ 2 ] += values_[ i ][ 2 ];
129  values[ 3 ] += flop_[ i ];
130  }
131  // convert to GFLOP
132  values[ 3 ] /= 1.0e9 ;
133  // compute mflops ourselfs
134  values[ 4 ] = values[ 3 ] / values[ 0 ];
135 
136  result_t max( values );
137  result_t min( values );
138  result_t sum( values );
139 
140  typedef MPIManager :: CollectiveCommunication CollectiveCommunication;
141  const CollectiveCommunication& comm = MPIManager :: comm();
142 
143  const int size = max.size();
144  // compute max, min, and sum of flop values
145  comm.max( &max[ 0 ], size );
146  comm.min( &min[ 0 ], size );
147  comm.sum( &sum[ 0 ], size );
148 
149  if( comm.rank() == 0 )
150  {
151  out << "FlopCounter::typ: real proc mflops flop flop/real " << std::endl;
152  printValues( out, "FlopCounter::sum: ", sum );
153  printValues( out, "FlopCounter::max: ", max );
154  printValues( out, "FlopCounter::min: ", min );
155  }
156  }
157 
158  static FlopCounter& instance()
159  {
160  static FlopCounter counter;
161  return counter;
162  }
163 
164  public:
170  static void start( )
171  {
172  instance().startCounter();
173  }
174 
176  static void stop( )
177  {
178  instance().stopCounter();
179  }
180 
184  static void print( std::ostream& out )
185  {
186  instance().printCounter( out );
187  }
188 
189  protected:
190  template <class vec_t>
191  void printValues( std::ostream& out, const std::string name, const vec_t& values ) const
192  {
193  out << name << " ";
194  for( unsigned int i=0; i<values.size(); ++i )
195  {
196  out << values[ i ] << " ";
197  }
198  out << std::endl;
199  }
200  };
201 
202  } // namespace Fem
203 } // namespace Dune
204 #endif
void printValues(std::ostream &out, const std::string name, const vec_t &values) const
Definition: flops.hh:191
static void stop()
stop counters
Definition: flops.hh:176
static const CollectiveCommunication & comm()
Definition: mpimanager.hh:108
static constexpr T max(T a)
Definition: utility.hh:65
static constexpr T sum(T a)
Definition: utility.hh:33
A class wrapper for the function PAPI_flops from the package PAPI. The results are CPU time...
Definition: flops.hh:31
static double max(const Double &v, const double p)
Definition: double.hh:387
static void start()
Start counters.
Definition: flops.hh:170
static int thread()
return thread number
Definition: threadmanager.hh:208
Definition: coordinate.hh:4
static void print(std::ostream &out)
print values to given ostream, all values are gathered to the master rank before printing ...
Definition: flops.hh:184
static int maxThreads()
return maximal number of threads possbile in the current run
Definition: threadmanager.hh:202
Dune::CollectiveCommunication< MPIHelper::MPICommunicator > CollectiveCommunication
Definition: mpimanager.hh:22
static bool singleThreadMode()
returns true if program is operating on one thread currently
Definition: threadmanager.hh:217
static double min(const Double &v, const double p)
Definition: double.hh:375