StrumpackParameters.hpp
Go to the documentation of this file.
1 /*
2  * STRUMPACK -- STRUctured Matrices PACKage, Copyright (c) 2014, The
3  * Regents of the University of California, through Lawrence Berkeley
4  * National Laboratory (subject to receipt of any required approvals
5  * from the U.S. Dept. of Energy). All rights reserved.
6  *
7  * If you have questions about your rights to use or distribute this
8  * software, please contact Berkeley Lab's Technology Transfer
9  * Department at TTD@lbl.gov.
10  *
11  * NOTICE. This software is owned by the U.S. Department of Energy. As
12  * such, the U.S. Government has been granted for itself and others
13  * acting on its behalf a paid-up, nonexclusive, irrevocable,
14  * worldwide license in the Software to reproduce, prepare derivative
15  * works, and perform publicly and display publicly. Beginning five
16  * (5) years after the date permission to assert copyright is obtained
17  * from the U.S. Department of Energy, and subject to any subsequent
18  * five (5) year renewals, the U.S. Government is granted for itself
19  * and others acting on its behalf a paid-up, nonexclusive,
20  * irrevocable, worldwide license in the Software to reproduce,
21  * prepare derivative works, distribute copies to the public, perform
22  * publicly and display publicly, and to permit others to do so.
23  *
24  * Developers: Pieter Ghysels, Francois-Henry Rouet, Xiaoye S. Li.
25  * (Lawrence Berkeley National Lab, Computational Research
26  * Division).
27  */
32 #ifndef STRUMPACK_PARAMETERS_HPP
33 #define STRUMPACK_PARAMETERS_HPP
34 #include <atomic>
35 #include <string>
36 #include <cmath>
37 #include <iostream>
38 #include <fstream>
39 #if defined(_OPENMP)
40 #include <omp.h>
41 #endif
42 #include "StrumpackConfig.hpp"
43 
44 namespace strumpack { // these are all global variables
45 
50  enum class ReturnCode {
51  SUCCESS,
54  ZERO_PIVOT,
57  };
58 
59  inline std::ostream& operator<<(std::ostream& os, ReturnCode& e) {
60  switch (e) {
61  case ReturnCode::SUCCESS: os << "SUCCESS"; break;
62  case ReturnCode::MATRIX_NOT_SET: os << "MATRIX_NOT_SET"; break;
63  case ReturnCode::REORDERING_ERROR: os << "REORDERING_ERROR"; break;
64  case ReturnCode::ZERO_PIVOT: os << "ZERO_PIVOT"; break;
65  case ReturnCode::NO_CONVERGENCE: os << "NO_CONVERGENCE"; break;
66  case ReturnCode::INACCURATE_INERTIA: os << "INACCURATE_INERTIA"; break;
67  }
68  return os;
69  }
70 
71  namespace params {
72 
73 #ifndef DOXYGEN_SHOULD_SKIP_THIS
74 
75  extern int num_threads;
76  extern int task_recursion_cutoff_level;
77 
78  extern std::atomic<long long int> flops;
79  extern std::atomic<long long int> bytes_moved;
80  extern std::atomic<long long int> memory;
81  extern std::atomic<long long int> peak_memory;
82  extern std::atomic<long long int> device_memory;
83  extern std::atomic<long long int> peak_device_memory;
84 
85  extern std::atomic<long long int> CB_sample_flops;
86  extern std::atomic<long long int> sparse_sample_flops;
87  extern std::atomic<long long int> extraction_flops;
88  extern std::atomic<long long int> ULV_factor_flops;
89  extern std::atomic<long long int> schur_flops;
90  extern std::atomic<long long int> full_rank_flops;
91  extern std::atomic<long long int> random_flops;
92  extern std::atomic<long long int> ID_flops;
93  extern std::atomic<long long int> ortho_flops;
94  extern std::atomic<long long int> QR_flops;
95  extern std::atomic<long long int> reduce_sample_flops;
96  extern std::atomic<long long int> update_sample_flops;
97  extern std::atomic<long long int> hss_solve_flops;
98 
99  extern std::atomic<long long int> f11_fill_flops;
100  extern std::atomic<long long int> f12_fill_flops;
101  extern std::atomic<long long int> f21_fill_flops;
102  extern std::atomic<long long int> f22_fill_flops;
103 
104  extern std::atomic<long long int> f21_mult_flops;
105  extern std::atomic<long long int> invf11_mult_flops;
106  extern std::atomic<long long int> f12_mult_flops;
107 
108 #endif //DOXYGEN_SHOULD_SKIP_THIS
109 
110  } //end namespace params
111 
112 #ifndef DOXYGEN_SHOULD_SKIP_THIS
113 #if defined(STRUMPACK_COUNT_FLOPS)
114 #define STRUMPACK_FLOPS(n) \
115  strumpack::params::flops += n;
116 #define STRUMPACK_BYTES(n) \
117  strumpack::params::bytes_moved += n;
118 #define STRUMPACK_ID_FLOPS(n) \
119  strumpack::params::ID_flops += n;
120 #define STRUMPACK_QR_FLOPS(n) \
121  strumpack::params::QR_flops += n;
122 #define STRUMPACK_ORTHO_FLOPS(n) \
123  strumpack::params::ortho_flops += n;
124 #define STRUMPACK_REDUCE_SAMPLE_FLOPS(n) \
125  strumpack::params::reduce_sample_flops += n;
126 #define STRUMPACK_UPDATE_SAMPLE_FLOPS(n) \
127  strumpack::params::update_sample_flops += n;
128 #define STRUMPACK_RANDOM_FLOPS(n) \
129  strumpack::params::random_flops += n;
130 #define STRUMPACK_SPARSE_SAMPLE_FLOPS(n) \
131  strumpack::params::sparse_sample_flops += n;
132 #define STRUMPACK_FULL_RANK_FLOPS(n) \
133  strumpack::params::full_rank_flops += n;
134 #define STRUMPACK_EXTRACTION_FLOPS(n) \
135  strumpack::params::extraction_flops += n;
136 #define STRUMPACK_ULV_FACTOR_FLOPS(n) \
137  strumpack::params::ULV_factor_flops += n;
138 #define STRUMPACK_SCHUR_FLOPS(n) \
139  strumpack::params::schur_flops += n;
140 #define STRUMPACK_CB_SAMPLE_FLOPS(n) \
141  strumpack::params::CB_sample_flops += n;
142 #define STRUMPACK_HSS_SOLVE_FLOPS(n) \
143  strumpack::params::hss_solve_flops += n;
144 
145 #define STRUMPACK_HODLR_F11_FILL_FLOPS(n) \
146  strumpack::params::f11_fill_flops += n;
147 #define STRUMPACK_HODLR_F12_FILL_FLOPS(n) \
148  strumpack::params::f12_fill_flops += n
149 #define STRUMPACK_HODLR_F21_FILL_FLOPS(n) \
150  strumpack::params::f21_fill_flops += n
151 #define STRUMPACK_HODLR_F22_FILL_FLOPS(n) \
152  strumpack::params::f22_fill_flops += n
153 
154 #define STRUMPACK_HODLR_F21_MULT_FLOPS(n) \
155  strumpack::params::f21_mult_flops += n
156 #define STRUMPACK_HODLR_INVF11_MULT_FLOPS(n) \
157  strumpack::params::invf11_mult_flops += n
158 #define STRUMPACK_HODLR_F12_MULT_FLOPS(n) \
159  strumpack::params::f12_mult_flops += n
160 
161 #define STRUMPACK_ADD_MEMORY(n) { \
162  strumpack::params::memory += n; \
163  auto new_peak_ = std::max(strumpack::params::memory.load(), \
164  strumpack::params::peak_memory.load()); \
165  auto old_peak_ = strumpack::params::peak_memory.load(); \
166  while (new_peak_ > old_peak_ && \
167  !strumpack::params::peak_memory.compare_exchange_weak \
168  (old_peak_, new_peak_)) { } \
169  }
170 #define STRUMPACK_ADD_DEVICE_MEMORY(n) { \
171  strumpack::params::device_memory += n; \
172  auto new_peak_ = std::max(strumpack::params::device_memory.load(), \
173  strumpack::params::peak_device_memory.load()); \
174  auto old_peak_ = strumpack::params::peak_device_memory.load(); \
175  while (new_peak_ > old_peak_ && \
176  !strumpack::params::peak_device_memory.compare_exchange_weak \
177  (old_peak_, new_peak_)) { } \
178  }
179 
180 #define STRUMPACK_SUB_MEMORY(n) \
181  strumpack::params::memory -= n;
182 #define STRUMPACK_SUB_DEVICE_MEMORY(n) \
183  strumpack::params::device_memory -= n;
184 
185 #else
186 
187 #define STRUMPACK_FLOPS(n) void(0);
188 #define STRUMPACK_BYTES(n) void(0);
189 #define STRUMPACK_ID_FLOPS(n) void(0);
190 #define STRUMPACK_QR_FLOPS(n) void(0);
191 #define STRUMPACK_ORTHO_FLOPS(n) void(0);
192 #define STRUMPACK_REDUCE_SAMPLE_FLOPS(n) void(0);
193 #define STRUMPACK_UPDATE_SAMPLE_FLOPS(n) void(0);
194 #define STRUMPACK_RANDOM_FLOPS(n) void(0);
195 #define STRUMPACK_SPARSE_SAMPLE_FLOPS(n) void(0);
196 #define STRUMPACK_FULL_RANK_FLOPS(n) void(0);
197 #define STRUMPACK_EXTRACTION_FLOPS(n) void(0);
198 #define STRUMPACK_ULV_FACTOR_FLOPS(n) void(0);
199 #define STRUMPACK_SCHUR_FLOPS(n) void(0);
200 #define STRUMPACK_CB_SAMPLE_FLOPS(n) void(0);
201 #define STRUMPACK_HSS_SOLVE_FLOPS(n) void(0);
202 
203 #define STRUMPACK_HODLR_F11_FILL_FLOPS(n) void(0);
204 #define STRUMPACK_HODLR_F12_FILL_FLOPS(n) void(0);
205 #define STRUMPACK_HODLR_F21_FILL_FLOPS(n) void(0);
206 #define STRUMPACK_HODLR_F22_FILL_FLOPS(n) void(0);
207 
208 #define STRUMPACK_HODLR_F21_MULT_FLOPS(n) void(0);
209 #define STRUMPACK_HODLR_INVF11_MULT_FLOPS(n) void(0);
210 #define STRUMPACK_HODLR_F12_FILL_FLOPS(n) void(0);
211 
212 #define STRUMPACK_ADD_MEMORY(n) void(0);
213 #define STRUMPACK_SUB_MEMORY(n) void(0);
214 #define STRUMPACK_ADD_DEVICE_MEMORY(n) void(0);
215 #define STRUMPACK_SUB_DEVICE_MEMORY(n) void(0);
216 
217 #endif
218 #endif // DOXYGEN_SHOULD_SKIP_THIS
219 
220 } //end namespace strumpack
221 
222 #endif // STRUMPACK_PARAMETERS_HPP
Definition: StrumpackOptions.hpp:43
ReturnCode
Enumeration for the possible return codes.
Definition: StrumpackParameters.hpp:50