32 #ifndef STRUMPACK_PARAMETERS_HPP
33 #define STRUMPACK_PARAMETERS_HPP
42 #include "StrumpackConfig.hpp"
59 inline std::ostream& operator<<(std::ostream& os,
ReturnCode& e) {
73 #ifndef DOXYGEN_SHOULD_SKIP_THIS
75 extern int num_threads;
76 extern int task_recursion_cutoff_level;
78 extern std::atomic<long long int> flops;
79 extern std::atomic<long long int> bytes_moved;
80 extern std::atomic<long long int> memory;
81 extern std::atomic<long long int> peak_memory;
82 extern std::atomic<long long int> device_memory;
83 extern std::atomic<long long int> peak_device_memory;
85 extern std::atomic<long long int> CB_sample_flops;
86 extern std::atomic<long long int> sparse_sample_flops;
87 extern std::atomic<long long int> extraction_flops;
88 extern std::atomic<long long int> ULV_factor_flops;
89 extern std::atomic<long long int> schur_flops;
90 extern std::atomic<long long int> full_rank_flops;
91 extern std::atomic<long long int> random_flops;
92 extern std::atomic<long long int> ID_flops;
93 extern std::atomic<long long int> ortho_flops;
94 extern std::atomic<long long int> QR_flops;
95 extern std::atomic<long long int> reduce_sample_flops;
96 extern std::atomic<long long int> update_sample_flops;
97 extern std::atomic<long long int> hss_solve_flops;
99 extern std::atomic<long long int> f11_fill_flops;
100 extern std::atomic<long long int> f12_fill_flops;
101 extern std::atomic<long long int> f21_fill_flops;
102 extern std::atomic<long long int> f22_fill_flops;
104 extern std::atomic<long long int> f21_mult_flops;
105 extern std::atomic<long long int> invf11_mult_flops;
106 extern std::atomic<long long int> f12_mult_flops;
112 #ifndef DOXYGEN_SHOULD_SKIP_THIS
113 #if defined(STRUMPACK_COUNT_FLOPS)
114 #define STRUMPACK_FLOPS(n) \
115 strumpack::params::flops += n;
116 #define STRUMPACK_BYTES(n) \
117 strumpack::params::bytes_moved += n;
118 #define STRUMPACK_ID_FLOPS(n) \
119 strumpack::params::ID_flops += n;
120 #define STRUMPACK_QR_FLOPS(n) \
121 strumpack::params::QR_flops += n;
122 #define STRUMPACK_ORTHO_FLOPS(n) \
123 strumpack::params::ortho_flops += n;
124 #define STRUMPACK_REDUCE_SAMPLE_FLOPS(n) \
125 strumpack::params::reduce_sample_flops += n;
126 #define STRUMPACK_UPDATE_SAMPLE_FLOPS(n) \
127 strumpack::params::update_sample_flops += n;
128 #define STRUMPACK_RANDOM_FLOPS(n) \
129 strumpack::params::random_flops += n;
130 #define STRUMPACK_SPARSE_SAMPLE_FLOPS(n) \
131 strumpack::params::sparse_sample_flops += n;
132 #define STRUMPACK_FULL_RANK_FLOPS(n) \
133 strumpack::params::full_rank_flops += n;
134 #define STRUMPACK_EXTRACTION_FLOPS(n) \
135 strumpack::params::extraction_flops += n;
136 #define STRUMPACK_ULV_FACTOR_FLOPS(n) \
137 strumpack::params::ULV_factor_flops += n;
138 #define STRUMPACK_SCHUR_FLOPS(n) \
139 strumpack::params::schur_flops += n;
140 #define STRUMPACK_CB_SAMPLE_FLOPS(n) \
141 strumpack::params::CB_sample_flops += n;
142 #define STRUMPACK_HSS_SOLVE_FLOPS(n) \
143 strumpack::params::hss_solve_flops += n;
145 #define STRUMPACK_HODLR_F11_FILL_FLOPS(n) \
146 strumpack::params::f11_fill_flops += n;
147 #define STRUMPACK_HODLR_F12_FILL_FLOPS(n) \
148 strumpack::params::f12_fill_flops += n
149 #define STRUMPACK_HODLR_F21_FILL_FLOPS(n) \
150 strumpack::params::f21_fill_flops += n
151 #define STRUMPACK_HODLR_F22_FILL_FLOPS(n) \
152 strumpack::params::f22_fill_flops += n
154 #define STRUMPACK_HODLR_F21_MULT_FLOPS(n) \
155 strumpack::params::f21_mult_flops += n
156 #define STRUMPACK_HODLR_INVF11_MULT_FLOPS(n) \
157 strumpack::params::invf11_mult_flops += n
158 #define STRUMPACK_HODLR_F12_MULT_FLOPS(n) \
159 strumpack::params::f12_mult_flops += n
161 #define STRUMPACK_ADD_MEMORY(n) { \
162 strumpack::params::memory += n; \
163 auto new_peak_ = std::max(strumpack::params::memory.load(), \
164 strumpack::params::peak_memory.load()); \
165 auto old_peak_ = strumpack::params::peak_memory.load(); \
166 while (new_peak_ > old_peak_ && \
167 !strumpack::params::peak_memory.compare_exchange_weak \
168 (old_peak_, new_peak_)) { } \
170 #define STRUMPACK_ADD_DEVICE_MEMORY(n) { \
171 strumpack::params::device_memory += n; \
172 auto new_peak_ = std::max(strumpack::params::device_memory.load(), \
173 strumpack::params::peak_device_memory.load()); \
174 auto old_peak_ = strumpack::params::peak_device_memory.load(); \
175 while (new_peak_ > old_peak_ && \
176 !strumpack::params::peak_device_memory.compare_exchange_weak \
177 (old_peak_, new_peak_)) { } \
180 #define STRUMPACK_SUB_MEMORY(n) \
181 strumpack::params::memory -= n;
182 #define STRUMPACK_SUB_DEVICE_MEMORY(n) \
183 strumpack::params::device_memory -= n;
187 #define STRUMPACK_FLOPS(n) void(0);
188 #define STRUMPACK_BYTES(n) void(0);
189 #define STRUMPACK_ID_FLOPS(n) void(0);
190 #define STRUMPACK_QR_FLOPS(n) void(0);
191 #define STRUMPACK_ORTHO_FLOPS(n) void(0);
192 #define STRUMPACK_REDUCE_SAMPLE_FLOPS(n) void(0);
193 #define STRUMPACK_UPDATE_SAMPLE_FLOPS(n) void(0);
194 #define STRUMPACK_RANDOM_FLOPS(n) void(0);
195 #define STRUMPACK_SPARSE_SAMPLE_FLOPS(n) void(0);
196 #define STRUMPACK_FULL_RANK_FLOPS(n) void(0);
197 #define STRUMPACK_EXTRACTION_FLOPS(n) void(0);
198 #define STRUMPACK_ULV_FACTOR_FLOPS(n) void(0);
199 #define STRUMPACK_SCHUR_FLOPS(n) void(0);
200 #define STRUMPACK_CB_SAMPLE_FLOPS(n) void(0);
201 #define STRUMPACK_HSS_SOLVE_FLOPS(n) void(0);
203 #define STRUMPACK_HODLR_F11_FILL_FLOPS(n) void(0);
204 #define STRUMPACK_HODLR_F12_FILL_FLOPS(n) void(0);
205 #define STRUMPACK_HODLR_F21_FILL_FLOPS(n) void(0);
206 #define STRUMPACK_HODLR_F22_FILL_FLOPS(n) void(0);
208 #define STRUMPACK_HODLR_F21_MULT_FLOPS(n) void(0);
209 #define STRUMPACK_HODLR_INVF11_MULT_FLOPS(n) void(0);
210 #define STRUMPACK_HODLR_F12_FILL_FLOPS(n) void(0);
212 #define STRUMPACK_ADD_MEMORY(n) void(0);
213 #define STRUMPACK_SUB_MEMORY(n) void(0);
214 #define STRUMPACK_ADD_DEVICE_MEMORY(n) void(0);
215 #define STRUMPACK_SUB_DEVICE_MEMORY(n) void(0);
Definition: StrumpackOptions.hpp:43
ReturnCode
Enumeration for the possible return codes.
Definition: StrumpackParameters.hpp:50