32 #ifndef STRUMPACK_PARAMETERS_HPP
33 #define STRUMPACK_PARAMETERS_HPP
42 #include "StrumpackConfig.hpp"
54 void get_version(
int& major,
int& minor,
int& patch);
68 #ifndef DOXYGEN_SHOULD_SKIP_THIS
70 extern int num_threads;
71 extern int task_recursion_cutoff_level;
73 extern std::atomic<long long int> flops;
74 extern std::atomic<long long int> bytes_moved;
75 extern std::atomic<long long int> memory;
76 extern std::atomic<long long int> peak_memory;
77 extern std::atomic<long long int> device_memory;
78 extern std::atomic<long long int> peak_device_memory;
80 extern std::atomic<long long int> CB_sample_flops;
81 extern std::atomic<long long int> sparse_sample_flops;
82 extern std::atomic<long long int> extraction_flops;
83 extern std::atomic<long long int> ULV_factor_flops;
84 extern std::atomic<long long int> schur_flops;
85 extern std::atomic<long long int> full_rank_flops;
86 extern std::atomic<long long int> random_flops;
87 extern std::atomic<long long int> ID_flops;
88 extern std::atomic<long long int> ortho_flops;
89 extern std::atomic<long long int> QR_flops;
90 extern std::atomic<long long int> reduce_sample_flops;
91 extern std::atomic<long long int> update_sample_flops;
92 extern std::atomic<long long int> hss_solve_flops;
94 extern std::atomic<long long int> f11_fill_flops;
95 extern std::atomic<long long int> f12_fill_flops;
96 extern std::atomic<long long int> f21_fill_flops;
97 extern std::atomic<long long int> f22_fill_flops;
99 extern std::atomic<long long int> f21_mult_flops;
100 extern std::atomic<long long int> invf11_mult_flops;
101 extern std::atomic<long long int> f12_mult_flops;
103 #endif //DOXYGEN_SHOULD_SKIP_THIS
107 #ifndef DOXYGEN_SHOULD_SKIP_THIS
108 #if defined(STRUMPACK_COUNT_FLOPS)
109 #define STRUMPACK_FLOPS(n) \
110 strumpack::params::flops += n;
111 #define STRUMPACK_BYTES(n) \
112 strumpack::params::bytes_moved += n;
113 #define STRUMPACK_ID_FLOPS(n) \
114 strumpack::params::ID_flops += n;
115 #define STRUMPACK_QR_FLOPS(n) \
116 strumpack::params::QR_flops += n;
117 #define STRUMPACK_ORTHO_FLOPS(n) \
118 strumpack::params::ortho_flops += n;
119 #define STRUMPACK_REDUCE_SAMPLE_FLOPS(n) \
120 strumpack::params::reduce_sample_flops += n;
121 #define STRUMPACK_UPDATE_SAMPLE_FLOPS(n) \
122 strumpack::params::update_sample_flops += n;
123 #define STRUMPACK_RANDOM_FLOPS(n) \
124 strumpack::params::random_flops += n;
125 #define STRUMPACK_SPARSE_SAMPLE_FLOPS(n) \
126 strumpack::params::sparse_sample_flops += n;
127 #define STRUMPACK_FULL_RANK_FLOPS(n) \
128 strumpack::params::full_rank_flops += n;
129 #define STRUMPACK_EXTRACTION_FLOPS(n) \
130 strumpack::params::extraction_flops += n;
131 #define STRUMPACK_ULV_FACTOR_FLOPS(n) \
132 strumpack::params::ULV_factor_flops += n;
133 #define STRUMPACK_SCHUR_FLOPS(n) \
134 strumpack::params::schur_flops += n;
135 #define STRUMPACK_CB_SAMPLE_FLOPS(n) \
136 strumpack::params::CB_sample_flops += n;
137 #define STRUMPACK_HSS_SOLVE_FLOPS(n) \
138 strumpack::params::hss_solve_flops += n;
140 #define STRUMPACK_HODLR_F11_FILL_FLOPS(n) \
141 strumpack::params::f11_fill_flops += n;
142 #define STRUMPACK_HODLR_F12_FILL_FLOPS(n) \
143 strumpack::params::f12_fill_flops += n
144 #define STRUMPACK_HODLR_F21_FILL_FLOPS(n) \
145 strumpack::params::f21_fill_flops += n
146 #define STRUMPACK_HODLR_F22_FILL_FLOPS(n) \
147 strumpack::params::f22_fill_flops += n
149 #define STRUMPACK_HODLR_F21_MULT_FLOPS(n) \
150 strumpack::params::f21_mult_flops += n
151 #define STRUMPACK_HODLR_INVF11_MULT_FLOPS(n) \
152 strumpack::params::invf11_mult_flops += n
153 #define STRUMPACK_HODLR_F12_MULT_FLOPS(n) \
154 strumpack::params::f12_mult_flops += n
156 #define STRUMPACK_ADD_MEMORY(n) { \
157 strumpack::params::memory += n; \
158 auto new_peak_ = std::max(strumpack::params::memory.load(), \
159 strumpack::params::peak_memory.load()); \
160 auto old_peak_ = strumpack::params::peak_memory.load(); \
161 while (new_peak_ > old_peak_ && \
162 !strumpack::params::peak_memory.compare_exchange_weak \
163 (old_peak_, new_peak_)) { } \
165 #define STRUMPACK_ADD_DEVICE_MEMORY(n) { \
166 strumpack::params::device_memory += n; \
167 auto new_peak_ = std::max(strumpack::params::device_memory.load(), \
168 strumpack::params::peak_device_memory.load()); \
169 auto old_peak_ = strumpack::params::peak_device_memory.load(); \
170 while (new_peak_ > old_peak_ && \
171 !strumpack::params::peak_device_memory.compare_exchange_weak \
172 (old_peak_, new_peak_)) { } \
175 #define STRUMPACK_SUB_MEMORY(n) \
176 strumpack::params::memory -= n;
177 #define STRUMPACK_SUB_DEVICE_MEMORY(n) \
178 strumpack::params::device_memory -= n;
182 #define STRUMPACK_FLOPS(n) void(0);
183 #define STRUMPACK_BYTES(n) void(0);
184 #define STRUMPACK_ID_FLOPS(n) void(0);
185 #define STRUMPACK_QR_FLOPS(n) void(0);
186 #define STRUMPACK_ORTHO_FLOPS(n) void(0);
187 #define STRUMPACK_REDUCE_SAMPLE_FLOPS(n) void(0);
188 #define STRUMPACK_UPDATE_SAMPLE_FLOPS(n) void(0);
189 #define STRUMPACK_RANDOM_FLOPS(n) void(0);
190 #define STRUMPACK_SPARSE_SAMPLE_FLOPS(n) void(0);
191 #define STRUMPACK_FULL_RANK_FLOPS(n) void(0);
192 #define STRUMPACK_EXTRACTION_FLOPS(n) void(0);
193 #define STRUMPACK_ULV_FACTOR_FLOPS(n) void(0);
194 #define STRUMPACK_SCHUR_FLOPS(n) void(0);
195 #define STRUMPACK_CB_SAMPLE_FLOPS(n) void(0);
196 #define STRUMPACK_HSS_SOLVE_FLOPS(n) void(0);
198 #define STRUMPACK_HODLR_F11_FILL_FLOPS(n) void(0);
199 #define STRUMPACK_HODLR_F12_FILL_FLOPS(n) void(0);
200 #define STRUMPACK_HODLR_F21_FILL_FLOPS(n) void(0);
201 #define STRUMPACK_HODLR_F22_FILL_FLOPS(n) void(0);
203 #define STRUMPACK_HODLR_F21_MULT_FLOPS(n) void(0);
204 #define STRUMPACK_HODLR_INVF11_MULT_FLOPS(n) void(0);
205 #define STRUMPACK_HODLR_F12_FILL_FLOPS(n) void(0);
207 #define STRUMPACK_ADD_MEMORY(n) void(0);
208 #define STRUMPACK_SUB_MEMORY(n) void(0);
209 #define STRUMPACK_ADD_DEVICE_MEMORY(n) void(0);
210 #define STRUMPACK_SUB_DEVICE_MEMORY(n) void(0);
213 #endif // DOXYGEN_SHOULD_SKIP_THIS
217 #endif // STRUMPACK_PARAMETERS_HPP