33 #ifndef STRUMPACK_MPI_WRAPPER_HPP
34 #define STRUMPACK_MPI_WRAPPER_HPP
44 #define OMPI_SKIP_MPICXX 1
48 #include "Triplet.hpp"
76 template<>
inline MPI_Datatype mpi_type<std::complex<float>>() {
return MPI_CXX_FLOAT_COMPLEX; }
78 template<>
inline MPI_Datatype mpi_type<std::complex<double>>() {
return MPI_CXX_DOUBLE_COMPLEX; }
80 template<>
inline MPI_Datatype mpi_type<std::pair<int,int>>() {
return MPI_2INT; }
83 template<>
inline MPI_Datatype mpi_type<std::pair<long int,long int>>() {
84 static MPI_Datatype l_l_mpi_type = MPI_DATATYPE_NULL;
85 if (l_l_mpi_type == MPI_DATATYPE_NULL) {
87 (2, strumpack::mpi_type<long int>(), &l_l_mpi_type);
88 MPI_Type_commit(&l_l_mpi_type);
93 template<>
inline MPI_Datatype mpi_type<std::pair<long long int,long long int>>() {
94 static MPI_Datatype ll_ll_mpi_type = MPI_DATATYPE_NULL;
95 if (ll_ll_mpi_type == MPI_DATATYPE_NULL) {
98 MPI_Type_commit(&ll_ll_mpi_type);
100 return ll_ll_mpi_type;
125 req_ = std::unique_ptr<MPI_Request>(
new MPI_Request());
152 void wait() { MPI_Wait(req_.get(), MPI_STATUS_IGNORE); }
155 std::unique_ptr<MPI_Request> req_;
168 inline void wait_all(std::vector<MPIRequest>& reqs) {
169 for (
auto& r : reqs) r.wait();
173 inline void wait_all(std::vector<MPI_Request>& reqs) {
174 MPI_Waitall(reqs.size(), reqs.data(), MPI_STATUSES_IGNORE);
228 if (comm_ != MPI_COMM_NULL && comm_ != MPI_COMM_WORLD)
229 MPI_Comm_free(&comm_);
238 if (
this != &c) duplicate(c.
comm());
250 c.comm_ = MPI_COMM_NULL;
257 MPI_Comm
comm()
const {
return comm_; }
262 bool is_null()
const {
return comm_ == MPI_COMM_NULL; }
268 assert(comm_ != MPI_COMM_NULL);
270 MPI_Comm_rank(comm_, &r);
279 assert(comm_ != MPI_COMM_NULL);
281 MPI_Comm_size(comm_, &nprocs);
297 template<
typename T>
void
298 broadcast(std::vector<T>& sbuf)
const {
299 MPI_Bcast(sbuf.data(), sbuf.size(), mpi_type<T>(), 0, comm_);
301 template<
typename T>
void
302 broadcast_from(std::vector<T>& sbuf,
int src)
const {
303 MPI_Bcast(sbuf.data(), sbuf.size(), mpi_type<T>(), src, comm_);
306 template<
typename T, std::
size_t N>
void
307 broadcast(std::array<T,N>& sbuf)
const {
308 MPI_Bcast(sbuf.data(), sbuf.size(), mpi_type<T>(), 0, comm_);
311 template<
typename T>
void
312 broadcast(T& data)
const {
313 MPI_Bcast(&data, 1, mpi_type<T>(), 0, comm_);
315 template<
typename T>
void
316 broadcast_from(T& data,
int src)
const {
317 MPI_Bcast(&data, 1, mpi_type<T>(), src, comm_);
319 template<
typename T>
void
320 broadcast(T* sbuf, std::size_t ssize)
const {
321 MPI_Bcast(sbuf, ssize, mpi_type<T>(), 0, comm_);
325 void all_gather(T* buf, std::size_t rsize)
const {
327 (MPI_IN_PLACE, 0, MPI_DATATYPE_NULL,
328 buf, rsize, mpi_type<T>(), comm_);
332 void all_gather_v(T* buf,
const int* rcnts,
const int* displs)
const {
334 (MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, buf, rcnts, displs,
335 mpi_type<T>(), comm_);
356 MPI_Isend(
const_cast<T*
>(sbuf.data()), sbuf.size(), mpi_type<T>(),
357 dest, tag, comm_, req.req_.get());
374 void isend(
const std::vector<T>& sbuf,
int dest,
int tag,
375 MPI_Request* req)
const {
377 MPI_Isend(
const_cast<T*
>(sbuf.data()), sbuf.size(), mpi_type<T>(),
378 dest, tag, comm_, req);
382 void isend(
const T* sbuf, std::size_t ssize,
int dest,
383 int tag, MPI_Request* req)
const {
385 MPI_Isend(
const_cast<T*
>(sbuf), ssize, mpi_type<T>(),
386 dest, tag, comm_, req);
390 void isend(
const T& buf,
int dest,
int tag, MPI_Request* req)
const {
392 MPI_Isend(
const_cast<T*
>(&buf), 1, mpi_type<T>(),
393 dest, tag, comm_, req);
410 void send(
const std::vector<T>& sbuf,
int dest,
int tag)
const {
412 MPI_Send(
const_cast<T*
>(sbuf.data()), sbuf.size(), mpi_type<T>(), dest, tag, comm_);
427 template<
typename T> std::vector<T>
recv(
int src,
int tag)
const {
429 MPI_Probe(src, tag, comm_, &stat);
431 MPI_Get_count(&stat, mpi_type<T>(), &msgsize);
433 std::vector<T> rbuf(msgsize);
434 MPI_Recv(rbuf.data(), msgsize, mpi_type<T>(), src, tag,
435 comm_, MPI_STATUS_IGNORE);
440 std::pair<int,std::vector<T>> recv_any_src(
int tag)
const {
442 MPI_Probe(MPI_ANY_SOURCE, tag, comm_, &stat);
444 MPI_Get_count(&stat, mpi_type<T>(), &msgsize);
445 std::vector<T> rbuf(msgsize);
446 MPI_Recv(rbuf.data(), msgsize, mpi_type<T>(), stat.MPI_SOURCE,
447 tag, comm_, MPI_STATUS_IGNORE);
448 return {stat.MPI_SOURCE, std::move(rbuf)};
451 template<
typename T>
T recv_one(
int src,
int tag)
const {
453 MPI_Recv(&t, 1, mpi_type<T>(), src, tag, comm_, MPI_STATUS_IGNORE);
458 void irecv(
const T* rbuf, std::size_t rsize,
int src,
459 int tag, MPI_Request* req)
const {
461 MPI_Irecv(
const_cast<T*
>(rbuf), rsize, mpi_type<T>(),
462 src, tag, comm_, req);
480 MPI_Allreduce(MPI_IN_PLACE, &t, 1, mpi_type<T>(), op, comm_);
498 template<
typename T>
T reduce(T t, MPI_Op op)
const {
500 MPI_Reduce(MPI_IN_PLACE, &t, 1, mpi_type<T>(), op, 0, comm_);
501 else MPI_Reduce(&t, &t, 1, mpi_type<T>(), op, 0, comm_);
520 template<
typename T>
void all_reduce(T* t,
int ssize, MPI_Op op)
const {
521 MPI_Allreduce(MPI_IN_PLACE, t, ssize, mpi_type<T>(), op, comm_);
524 template<
typename T>
void all_reduce(std::vector<T>& t, MPI_Op op)
const {
543 template<
typename T>
void reduce(T* t,
int ssize, MPI_Op op)
const {
545 MPI_Reduce(MPI_IN_PLACE, t, ssize, mpi_type<T>(), op, 0, comm_);
546 else MPI_Reduce(t, t, ssize, mpi_type<T>(), op, 0, comm_);
550 void all_to_all(
const T* sbuf,
int scnt, T* rbuf)
const {
552 (sbuf, scnt, mpi_type<T>(), rbuf, scnt, mpi_type<T>(), comm_);
555 template<
typename T,
typename A=std::allocator<T>> std::vector<T,A>
556 all_to_allv(
const T* sbuf,
int* scnts,
int* sdispls,
557 int* rcnts,
int* rdispls)
const {
558 std::size_t rsize = 0;
559 for (
int p=0; p<
size(); p++)
561 std::vector<T,A> rbuf(rsize);
563 (sbuf, scnts, sdispls, mpi_type<T>(),
564 rbuf.data(), rcnts, rdispls, mpi_type<T>(), comm_);
568 template<
typename T>
void
569 all_to_allv(
const T* sbuf,
int* scnts,
int* sdispls,
570 T* rbuf,
int* rcnts,
int* rdispls)
const {
572 (sbuf, scnts, sdispls, mpi_type<T>(),
573 rbuf, rcnts, rdispls, mpi_type<T>(), comm_);
593 template<
typename T,
typename A=std::allocator<T>>
void
594 all_to_all_v(std::vector<std::vector<T>>& sbuf, std::vector<T,A>& rbuf,
595 std::vector<T*>& pbuf)
const {
612 template<
typename T,
typename A=std::allocator<T>> std::vector<T,A>
614 std::vector<T,A> rbuf;
615 std::vector<T*> pbuf;
637 template<
typename T,
typename A=std::allocator<T>>
void
638 all_to_all_v(std::vector<std::vector<T>>& sbuf, std::vector<T,A>& rbuf,
639 std::vector<T*>& pbuf,
const MPI_Datatype Ttype)
const {
640 assert(sbuf.size() == std::size_t(
size()));
642 std::unique_ptr<int[]> iwork(
new int[4*P]);
643 auto ssizes = iwork.get();
644 auto rsizes = ssizes + P;
645 auto sdispl = ssizes + 2*P;
646 auto rdispl = ssizes + 3*P;
647 for (
int p=0; p<P; p++) {
649 static_cast<std::size_t
>(std::numeric_limits<int>::max())) {
650 std::cerr <<
"# ERROR: 32bit integer overflow in all_to_all_v!!"
654 ssizes[p] = sbuf[p].size();
658 std::size_t totssize = std::accumulate(ssizes, ssizes+P, std::size_t(0)),
659 totrsize = std::accumulate(rsizes, rsizes+P, std::size_t(0));
661 static_cast<std::size_t
>(std::numeric_limits<int>::max()) ||
663 static_cast<std::size_t
>(std::numeric_limits<int>::max())) {
669 rbuf.resize(totrsize);
670 std::unique_ptr<MPI_Request[]> reqs(
new MPI_Request[2*P]);
671 std::size_t displ = 0;
673 for (
int p=0; p<P; p++) {
674 pbuf[p] = rbuf.data() + displ;
675 MPI_Irecv(pbuf[p], rsizes[p], Ttype, p, 0, comm_, reqs.get()+p);
678 for (
int p=0; p<P; p++)
680 (sbuf[p].data(), ssizes[p], Ttype, p, 0, comm_, reqs.get()+P+p);
681 MPI_Waitall(2*P, reqs.get(), MPI_STATUSES_IGNORE);
682 std::vector<std::vector<T>>().swap(sbuf);
684 std::unique_ptr<T[]> sendbuf_(
new T[totssize]);
685 auto sendbuf = sendbuf_.get();
686 sdispl[0] = rdispl[0] = 0;
687 for (
int p=1; p<P; p++) {
688 sdispl[p] = sdispl[p-1] + ssizes[p-1];
689 rdispl[p] = rdispl[p-1] + rsizes[p-1];
691 for (
int p=0; p<P; p++)
692 std::copy(sbuf[p].begin(), sbuf[p].end(), sendbuf+sdispl[p]);
693 std::vector<std::vector<T>>().swap(sbuf);
694 rbuf.resize(totrsize);
695 MPI_Alltoallv(sendbuf, ssizes, sdispl, Ttype,
696 rbuf.data(), rsizes, rdispl, Ttype, comm_);
698 for (
int p=0; p<P; p++)
699 pbuf[p] = rbuf.data() + rdispl[p];
724 assert(P0 + P <=
size());
726 std::vector<int> sub_ranks(P);
727 for (
int i=0; i<P; i++)
728 sub_ranks[i] = P0 + i*stride;
729 MPI_Group group, sub_group;
730 MPI_Comm_group(comm_, &group);
731 MPI_Group_incl(group, P, sub_ranks.data(), &sub_group);
732 MPI_Comm_create(comm_, sub_group, &sub_comm.comm_);
733 MPI_Group_free(&group);
734 MPI_Group_free(&sub_group);
751 MPI_Group group, sub_group;
752 MPI_Comm_group(comm_, &group);
753 MPI_Group_incl(group, 1, &p, &sub_group);
754 MPI_Comm_create(comm_, sub_group, &c0.comm_);
755 MPI_Group_free(&group);
756 MPI_Group_free(&sub_group);
764 MPI_Pcontrol(1, name.c_str());
770 MPI_Pcontrol(-1, name.c_str());
773 static bool initialized() {
775 MPI_Initialized(&flag);
776 return static_cast<bool>(flag);
780 MPI_Comm comm_ = MPI_COMM_WORLD;
782 void duplicate(MPI_Comm c) {
783 if (c == MPI_COMM_NULL) comm_ = c;
784 else MPI_Comm_dup(c, &comm_);
797 assert(c != MPI_COMM_NULL);
799 MPI_Comm_rank(c, &rank);
811 assert(c != MPI_COMM_NULL);
813 MPI_Comm_size(c, &nprocs);
819 #endif // STRUMPACK_MPI_WRAPPER_HPP