211 if (comm_ != MPI_COMM_NULL && comm_ != MPI_COMM_WORLD)
212 MPI_Comm_free(&comm_);
221 if (
this != &c) duplicate(c.
comm());
233 c.comm_ = MPI_COMM_NULL;
240 MPI_Comm
comm()
const {
return comm_; }
245 bool is_null()
const {
return comm_ == MPI_COMM_NULL; }
251 assert(comm_ != MPI_COMM_NULL);
253 MPI_Comm_rank(comm_, &r);
262 assert(comm_ != MPI_COMM_NULL);
264 MPI_Comm_size(comm_, &nprocs);
280 template<
typename T>
void
281 broadcast(std::vector<T>& sbuf)
const {
282 MPI_Bcast(sbuf.data(), sbuf.size(), mpi_type<T>(), 0, comm_);
285 template<
typename T>
void
286 broadcast_from(std::vector<T>& sbuf,
int src)
const {
287 MPI_Bcast(sbuf.data(), sbuf.size(), mpi_type<T>(), src, comm_);
290 template<
typename T, std::
size_t N>
void
291 broadcast(std::array<T,N>& sbuf)
const {
292 MPI_Bcast(sbuf.data(), sbuf.size(), mpi_type<T>(), 0, comm_);
295 template<
typename T>
void broadcast(
T& data)
const {
296 MPI_Bcast(&data, 1, mpi_type<T>(), 0, comm_);
298 template<
typename T>
void broadcast_from(
T& data,
int src)
const {
299 MPI_Bcast(&data, 1, mpi_type<T>(), src, comm_);
301 template<
typename T>
void
302 broadcast(
T* sbuf, std::size_t ssize)
const {
303 MPI_Bcast(sbuf, ssize, mpi_type<T>(), 0, comm_);
305 template<
typename T>
void
306 broadcast_from(
T* sbuf, std::size_t ssize,
int src)
const {
307 MPI_Bcast(sbuf, ssize, mpi_type<T>(), src, comm_);
311 void all_gather(
T* buf, std::size_t rsize)
const {
313 (MPI_IN_PLACE, 0, MPI_DATATYPE_NULL,
314 buf, rsize, mpi_type<T>(), comm_);
316 void all_gather(std::pair<long int,long int>* buf,
317 std::size_t rsize)
const {
318 MPI_Datatype l_l_mpi_type;
319 MPI_Type_contiguous(2, mpi_type<long int>(), &l_l_mpi_type);
320 MPI_Type_commit(&l_l_mpi_type);
322 (MPI_IN_PLACE, 0, MPI_DATATYPE_NULL,
323 buf, rsize, l_l_mpi_type, comm_);
324 MPI_Type_free(&l_l_mpi_type);
326 void all_gather(std::pair<long long int,long long int>* buf,
327 std::size_t rsize)
const {
328 MPI_Datatype ll_ll_mpi_type;
330 MPI_Type_commit(&ll_ll_mpi_type);
332 (MPI_IN_PLACE, 0, MPI_DATATYPE_NULL,
333 buf, rsize, ll_ll_mpi_type, comm_);
334 MPI_Type_free(&ll_ll_mpi_type);
338 void all_gather_v(
T* buf,
const int* rcnts,
const int* displs)
const {
340 (MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, buf, rcnts, displs,
341 mpi_type<T>(), comm_);
345 void gather(
T* sbuf,
int ssize,
int* rbuf,
int rsize,
int root)
const {
347 (sbuf, ssize, mpi_type<T>(), rbuf,
348 rsize, mpi_type<T>(), root, comm_);
352 void gather_v(
T* sbuf,
int scnts,
T* rbuf,
const int* rcnts,
353 const int* displs,
int root)
const {
355 (sbuf, scnts, mpi_type<T>(), rbuf, rcnts, displs,
356 mpi_type<T>(), root, comm_);
377 MPI_Isend(
const_cast<T*
>(sbuf.data()), sbuf.size(), mpi_type<T>(),
378 dest, tag, comm_, req.req_.get());
395 void isend(
const std::vector<T>& sbuf,
int dest,
int tag,
396 MPI_Request* req)
const {
398 MPI_Isend(
const_cast<T*
>(sbuf.data()), sbuf.size(), mpi_type<T>(),
399 dest, tag, comm_, req);
403 void isend(
const T* sbuf, std::size_t ssize,
int dest,
404 int tag, MPI_Request* req)
const {
406 MPI_Isend(
const_cast<T*
>(sbuf), ssize, mpi_type<T>(),
407 dest, tag, comm_, req);
410 void send(
const T* sbuf, std::size_t ssize,
int dest,
int tag)
const {
412 MPI_Send(
const_cast<T*
>(sbuf), ssize, mpi_type<T>(), dest, tag, comm_);
416 void isend(
const T& buf,
int dest,
int tag, MPI_Request* req)
const {
418 MPI_Isend(
const_cast<T*
>(&buf), 1, mpi_type<T>(),
419 dest, tag, comm_, req);
436 void send(
const std::vector<T>& sbuf,
int dest,
int tag)
const {
438 MPI_Send(
const_cast<T*
>(sbuf.data()), sbuf.size(),
439 mpi_type<T>(), dest, tag, comm_);
454 template<
typename T> std::vector<T>
recv(
int src,
int tag)
const {
456 MPI_Probe(src, tag, comm_, &stat);
458 MPI_Get_count(&stat, mpi_type<T>(), &msgsize);
460 std::vector<T> rbuf(msgsize);
461 MPI_Recv(rbuf.data(), msgsize, mpi_type<T>(), src, tag,
462 comm_, MPI_STATUS_IGNORE);
467 std::pair<int,std::vector<T>> recv_any_src(
int tag)
const {
469 MPI_Probe(MPI_ANY_SOURCE, tag, comm_, &stat);
471 MPI_Get_count(&stat, mpi_type<T>(), &msgsize);
472 std::vector<T> rbuf(msgsize);
473 MPI_Recv(rbuf.data(), msgsize, mpi_type<T>(), stat.MPI_SOURCE,
474 tag, comm_, MPI_STATUS_IGNORE);
475 return {stat.MPI_SOURCE, std::move(rbuf)};
478 template<
typename T>
T recv_one(
int src,
int tag)
const {
480 MPI_Recv(&t, 1, mpi_type<T>(), src, tag, comm_, MPI_STATUS_IGNORE);
485 void irecv(
const T* rbuf, std::size_t rsize,
int src,
486 int tag, MPI_Request* req)
const {
488 MPI_Irecv(
const_cast<T*
>(rbuf), rsize, mpi_type<T>(),
489 src, tag, comm_, req);
493 void recv(
const T* rbuf, std::size_t rsize,
int src,
int tag)
const {
496 MPI_Recv(
const_cast<T*
>(rbuf), rsize, mpi_type<T>(),
497 src, tag, comm_, &stat);
515 MPI_Allreduce(MPI_IN_PLACE, &t, 1, mpi_type<T>(), op, comm_);
533 template<
typename T>
T reduce(
T t, MPI_Op op)
const {
534 MPI_Reduce(
is_root() ? MPI_IN_PLACE : &t, &t, 1,
535 mpi_type<T>(), op, 0, comm_);
554 template<
typename T>
void all_reduce(
T* t,
int ssize, MPI_Op op)
const {
555 MPI_Allreduce(MPI_IN_PLACE, t, ssize, mpi_type<T>(), op, comm_);
558 template<
typename T>
void all_reduce(std::vector<T>& t, MPI_Op op)
const {
578 template<
typename T>
void
579 reduce(
T* t,
int ssize, MPI_Op op,
int dest=0)
const {
580 MPI_Reduce(
rank() == dest ? MPI_IN_PLACE : t, t, ssize,
581 mpi_type<T>(), op, dest, comm_);
585 void all_to_all(
const T* sbuf,
int scnt,
T* rbuf)
const {
587 (sbuf, scnt, mpi_type<T>(), rbuf, scnt, mpi_type<T>(), comm_);
590 template<
typename T,
typename A=std::allocator<T>> std::vector<T,A>
591 all_to_allv(
const T* sbuf,
int* scnts,
int* sdispls,
592 int* rcnts,
int* rdispls)
const {
593 std::size_t rsize = 0;
594 for (
int p=0; p<
size(); p++)
596 std::vector<T,A> rbuf(rsize);
598 (sbuf, scnts, sdispls, mpi_type<T>(),
599 rbuf.data(), rcnts, rdispls, mpi_type<T>(), comm_);
603 template<
typename T>
void
604 all_to_allv(
const T* sbuf,
int* scnts,
int* sdispls,
605 T* rbuf,
int* rcnts,
int* rdispls)
const {
607 (sbuf, scnts, sdispls, mpi_type<T>(),
608 rbuf, rcnts, rdispls, mpi_type<T>(), comm_);
628 template<
typename T,
typename A=std::allocator<T>>
void
629 all_to_all_v(std::vector<std::vector<T>>& sbuf, std::vector<T,A>& rbuf,
630 std::vector<T*>& pbuf)
const {
647 template<
typename T,
typename A=std::allocator<T>> std::vector<T,A>
649 std::vector<T,A> rbuf;
650 std::vector<T*> pbuf;
672 template<
typename T,
typename A=std::allocator<T>>
void
673 all_to_all_v(std::vector<std::vector<T>>& sbuf, std::vector<T,A>& rbuf,
674 std::vector<T*>& pbuf,
const MPI_Datatype Ttype)
const {
675 assert(sbuf.size() == std::size_t(
size()));
677 std::unique_ptr<int[]> iwork(
new int[4*P]);
678 auto ssizes = iwork.get();
679 auto rsizes = ssizes + P;
680 auto sdispl = ssizes + 2*P;
681 auto rdispl = ssizes + 3*P;
682 for (
int p=0; p<P; p++) {
684 static_cast<std::size_t
>(std::numeric_limits<int>::max())) {
685 std::cerr <<
"# ERROR: 32bit integer overflow in all_to_all_v!!"
689 ssizes[p] = sbuf[p].size();
693 std::size_t totssize = std::accumulate(ssizes, ssizes+P, std::size_t(0)),
694 totrsize = std::accumulate(rsizes, rsizes+P, std::size_t(0));
705 static_cast<std::size_t
>(std::numeric_limits<int>::max()) ||
707 static_cast<std::size_t
>(std::numeric_limits<int>::max())) {
714 rbuf.resize(totrsize);
715 std::unique_ptr<MPI_Request[]> reqs(
new MPI_Request[2*P]);
716 std::size_t displ = 0;
719 for (
int p=0; p<P; p++) {
720 auto dst = (r + p) % P;
721 pbuf[dst] = rbuf.data() + displ;
722 MPI_Irecv(pbuf[dst], rsizes[dst], Ttype, dst, 0, comm_, reqs.get()+dst);
723 displ += rsizes[dst];
725 for (
int p=0; p<P; p++) {
726 auto dst = (r + p) % P;
728 (sbuf[dst].data(), ssizes[dst], Ttype, dst, 0, comm_, reqs.get()+P+dst);
730 MPI_Waitall(2*P, reqs.get(), MPI_STATUSES_IGNORE);
731 std::vector<std::vector<T>>().swap(sbuf);
733 std::unique_ptr<T[]> sendbuf_(
new T[totssize]);
734 auto sendbuf = sendbuf_.get();
735 sdispl[0] = rdispl[0] = 0;
736 for (
int p=1; p<P; p++) {
737 sdispl[p] = sdispl[p-1] + ssizes[p-1];
738 rdispl[p] = rdispl[p-1] + rsizes[p-1];
740 for (
int p=0; p<P; p++)
741 std::copy(sbuf[p].begin(), sbuf[p].end(), sendbuf+sdispl[p]);
742 std::vector<std::vector<T>>().swap(sbuf);
743 rbuf.resize(totrsize);
744 MPI_Alltoallv(sendbuf, ssizes, sdispl, Ttype,
745 rbuf.data(), rsizes, rdispl, Ttype, comm_);
747 for (
int p=0; p<P; p++)
748 pbuf[p] = rbuf.data() + rdispl[p];
769 assert(P0 + P <=
size());
771 std::vector<int> sub_ranks(P);
772 for (
int i=0; i<P; i++)
773 sub_ranks[i] = P0 + i*stride;
774 MPI_Group group, sub_group;
775 MPI_Comm_group(comm_, &group);
776 MPI_Group_incl(group, P, sub_ranks.data(), &sub_group);
777 MPI_Comm_create(comm_, sub_group, &sub_comm.comm_);
778 MPI_Group_free(&group);
779 MPI_Group_free(&sub_group);
796 MPI_Group group, sub_group;
797 MPI_Comm_group(comm_, &group);
798 MPI_Group_incl(group, 1, &p, &sub_group);
799 MPI_Comm_create(comm_, sub_group, &c0.comm_);
800 MPI_Group_free(&group);
801 MPI_Group_free(&sub_group);
809 MPI_Pcontrol(1, name.c_str());
815 MPI_Pcontrol(-1, name.c_str());
818 static bool initialized() {
820 MPI_Initialized(&flag);
821 return static_cast<bool>(flag);
825 MPI_Comm comm_ = MPI_COMM_WORLD;
827 void duplicate(MPI_Comm c) {
828 if (c == MPI_COMM_NULL) comm_ = c;
829 else MPI_Comm_dup(c, &comm_);