Skip to content

v2.x: fix MPI_Alltoallw() with zero size messages #3487

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Aug 9, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 10 additions & 7 deletions ompi/mca/coll/base/coll_base_alltoallv.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
* Copyright (c) 2013 Los Alamos National Security, LLC. All Rights
* reserved.
* Copyright (c) 2013 FUJITSU LIMITED. All rights reserved.
* Copyright (c) 2014-2016 Research Organization for Information Science
* Copyright (c) 2014-2017 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
*
Expand Down Expand Up @@ -45,17 +45,16 @@ mca_coll_base_alltoallv_intra_basic_inplace(const void *rbuf, const int *rcounts
int i, j, size, rank, err=MPI_SUCCESS;
ompi_request_t *req;
char *allocated_buffer, *tmp_buffer;
size_t max_size, rdtype_size;
OPAL_PTRDIFF_TYPE ext, gap;
size_t max_size;
OPAL_PTRDIFF_TYPE ext, gap = 0;

/* Initialize. */

size = ompi_comm_size(comm);
rank = ompi_comm_rank(comm);
ompi_datatype_type_size(rdtype, &rdtype_size);

/* If only one process, we're done. */
if (1 == size || 0 == rdtype_size) {
if (1 == size) {
return MPI_SUCCESS;
}

Expand All @@ -67,6 +66,10 @@ mca_coll_base_alltoallv_intra_basic_inplace(const void *rbuf, const int *rcounts
}
/* The gap will always be the same as we are working on the same datatype */

if (OPAL_UNLIKELY(0 == max_size)) {
return MPI_SUCCESS;
}

/* Allocate a temporary buffer */
allocated_buffer = calloc (max_size, 1);
if (NULL == allocated_buffer) {
Expand All @@ -78,7 +81,7 @@ mca_coll_base_alltoallv_intra_basic_inplace(const void *rbuf, const int *rcounts
/* in-place alltoallv slow algorithm (but works) */
for (i = 0 ; i < size ; ++i) {
for (j = i+1 ; j < size ; ++j) {
if (i == rank && rcounts[j]) {
if (i == rank && 0 != rcounts[j]) {
/* Copy the data into the temporary buffer */
err = ompi_datatype_copy_content_same_ddt (rdtype, rcounts[j],
tmp_buffer, (char *) rbuf + rdisps[j] * ext);
Expand All @@ -93,7 +96,7 @@ mca_coll_base_alltoallv_intra_basic_inplace(const void *rbuf, const int *rcounts
j, MCA_COLL_BASE_TAG_ALLTOALLV, MCA_PML_BASE_SEND_STANDARD,
comm));
if (MPI_SUCCESS != err) { goto error_hndl; }
} else if (j == rank && rcounts[i]) {
} else if (j == rank && 0 != rcounts[i]) {
/* Copy the data into the temporary buffer */
err = ompi_datatype_copy_content_same_ddt (rdtype, rcounts[i],
tmp_buffer, (char *) rbuf + rdisps[i] * ext);
Expand Down
3 changes: 2 additions & 1 deletion ompi/mca/coll/libnbc/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
# Copyright (c) 2013 Los Alamos National Security, LLC. All rights
# reserved.
# Copyright (c) 2017 Research Organization for Information Science
# and Technology (RIST). All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
Expand All @@ -37,7 +39,6 @@ sources = \
nbc_ialltoallw.c \
nbc_ibarrier.c \
nbc_ibcast.c \
nbc_ibcast_inter.c \
nbc_iexscan.c \
nbc_igather.c \
nbc_igatherv.c \
Expand Down
21 changes: 20 additions & 1 deletion ompi/mca/coll/libnbc/nbc.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
* rights reserved.
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2015-2016 Research Organization for Information Science
* Copyright (c) 2015-2017 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
*
* Author(s): Torsten Hoefler <htor@cs.indiana.edu>
Expand Down Expand Up @@ -709,6 +709,25 @@ int NBC_Start(NBC_Handle *handle, NBC_Schedule *schedule) {
return OMPI_SUCCESS;
}

int NBC_Schedule_request(NBC_Schedule *schedule, ompi_communicator_t *comm, ompi_coll_libnbc_module_t *module, ompi_request_t **request, void *tmpbuf) {
int res;
NBC_Handle *handle;
res = NBC_Init_handle (comm, &handle, module);
if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
return res;
}
handle->tmpbuf = tmpbuf;

res = NBC_Start (handle, schedule);
if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
NBC_Return_handle (handle);
return res;
}

*request = (ompi_request_t *) handle;
return OMPI_SUCCESS;
}

#ifdef NBC_CACHE_SCHEDULE
void NBC_SchedCache_args_delete_key_dummy(void *k) {
/* do nothing because the key and the data element are identical :-)
Expand Down
22 changes: 2 additions & 20 deletions ompi/mca/coll/libnbc/nbc_iallgather.c
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,6 @@ int ompi_coll_libnbc_iallgather(const void* sendbuf, int sendcount, MPI_Datatype
#ifdef NBC_CACHE_SCHEDULE
NBC_Allgather_args *args, *found, search;
#endif
NBC_Handle *handle;
ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module;

NBC_IN_PLACE(sendbuf, recvbuf, inplace);
Expand Down Expand Up @@ -147,20 +146,12 @@ int ompi_coll_libnbc_iallgather(const void* sendbuf, int sendcount, MPI_Datatype
}
#endif

res = NBC_Init_handle (comm, &handle, libnbc_module);
res = NBC_Schedule_request(schedule, comm, libnbc_module, request, NULL);
if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
OBJ_RELEASE(schedule);
return res;
}

res = NBC_Start (handle, schedule);
if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
OMPI_COLL_LIBNBC_REQUEST_RETURN(handle);
return res;
}

*request = (ompi_request_t *) handle;

return OMPI_SUCCESS;
}

Expand All @@ -172,7 +163,6 @@ int ompi_coll_libnbc_iallgather_inter(const void* sendbuf, int sendcount, MPI_Da
MPI_Aint rcvext;
NBC_Schedule *schedule;
char *rbuf;
NBC_Handle *handle;
ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module;

res = ompi_datatype_type_extent(recvtype, &rcvext);
Expand Down Expand Up @@ -213,19 +203,11 @@ int ompi_coll_libnbc_iallgather_inter(const void* sendbuf, int sendcount, MPI_Da
return res;
}

res = NBC_Init_handle (comm, &handle, libnbc_module);
res = NBC_Schedule_request(schedule, comm, libnbc_module, request, NULL);
if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
OBJ_RELEASE(schedule);
return res;
}

res = NBC_Start (handle, schedule);
if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
OMPI_COLL_LIBNBC_REQUEST_RETURN(handle);
return res;
}

*request = (ompi_request_t *) handle;

return OMPI_SUCCESS;
}
24 changes: 3 additions & 21 deletions ompi/mca/coll/libnbc/nbc_iallgatherv.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
* Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2014-2016 Research Organization for Information Science
* Copyright (c) 2014-2017 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
*
*/
Expand All @@ -37,7 +37,6 @@ int ompi_coll_libnbc_iallgatherv(const void* sendbuf, int sendcount, MPI_Datatyp
MPI_Aint rcvext;
NBC_Schedule *schedule;
char *rbuf, *sbuf, inplace;
NBC_Handle *handle;
ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module;

NBC_IN_PLACE(sendbuf, recvbuf, inplace);
Expand Down Expand Up @@ -96,20 +95,12 @@ int ompi_coll_libnbc_iallgatherv(const void* sendbuf, int sendcount, MPI_Datatyp
return res;
}

res = NBC_Init_handle (comm, &handle, libnbc_module);
res = NBC_Schedule_request (schedule, comm, libnbc_module, request, NULL);
if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
OBJ_RELEASE(schedule);
return res;
}

res = NBC_Start (handle, schedule);
if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
NBC_Return_handle (handle);
return res;
}

*request = (ompi_request_t *) handle;

return OMPI_SUCCESS;
}

Expand All @@ -120,7 +111,6 @@ int ompi_coll_libnbc_iallgatherv_inter(const void* sendbuf, int sendcount, MPI_D
int res, rsize;
MPI_Aint rcvext;
NBC_Schedule *schedule;
NBC_Handle *handle;
ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module;

rsize = ompi_comm_remote_size (comm);
Expand Down Expand Up @@ -165,19 +155,11 @@ int ompi_coll_libnbc_iallgatherv_inter(const void* sendbuf, int sendcount, MPI_D
return res;
}

res = NBC_Init_handle (comm, &handle, libnbc_module);
res = NBC_Schedule_request(schedule, comm, libnbc_module, request, NULL);
if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
OBJ_RELEASE(schedule);
return res;
}

res = NBC_Start (handle, schedule);
if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
NBC_Return_handle (handle);
return res;
}

*request = (ompi_request_t *) handle;

return OMPI_SUCCESS;
}
Loading