Skip to content

Commit 99d8576

Browse files
committed
btl/openib: delay UCX warning to add_procs()
If UCX is available, then pml/ucx will be used instead of pml/ob1 + btl/openib, so there is no need to warn about btl/openib not supporting Infiniband. Signed-off-by: Gilles Gouaillardet <gilles@rist.or.jp> (cherry picked from commit 0a2ce58)
1 parent 804f65f commit 99d8576

File tree

4 files changed

+171
-127
lines changed

4 files changed

+171
-127
lines changed

opal/mca/btl/openib/btl_openib.c

+70-60
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@
1919
* Copyright (c) 2009 IBM Corporation. All rights reserved.
2020
* Copyright (c) 2013-2015 Intel, Inc. All rights reserved
2121
* Copyright (c) 2013-2015 NVIDIA Corporation. All rights reserved.
22-
* Copyright (c) 2014-2015 Research Organization for Information Science
23-
* and Technology (RIST). All rights reserved.
22+
* Copyright (c) 2014-2018 Research Organization for Information Science
23+
* and Technology (RIST). All rights reserved.
2424
* Copyright (c) 2014 Bull SAS. All rights reserved
2525
* $COPYRIGHT$
2626
*
@@ -1040,6 +1040,14 @@ int mca_btl_openib_add_procs(
10401040
int btl_rank = 0;
10411041
volatile mca_btl_base_endpoint_t* endpoint;
10421042

1043+
1044+
if (! openib_btl->allowed) {
1045+
opal_bitmap_clear_all_bits(reachable);
1046+
opal_show_help("help-mpi-btl-openib.txt", "ib port not selected",
1047+
true, opal_process_info.nodename,
1048+
ibv_get_device_name(openib_btl->device->ib_dev), openib_btl->port_num);
1049+
}
1050+
10431051
btl_rank = get_openib_btl_params(openib_btl, &lcl_subnet_id_port_cnt);
10441052
if( 0 > btl_rank ){
10451053
return OPAL_ERR_NOT_FOUND;
@@ -1639,75 +1647,77 @@ static int mca_btl_openib_finalize_resources(struct mca_btl_base_module_t* btl)
16391647
return OPAL_SUCCESS;
16401648
}
16411649

1642-
/* Release all QPs */
1643-
if (NULL != openib_btl->device->endpoints) {
1644-
for (ep_index=0;
1645-
ep_index < opal_pointer_array_get_size(openib_btl->device->endpoints);
1646-
ep_index++) {
1647-
endpoint=(mca_btl_openib_endpoint_t *)opal_pointer_array_get_item(openib_btl->device->endpoints,
1650+
if (openib_btl->allowed) {
1651+
/* Release all QPs */
1652+
if (NULL != openib_btl->device->endpoints) {
1653+
for (ep_index=0;
1654+
ep_index < opal_pointer_array_get_size(openib_btl->device->endpoints);
1655+
ep_index++) {
1656+
endpoint=(mca_btl_openib_endpoint_t *)opal_pointer_array_get_item(openib_btl->device->endpoints,
16481657
ep_index);
1649-
if(!endpoint) {
1650-
BTL_VERBOSE(("In finalize, got another null endpoint"));
1651-
continue;
1652-
}
1653-
if(endpoint->endpoint_btl != openib_btl) {
1654-
continue;
1655-
}
1656-
for(i = 0; i < openib_btl->device->eager_rdma_buffers_count; i++) {
1657-
if(openib_btl->device->eager_rdma_buffers[i] == endpoint) {
1658-
openib_btl->device->eager_rdma_buffers[i] = NULL;
1659-
OBJ_RELEASE(endpoint);
1658+
if(!endpoint) {
1659+
BTL_VERBOSE(("In finalize, got another null endpoint"));
1660+
continue;
16601661
}
1662+
if(endpoint->endpoint_btl != openib_btl) {
1663+
continue;
1664+
}
1665+
for(i = 0; i < openib_btl->device->eager_rdma_buffers_count; i++) {
1666+
if(openib_btl->device->eager_rdma_buffers[i] == endpoint) {
1667+
openib_btl->device->eager_rdma_buffers[i] = NULL;
1668+
OBJ_RELEASE(endpoint);
1669+
}
1670+
}
1671+
opal_pointer_array_set_item(openib_btl->device->endpoints,
1672+
ep_index, NULL);
1673+
assert(((opal_object_t*)endpoint)->obj_reference_count == 1);
1674+
OBJ_RELEASE(endpoint);
16611675
}
1662-
opal_pointer_array_set_item(openib_btl->device->endpoints,
1663-
ep_index, NULL);
1664-
assert(((opal_object_t*)endpoint)->obj_reference_count == 1);
1665-
OBJ_RELEASE(endpoint);
16661676
}
1667-
}
1668-
1669-
/* Release SRQ resources */
1670-
for(qp = 0; qp < mca_btl_openib_component.num_qps; qp++) {
1671-
if(!BTL_OPENIB_QP_TYPE_PP(qp)) {
1672-
MCA_BTL_OPENIB_CLEAN_PENDING_FRAGS(
1673-
&openib_btl->qps[qp].u.srq_qp.pending_frags[0]);
1674-
MCA_BTL_OPENIB_CLEAN_PENDING_FRAGS(
1675-
&openib_btl->qps[qp].u.srq_qp.pending_frags[1]);
1676-
if (NULL != openib_btl->qps[qp].u.srq_qp.srq) {
1677-
opal_mutex_t *lock =
1678-
&mca_btl_openib_component.srq_manager.lock;
16791677

1680-
opal_hash_table_t *srq_addr_table =
1681-
&mca_btl_openib_component.srq_manager.srq_addr_table;
1682-
1683-
opal_mutex_lock(lock);
1684-
if (OPAL_SUCCESS !=
1685-
opal_hash_table_remove_value_ptr(srq_addr_table,
1686-
&openib_btl->qps[qp].u.srq_qp.srq,
1687-
sizeof(struct ibv_srq *))) {
1688-
BTL_VERBOSE(("Failed to remove SRQ %d entry from hash table.", qp));
1689-
rc = OPAL_ERROR;
1690-
}
1691-
opal_mutex_unlock(lock);
1692-
if (0 != ibv_destroy_srq(openib_btl->qps[qp].u.srq_qp.srq)) {
1693-
BTL_VERBOSE(("Failed to close SRQ %d", qp));
1694-
rc = OPAL_ERROR;
1678+
/* Release SRQ resources */
1679+
for(qp = 0; qp < mca_btl_openib_component.num_qps; qp++) {
1680+
if(!BTL_OPENIB_QP_TYPE_PP(qp)) {
1681+
MCA_BTL_OPENIB_CLEAN_PENDING_FRAGS(
1682+
&openib_btl->qps[qp].u.srq_qp.pending_frags[0]);
1683+
MCA_BTL_OPENIB_CLEAN_PENDING_FRAGS(
1684+
&openib_btl->qps[qp].u.srq_qp.pending_frags[1]);
1685+
if (NULL != openib_btl->qps[qp].u.srq_qp.srq) {
1686+
opal_mutex_t *lock =
1687+
&mca_btl_openib_component.srq_manager.lock;
1688+
1689+
opal_hash_table_t *srq_addr_table =
1690+
&mca_btl_openib_component.srq_manager.srq_addr_table;
1691+
1692+
opal_mutex_lock(lock);
1693+
if (OPAL_SUCCESS !=
1694+
opal_hash_table_remove_value_ptr(srq_addr_table,
1695+
&openib_btl->qps[qp].u.srq_qp.srq,
1696+
sizeof(struct ibv_srq *))) {
1697+
BTL_VERBOSE(("Failed to remove SRQ %d entry from hash table.", qp));
1698+
rc = OPAL_ERROR;
1699+
}
1700+
opal_mutex_unlock(lock);
1701+
if (0 != ibv_destroy_srq(openib_btl->qps[qp].u.srq_qp.srq)) {
1702+
BTL_VERBOSE(("Failed to close SRQ %d", qp));
1703+
rc = OPAL_ERROR;
1704+
}
16951705
}
1696-
}
16971706

1698-
OBJ_DESTRUCT(&openib_btl->qps[qp].u.srq_qp.pending_frags[0]);
1699-
OBJ_DESTRUCT(&openib_btl->qps[qp].u.srq_qp.pending_frags[1]);
1707+
OBJ_DESTRUCT(&openib_btl->qps[qp].u.srq_qp.pending_frags[0]);
1708+
OBJ_DESTRUCT(&openib_btl->qps[qp].u.srq_qp.pending_frags[1]);
1709+
}
17001710
}
1701-
}
17021711

1703-
/* Finalize the CPC modules on this openib module */
1704-
for (i = 0; i < openib_btl->num_cpcs; ++i) {
1705-
if (NULL != openib_btl->cpcs[i]->cbm_finalize) {
1706-
openib_btl->cpcs[i]->cbm_finalize(openib_btl, openib_btl->cpcs[i]);
1712+
/* Finalize the CPC modules on this openib module */
1713+
for (i = 0; i < openib_btl->num_cpcs; ++i) {
1714+
if (NULL != openib_btl->cpcs[i]->cbm_finalize) {
1715+
openib_btl->cpcs[i]->cbm_finalize(openib_btl, openib_btl->cpcs[i]);
1716+
}
1717+
free(openib_btl->cpcs[i]);
17071718
}
1708-
free(openib_btl->cpcs[i]);
1719+
free(openib_btl->cpcs);
17091720
}
1710-
free(openib_btl->cpcs);
17111721

17121722
/* Release device if there are no more users */
17131723
if(!(--openib_btl->device->btls)) {

opal/mca/btl/openib/btl_openib.h

+7-2
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,8 @@
1818
* Copyright (c) 2009-2010 Oracle and/or its affiliates. All rights reserved.
1919
* Copyright (c) 2013-2014 NVIDIA Corporation. All rights reserved.
2020
* Copyright (c) 2014 Bull SAS. All rights reserved.
21-
* Copyright (c) 2015-2016 Research Organization for Information Science
22-
* and Technology (RIST). All rights reserved.
21+
* Copyright (c) 2015-2018 Research Organization for Information Science
22+
* and Technology (RIST). All rights reserved.
2323
* $COPYRIGHT$
2424
*
2525
* Additional copyrights may follow
@@ -164,6 +164,9 @@ struct mca_btl_openib_component_t {
164164
int ib_num_btls;
165165
/**< number of devices available to the openib component */
166166

167+
int ib_allowed_btls;
168+
/**< number of devices allowed to the openib component */
169+
167170
struct mca_btl_openib_module_t **openib_btls;
168171
/**< array of available BTLs */
169172

@@ -501,6 +504,8 @@ struct mca_btl_openib_module_t {
501504
int local_procs; /** number of local procs */
502505

503506
bool atomic_ops_be; /** atomic result is big endian */
507+
508+
bool allowed; /** is this port allowed */
504509
};
505510
typedef struct mca_btl_openib_module_t mca_btl_openib_module_t;
506511

0 commit comments

Comments
 (0)