Skip to content

Commit 79e4488

Browse files
authored
Merge pull request #10397 from drwootton/prot-report-ucx
Implement reporting of transport/device names used by MPI application when using UCX
2 parents 528ff5d + fc1d1cd commit 79e4488

File tree

5 files changed

+280
-61
lines changed

5 files changed

+280
-61
lines changed

config/ompi_check_ucx.m4

+3
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,9 @@ AC_DEFUN([OMPI_CHECK_UCX],[
111111
[AC_DEFINE([HAVE_UCP_ATTR_MEMORY_TYPES], [1],
112112
[have memory types attribute])], [],
113113
[#include <ucp/api/ucp.h>])
114+
AC_CHECK_DECLS([UCP_EP_ATTR_FIELD_TRANSPORTS],
115+
[], [],
116+
[#include <ucp/api/ucp.h>])
114117
AC_CHECK_DECLS([ucp_tag_send_nbx,
115118
ucp_tag_send_sync_nbx,
116119
ucp_tag_recv_nbx],

ompi/mca/hook/comm_method/hook_comm_method_fns.c

+150-37
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,10 @@
2525
// For converting comm_method strings to comm_method id# and back.
2626
// This starts as our local set of strings, but gets Allreduced into
2727
// a global mapping so all the strings at all the ranks are represented.
28-
// If an MCA's name is more than 15 chars it gets truncated.
29-
#define COMM_METHOD_STRING_SIZE 16
30-
#define MAX_COMM_METHODS 50
28+
#define COMM_METHOD_STRING_SIZE 200
29+
#define MAX_COMM_METHODS 1000
30+
#define UCX_TAG "ucx="
31+
3132
typedef struct {
3233
int n;
3334
char str[MAX_COMM_METHODS][COMM_METHOD_STRING_SIZE];
@@ -87,27 +88,69 @@ lookup_btl_name_for_send(ompi_communicator_t* comm, int rank) {
8788
static char *
8889
comm_method_string(MPI_Comm comm, int rank, int *comm_mode) {
8990
char *p, *btl;
90-
char *string = malloc(COMM_METHOD_STRING_SIZE);
91-
92-
if (!string) { return NULL; }
93-
94-
p = lookup_pml_name();
95-
if (p && 0==strncmp("ob1", p, 4)) { // BTL
96-
if (comm_mode) { *comm_mode = MODE_IS_BTL; }
97-
btl = lookup_btl_name_for_send(comm, rank);
98-
if (NULL == btl) {
99-
strncpy(string, "n/a", COMM_METHOD_STRING_SIZE);
100-
} else {
101-
strncpy(string, btl, COMM_METHOD_STRING_SIZE);
91+
char *string, *comma_delim = "";
92+
mca_pml_transports_t *transports = NULL;
93+
int name_length;
94+
unsigned int i;
95+
if (NULL != mca_pml.pml_get_transports) {
96+
transports = mca_pml.pml_get_transports(comm, rank);
97+
}
98+
if (NULL == transports) {
99+
string = malloc(COMM_METHOD_STRING_SIZE);
100+
if (!string) {
101+
return NULL;
102+
}
103+
p = lookup_pml_name();
104+
if (p && 0==strncmp("ob1", p, 4)) { // BTL
105+
if (comm_mode) { *comm_mode = MODE_IS_BTL; }
106+
btl = lookup_btl_name_for_send(comm, rank);
107+
if (NULL == btl) {
108+
strncpy(string, "n/a", COMM_METHOD_STRING_SIZE);
109+
} else {
110+
strncpy(string, btl, COMM_METHOD_STRING_SIZE);
111+
}
112+
}
113+
else if (p && 0==strncmp("cm", p, 3)) { // MTL
114+
if (comm_mode) { *comm_mode = MODE_IS_MTL; }
115+
strncpy(string, lookup_mtl_name(), COMM_METHOD_STRING_SIZE);
116+
} else { // PML
117+
if (comm_mode) { *comm_mode = MODE_IS_PML; }
118+
if (p) {
119+
strncpy(string, p, COMM_METHOD_STRING_SIZE);
120+
}
121+
else {
122+
strncpy(string, "n/a", COMM_METHOD_STRING_SIZE);
123+
}
102124
}
103125
}
104-
else if (p && 0==strncmp("cm", p, 3)) { // MTL
105-
if (comm_mode) { *comm_mode = MODE_IS_MTL; }
106-
strncpy(string, lookup_mtl_name(), COMM_METHOD_STRING_SIZE);
107-
} else { // PML
108-
if (comm_mode) { *comm_mode = MODE_IS_PML; }
109-
strncpy(string, p, COMM_METHOD_STRING_SIZE);
126+
else {
127+
/* Determine how much memory is needed to store UCX transport info */
128+
char *s = UCX_TAG;
129+
name_length = strlen(s);
130+
for (i = 0; i < transports->count; i++) {
131+
name_length = name_length + strlen(transports->entries[i].transport_name) +
132+
strlen(transports->entries[i].device_name) + 2;
133+
}
134+
/* Allocate storage to store UCX transport info then build the info string */
135+
string = malloc(name_length);
136+
if (!string) {
137+
return NULL;
138+
}
139+
strcpy(string, s);
140+
for (i = 0; i < transports->count; i++) {
141+
strcat(string, comma_delim);
142+
comma_delim = ",";
143+
strcat(string, transports->entries[i].transport_name);
144+
strcat(string, ";");
145+
strcat(string, transports->entries[i].device_name);
146+
}
147+
}
148+
if (comm_mode) {
149+
// UCX is used for PML mode only
150+
*comm_mode = MODE_IS_PML;
110151
}
152+
free(transports->entries);
153+
free(transports);
111154
return string;
112155
}
113156

@@ -135,7 +178,7 @@ lookup_string_in_conversion_struct(comm_method_string_conversion_t *data, char *
135178
{
136179
int i;
137180
for (i=0; i<data->n; ++i) {
138-
if (0==strncmp(data->str[i], string, COMM_METHOD_STRING_SIZE)) {
181+
if (0==strcmp(data->str[i], string)) {
139182
return i;
140183
}
141184
}
@@ -160,7 +203,6 @@ add_string_to_conversion_struct(comm_method_string_conversion_t *data, char *str
160203
++(data->n);
161204
}
162205
}
163-
qsort(&data->str[1], data->n - 1, COMM_METHOD_STRING_SIZE, &mycompar);
164206
}
165207

166208
// For MPI_Allreduce of a comm_method_string_conversion_t
@@ -174,7 +216,6 @@ static void myfn(void* invec, void* inoutvec, int *len, MPI_Datatype *dt) {
174216
for (j=0; j<b->n; ++j) { // for each entry j in 'b', add it to 'a'
175217
add_string_to_conversion_struct(a, b->str[j]);
176218
}
177-
qsort(&a->str[1], a->n - 1, COMM_METHOD_STRING_SIZE, &mycompar);
178219
}
179220
}
180221

@@ -321,14 +362,15 @@ abbreviate_list_into_string(char *str, int max, int *list, int nlist)
321362
static void
322363
ompi_report_comm_methods(int called_from_location)
323364
{
324-
int numhosts, i, j, k;
365+
int numhosts, i, j, k, n;
325366
int max2Dprottable = 12;
326367
int max2D1Cprottable = 36;
327368
int hpmp_myrank;
328369
int mylocalrank, nlocalranks, myleaderrank, nleaderranks;
329370
int ret;
330371
ompi_communicator_t *local_comm, *leader_comm;
331372
int *method;
373+
unsigned char *methods_used;
332374
char *hoststring;
333375
char **allhoststrings;
334376
int comm_mode; // MODE_IS_BTL / MTL / PML
@@ -423,17 +465,16 @@ ompi_report_comm_methods(int called_from_location)
423465

424466
// If we're running during init, establish connections between all peers
425467
// (in leader_comm, which is all the ranks that are here at this point)
426-
if (CALLED_FROM_MPI_INIT == called_from_location) {
468+
if (called_from_location == 1) {
469+
int speer = (myleaderrank + 1) % nleaderranks;
470+
int rpeer = (myleaderrank - 1 + nleaderranks) % nleaderranks;
427471
for (i=0; i<=nleaderranks/2; ++i) {
428472
// (Examples to show why the loop is i<=nleaderranks/2)
429473
// np4 : 0 1 2 3 i=0 0c0 i=1 0c0&1&3 i=2 0c0&1&3&2
430474
// np5 : 0 1 2 3 4 i=0 0c0 i=1 0c0&1&4 i=2 0c0&1&4&2&3
431475
MPI_Request sreq, rreq;
432476
MPI_Status status;
433477
int sbuf, rbuf;
434-
int speer = (myleaderrank + 1) % nleaderranks;
435-
int rpeer = (myleaderrank - 1 + nleaderranks) % nleaderranks;
436-
437478
sbuf = rbuf = 0;
438479
MCA_PML_CALL(isend(&sbuf, 1, MPI_INT, speer, 99,
439480
MCA_PML_BASE_SEND_STANDARD,
@@ -442,6 +483,11 @@ ompi_report_comm_methods(int called_from_location)
442483
leader_comm, &rreq));
443484
ompi_request_wait(&sreq, &status);
444485
ompi_request_wait(&rreq, &status);
486+
speer = (speer + 1) % nleaderranks;
487+
rpeer = (rpeer - 1) % nleaderranks;
488+
if (rpeer < 0) {
489+
rpeer = nleaderranks - 1;
490+
}
445491
}
446492
}
447493

@@ -471,19 +517,26 @@ ompi_report_comm_methods(int called_from_location)
471517
MPI_Op_free(&myop);
472518
MPI_Type_free(&mydt);
473519

520+
// Sort communication method string arrays after reduction
521+
qsort(&comm_method_string_conversion.str[1],
522+
comm_method_string_conversion.n - 1, COMM_METHOD_STRING_SIZE, &mycompar);
523+
474524
// Each host leader fills in a "numhosts" sized array method[] of
475525
// how it communicates with each peer.
526+
// Use a bitmap to keep track of which communication methods are used
527+
n = ((comm_method_string_conversion.n + 7) / 8) * sizeof(unsigned char);
528+
methods_used = malloc(n);
529+
memset(methods_used, 0, n);
530+
476531
for (i=0; i<nleaderranks; ++i) {
477532
method[i] = comm_method(leader_comm, i);
478533

479534
// For looking at our own local host though, we don't really want "self"
480535
// unless there's only one rank and "self" is the best answer. So if
481536
// there's more than one rank on our host, we get our local-host's
482537
// communication method for a neighbor on this host.
483-
if (i == myleaderrank) {
484-
if (nlocalranks > 1) {
485-
method[i] = comm_method(local_comm, 1);
486-
}
538+
if ((i == myleaderrank) && (nlocalranks > 1)) {
539+
method[i] = comm_method(local_comm, 1);
487540
}
488541
}
489542

@@ -493,6 +546,8 @@ ompi_report_comm_methods(int called_from_location)
493546
{
494547
int len, *lens, *disps;
495548

549+
// First get the array of host strings (host names and task lists)
550+
// for all nodes.
496551
len = strlen(hoststring) + 1;
497552
if (myleaderrank == 0) {
498553
lens = malloc(nleaderranks * sizeof(int));
@@ -533,7 +588,9 @@ ompi_report_comm_methods(int called_from_location)
533588
free(lens);
534589
free(disps);
535590
}
536-
// and a simpler gather for the methods
591+
592+
// and a simpler gather for the arrays of communication method indices
593+
// for all nodes.
537594
leader_comm->c_coll->coll_gather(
538595
method, nleaderranks, MPI_INT,
539596
method, nleaderranks, MPI_INT,
@@ -581,14 +638,22 @@ ompi_report_comm_methods(int called_from_location)
581638
// 2: 2d table
582639
if (nleaderranks <= max2Dprottable) {
583640
char *str, *p;
584-
int tmp, per;
641+
int tmp, per, has_ucx_transport;
585642
int strlens[NUM_COMM_METHODS];
586643

587644
// characters per entry in the 2d table, must be large enough
588645
// for the digits needed for host numbers, and for whatever is
589646
// the longest string used in the table, plus a space.
590647
for (i=0; i<NUM_COMM_METHODS; ++i) {
591-
strlens[i] = strlen(comm_method_to_string(i));
648+
p = comm_method_to_string(i);
649+
if (0 == strncmp(p, UCX_TAG, strlen(UCX_TAG))) {
650+
// Assume no more than 1000 UCX transport strings
651+
// See PML_UCX_MAX_TRANSPORT_ENTRIES in pml_ucx.c
652+
strlens[i] = strlen("ucx[000]");
653+
}
654+
else {
655+
strlens[i] = strlen(p);
656+
}
592657
}
593658
per = 2;
594659
tmp = nleaderranks;
@@ -610,19 +675,38 @@ ompi_report_comm_methods(int called_from_location)
610675
p[j] = 0;
611676
p += j;
612677
}
678+
// Use a bitmap to trace which UCX transport strings are used.
679+
n = (nleaderranks + 7) / 8;
680+
methods_used = malloc(n * sizeof(unsigned char));
681+
memset(methods_used, 0, n);
613682
tmp = (int)strlen(str);
614683
--p;
615684
while (p>=str && ((*p)==' ')) { *(p--)=0; }
616685
printf(" host | %s\n", str);
617686
memset(str, (int)'=', tmp);
618687
str[tmp] = 0;
619688
printf("======|=%s\n", str);
689+
has_ucx_transport = 0;
620690

621691
for (i=0; i<nleaderranks; ++i) {
622692
str[0] = 0;
623693
p = str;
624694
for (k=0; k<nleaderranks; ++k) {
625-
strcat(p, comm_method_to_string(method[i * nleaderranks + k]));
695+
char *method_string;
696+
char ucx_label[10];
697+
698+
method_string = comm_method_to_string(method[i * nleaderranks + k]);
699+
if (0 == strncmp(method_string, UCX_TAG, strlen(UCX_TAG))) {
700+
n = lookup_string_in_conversion_struct(&comm_method_string_conversion,
701+
method_string);
702+
sprintf(ucx_label, "ucx[%3d]", n);
703+
strcat(p, ucx_label);
704+
methods_used[n / 8] |= (1 << (n % 8));
705+
has_ucx_transport = 1;
706+
}
707+
else {
708+
strcat(p, method_string);
709+
}
626710
for (j=(int)strlen(p); j<per; ++j) {
627711
p[j] = ' ';
628712
}
@@ -635,6 +719,35 @@ ompi_report_comm_methods(int called_from_location)
635719
}
636720
printf("\n");
637721
free(str);
722+
if (has_ucx_transport) {
723+
printf("UCX Transport/Device\n");
724+
for (i = 0; i < comm_method_string_conversion.n; i++) {
725+
// Check bitmap to check if method was used
726+
if (methods_used[i / 8] & (1 << (i % 8))) {
727+
p = comm_method_to_string(i);
728+
if (0 == strncmp(p, UCX_TAG, strlen(UCX_TAG))) {
729+
char *temp_str, *token;
730+
n = lookup_string_in_conversion_struct(&comm_method_string_conversion, p);
731+
printf("ucx[%3d]:\n", n);
732+
temp_str = strdup(p + 4);
733+
token = strtok(temp_str, ",");
734+
while (NULL != token) {
735+
p = strchr(token, ';');
736+
if (NULL == p) {
737+
printf(" %-16s\n", token);
738+
}
739+
else {
740+
*p = '\0';
741+
printf(" %-16s %-16s\n", token, p + 1);
742+
}
743+
token = strtok(NULL, ",");
744+
}
745+
free(temp_str);
746+
}
747+
}
748+
}
749+
}
750+
free(methods_used);
638751
}
639752
else if (nleaderranks <= max2D1Cprottable) {
640753
char *str, *p;

ompi/mca/pml/base/pml_base_frame.c

+2-1
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,8 @@ mca_pml_base_module_t mca_pml = {
8888
NULL, /* pml_dump */
8989
0, /* pml_max_contextid */
9090
0, /* pml_max_tag */
91-
0 /* pml_flags */
91+
0, /* pml_flags */
92+
NULL /* pml_get_transports */
9293
};
9394

9495
mca_pml_base_component_t mca_pml_base_selected_component = {{0}};

0 commit comments

Comments
 (0)