25
25
// For converting comm_method strings to comm_method id# and back.
26
26
// This starts as our local set of strings, but gets Allreduced into
27
27
// a global mapping so all the strings at all the ranks are represented.
28
- // If an MCA's name is more than 15 chars it gets truncated.
29
- #define COMM_METHOD_STRING_SIZE 16
30
- #define MAX_COMM_METHODS 50
28
+ #define COMM_METHOD_STRING_SIZE 200
29
+ #define MAX_COMM_METHODS 1000
30
+ #define UCX_TAG "ucx="
31
+
31
32
typedef struct {
32
33
int n ;
33
34
char str [MAX_COMM_METHODS ][COMM_METHOD_STRING_SIZE ];
@@ -87,27 +88,69 @@ lookup_btl_name_for_send(ompi_communicator_t* comm, int rank) {
87
88
static char *
88
89
comm_method_string (MPI_Comm comm , int rank , int * comm_mode ) {
89
90
char * p , * btl ;
90
- char * string = malloc (COMM_METHOD_STRING_SIZE );
91
-
92
- if (!string ) { return NULL ; }
93
-
94
- p = lookup_pml_name ();
95
- if (p && 0 == strncmp ("ob1" , p , 4 )) { // BTL
96
- if (comm_mode ) { * comm_mode = MODE_IS_BTL ; }
97
- btl = lookup_btl_name_for_send (comm , rank );
98
- if (NULL == btl ) {
99
- strncpy (string , "n/a" , COMM_METHOD_STRING_SIZE );
100
- } else {
101
- strncpy (string , btl , COMM_METHOD_STRING_SIZE );
91
+ char * string , * comma_delim = "" ;
92
+ mca_pml_transports_t * transports = NULL ;
93
+ int name_length ;
94
+ unsigned int i ;
95
+ if (NULL != mca_pml .pml_get_transports ) {
96
+ transports = mca_pml .pml_get_transports (comm , rank );
97
+ }
98
+ if (NULL == transports ) {
99
+ string = malloc (COMM_METHOD_STRING_SIZE );
100
+ if (!string ) {
101
+ return NULL ;
102
+ }
103
+ p = lookup_pml_name ();
104
+ if (p && 0 == strncmp ("ob1" , p , 4 )) { // BTL
105
+ if (comm_mode ) { * comm_mode = MODE_IS_BTL ; }
106
+ btl = lookup_btl_name_for_send (comm , rank );
107
+ if (NULL == btl ) {
108
+ strncpy (string , "n/a" , COMM_METHOD_STRING_SIZE );
109
+ } else {
110
+ strncpy (string , btl , COMM_METHOD_STRING_SIZE );
111
+ }
112
+ }
113
+ else if (p && 0 == strncmp ("cm" , p , 3 )) { // MTL
114
+ if (comm_mode ) { * comm_mode = MODE_IS_MTL ; }
115
+ strncpy (string , lookup_mtl_name (), COMM_METHOD_STRING_SIZE );
116
+ } else { // PML
117
+ if (comm_mode ) { * comm_mode = MODE_IS_PML ; }
118
+ if (p ) {
119
+ strncpy (string , p , COMM_METHOD_STRING_SIZE );
120
+ }
121
+ else {
122
+ strncpy (string , "n/a" , COMM_METHOD_STRING_SIZE );
123
+ }
102
124
}
103
125
}
104
- else if (p && 0 == strncmp ("cm" , p , 3 )) { // MTL
105
- if (comm_mode ) { * comm_mode = MODE_IS_MTL ; }
106
- strncpy (string , lookup_mtl_name (), COMM_METHOD_STRING_SIZE );
107
- } else { // PML
108
- if (comm_mode ) { * comm_mode = MODE_IS_PML ; }
109
- strncpy (string , p , COMM_METHOD_STRING_SIZE );
126
+ else {
127
+ /* Determine how much memory is needed to store UCX transport info */
128
+ char * s = UCX_TAG ;
129
+ name_length = strlen (s );
130
+ for (i = 0 ; i < transports -> count ; i ++ ) {
131
+ name_length = name_length + strlen (transports -> entries [i ].transport_name ) +
132
+ strlen (transports -> entries [i ].device_name ) + 2 ;
133
+ }
134
+ /* Allocate storage to store UCX transport info then build the info string */
135
+ string = malloc (name_length );
136
+ if (!string ) {
137
+ return NULL ;
138
+ }
139
+ strcpy (string , s );
140
+ for (i = 0 ; i < transports -> count ; i ++ ) {
141
+ strcat (string , comma_delim );
142
+ comma_delim = "," ;
143
+ strcat (string , transports -> entries [i ].transport_name );
144
+ strcat (string , ";" );
145
+ strcat (string , transports -> entries [i ].device_name );
146
+ }
147
+ }
148
+ if (comm_mode ) {
149
+ // UCX is used for PML mode only
150
+ * comm_mode = MODE_IS_PML ;
110
151
}
152
+ free (transports -> entries );
153
+ free (transports );
111
154
return string ;
112
155
}
113
156
@@ -135,7 +178,7 @@ lookup_string_in_conversion_struct(comm_method_string_conversion_t *data, char *
135
178
{
136
179
int i ;
137
180
for (i = 0 ; i < data -> n ; ++ i ) {
138
- if (0 == strncmp (data -> str [i ], string , COMM_METHOD_STRING_SIZE )) {
181
+ if (0 == strcmp (data -> str [i ], string )) {
139
182
return i ;
140
183
}
141
184
}
@@ -160,7 +203,6 @@ add_string_to_conversion_struct(comm_method_string_conversion_t *data, char *str
160
203
++ (data -> n );
161
204
}
162
205
}
163
- qsort (& data -> str [1 ], data -> n - 1 , COMM_METHOD_STRING_SIZE , & mycompar );
164
206
}
165
207
166
208
// For MPI_Allreduce of a comm_method_string_conversion_t
@@ -174,7 +216,6 @@ static void myfn(void* invec, void* inoutvec, int *len, MPI_Datatype *dt) {
174
216
for (j = 0 ; j < b -> n ; ++ j ) { // for each entry j in 'b', add it to 'a'
175
217
add_string_to_conversion_struct (a , b -> str [j ]);
176
218
}
177
- qsort (& a -> str [1 ], a -> n - 1 , COMM_METHOD_STRING_SIZE , & mycompar );
178
219
}
179
220
}
180
221
@@ -321,14 +362,15 @@ abbreviate_list_into_string(char *str, int max, int *list, int nlist)
321
362
static void
322
363
ompi_report_comm_methods (int called_from_location )
323
364
{
324
- int numhosts , i , j , k ;
365
+ int numhosts , i , j , k , n ;
325
366
int max2Dprottable = 12 ;
326
367
int max2D1Cprottable = 36 ;
327
368
int hpmp_myrank ;
328
369
int mylocalrank , nlocalranks , myleaderrank , nleaderranks ;
329
370
int ret ;
330
371
ompi_communicator_t * local_comm , * leader_comm ;
331
372
int * method ;
373
+ unsigned char * methods_used ;
332
374
char * hoststring ;
333
375
char * * allhoststrings ;
334
376
int comm_mode ; // MODE_IS_BTL / MTL / PML
@@ -423,17 +465,16 @@ ompi_report_comm_methods(int called_from_location)
423
465
424
466
// If we're running during init, establish connections between all peers
425
467
// (in leader_comm, which is all the ranks that are here at this point)
426
- if (CALLED_FROM_MPI_INIT == called_from_location ) {
468
+ if (called_from_location == 1 ) {
469
+ int speer = (myleaderrank + 1 ) % nleaderranks ;
470
+ int rpeer = (myleaderrank - 1 + nleaderranks ) % nleaderranks ;
427
471
for (i = 0 ; i <=nleaderranks /2 ; ++ i ) {
428
472
// (Examples to show why the loop is i<=nleaderranks/2)
429
473
// np4 : 0 1 2 3 i=0 0c0 i=1 0c0&1&3 i=2 0c0&1&3&2
430
474
// np5 : 0 1 2 3 4 i=0 0c0 i=1 0c0&1&4 i=2 0c0&1&4&2&3
431
475
MPI_Request sreq , rreq ;
432
476
MPI_Status status ;
433
477
int sbuf , rbuf ;
434
- int speer = (myleaderrank + 1 ) % nleaderranks ;
435
- int rpeer = (myleaderrank - 1 + nleaderranks ) % nleaderranks ;
436
-
437
478
sbuf = rbuf = 0 ;
438
479
MCA_PML_CALL (isend (& sbuf , 1 , MPI_INT , speer , 99 ,
439
480
MCA_PML_BASE_SEND_STANDARD ,
@@ -442,6 +483,11 @@ ompi_report_comm_methods(int called_from_location)
442
483
leader_comm , & rreq ));
443
484
ompi_request_wait (& sreq , & status );
444
485
ompi_request_wait (& rreq , & status );
486
+ speer = (speer + 1 ) % nleaderranks ;
487
+ rpeer = (rpeer - 1 ) % nleaderranks ;
488
+ if (rpeer < 0 ) {
489
+ rpeer = nleaderranks - 1 ;
490
+ }
445
491
}
446
492
}
447
493
@@ -471,19 +517,26 @@ ompi_report_comm_methods(int called_from_location)
471
517
MPI_Op_free (& myop );
472
518
MPI_Type_free (& mydt );
473
519
520
+ // Sort communication method string arrays after reduction
521
+ qsort (& comm_method_string_conversion .str [1 ],
522
+ comm_method_string_conversion .n - 1 , COMM_METHOD_STRING_SIZE , & mycompar );
523
+
474
524
// Each host leader fills in a "numhosts" sized array method[] of
475
525
// how it communicates with each peer.
526
+ // Use a bitmap to keep track of which communication methods are used
527
+ n = ((comm_method_string_conversion .n + 7 ) / 8 ) * sizeof (unsigned char );
528
+ methods_used = malloc (n );
529
+ memset (methods_used , 0 , n );
530
+
476
531
for (i = 0 ; i < nleaderranks ; ++ i ) {
477
532
method [i ] = comm_method (leader_comm , i );
478
533
479
534
// For looking at our own local host though, we don't really want "self"
480
535
// unless there's only one rank and "self" is the best answer. So if
481
536
// there's more than one rank on our host, we get our local-host's
482
537
// communication method for a neighbor on this host.
483
- if (i == myleaderrank ) {
484
- if (nlocalranks > 1 ) {
485
- method [i ] = comm_method (local_comm , 1 );
486
- }
538
+ if ((i == myleaderrank ) && (nlocalranks > 1 )) {
539
+ method [i ] = comm_method (local_comm , 1 );
487
540
}
488
541
}
489
542
@@ -493,6 +546,8 @@ ompi_report_comm_methods(int called_from_location)
493
546
{
494
547
int len , * lens , * disps ;
495
548
549
+ // First get the array of host strings (host names and task lists)
550
+ // for all nodes.
496
551
len = strlen (hoststring ) + 1 ;
497
552
if (myleaderrank == 0 ) {
498
553
lens = malloc (nleaderranks * sizeof (int ));
@@ -533,7 +588,9 @@ ompi_report_comm_methods(int called_from_location)
533
588
free (lens );
534
589
free (disps );
535
590
}
536
- // and a simpler gather for the methods
591
+
592
+ // and a simpler gather for the arrays of communication method indices
593
+ // for all nodes.
537
594
leader_comm -> c_coll -> coll_gather (
538
595
method , nleaderranks , MPI_INT ,
539
596
method , nleaderranks , MPI_INT ,
@@ -581,14 +638,22 @@ ompi_report_comm_methods(int called_from_location)
581
638
// 2: 2d table
582
639
if (nleaderranks <= max2Dprottable ) {
583
640
char * str , * p ;
584
- int tmp , per ;
641
+ int tmp , per , has_ucx_transport ;
585
642
int strlens [NUM_COMM_METHODS ];
586
643
587
644
// characters per entry in the 2d table, must be large enough
588
645
// for the digits needed for host numbers, and for whatever is
589
646
// the longest string used in the table, plus a space.
590
647
for (i = 0 ; i < NUM_COMM_METHODS ; ++ i ) {
591
- strlens [i ] = strlen (comm_method_to_string (i ));
648
+ p = comm_method_to_string (i );
649
+ if (0 == strncmp (p , UCX_TAG , strlen (UCX_TAG ))) {
650
+ // Assume no more than 1000 UCX transport strings
651
+ // See PML_UCX_MAX_TRANSPORT_ENTRIES in pml_ucx.c
652
+ strlens [i ] = strlen ("ucx[000]" );
653
+ }
654
+ else {
655
+ strlens [i ] = strlen (p );
656
+ }
592
657
}
593
658
per = 2 ;
594
659
tmp = nleaderranks ;
@@ -610,19 +675,38 @@ ompi_report_comm_methods(int called_from_location)
610
675
p [j ] = 0 ;
611
676
p += j ;
612
677
}
678
+ // Use a bitmap to trace which UCX transport strings are used.
679
+ n = (nleaderranks + 7 ) / 8 ;
680
+ methods_used = malloc (n * sizeof (unsigned char ));
681
+ memset (methods_used , 0 , n );
613
682
tmp = (int )strlen (str );
614
683
-- p ;
615
684
while (p >=str && ((* p )== ' ' )) { * (p -- )= 0 ; }
616
685
printf (" host | %s\n" , str );
617
686
memset (str , (int )'=' , tmp );
618
687
str [tmp ] = 0 ;
619
688
printf ("======|=%s\n" , str );
689
+ has_ucx_transport = 0 ;
620
690
621
691
for (i = 0 ; i < nleaderranks ; ++ i ) {
622
692
str [0 ] = 0 ;
623
693
p = str ;
624
694
for (k = 0 ; k < nleaderranks ; ++ k ) {
625
- strcat (p , comm_method_to_string (method [i * nleaderranks + k ]));
695
+ char * method_string ;
696
+ char ucx_label [10 ];
697
+
698
+ method_string = comm_method_to_string (method [i * nleaderranks + k ]);
699
+ if (0 == strncmp (method_string , UCX_TAG , strlen (UCX_TAG ))) {
700
+ n = lookup_string_in_conversion_struct (& comm_method_string_conversion ,
701
+ method_string );
702
+ sprintf (ucx_label , "ucx[%3d]" , n );
703
+ strcat (p , ucx_label );
704
+ methods_used [n / 8 ] |= (1 << (n % 8 ));
705
+ has_ucx_transport = 1 ;
706
+ }
707
+ else {
708
+ strcat (p , method_string );
709
+ }
626
710
for (j = (int )strlen (p ); j < per ; ++ j ) {
627
711
p [j ] = ' ' ;
628
712
}
@@ -635,6 +719,35 @@ ompi_report_comm_methods(int called_from_location)
635
719
}
636
720
printf ("\n" );
637
721
free (str );
722
+ if (has_ucx_transport ) {
723
+ printf ("UCX Transport/Device\n" );
724
+ for (i = 0 ; i < comm_method_string_conversion .n ; i ++ ) {
725
+ // Check bitmap to check if method was used
726
+ if (methods_used [i / 8 ] & (1 << (i % 8 ))) {
727
+ p = comm_method_to_string (i );
728
+ if (0 == strncmp (p , UCX_TAG , strlen (UCX_TAG ))) {
729
+ char * temp_str , * token ;
730
+ n = lookup_string_in_conversion_struct (& comm_method_string_conversion , p );
731
+ printf ("ucx[%3d]:\n" , n );
732
+ temp_str = strdup (p + 4 );
733
+ token = strtok (temp_str , "," );
734
+ while (NULL != token ) {
735
+ p = strchr (token , ';' );
736
+ if (NULL == p ) {
737
+ printf (" %-16s\n" , token );
738
+ }
739
+ else {
740
+ * p = '\0' ;
741
+ printf (" %-16s %-16s\n" , token , p + 1 );
742
+ }
743
+ token = strtok (NULL , "," );
744
+ }
745
+ free (temp_str );
746
+ }
747
+ }
748
+ }
749
+ }
750
+ free (methods_used );
638
751
}
639
752
else if (nleaderranks <= max2D1Cprottable ) {
640
753
char * str , * p ;
0 commit comments