Skip to content

Commit 8b09010

Browse files
committed
opal/fifo: fix 128-bit atomic fifo on Power9
This commit updates the atomic fifo code to fix a consistency issue observed on Power9 systems when builtin atomics are used. The cause was two things: 1) a missing write memory barrier in fifo push, and 2) a read ordering issue when reading the fifo head non-atomically. This commit fixes both issues and appears to correct then inconsistency. Signed-off-by: Nathan Hjelm <hjelmn@lanl.gov>
1 parent 4c0e0c3 commit 8b09010

File tree

2 files changed

+36
-25
lines changed

2 files changed

+36
-25
lines changed

opal/class/opal_fifo.h

+10-10
Original file line numberDiff line numberDiff line change
@@ -86,8 +86,11 @@ static inline opal_list_item_t *opal_fifo_push_atomic (opal_fifo_t *fifo,
8686
opal_list_item_t *item)
8787
{
8888
opal_counted_pointer_t tail = {.value = fifo->opal_fifo_tail.value};
89+
const opal_list_item_t * const ghost = &fifo->opal_fifo_ghost;
8990

90-
item->opal_list_next = &fifo->opal_fifo_ghost;
91+
item->opal_list_next = (opal_list_item_t *) ghost;
92+
93+
opal_atomic_wmb ();
9194

9295
do {
9396
if (opal_update_counted_pointer (&fifo->opal_fifo_tail, &tail, item)) {
@@ -97,7 +100,7 @@ static inline opal_list_item_t *opal_fifo_push_atomic (opal_fifo_t *fifo,
97100

98101
opal_atomic_wmb ();
99102

100-
if (&fifo->opal_fifo_ghost == tail.data.item) {
103+
if (ghost == tail.data.item) {
101104
/* update the head */
102105
opal_counted_pointer_t head = {.value = fifo->opal_fifo_head.value};
103106
opal_update_counted_pointer (&fifo->opal_fifo_head, &head, item);
@@ -115,7 +118,9 @@ static inline opal_list_item_t *opal_fifo_push_atomic (opal_fifo_t *fifo,
115118
static inline opal_list_item_t *opal_fifo_pop_atomic (opal_fifo_t *fifo)
116119
{
117120
opal_list_item_t *item, *next, *ghost = &fifo->opal_fifo_ghost;
118-
opal_counted_pointer_t head = {.value = fifo->opal_fifo_head.value}, tail;
121+
opal_counted_pointer_t head, tail;
122+
123+
opal_read_counted_pointer (&fifo->opal_fifo_head, &head);
119124

120125
do {
121126
tail.value = fifo->opal_fifo_tail.value;
@@ -130,7 +135,7 @@ static inline opal_list_item_t *opal_fifo_pop_atomic (opal_fifo_t *fifo)
130135

131136
/* the head or next pointer are in an inconsistent state. keep looping. */
132137
if (tail.data.item != item && ghost != tail.data.item && ghost == next) {
133-
head.value = fifo->opal_fifo_head.value;
138+
opal_read_counted_pointer (&fifo->opal_fifo_head, &head);
134139
continue;
135140
}
136141

@@ -160,12 +165,7 @@ static inline opal_list_item_t *opal_fifo_pop_atomic (opal_fifo_t *fifo)
160165
* will be attempting to update the head until after it has been updated
161166
* with the next pointer. push will not see an empty list and other pop
162167
* operations will loop until the head is consistent. */
163-
head.value = fifo->opal_fifo_head.value;
164-
next = (opal_list_item_t *) item->opal_list_next;
165-
166-
assert (ghost == head.data.item);
167-
168-
fifo->opal_fifo_head.data.item = next;
168+
fifo->opal_fifo_head.data.item = (opal_list_item_t *) item->opal_list_next;
169169
opal_atomic_wmb ();
170170
}
171171
}

opal/class/opal_lifo.h

+26-15
Original file line numberDiff line numberDiff line change
@@ -74,8 +74,33 @@ static inline bool opal_update_counted_pointer (volatile opal_counted_pointer_t
7474
return opal_atomic_compare_exchange_strong_128 (&addr->value, &old->value, new_p.value);
7575
}
7676

77+
__opal_attribute_always_inline__
78+
static inline void opal_read_counted_pointer (volatile opal_counted_pointer_t *addr, opal_counted_pointer_t *value)
79+
{
80+
/* most platforms do not read the value atomically so make sure we read the counted pointer in a specific order */
81+
value->data.counter = addr->data.counter;
82+
opal_atomic_rmb ();
83+
value->data.item = addr->data.item;
84+
}
85+
7786
#endif
7887

88+
/**
89+
* @brief Helper function for lifo/fifo to sleep this thread if excessive contention is detected
90+
*/
91+
static inline void _opal_lifo_release_cpu (void)
92+
{
93+
/* NTH: there are many ways to cause the current thread to be suspended. This one
94+
* should work well in most cases. Another approach would be to use poll (NULL, 0, ) but
95+
* the interval will be forced to be in ms (instead of ns or us). Note that there
96+
* is a performance improvement for the lifo test when this call is made on detection
97+
* of contention but it may not translate into actually MPI or application performance
98+
* improvements. */
99+
static struct timespec interval = { .tv_sec = 0, .tv_nsec = 100 };
100+
nanosleep (&interval, NULL);
101+
}
102+
103+
79104
/* Atomic Last In First Out lists. If we are in a multi-threaded environment then the
80105
* atomicity is insured via the compare-and-swap operation, if not we simply do a read
81106
* and/or a write.
@@ -141,9 +166,7 @@ static inline opal_list_item_t *opal_lifo_pop_atomic (opal_lifo_t* lifo)
141166
opal_counted_pointer_t old_head;
142167
opal_list_item_t *item;
143168

144-
old_head.data.counter = lifo->opal_lifo_head.data.counter;
145-
opal_atomic_rmb ();
146-
old_head.data.item = (opal_list_item_t *) lifo->opal_lifo_head.data.item;
169+
opal_read_counted_pointer (&lifo->opal_lifo_head, &old_head);
147170

148171
do {
149172
item = (opal_list_item_t *) old_head.data.item;
@@ -189,18 +212,6 @@ static inline opal_list_item_t *opal_lifo_push_atomic (opal_lifo_t *lifo,
189212

190213
#if OPAL_HAVE_ATOMIC_LLSC_PTR
191214

192-
static inline void _opal_lifo_release_cpu (void)
193-
{
194-
/* NTH: there are many ways to cause the current thread to be suspended. This one
195-
* should work well in most cases. Another approach would be to use poll (NULL, 0, ) but
196-
* the interval will be forced to be in ms (instead of ns or us). Note that there
197-
* is a performance improvement for the lifo test when this call is made on detection
198-
* of contention but it may not translate into actually MPI or application performance
199-
* improvements. */
200-
static struct timespec interval = { .tv_sec = 0, .tv_nsec = 100 };
201-
nanosleep (&interval, NULL);
202-
}
203-
204215
/* Retrieve one element from the LIFO. If we reach the ghost element then the LIFO
205216
* is empty so we return NULL.
206217
*/

0 commit comments

Comments
 (0)