Skip to content

Commit 236af1a

Browse files
devrealPhuong Nguyen
and
Phuong Nguyen
committed
Add stream operations to accelerator components
- Default stream - Stream-based alloc and free - Stream-based memmove - Wait for stream to complete Also, enable querying for number of devices and memory bandwidth. These operations are needed for operation device offloading. Co-authored-by: Phuong Nguyen <phuong.nguyen@icl.utk.edu> Signed-off-by: Joseph Schuchart <schuchart@icl.utk.edu>
1 parent 980eb50 commit 236af1a

9 files changed

+824
-65
lines changed

opal/mca/accelerator/accelerator.h

+119-1
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,9 @@
55
* Copyright (c) Amazon.com, Inc. or its affiliates.
66
* All Rights reserved.
77
* Copyright (c) 2023 Advanced Micro Devices, Inc. All Rights reserved.
8+
* Copyright (c) 2024 The University of Tennessee and The University
9+
* of Tennessee Research Foundation. All rights
10+
* reserved.
811
*
912
* $COPYRIGHT$
1013
*
@@ -184,6 +187,19 @@ typedef int (*opal_accelerator_base_module_check_addr_fn_t)(
184187
typedef int (*opal_accelerator_base_module_create_stream_fn_t)(
185188
int dev_id, opal_accelerator_stream_t **stream);
186189

190+
191+
/**
192+
* Query the default stream.
193+
*
194+
* @param[IN] dev_id Associated device for the stream or
195+
* MCA_ACCELERATOR_NO_DEVICE_ID
196+
* @param[OUT] stream Set to the default stream.
197+
*
198+
* @return OPAL_SUCCESS or error status on failure
199+
*/
200+
typedef int (*opal_accelerator_base_get_default_stream_fn_t)(
201+
int dev_id, opal_accelerator_stream_t **stream);
202+
187203
/**
188204
* Creates an event. An event is a synchronization marker that can be
189205
* appended to a stream to monitor device progress or synchronize the
@@ -193,7 +209,7 @@ typedef int (*opal_accelerator_base_module_create_stream_fn_t)(
193209
* @param[IN] dev_id Associated device for the event or
194210
* MCA_ACCELERATOR_NO_DEVICE_ID
195211
* @param[OUT] event Event to create
196-
* @param[IN] enable_ipc support inter-process tracking of the event
212+
* @param[IN] enable_ipc support inter-process tracking of the event
197213
*
198214
* @return OPAL_SUCCESS or error status on failure.
199215
*/
@@ -310,6 +326,31 @@ typedef int (*opal_accelerator_base_module_memmove_fn_t)(
310326
int dest_dev_id, int src_dev_id, void *dest, const void *src, size_t size,
311327
opal_accelerator_transfer_type_t type);
312328

329+
330+
/**
331+
* Copies memory asynchronously from src to dest. Memory of dest and src
332+
* may overlap. Optionally can specify the transfer type to
333+
* avoid pointer detection for performance. The operations will be enqueued
334+
* into the provided stream but are not guaranteed to be complete upon return.
335+
*
336+
* @param[IN] dest_dev_id Associated device to copy to or
337+
* MCA_ACCELERATOR_NO_DEVICE_ID
338+
* @param[IN] src_dev_id Associated device to copy from or
339+
* MCA_ACCELERATOR_NO_DEVICE_ID
340+
* @param[IN] dest Destination to copy memory to
341+
* @param[IN] src Source to copy memory from
342+
* @param[IN] size Size of memory to copy
343+
* @param[IN] stream Stream to perform asynchronous move on
344+
* @param[IN] type Transfer type field for performance
345+
* Can be set to MCA_ACCELERATOR_TRANSFER_UNSPEC
346+
* if caller is unsure of the transfer direction.
347+
*
348+
* @return OPAL_SUCCESS or error status on failure
349+
*/
350+
typedef int (*opal_accelerator_base_module_memmove_async_fn_t)(
351+
int dest_dev_id, int src_dev_id, void *dest, const void *src, size_t size,
352+
opal_accelerator_stream_t *stream, opal_accelerator_transfer_type_t type);
353+
313354
/**
314355
* Allocates size bytes memory from the device and sets ptr to the
315356
* pointer of the allocated memory. The memory is not initialized.
@@ -340,6 +381,46 @@ typedef int (*opal_accelerator_base_module_mem_alloc_fn_t)(
340381
typedef int (*opal_accelerator_base_module_mem_release_fn_t)(
341382
int dev_id, void *ptr);
342383

384+
385+
/**
386+
* Allocates size bytes memory from the device and sets ptr to the
387+
* pointer of the allocated memory. The memory is not initialized.
388+
* The allocation request is placed into the stream object.
389+
* Any use of the memory must succeed the completion of this
390+
* operation on the stream.
391+
*
392+
* @param[IN] dev_id Associated device for the allocation or
393+
* MCA_ACCELERATOR_NO_DEVICE_ID
394+
* @param[OUT] ptr Returns pointer to allocated memory
395+
* @param[IN] size Size of memory to allocate
396+
* @param[IN] stream Stream into which to insert the allocation request
397+
*
398+
* @return OPAL_SUCCESS or error status on failure
399+
*/
400+
typedef int (*opal_accelerator_base_module_mem_alloc_stream_fn_t)(
401+
int dev_id, void **ptr, size_t size, opal_accelerator_stream_t *stream);
402+
403+
/**
404+
* Frees the memory space pointed to by ptr which has been returned by
405+
* a previous call to an opal_accelerator_base_module_mem_alloc_stream_fn_t().
406+
* If the function is called on a ptr that has already been freed,
407+
* undefined behavior occurs. If ptr is NULL, no operation is performed,
408+
* and the function returns OPAL_SUCCESS.
409+
* The release of the memory will be inserted into the stream and occurs after
410+
* all previous operations have completed.
411+
*
412+
* @param[IN] dev_id Associated device for the allocation or
413+
* MCA_ACCELERATOR_NO_DEVICE_ID
414+
* @param[IN] ptr Pointer to free
415+
* @param[IN] stream Stream into which to insert the free operation
416+
*
417+
* @return OPAL_SUCCESS or error status on failure
418+
*/
419+
typedef int (*opal_accelerator_base_module_mem_release_stream_fn_t)(
420+
int dev_id, void *ptr, opal_accelerator_stream_t *stream);
421+
422+
423+
343424
/**
344425
* Retrieves the base address and/or size of a memory allocation of the
345426
* device.
@@ -557,11 +638,41 @@ typedef int (*opal_accelerator_base_module_device_can_access_peer_fn_t)(
557638
typedef int (*opal_accelerator_base_module_get_buffer_id_fn_t)(
558639
int dev_id, const void *addr, opal_accelerator_buffer_id_t *buf_id);
559640

641+
/**
642+
* Wait for the completion of all operations inserted into the stream.
643+
*
644+
* @param[IN] stram The stream to wait for.
645+
*
646+
* @return OPAL_SUCCESS or error status on failure
647+
*/
648+
typedef int (*opal_accelerator_base_module_wait_stream_fn_t)(opal_accelerator_stream_t *stream);
649+
650+
/**
651+
* Get the number of devices available.
652+
*
653+
* @param[OUT] stram Number of devices.
654+
*
655+
* @return OPAL_SUCCESS or error status on failure
656+
*/
657+
typedef int (*opal_accelerator_base_module_get_num_devices_fn_t)(int *num_devices);
658+
659+
/**
660+
* Get the memory bandwidth of the device.
661+
*
662+
* @param[IN] device The device to query.
663+
* @param[OUT] bw The returned bandwidth for the device.
664+
*
665+
* @return OPAL_SUCCESS or error status on failure
666+
*/
667+
typedef int (*opal_accelerator_base_module_get_mem_bw_fn_t)(int device, float *bw);
668+
669+
560670
/*
561671
* the standard public API data structure
562672
*/
563673
typedef struct {
564674
/* accelerator function table */
675+
opal_accelerator_base_get_default_stream_fn_t get_default_stream;
565676
opal_accelerator_base_module_check_addr_fn_t check_addr;
566677

567678
opal_accelerator_base_module_create_stream_fn_t create_stream;
@@ -572,10 +683,13 @@ typedef struct {
572683

573684
opal_accelerator_base_module_memcpy_async_fn_t mem_copy_async;
574685
opal_accelerator_base_module_memcpy_fn_t mem_copy;
686+
opal_accelerator_base_module_memmove_async_fn_t mem_move_async;
575687
opal_accelerator_base_module_memmove_fn_t mem_move;
576688

577689
opal_accelerator_base_module_mem_alloc_fn_t mem_alloc;
578690
opal_accelerator_base_module_mem_release_fn_t mem_release;
691+
opal_accelerator_base_module_mem_alloc_stream_fn_t mem_alloc_stream;
692+
opal_accelerator_base_module_mem_release_stream_fn_t mem_release_stream;
579693
opal_accelerator_base_module_get_address_range_fn_t get_address_range;
580694

581695
opal_accelerator_base_module_is_ipc_enabled_fn_t is_ipc_enabled;
@@ -595,6 +709,10 @@ typedef struct {
595709
opal_accelerator_base_module_device_can_access_peer_fn_t device_can_access_peer;
596710

597711
opal_accelerator_base_module_get_buffer_id_fn_t get_buffer_id;
712+
713+
opal_accelerator_base_module_wait_stream_fn_t wait_stream;
714+
opal_accelerator_base_module_get_num_devices_fn_t num_devices;
715+
opal_accelerator_base_module_get_mem_bw_fn_t get_mem_bw;
598716
} opal_accelerator_base_module_t;
599717

600718
/**

0 commit comments

Comments
 (0)