Skip to content

Commit 1d23d06

Browse files
devrealPhuong Nguyen
and
Phuong Nguyen
committed
Add stream operations to accelerator components
- Stream-based alloc and free - Stream-based memmove - Wait for stream to complete Also, enable querying for number of devices and memory bandwidth. These operations are needed for operation device offloading. Co-authored-by: Phuong Nguyen <phuong.nguyen@icl.utk.edu> Signed-off-by: Joseph Schuchart <schuchart@icl.utk.edu>
1 parent 1438a79 commit 1d23d06

9 files changed

+723
-83
lines changed

opal/mca/accelerator/accelerator.h

+106-1
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,9 @@
55
* Copyright (c) Amazon.com, Inc. or its affiliates.
66
* All Rights reserved.
77
* Copyright (c) 2023 Advanced Micro Devices, Inc. All Rights reserved.
8+
* Copyright (c) 2024 The University of Tennessee and The University
9+
* of Tennessee Research Foundation. All rights
10+
* reserved.
811
*
912
* $COPYRIGHT$
1013
*
@@ -184,6 +187,16 @@ typedef int (*opal_accelerator_base_module_check_addr_fn_t)(
184187
typedef int (*opal_accelerator_base_module_create_stream_fn_t)(
185188
int dev_id, opal_accelerator_stream_t **stream);
186189

190+
/**
191+
* Wait for the completion of all operations inserted into the stream.
192+
*
193+
* @param[IN] stram The stream to wait for.
194+
*
195+
* @return OPAL_SUCCESS or error status on failure
196+
*/
197+
typedef int (*opal_accelerator_base_module_sync_stream_fn_t)(
198+
opal_accelerator_stream_t *stream);
199+
187200
/**
188201
* Creates an event. An event is a synchronization marker that can be
189202
* appended to a stream to monitor device progress or synchronize the
@@ -193,7 +206,7 @@ typedef int (*opal_accelerator_base_module_create_stream_fn_t)(
193206
* @param[IN] dev_id Associated device for the event or
194207
* MCA_ACCELERATOR_NO_DEVICE_ID
195208
* @param[OUT] event Event to create
196-
* @param[IN] enable_ipc support inter-process tracking of the event
209+
* @param[IN] enable_ipc support inter-process tracking of the event
197210
*
198211
* @return OPAL_SUCCESS or error status on failure.
199212
*/
@@ -310,6 +323,31 @@ typedef int (*opal_accelerator_base_module_memmove_fn_t)(
310323
int dest_dev_id, int src_dev_id, void *dest, const void *src, size_t size,
311324
opal_accelerator_transfer_type_t type);
312325

326+
327+
/**
328+
* Copies memory asynchronously from src to dest. Memory of dest and src
329+
* may overlap. Optionally can specify the transfer type to
330+
* avoid pointer detection for performance. The operations will be enqueued
331+
* into the provided stream but are not guaranteed to be complete upon return.
332+
*
333+
* @param[IN] dest_dev_id Associated device to copy to or
334+
* MCA_ACCELERATOR_NO_DEVICE_ID
335+
* @param[IN] src_dev_id Associated device to copy from or
336+
* MCA_ACCELERATOR_NO_DEVICE_ID
337+
* @param[IN] dest Destination to copy memory to
338+
* @param[IN] src Source to copy memory from
339+
* @param[IN] size Size of memory to copy
340+
* @param[IN] stream Stream to perform asynchronous move on
341+
* @param[IN] type Transfer type field for performance
342+
* Can be set to MCA_ACCELERATOR_TRANSFER_UNSPEC
343+
* if caller is unsure of the transfer direction.
344+
*
345+
* @return OPAL_SUCCESS or error status on failure
346+
*/
347+
typedef int (*opal_accelerator_base_module_memmove_async_fn_t)(
348+
int dest_dev_id, int src_dev_id, void *dest, const void *src, size_t size,
349+
opal_accelerator_stream_t *stream, opal_accelerator_transfer_type_t type);
350+
313351
/**
314352
* Allocates size bytes memory from the device and sets ptr to the
315353
* pointer of the allocated memory. The memory is not initialized.
@@ -340,6 +378,46 @@ typedef int (*opal_accelerator_base_module_mem_alloc_fn_t)(
340378
typedef int (*opal_accelerator_base_module_mem_release_fn_t)(
341379
int dev_id, void *ptr);
342380

381+
382+
/**
383+
* Allocates size bytes memory from the device and sets ptr to the
384+
* pointer of the allocated memory. The memory is not initialized.
385+
* The allocation request is placed into the stream object.
386+
* Any use of the memory must succeed the completion of this
387+
* operation on the stream.
388+
*
389+
* @param[IN] dev_id Associated device for the allocation or
390+
* MCA_ACCELERATOR_NO_DEVICE_ID
391+
* @param[OUT] ptr Returns pointer to allocated memory
392+
* @param[IN] size Size of memory to allocate
393+
* @param[IN] stream Stream into which to insert the allocation request
394+
*
395+
* @return OPAL_SUCCESS or error status on failure
396+
*/
397+
typedef int (*opal_accelerator_base_module_mem_alloc_stream_fn_t)(
398+
int dev_id, void **ptr, size_t size, opal_accelerator_stream_t *stream);
399+
400+
/**
401+
* Frees the memory space pointed to by ptr which has been returned by
402+
* a previous call to an opal_accelerator_base_module_mem_alloc_stream_fn_t().
403+
* If the function is called on a ptr that has already been freed,
404+
* undefined behavior occurs. If ptr is NULL, no operation is performed,
405+
* and the function returns OPAL_SUCCESS.
406+
* The release of the memory will be inserted into the stream and occurs after
407+
* all previous operations have completed.
408+
*
409+
* @param[IN] dev_id Associated device for the allocation or
410+
* MCA_ACCELERATOR_NO_DEVICE_ID
411+
* @param[IN] ptr Pointer to free
412+
* @param[IN] stream Stream into which to insert the free operation
413+
*
414+
* @return OPAL_SUCCESS or error status on failure
415+
*/
416+
typedef int (*opal_accelerator_base_module_mem_release_stream_fn_t)(
417+
int dev_id, void *ptr, opal_accelerator_stream_t *stream);
418+
419+
420+
343421
/**
344422
* Retrieves the base address and/or size of a memory allocation of the
345423
* device.
@@ -557,6 +635,26 @@ typedef int (*opal_accelerator_base_module_device_can_access_peer_fn_t)(
557635
typedef int (*opal_accelerator_base_module_get_buffer_id_fn_t)(
558636
int dev_id, const void *addr, opal_accelerator_buffer_id_t *buf_id);
559637

638+
/**
639+
* Get the number of devices available.
640+
*
641+
* @param[OUT] stram Number of devices.
642+
*
643+
* @return OPAL_SUCCESS or error status on failure
644+
*/
645+
typedef int (*opal_accelerator_base_module_get_num_devices_fn_t)(int *num_devices);
646+
647+
/**
648+
* Get the memory bandwidth of the device.
649+
*
650+
* @param[IN] device The device to query.
651+
* @param[OUT] bw The returned bandwidth for the device.
652+
*
653+
* @return OPAL_SUCCESS or error status on failure
654+
*/
655+
typedef int (*opal_accelerator_base_module_get_mem_bw_fn_t)(int device, float *bw);
656+
657+
560658
/*
561659
* the standard public API data structure
562660
*/
@@ -565,17 +663,21 @@ typedef struct {
565663
opal_accelerator_base_module_check_addr_fn_t check_addr;
566664

567665
opal_accelerator_base_module_create_stream_fn_t create_stream;
666+
opal_accelerator_base_module_sync_stream_fn_t sync_stream;
568667
opal_accelerator_base_module_create_event_fn_t create_event;
569668
opal_accelerator_base_module_record_event_fn_t record_event;
570669
opal_accelerator_base_module_query_event_fn_t query_event;
571670
opal_accelerator_base_module_wait_event_fn_t wait_event;
572671

573672
opal_accelerator_base_module_memcpy_async_fn_t mem_copy_async;
574673
opal_accelerator_base_module_memcpy_fn_t mem_copy;
674+
opal_accelerator_base_module_memmove_async_fn_t mem_move_async;
575675
opal_accelerator_base_module_memmove_fn_t mem_move;
576676

577677
opal_accelerator_base_module_mem_alloc_fn_t mem_alloc;
578678
opal_accelerator_base_module_mem_release_fn_t mem_release;
679+
opal_accelerator_base_module_mem_alloc_stream_fn_t mem_alloc_stream;
680+
opal_accelerator_base_module_mem_release_stream_fn_t mem_release_stream;
579681
opal_accelerator_base_module_get_address_range_fn_t get_address_range;
580682

581683
opal_accelerator_base_module_is_ipc_enabled_fn_t is_ipc_enabled;
@@ -595,6 +697,9 @@ typedef struct {
595697
opal_accelerator_base_module_device_can_access_peer_fn_t device_can_access_peer;
596698

597699
opal_accelerator_base_module_get_buffer_id_fn_t get_buffer_id;
700+
701+
opal_accelerator_base_module_get_num_devices_fn_t num_devices;
702+
opal_accelerator_base_module_get_mem_bw_fn_t get_mem_bw;
598703
} opal_accelerator_base_module_t;
599704

600705
/**

0 commit comments

Comments
 (0)