5
5
* Copyright (c) Amazon.com, Inc. or its affiliates.
6
6
* All Rights reserved.
7
7
* Copyright (c) 2023 Advanced Micro Devices, Inc. All Rights reserved.
8
+ * Copyright (c) 2024 The University of Tennessee and The University
9
+ * of Tennessee Research Foundation. All rights
10
+ * reserved.
8
11
*
9
12
* $COPYRIGHT$
10
13
*
@@ -184,6 +187,16 @@ typedef int (*opal_accelerator_base_module_check_addr_fn_t)(
184
187
typedef int (* opal_accelerator_base_module_create_stream_fn_t )(
185
188
int dev_id , opal_accelerator_stream_t * * stream );
186
189
190
+ /**
191
+ * Wait for the completion of all operations inserted into the stream.
192
+ *
193
+ * @param[IN] stram The stream to wait for.
194
+ *
195
+ * @return OPAL_SUCCESS or error status on failure
196
+ */
197
+ typedef int (* opal_accelerator_base_module_sync_stream_fn_t )(
198
+ opal_accelerator_stream_t * stream );
199
+
187
200
/**
188
201
* Creates an event. An event is a synchronization marker that can be
189
202
* appended to a stream to monitor device progress or synchronize the
@@ -193,7 +206,7 @@ typedef int (*opal_accelerator_base_module_create_stream_fn_t)(
193
206
* @param[IN] dev_id Associated device for the event or
194
207
* MCA_ACCELERATOR_NO_DEVICE_ID
195
208
* @param[OUT] event Event to create
196
- * @param[IN] enable_ipc support inter-process tracking of the event
209
+ * @param[IN] enable_ipc support inter-process tracking of the event
197
210
*
198
211
* @return OPAL_SUCCESS or error status on failure.
199
212
*/
@@ -310,6 +323,31 @@ typedef int (*opal_accelerator_base_module_memmove_fn_t)(
310
323
int dest_dev_id , int src_dev_id , void * dest , const void * src , size_t size ,
311
324
opal_accelerator_transfer_type_t type );
312
325
326
+
327
+ /**
328
+ * Copies memory asynchronously from src to dest. Memory of dest and src
329
+ * may overlap. Optionally can specify the transfer type to
330
+ * avoid pointer detection for performance. The operations will be enqueued
331
+ * into the provided stream but are not guaranteed to be complete upon return.
332
+ *
333
+ * @param[IN] dest_dev_id Associated device to copy to or
334
+ * MCA_ACCELERATOR_NO_DEVICE_ID
335
+ * @param[IN] src_dev_id Associated device to copy from or
336
+ * MCA_ACCELERATOR_NO_DEVICE_ID
337
+ * @param[IN] dest Destination to copy memory to
338
+ * @param[IN] src Source to copy memory from
339
+ * @param[IN] size Size of memory to copy
340
+ * @param[IN] stream Stream to perform asynchronous move on
341
+ * @param[IN] type Transfer type field for performance
342
+ * Can be set to MCA_ACCELERATOR_TRANSFER_UNSPEC
343
+ * if caller is unsure of the transfer direction.
344
+ *
345
+ * @return OPAL_SUCCESS or error status on failure
346
+ */
347
+ typedef int (* opal_accelerator_base_module_memmove_async_fn_t )(
348
+ int dest_dev_id , int src_dev_id , void * dest , const void * src , size_t size ,
349
+ opal_accelerator_stream_t * stream , opal_accelerator_transfer_type_t type );
350
+
313
351
/**
314
352
* Allocates size bytes memory from the device and sets ptr to the
315
353
* pointer of the allocated memory. The memory is not initialized.
@@ -340,6 +378,46 @@ typedef int (*opal_accelerator_base_module_mem_alloc_fn_t)(
340
378
typedef int (* opal_accelerator_base_module_mem_release_fn_t )(
341
379
int dev_id , void * ptr );
342
380
381
+
382
+ /**
383
+ * Allocates size bytes memory from the device and sets ptr to the
384
+ * pointer of the allocated memory. The memory is not initialized.
385
+ * The allocation request is placed into the stream object.
386
+ * Any use of the memory must succeed the completion of this
387
+ * operation on the stream.
388
+ *
389
+ * @param[IN] dev_id Associated device for the allocation or
390
+ * MCA_ACCELERATOR_NO_DEVICE_ID
391
+ * @param[OUT] ptr Returns pointer to allocated memory
392
+ * @param[IN] size Size of memory to allocate
393
+ * @param[IN] stream Stream into which to insert the allocation request
394
+ *
395
+ * @return OPAL_SUCCESS or error status on failure
396
+ */
397
+ typedef int (* opal_accelerator_base_module_mem_alloc_stream_fn_t )(
398
+ int dev_id , void * * ptr , size_t size , opal_accelerator_stream_t * stream );
399
+
400
+ /**
401
+ * Frees the memory space pointed to by ptr which has been returned by
402
+ * a previous call to an opal_accelerator_base_module_mem_alloc_stream_fn_t().
403
+ * If the function is called on a ptr that has already been freed,
404
+ * undefined behavior occurs. If ptr is NULL, no operation is performed,
405
+ * and the function returns OPAL_SUCCESS.
406
+ * The release of the memory will be inserted into the stream and occurs after
407
+ * all previous operations have completed.
408
+ *
409
+ * @param[IN] dev_id Associated device for the allocation or
410
+ * MCA_ACCELERATOR_NO_DEVICE_ID
411
+ * @param[IN] ptr Pointer to free
412
+ * @param[IN] stream Stream into which to insert the free operation
413
+ *
414
+ * @return OPAL_SUCCESS or error status on failure
415
+ */
416
+ typedef int (* opal_accelerator_base_module_mem_release_stream_fn_t )(
417
+ int dev_id , void * ptr , opal_accelerator_stream_t * stream );
418
+
419
+
420
+
343
421
/**
344
422
* Retrieves the base address and/or size of a memory allocation of the
345
423
* device.
@@ -557,6 +635,26 @@ typedef int (*opal_accelerator_base_module_device_can_access_peer_fn_t)(
557
635
typedef int (* opal_accelerator_base_module_get_buffer_id_fn_t )(
558
636
int dev_id , const void * addr , opal_accelerator_buffer_id_t * buf_id );
559
637
638
+ /**
639
+ * Get the number of devices available.
640
+ *
641
+ * @param[OUT] stram Number of devices.
642
+ *
643
+ * @return OPAL_SUCCESS or error status on failure
644
+ */
645
+ typedef int (* opal_accelerator_base_module_get_num_devices_fn_t )(int * num_devices );
646
+
647
+ /**
648
+ * Get the memory bandwidth of the device.
649
+ *
650
+ * @param[IN] device The device to query.
651
+ * @param[OUT] bw The returned bandwidth for the device.
652
+ *
653
+ * @return OPAL_SUCCESS or error status on failure
654
+ */
655
+ typedef int (* opal_accelerator_base_module_get_mem_bw_fn_t )(int device , float * bw );
656
+
657
+
560
658
/*
561
659
* the standard public API data structure
562
660
*/
@@ -565,17 +663,21 @@ typedef struct {
565
663
opal_accelerator_base_module_check_addr_fn_t check_addr ;
566
664
567
665
opal_accelerator_base_module_create_stream_fn_t create_stream ;
666
+ opal_accelerator_base_module_sync_stream_fn_t sync_stream ;
568
667
opal_accelerator_base_module_create_event_fn_t create_event ;
569
668
opal_accelerator_base_module_record_event_fn_t record_event ;
570
669
opal_accelerator_base_module_query_event_fn_t query_event ;
571
670
opal_accelerator_base_module_wait_event_fn_t wait_event ;
572
671
573
672
opal_accelerator_base_module_memcpy_async_fn_t mem_copy_async ;
574
673
opal_accelerator_base_module_memcpy_fn_t mem_copy ;
674
+ opal_accelerator_base_module_memmove_async_fn_t mem_move_async ;
575
675
opal_accelerator_base_module_memmove_fn_t mem_move ;
576
676
577
677
opal_accelerator_base_module_mem_alloc_fn_t mem_alloc ;
578
678
opal_accelerator_base_module_mem_release_fn_t mem_release ;
679
+ opal_accelerator_base_module_mem_alloc_stream_fn_t mem_alloc_stream ;
680
+ opal_accelerator_base_module_mem_release_stream_fn_t mem_release_stream ;
579
681
opal_accelerator_base_module_get_address_range_fn_t get_address_range ;
580
682
581
683
opal_accelerator_base_module_is_ipc_enabled_fn_t is_ipc_enabled ;
@@ -595,6 +697,9 @@ typedef struct {
595
697
opal_accelerator_base_module_device_can_access_peer_fn_t device_can_access_peer ;
596
698
597
699
opal_accelerator_base_module_get_buffer_id_fn_t get_buffer_id ;
700
+
701
+ opal_accelerator_base_module_get_num_devices_fn_t num_devices ;
702
+ opal_accelerator_base_module_get_mem_bw_fn_t get_mem_bw ;
598
703
} opal_accelerator_base_module_t ;
599
704
600
705
/**
0 commit comments