@@ -389,19 +389,20 @@ pi_result enqueueEventWait(pi_queue queue, pi_event event) {
389
389
}
390
390
391
391
_pi_program::_pi_program (pi_context ctxt)
392
- : module_{nullptr }, source_{}, sourceLength_{0 }
393
- , refCount_{1 }, context_{ctxt}
394
- {
392
+ : module_{nullptr }, binary_{},
393
+ binarySizeInBytes_{0 }, refCount_{1 }, context_{ctxt} {
395
394
cuda_piContextRetain (context_);
396
395
}
397
396
398
397
_pi_program::~_pi_program () {
399
398
cuda_piContextRelease (context_);
400
399
}
401
400
402
- pi_result _pi_program::create_from_source (const char *source, size_t length) {
403
- source_ = source;
404
- sourceLength_ = length;
401
+ pi_result _pi_program::set_binary (const char *source, size_t length) {
402
+ assert ((binary_ == nullptr && binarySizeInBytes_ == 0 ) &&
403
+ " Re-setting program binary data which has already been set" );
404
+ binary_ = source;
405
+ binarySizeInBytes_ = length;
405
406
return PI_SUCCESS;
406
407
}
407
408
@@ -427,9 +428,9 @@ pi_result _pi_program::build_program(const char *build_options) {
427
428
options[3 ] = CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES;
428
429
optionVals[3 ] = (void *)(long )MAX_LOG_SIZE;
429
430
430
- auto result = PI_CHECK_ERROR (cuModuleLoadDataEx (
431
- &module_, static_cast <const void *>(source_), numberOfOptions, options ,
432
- optionVals));
431
+ auto result = PI_CHECK_ERROR (
432
+ cuModuleLoadDataEx ( &module_, static_cast <const void *>(binary_) ,
433
+ numberOfOptions, options, optionVals));
433
434
434
435
const auto success = (result == PI_SUCCESS);
435
436
@@ -446,8 +447,8 @@ pi_result _pi_program::build_program(const char *build_options) {
446
447
// / has_kernel method, so an alternative would be to move the has_kernel
447
448
// / query to PI and use cuModuleGetFunction to check for a kernel.
448
449
std::string getKernelNames (pi_program program) {
449
- std::string source (program->source_ ,
450
- program->source_ + program->sourceLength_ );
450
+ std::string source (program->binary_ ,
451
+ program->binary_ + program->binarySizeInBytes_ );
451
452
std::regex entries_pattern (" .entry\\ s+([^\\ ([:s:]]*)" );
452
453
std::string names (" " );
453
454
std::smatch match;
@@ -2172,41 +2173,15 @@ pi_result cuda_piMemRetain(pi_mem mem) {
2172
2173
return PI_SUCCESS;
2173
2174
}
2174
2175
2175
- // / Constructs a PI program from a list of PTX or CUBIN binaries.
2176
- // / Note: No calls to CUDA driver API in this function, only store binaries
2177
- // / for later.
2178
- // /
2179
- // / \TODO Implement more than one input image
2180
- // / \TODO SYCL RT should use cuda_piclprogramCreateWithBinary instead
2176
+ // / Not used as CUDA backend only creates programs from binary.
2177
+ // / See \ref cuda_piclProgramCreateWithBinary.
2181
2178
// /
2182
2179
pi_result cuda_piclProgramCreateWithSource (pi_context context, pi_uint32 count,
2183
2180
const char **strings,
2184
2181
const size_t *lengths,
2185
2182
pi_program *program) {
2186
-
2187
- assert (context != nullptr );
2188
- assert (strings != nullptr );
2189
- assert (program != nullptr );
2190
-
2191
- pi_result retErr = PI_SUCCESS;
2192
-
2193
- if (count == 0 ) {
2194
- retErr = PI_INVALID_PROGRAM;
2195
- return retErr;
2196
- }
2197
-
2198
- assert (count == 1 );
2199
-
2200
- std::unique_ptr<_pi_program> retProgram{new _pi_program{context}};
2201
-
2202
- auto has_length = (lengths != nullptr );
2203
- size_t length = has_length ? lengths[0 ] : strlen (strings[0 ]) + 1 ;
2204
-
2205
- retProgram->create_from_source (strings[0 ], length);
2206
-
2207
- *program = retProgram.release ();
2208
-
2209
- return retErr;
2183
+ cl::sycl::detail::pi::die (" cuda_piclProgramCreateWithSource not implemented" );
2184
+ return {};
2210
2185
}
2211
2186
2212
2187
// / Loads the images from a PI program into a CUmodule that can be
@@ -2244,13 +2219,41 @@ pi_result cuda_piProgramCreate(pi_context context, const void *il,
2244
2219
return {};
2245
2220
}
2246
2221
2247
- // / \TODO Not implemented. See \ref cuda_piclProgramCreateWithSource
2222
+ // / Loads images from a list of PTX or CUBIN binaries.
2223
+ // / Note: No calls to CUDA driver API in this function, only store binaries
2224
+ // / for later.
2225
+ // /
2226
+ // / Note: Only supports one device
2227
+ // /
2248
2228
pi_result cuda_piclProgramCreateWithBinary (
2249
2229
pi_context context, pi_uint32 num_devices, const pi_device *device_list,
2250
2230
const size_t *lengths, const unsigned char **binaries,
2251
- pi_int32 *binary_status, pi_program *errcode_ret) {
2252
- cl::sycl::detail::pi::die (" cuda_piclProgramCreateWithBinary not implemented" );
2253
- return {};
2231
+ pi_int32 *binary_status, pi_program *program) {
2232
+ assert (context != nullptr );
2233
+ assert (binaries != nullptr );
2234
+ assert (program != nullptr );
2235
+ assert (device_list != nullptr );
2236
+ assert (num_devices == 1 && " CUDA contexts are for a single device" );
2237
+ assert ((context->get_device ()->get () == device_list[0 ]->get ()) &&
2238
+ " Mismatch between devices context and passed context when creating "
2239
+ " program from binary" );
2240
+
2241
+ pi_result retError = PI_SUCCESS;
2242
+
2243
+ std::unique_ptr<_pi_program> retProgram{new _pi_program{context}};
2244
+
2245
+ const bool has_length = (lengths != nullptr );
2246
+ size_t length = has_length
2247
+ ? lengths[0 ]
2248
+ : strlen (reinterpret_cast <const char *>(binaries[0 ])) + 1 ;
2249
+
2250
+ assert (length != 0 );
2251
+
2252
+ retProgram->set_binary (reinterpret_cast <const char *>(binaries[0 ]), length);
2253
+
2254
+ *program = retProgram.release ();
2255
+
2256
+ return retError;
2254
2257
}
2255
2258
2256
2259
pi_result cuda_piProgramGetInfo (pi_program program, pi_program_info param_name,
@@ -2272,13 +2275,13 @@ pi_result cuda_piProgramGetInfo(pi_program program, pi_program_info param_name,
2272
2275
&program->context_ ->deviceId_ );
2273
2276
case PI_PROGRAM_INFO_SOURCE:
2274
2277
return getInfo (param_value_size, param_value, param_value_size_ret,
2275
- program->source_ );
2278
+ program->binary_ );
2276
2279
case PI_PROGRAM_INFO_BINARY_SIZES:
2277
2280
return getInfoArray (1 , param_value_size, param_value, param_value_size_ret,
2278
- &program->sourceLength_ );
2281
+ &program->binarySizeInBytes_ );
2279
2282
case PI_PROGRAM_INFO_BINARIES:
2280
2283
return getInfoArray (1 , param_value_size, param_value, param_value_size_ret,
2281
- &program->source_ );
2284
+ &program->binary_ );
2282
2285
case PI_PROGRAM_INFO_KERNEL_NAMES: {
2283
2286
return getInfo (param_value_size, param_value, param_value_size_ret,
2284
2287
getKernelNames (program).c_str ());
@@ -2320,15 +2323,15 @@ pi_result cuda_piProgramLink(pi_context context, pi_uint32 num_devices,
2320
2323
for (size_t i = 0 ; i < num_input_programs; ++i) {
2321
2324
pi_program program = input_programs[i];
2322
2325
retError = PI_CHECK_ERROR (cuLinkAddData (
2323
- state, CU_JIT_INPUT_PTX, const_cast <char *>(program->source_ ),
2324
- program->sourceLength_ , nullptr , 0 , nullptr , nullptr ));
2326
+ state, CU_JIT_INPUT_PTX, const_cast <char *>(program->binary_ ),
2327
+ program->binarySizeInBytes_ , nullptr , 0 , nullptr , nullptr ));
2325
2328
}
2326
2329
void *cubin = nullptr ;
2327
2330
size_t cubinSize = 0 ;
2328
2331
retError = PI_CHECK_ERROR (cuLinkComplete (state, &cubin, &cubinSize));
2329
2332
2330
- retError = retProgram-> create_from_source (
2331
- static_cast <const char *>(cubin), cubinSize);
2333
+ retError =
2334
+ retProgram-> set_binary ( static_cast <const char *>(cubin), cubinSize);
2332
2335
2333
2336
if (retError != PI_SUCCESS) {
2334
2337
return retError;
0 commit comments