|
43 | 43 | "name": "stdout",
|
44 | 44 | "output_type": "stream",
|
45 | 45 | "text": [
|
46 |
| - "#ifndef EXAMPLE_DUMMY_H_\r\n", |
47 |
| - "#define EXAMPLE_DUMMY_H_\r\n", |
48 |
| - "\r\n", |
49 |
| - "#include <vector>\r\n", |
50 |
| - "\r\n", |
51 |
| - "#include \"dali/pipeline/operator/operator.h\"\r\n", |
52 |
| - "\r\n", |
53 |
| - "namespace other_ns {\r\n", |
54 |
| - "\r\n", |
55 |
| - "template <typename Backend>\r\n", |
56 |
| - "class Dummy : public ::dali::Operator<Backend> {\r\n", |
57 |
| - " public:\r\n", |
58 |
| - " inline explicit Dummy(const ::dali::OpSpec &spec) :\r\n", |
59 |
| - " ::dali::Operator<Backend>(spec) {}\r\n", |
60 |
| - "\r\n", |
61 |
| - " virtual inline ~Dummy() = default;\r\n", |
62 |
| - "\r\n", |
63 |
| - " Dummy(const Dummy&) = delete;\r\n", |
64 |
| - " Dummy& operator=(const Dummy&) = delete;\r\n", |
65 |
| - " Dummy(Dummy&&) = delete;\r\n", |
66 |
| - " Dummy& operator=(Dummy&&) = delete;\r\n", |
67 |
| - "\r\n", |
68 |
| - " protected:\r\n", |
69 |
| - " bool CanInferOutputs() const override {\r\n", |
70 |
| - " return true;\r\n", |
71 |
| - " }\r\n", |
72 |
| - "\r\n", |
73 |
| - " bool SetupImpl(std::vector<::dali::OutputDesc> &output_desc,\r\n", |
74 |
| - " const ::dali::Workspace &ws) override {\r\n", |
75 |
| - " const auto &input = ws.Input<Backend>(0);\r\n", |
76 |
| - " output_desc.resize(1);\r\n", |
77 |
| - " output_desc[0] = {input.shape(), input.type()};\r\n", |
78 |
| - " return true;\r\n", |
79 |
| - " }\r\n", |
80 |
| - "\r\n", |
81 |
| - " void RunImpl(::dali::Workspace &ws) override;\r\n", |
82 |
| - "};\r\n", |
83 |
| - "\r\n", |
84 |
| - "} // namespace other_ns\r\n", |
85 |
| - "\r\n", |
86 |
| - "#endif // EXAMPLE_DUMMY_H_\r\n" |
| 46 | + "#ifndef EXAMPLE_DUMMY_H_\n", |
| 47 | + "#define EXAMPLE_DUMMY_H_\n", |
| 48 | + "\n", |
| 49 | + "#include <vector>\n", |
| 50 | + "\n", |
| 51 | + "#include \"dali/pipeline/operator/operator.h\"\n", |
| 52 | + "\n", |
| 53 | + "namespace other_ns {\n", |
| 54 | + "\n", |
| 55 | + "template <typename Backend>\n", |
| 56 | + "class Dummy : public ::dali::Operator<Backend> {\n", |
| 57 | + " public:\n", |
| 58 | + " inline explicit Dummy(const ::dali::OpSpec &spec) :\n", |
| 59 | + " ::dali::Operator<Backend>(spec) {}\n", |
| 60 | + "\n", |
| 61 | + " virtual inline ~Dummy() = default;\n", |
| 62 | + "\n", |
| 63 | + " Dummy(const Dummy&) = delete;\n", |
| 64 | + " Dummy& operator=(const Dummy&) = delete;\n", |
| 65 | + " Dummy(Dummy&&) = delete;\n", |
| 66 | + " Dummy& operator=(Dummy&&) = delete;\n", |
| 67 | + "\n", |
| 68 | + " protected:\n", |
| 69 | + " bool CanInferOutputs() const override {\n", |
| 70 | + " return true;\n", |
| 71 | + " }\n", |
| 72 | + "\n", |
| 73 | + " bool SetupImpl(std::vector<::dali::OutputDesc> &output_desc,\n", |
| 74 | + " const ::dali::Workspace &ws) override {\n", |
| 75 | + " const auto &input = ws.Input<Backend>(0);\n", |
| 76 | + " output_desc.resize(1);\n", |
| 77 | + " output_desc[0] = {input.shape(), input.type()};\n", |
| 78 | + " return true;\n", |
| 79 | + " }\n", |
| 80 | + "\n", |
| 81 | + " void RunImpl(::dali::Workspace &ws) override;\n", |
| 82 | + "};\n", |
| 83 | + "\n", |
| 84 | + "} // namespace other_ns\n", |
| 85 | + "\n", |
| 86 | + "#endif // EXAMPLE_DUMMY_H_\n" |
87 | 87 | ]
|
88 | 88 | }
|
89 | 89 | ],
|
|
115 | 115 | "name": "stdout",
|
116 | 116 | "output_type": "stream",
|
117 | 117 | "text": [
|
118 |
| - "#include \"dummy.h\"\r\n", |
119 |
| - "\r\n", |
120 |
| - "namespace other_ns {\r\n", |
121 |
| - "\r\n", |
122 |
| - "template <>\r\n", |
123 |
| - "void Dummy<::dali::CPUBackend>::RunImpl(::dali::Workspace &ws) {\r\n", |
124 |
| - " const auto &input = ws.Input<::dali::CPUBackend>(0);\r\n", |
125 |
| - " auto &output = ws.Output<::dali::CPUBackend>(0);\r\n", |
126 |
| - "\r\n", |
127 |
| - " ::dali::TypeInfo type = input.type_info();\r\n", |
128 |
| - " auto &tp = ws.GetThreadPool();\r\n", |
129 |
| - " const auto &in_shape = input.shape();\r\n", |
130 |
| - " for (int sample_id = 0; sample_id < in_shape.num_samples(); sample_id++) {\r\n", |
131 |
| - " tp.AddWork(\r\n", |
132 |
| - " [&, sample_id](int thread_id) {\r\n", |
133 |
| - " type.Copy<::dali::CPUBackend, ::dali::CPUBackend>(\r\n", |
134 |
| - " output.raw_mutable_tensor(sample_id),\r\n", |
135 |
| - " input.raw_tensor(sample_id),\r\n", |
136 |
| - " in_shape.tensor_size(sample_id), 0);\r\n", |
137 |
| - " },\r\n", |
138 |
| - " in_shape.tensor_size(sample_id));\r\n", |
139 |
| - " }\r\n", |
140 |
| - " tp.RunAll();\r\n", |
141 |
| - "}\r\n", |
142 |
| - "\r\n", |
143 |
| - "} // namespace other_ns\r\n", |
144 |
| - "\r\n", |
145 |
| - "DALI_REGISTER_OPERATOR(CustomDummy, ::other_ns::Dummy<::dali::CPUBackend>,\r\n", |
146 |
| - " ::dali::CPU);\r\n", |
147 |
| - "\r\n", |
148 |
| - "DALI_SCHEMA(CustomDummy)\r\n", |
149 |
| - " .DocStr(\"Make a copy of the input tensor\")\r\n", |
150 |
| - " .NumInput(1)\r\n", |
151 |
| - " .NumOutput(1);\r\n" |
| 118 | + "#include \"dummy.h\"\n", |
| 119 | + "\n", |
| 120 | + "namespace other_ns {\n", |
| 121 | + "\n", |
| 122 | + "template <>\n", |
| 123 | + "void Dummy<::dali::CPUBackend>::RunImpl(::dali::Workspace &ws) {\n", |
| 124 | + " const auto &input = ws.Input<::dali::CPUBackend>(0);\n", |
| 125 | + " auto &output = ws.Output<::dali::CPUBackend>(0);\n", |
| 126 | + "\n", |
| 127 | + " ::dali::TypeInfo type = input.type_info();\n", |
| 128 | + " auto &tp = ws.GetThreadPool();\n", |
| 129 | + " const auto &in_shape = input.shape();\n", |
| 130 | + " for (int sample_id = 0; sample_id < in_shape.num_samples(); sample_id++) {\n", |
| 131 | + " tp.AddWork(\n", |
| 132 | + " [&, sample_id](int thread_id) {\n", |
| 133 | + " type.Copy<::dali::CPUBackend, ::dali::CPUBackend>(\n", |
| 134 | + " output.raw_mutable_tensor(sample_id),\n", |
| 135 | + " input.raw_tensor(sample_id),\n", |
| 136 | + " in_shape.tensor_size(sample_id), 0);\n", |
| 137 | + " },\n", |
| 138 | + " in_shape.tensor_size(sample_id));\n", |
| 139 | + " }\n", |
| 140 | + " tp.RunAll();\n", |
| 141 | + "}\n", |
| 142 | + "\n", |
| 143 | + "} // namespace other_ns\n", |
| 144 | + "\n", |
| 145 | + "DALI_REGISTER_OPERATOR(CustomDummy, ::other_ns::Dummy<::dali::CPUBackend>,\n", |
| 146 | + " ::dali::CPU);\n", |
| 147 | + "\n", |
| 148 | + "DALI_SCHEMA(CustomDummy)\n", |
| 149 | + " .DocStr(\"Make a copy of the input tensor\")\n", |
| 150 | + " .NumInput(1)\n", |
| 151 | + " .NumOutput(1);\n" |
152 | 152 | ]
|
153 | 153 | }
|
154 | 154 | ],
|
|
180 | 180 | "name": "stdout",
|
181 | 181 | "output_type": "stream",
|
182 | 182 | "text": [
|
183 |
| - "#include <cuda_runtime_api.h>\r\n", |
184 |
| - "#include \"dummy.h\"\r\n", |
185 |
| - "\r\n", |
186 |
| - "namespace other_ns {\r\n", |
187 |
| - "\r\n", |
188 |
| - "template<>\r\n", |
189 |
| - "void Dummy<::dali::GPUBackend>::RunImpl(::dali::Workspace &ws) {\r\n", |
190 |
| - " const auto &input = ws.Input<::dali::GPUBackend>(0);\r\n", |
191 |
| - " const auto &shape = input.shape();\r\n", |
192 |
| - " auto &output = ws.Output<::dali::GPUBackend>(0);\r\n", |
193 |
| - " for (int sample_idx = 0; sample_idx < shape.num_samples(); sample_idx++) {\r\n", |
194 |
| - " CUDA_CALL(cudaMemcpyAsync(\r\n", |
195 |
| - " output.raw_mutable_tensor(sample_idx),\r\n", |
196 |
| - " input.raw_tensor(sample_idx),\r\n", |
197 |
| - " shape[sample_idx].num_elements() * input.type_info().size(),\r\n", |
198 |
| - " cudaMemcpyDeviceToDevice,\r\n", |
199 |
| - " ws.stream()));\r\n", |
200 |
| - " }\r\n", |
201 |
| - "}\r\n", |
202 |
| - "\r\n", |
203 |
| - "} // namespace other_ns\r\n", |
204 |
| - "\r\n", |
205 |
| - "DALI_REGISTER_OPERATOR(CustomDummy, ::other_ns::Dummy<::dali::GPUBackend>,\r\n", |
206 |
| - " ::dali::GPU);\r\n" |
| 183 | + "#include <cuda_runtime_api.h>\n", |
| 184 | + "#include \"dummy.h\"\n", |
| 185 | + "\n", |
| 186 | + "namespace other_ns {\n", |
| 187 | + "\n", |
| 188 | + "template<>\n", |
| 189 | + "void Dummy<::dali::GPUBackend>::RunImpl(::dali::Workspace &ws) {\n", |
| 190 | + " const auto &input = ws.Input<::dali::GPUBackend>(0);\n", |
| 191 | + " const auto &shape = input.shape();\n", |
| 192 | + " auto &output = ws.Output<::dali::GPUBackend>(0);\n", |
| 193 | + " for (int sample_idx = 0; sample_idx < shape.num_samples(); sample_idx++) {\n", |
| 194 | + " CUDA_CALL(cudaMemcpyAsync(\n", |
| 195 | + " output.raw_mutable_tensor(sample_idx),\n", |
| 196 | + " input.raw_tensor(sample_idx),\n", |
| 197 | + " shape[sample_idx].num_elements() * input.type_info().size(),\n", |
| 198 | + " cudaMemcpyDeviceToDevice,\n", |
| 199 | + " ws.stream()));\n", |
| 200 | + " }\n", |
| 201 | + "}\n", |
| 202 | + "\n", |
| 203 | + "} // namespace other_ns\n", |
| 204 | + "\n", |
| 205 | + "DALI_REGISTER_OPERATOR(CustomDummy, ::other_ns::Dummy<::dali::GPUBackend>,\n", |
| 206 | + " ::dali::GPU);\n" |
207 | 207 | ]
|
208 | 208 | }
|
209 | 209 | ],
|
|
239 | 239 | "name": "stdout",
|
240 | 240 | "output_type": "stream",
|
241 | 241 | "text": [
|
242 |
| - "/home/jlisiecki/Dali/dali/compile/dali/python/nvidia/dali/include\n" |
| 242 | + "/usr/local/lib/python3.8/dist-packages/nvidia/dali/include\n" |
243 | 243 | ]
|
244 | 244 | }
|
245 | 245 | ],
|
|
256 | 256 | "name": "stdout",
|
257 | 257 | "output_type": "stream",
|
258 | 258 | "text": [
|
259 |
| - "/home/jlisiecki/Dali/dali/compile/dali/python/nvidia/dali\n" |
| 259 | + "/usr/local/lib/python3.8/dist-packages/nvidia/dali\n" |
260 | 260 | ]
|
261 | 261 | }
|
262 | 262 | ],
|
|
273 | 273 | "name": "stdout",
|
274 | 274 | "output_type": "stream",
|
275 | 275 | "text": [
|
276 |
| - "['-I/home/jlisiecki/Dali/dali/compile/dali/python/nvidia/dali/include', '-D_GLIBCXX_USE_CXX11_ABI=1']\n" |
| 276 | + "['-I/usr/local/lib/python3.8/dist-packages/nvidia/dali/include', '-D_GLIBCXX_USE_CXX11_ABI=1']\n" |
277 | 277 | ]
|
278 | 278 | }
|
279 | 279 | ],
|
|
290 | 290 | "name": "stdout",
|
291 | 291 | "output_type": "stream",
|
292 | 292 | "text": [
|
293 |
| - "['-L/home/jlisiecki/Dali/dali/compile/dali/python/nvidia/dali', '-ldali']\n" |
| 293 | + "['-L/usr/local/lib/python3.8/dist-packages/nvidia/dali', '-ldali']\n" |
294 | 294 | ]
|
295 | 295 | }
|
296 | 296 | ],
|
|
317 | 317 | "name": "stdout",
|
318 | 318 | "output_type": "stream",
|
319 | 319 | "text": [
|
320 |
| - "cmake_minimum_required(VERSION 3.10)\r\n", |
321 |
| - "set(CMAKE_CUDA_ARCHITECTURES \"50;60;70;80;90\")\r\n", |
322 |
| - "\r\n", |
323 |
| - "project(custom_dummy_plugin LANGUAGES CUDA CXX C)\r\n", |
324 |
| - "\r\n", |
325 |
| - "set(CMAKE_CXX_STANDARD 17)\r\n", |
326 |
| - "set(CMAKE_CXX_STANDARD_REQUIRED ON)\r\n", |
327 |
| - "set(CMAKE_CXX_EXTENSIONS OFF)\r\n", |
328 |
| - "set(CMAKE_C_STANDARD 11)\r\n", |
329 |
| - "\r\n", |
330 |
| - "# TODO(klecki): When the test container gets a CMake that supports C++17 as a proper option,\r\n", |
331 |
| - "# swap those lines\r\n", |
332 |
| - "# set(CMAKE_CUDA_STANDARD 17)\r\n", |
333 |
| - "# set(CMAKE_CUDA_STANDARD_REQUIRED ON)\r\n", |
334 |
| - "set(CMAKE_CUDA_FLAGS \"${CMAKE_CUDA_FLAGS} -std=c++17\")\r\n", |
335 |
| - "\r\n", |
336 |
| - "include_directories(SYSTEM \"${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}\")\r\n", |
337 |
| - "\r\n", |
338 |
| - "execute_process(\r\n", |
339 |
| - " COMMAND python -c \"import nvidia.dali as dali; \\\r\n", |
340 |
| - " print(dali.sysconfig.get_lib_dir())\"\r\n", |
341 |
| - " OUTPUT_VARIABLE DALI_LIB_DIR)\r\n", |
342 |
| - "string(STRIP ${DALI_LIB_DIR} DALI_LIB_DIR)\r\n", |
343 |
| - "\r\n", |
344 |
| - "execute_process(\r\n", |
345 |
| - " COMMAND python -c \"import nvidia.dali as dali; print(\\\" \\\r\n", |
346 |
| - " \\\".join(dali.sysconfig.get_compile_flags()))\"\r\n", |
347 |
| - " OUTPUT_VARIABLE DALI_COMPILE_FLAGS)\r\n", |
348 |
| - "string(STRIP ${DALI_COMPILE_FLAGS} DALI_COMPILE_FLAGS)\r\n", |
349 |
| - "\r\n", |
350 |
| - "set(CMAKE_CXX_FLAGS \"${CMAKE_CXX_FLAGS} ${DALI_COMPILE_FLAGS} \")\r\n", |
351 |
| - "set(CMAKE_CUDA_FLAGS \"${CMAKE_CUDA_FLAGS} ${DALI_COMPILE_FLAGS} \")\r\n", |
352 |
| - "link_directories(\"${DALI_LIB_DIR}\")\r\n", |
353 |
| - "\r\n", |
354 |
| - "add_library(dali_customdummy SHARED dummy.cc dummy.cu)\r\n", |
355 |
| - "target_link_libraries(dali_customdummy dali)\r\n" |
| 320 | + "cmake_minimum_required(VERSION 3.10)\n", |
| 321 | + "set(CMAKE_CUDA_ARCHITECTURES \"50;60;70;80;90\")\n", |
| 322 | + "\n", |
| 323 | + "project(custom_dummy_plugin LANGUAGES CUDA CXX C)\n", |
| 324 | + "\n", |
| 325 | + "set(CMAKE_CXX_STANDARD 17)\n", |
| 326 | + "set(CMAKE_CXX_STANDARD_REQUIRED ON)\n", |
| 327 | + "set(CMAKE_CXX_EXTENSIONS OFF)\n", |
| 328 | + "set(CMAKE_C_STANDARD 11)\n", |
| 329 | + "\n", |
| 330 | + "# TODO(klecki): When the test container gets a CMake that supports C++17 as a proper option,\n", |
| 331 | + "# swap those lines\n", |
| 332 | + "# set(CMAKE_CUDA_STANDARD 17)\n", |
| 333 | + "# set(CMAKE_CUDA_STANDARD_REQUIRED ON)\n", |
| 334 | + "set(CMAKE_CUDA_FLAGS \"${CMAKE_CUDA_FLAGS} -std=c++17\")\n", |
| 335 | + "\n", |
| 336 | + "include_directories(SYSTEM \"${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}\")\n", |
| 337 | + "\n", |
| 338 | + "execute_process(\n", |
| 339 | + " COMMAND python -c \"import nvidia.dali as dali; \\\n", |
| 340 | + " print(dali.sysconfig.get_lib_dir())\"\n", |
| 341 | + " OUTPUT_VARIABLE DALI_LIB_DIR)\n", |
| 342 | + "string(STRIP ${DALI_LIB_DIR} DALI_LIB_DIR)\n", |
| 343 | + "\n", |
| 344 | + "execute_process(\n", |
| 345 | + " COMMAND python -c \"import nvidia.dali as dali; print(\\\" \\\n", |
| 346 | + " \\\".join(dali.sysconfig.get_compile_flags()))\"\n", |
| 347 | + " OUTPUT_VARIABLE DALI_COMPILE_FLAGS)\n", |
| 348 | + "string(STRIP ${DALI_COMPILE_FLAGS} DALI_COMPILE_FLAGS)\n", |
| 349 | + "\n", |
| 350 | + "set(CMAKE_CXX_FLAGS \"${CMAKE_CXX_FLAGS} ${DALI_COMPILE_FLAGS} \")\n", |
| 351 | + "set(CMAKE_CUDA_FLAGS \"${CMAKE_CUDA_FLAGS} ${DALI_COMPILE_FLAGS} \")\n", |
| 352 | + "link_directories(\"${DALI_LIB_DIR}\")\n", |
| 353 | + "\n", |
| 354 | + "add_library(dali_customdummy SHARED dummy.cc dummy.cu)\n", |
| 355 | + "target_link_libraries(dali_customdummy dali)\n" |
356 | 356 | ]
|
357 | 357 | }
|
358 | 358 | ],
|
|
376 | 376 | "name": "stdout",
|
377 | 377 | "output_type": "stream",
|
378 | 378 | "text": [
|
379 |
| - "-- The CUDA compiler identification is NVIDIA 12.5.40\n", |
380 |
| - "-- The CXX compiler identification is GNU 12.3.0\n", |
381 |
| - "-- The C compiler identification is GNU 12.3.0\n", |
| 379 | + "-- The CUDA compiler identification is NVIDIA 12.4.131\n", |
| 380 | + "-- The CXX compiler identification is GNU 9.4.0\n", |
| 381 | + "-- The C compiler identification is GNU 9.4.0\n", |
382 | 382 | "-- Detecting CUDA compiler ABI info\n",
|
383 | 383 | "-- Detecting CUDA compiler ABI info - done\n",
|
384 | 384 | "-- Check for working CUDA compiler: /usr/local/cuda/bin/nvcc - skipped\n",
|
|
394 | 394 | "-- Check for working C compiler: /usr/bin/cc - skipped\n",
|
395 | 395 | "-- Detecting C compile features\n",
|
396 | 396 | "-- Detecting C compile features - done\n",
|
397 |
| - "-- Configuring done (6.0s)\n", |
| 397 | + "-- Configuring done (5.2s)\n", |
398 | 398 | "-- Generating done (0.0s)\n",
|
399 |
| - "-- Build files have been written to: /home/jlisiecki/Dali/dali/docs/examples/custom_operations/custom_operator/customdummy/build\n", |
| 399 | + "-- Build files have been written to: /dali/docs/examples/custom_operations/custom_operator/customdummy/build\n", |
400 | 400 | "[ 33%] \u001b[32mBuilding CXX object CMakeFiles/dali_customdummy.dir/dummy.cc.o\u001b[0m\n",
|
401 | 401 | "[ 66%] \u001b[32mBuilding CUDA object CMakeFiles/dali_customdummy.dir/dummy.cu.o\u001b[0m\n",
|
402 | 402 | "[100%] \u001b[32m\u001b[1mLinking CXX shared library libdali_customdummy.so\u001b[0m\n",
|
|
428 | 428 | "name": "stdout",
|
429 | 429 | "output_type": "stream",
|
430 | 430 | "text": [
|
431 |
| - "customdummy/build/libdali_customdummy.so\r\n" |
| 431 | + "customdummy/build/libdali_customdummy.so\n" |
432 | 432 | ]
|
433 | 433 | }
|
434 | 434 | ],
|
|
574 | 574 | "name": "python",
|
575 | 575 | "nbconvert_exporter": "python",
|
576 | 576 | "pygments_lexer": "ipython3",
|
577 |
| - "version": "3.10.12" |
| 577 | + "version": "3.8.10" |
578 | 578 | }
|
579 | 579 | },
|
580 | 580 | "nbformat": 4,
|
|
0 commit comments