Kokkos Core Kernels Package  Version of the Day
Kokkos_CudaSpace.hpp
1 /*
2 //@HEADER
3 // ************************************************************************
4 //
5 // Kokkos v. 3.0
6 // Copyright (2020) National Technology & Engineering
7 // Solutions of Sandia, LLC (NTESS).
8 //
9 // Under the terms of Contract DE-NA0003525 with NTESS,
10 // the U.S. Government retains certain rights in this software.
11 //
12 // Redistribution and use in source and binary forms, with or without
13 // modification, are permitted provided that the following conditions are
14 // met:
15 //
16 // 1. Redistributions of source code must retain the above copyright
17 // notice, this list of conditions and the following disclaimer.
18 //
19 // 2. Redistributions in binary form must reproduce the above copyright
20 // notice, this list of conditions and the following disclaimer in the
21 // documentation and/or other materials provided with the distribution.
22 //
23 // 3. Neither the name of the Corporation nor the names of the
24 // contributors may be used to endorse or promote products derived from
25 // this software without specific prior written permission.
26 //
27 // THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
28 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
31 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
32 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
33 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
34 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
35 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
36 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
37 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 //
39 // Questions? Contact Christian R. Trott (crtrott@sandia.gov)
40 //
41 // ************************************************************************
42 //@HEADER
43 */
44 
45 #ifndef KOKKOS_CUDASPACE_HPP
46 #define KOKKOS_CUDASPACE_HPP
47 
48 #include <Kokkos_Macros.hpp>
49 #if defined(KOKKOS_ENABLE_CUDA)
50 
51 #include <Kokkos_Core_fwd.hpp>
52 
53 #include <iosfwd>
54 #include <typeinfo>
55 #include <string>
56 #include <memory>
57 
58 #include <Kokkos_HostSpace.hpp>
59 #include <impl/Kokkos_SharedAlloc.hpp>
60 
61 #include <impl/Kokkos_Profiling_Interface.hpp>
62 
63 #include <Cuda/Kokkos_Cuda_abort.hpp>
64 
65 #ifdef KOKKOS_IMPL_DEBUG_CUDA_PIN_UVM_TO_HOST
66 extern "C" bool kokkos_impl_cuda_pin_uvm_to_host();
67 extern "C" void kokkos_impl_cuda_set_pin_uvm_to_host(bool);
68 #endif
69 
70 /*--------------------------------------------------------------------------*/
71 
72 namespace Kokkos {
73 
76 class CudaSpace {
77  public:
79  using memory_space = CudaSpace;
80  using execution_space = Kokkos::Cuda;
81  using device_type = Kokkos::Device<execution_space, memory_space>;
82 
83  using size_type = unsigned int;
84 
85  /*--------------------------------*/
86 
87  CudaSpace();
88  CudaSpace(CudaSpace&& rhs) = default;
89  CudaSpace(const CudaSpace& rhs) = default;
90  CudaSpace& operator=(CudaSpace&& rhs) = default;
91  CudaSpace& operator=(const CudaSpace& rhs) = default;
92  ~CudaSpace() = default;
93 
95  void* allocate(const size_t arg_alloc_size) const;
96  void* allocate(const char* arg_label, const size_t arg_alloc_size,
97  const size_t arg_logical_size = 0) const;
98 
100  void deallocate(void* const arg_alloc_ptr, const size_t arg_alloc_size) const;
101  void deallocate(const char* arg_label, void* const arg_alloc_ptr,
102  const size_t arg_alloc_size,
103  const size_t arg_logical_size = 0) const;
104 
105  private:
106  template <class, class, class, class>
108  void* impl_allocate(const char* arg_label, const size_t arg_alloc_size,
109  const size_t arg_logical_size = 0,
110  const Kokkos::Tools::SpaceHandle =
111  Kokkos::Tools::make_space_handle(name())) const;
112  void impl_deallocate(const char* arg_label, void* const arg_alloc_ptr,
113  const size_t arg_alloc_size,
114  const size_t arg_logical_size = 0,
115  const Kokkos::Tools::SpaceHandle =
116  Kokkos::Tools::make_space_handle(name())) const;
117 
118  public:
120  static constexpr const char* name() { return m_name; }
121 
122  /*--------------------------------*/
124  KOKKOS_DEPRECATED static void access_error();
125  KOKKOS_DEPRECATED static void access_error(const void* const);
126 
127  private:
128  int m_device;
129 
130  static constexpr const char* m_name = "Cuda";
131  friend class Kokkos::Impl::SharedAllocationRecord<Kokkos::CudaSpace, void>;
132 };
133 } // namespace Kokkos
134 
135 /*--------------------------------------------------------------------------*/
136 /*--------------------------------------------------------------------------*/
137 
138 namespace Kokkos {
139 
143 class CudaUVMSpace {
144  public:
146  using memory_space = CudaUVMSpace;
147  using execution_space = Cuda;
148  using device_type = Kokkos::Device<execution_space, memory_space>;
149  using size_type = unsigned int;
150 
152  static bool available();
153 
154  /*--------------------------------*/
156  KOKKOS_DEPRECATED static int number_of_allocations();
157 
158  /*--------------------------------*/
159 
160  /*--------------------------------*/
161 
162  CudaUVMSpace();
163  CudaUVMSpace(CudaUVMSpace&& rhs) = default;
164  CudaUVMSpace(const CudaUVMSpace& rhs) = default;
165  CudaUVMSpace& operator=(CudaUVMSpace&& rhs) = default;
166  CudaUVMSpace& operator=(const CudaUVMSpace& rhs) = default;
167  ~CudaUVMSpace() = default;
168 
170  void* allocate(const size_t arg_alloc_size) const;
171  void* allocate(const char* arg_label, const size_t arg_alloc_size,
172  const size_t arg_logical_size = 0) const;
173 
175  void deallocate(void* const arg_alloc_ptr, const size_t arg_alloc_size) const;
176  void deallocate(const char* arg_label, void* const arg_alloc_ptr,
177  const size_t arg_alloc_size,
178  const size_t arg_logical_size = 0) const;
179 
180  private:
181  template <class, class, class, class>
183  void* impl_allocate(const char* arg_label, const size_t arg_alloc_size,
184  const size_t arg_logical_size = 0,
185  const Kokkos::Tools::SpaceHandle =
186  Kokkos::Tools::make_space_handle(name())) const;
187  void impl_deallocate(const char* arg_label, void* const arg_alloc_ptr,
188  const size_t arg_alloc_size,
189  const size_t arg_logical_size = 0,
190  const Kokkos::Tools::SpaceHandle =
191  Kokkos::Tools::make_space_handle(name())) const;
192 
193  public:
195  static constexpr const char* name() { return m_name; }
196 
197 #ifdef KOKKOS_IMPL_DEBUG_CUDA_PIN_UVM_TO_HOST
198  static bool cuda_pin_uvm_to_host();
199  static void cuda_set_pin_uvm_to_host(bool val);
200 #endif
201  /*--------------------------------*/
202 
203  private:
204  int m_device;
205 
206 #ifdef KOKKOS_IMPL_DEBUG_CUDA_PIN_UVM_TO_HOST
207  static bool kokkos_impl_cuda_pin_uvm_to_host_v;
208 #endif
209  static constexpr const char* m_name = "CudaUVM";
210 };
211 
212 } // namespace Kokkos
213 
214 /*--------------------------------------------------------------------------*/
215 /*--------------------------------------------------------------------------*/
216 
217 namespace Kokkos {
218 
222 class CudaHostPinnedSpace {
223  public:
225 
226  using execution_space = HostSpace::execution_space;
227  using memory_space = CudaHostPinnedSpace;
228  using device_type = Kokkos::Device<execution_space, memory_space>;
229  using size_type = unsigned int;
230 
231  /*--------------------------------*/
232 
233  CudaHostPinnedSpace();
234  CudaHostPinnedSpace(CudaHostPinnedSpace&& rhs) = default;
235  CudaHostPinnedSpace(const CudaHostPinnedSpace& rhs) = default;
236  CudaHostPinnedSpace& operator=(CudaHostPinnedSpace&& rhs) = default;
237  CudaHostPinnedSpace& operator=(const CudaHostPinnedSpace& rhs) = default;
238  ~CudaHostPinnedSpace() = default;
239 
241  void* allocate(const size_t arg_alloc_size) const;
242  void* allocate(const char* arg_label, const size_t arg_alloc_size,
243  const size_t arg_logical_size = 0) const;
244 
246  void deallocate(void* const arg_alloc_ptr, const size_t arg_alloc_size) const;
247  void deallocate(const char* arg_label, void* const arg_alloc_ptr,
248  const size_t arg_alloc_size,
249  const size_t arg_logical_size = 0) const;
250 
251  private:
252  template <class, class, class, class>
254  void* impl_allocate(const char* arg_label, const size_t arg_alloc_size,
255  const size_t arg_logical_size = 0,
256  const Kokkos::Tools::SpaceHandle =
257  Kokkos::Tools::make_space_handle(name())) const;
258  void impl_deallocate(const char* arg_label, void* const arg_alloc_ptr,
259  const size_t arg_alloc_size,
260  const size_t arg_logical_size = 0,
261  const Kokkos::Tools::SpaceHandle =
262  Kokkos::Tools::make_space_handle(name())) const;
263 
264  public:
266  static constexpr const char* name() { return m_name; }
267 
268  private:
269  static constexpr const char* m_name = "CudaHostPinned";
270 
271  /*--------------------------------*/
272 };
273 
274 } // namespace Kokkos
275 
276 /*--------------------------------------------------------------------------*/
277 /*--------------------------------------------------------------------------*/
278 
279 namespace Kokkos {
280 namespace Impl {
281 
282 cudaStream_t cuda_get_deep_copy_stream();
283 
284 const std::unique_ptr<Kokkos::Cuda>& cuda_get_deep_copy_space(
285  bool initialize = true);
286 
287 static_assert(Kokkos::Impl::MemorySpaceAccess<Kokkos::CudaSpace,
288  Kokkos::CudaSpace>::assignable,
289  "");
290 static_assert(Kokkos::Impl::MemorySpaceAccess<Kokkos::CudaUVMSpace,
291  Kokkos::CudaUVMSpace>::assignable,
292  "");
293 static_assert(
294  Kokkos::Impl::MemorySpaceAccess<Kokkos::CudaHostPinnedSpace,
295  Kokkos::CudaHostPinnedSpace>::assignable,
296  "");
297 
298 //----------------------------------------
299 
300 template <>
301 struct MemorySpaceAccess<Kokkos::HostSpace, Kokkos::CudaSpace> {
302  enum : bool { assignable = false };
303  enum : bool { accessible = false };
304  enum : bool { deepcopy = true };
305 };
306 
307 template <>
308 struct MemorySpaceAccess<Kokkos::HostSpace, Kokkos::CudaUVMSpace> {
309  // HostSpace::execution_space != CudaUVMSpace::execution_space
310  enum : bool { assignable = false };
311  enum : bool { accessible = true };
312  enum : bool { deepcopy = true };
313 };
314 
315 template <>
316 struct MemorySpaceAccess<Kokkos::HostSpace, Kokkos::CudaHostPinnedSpace> {
317  // HostSpace::execution_space == CudaHostPinnedSpace::execution_space
318  enum : bool { assignable = true };
319  enum : bool { accessible = true };
320  enum : bool { deepcopy = true };
321 };
322 
323 //----------------------------------------
324 
325 template <>
326 struct MemorySpaceAccess<Kokkos::CudaSpace, Kokkos::HostSpace> {
327  enum : bool { assignable = false };
328  enum : bool { accessible = false };
329  enum : bool { deepcopy = true };
330 };
331 
332 template <>
333 struct MemorySpaceAccess<Kokkos::CudaSpace, Kokkos::CudaUVMSpace> {
334  // CudaSpace::execution_space == CudaUVMSpace::execution_space
335  enum : bool { assignable = true };
336  enum : bool { accessible = true };
337  enum : bool { deepcopy = true };
338 };
339 
340 template <>
341 struct MemorySpaceAccess<Kokkos::CudaSpace, Kokkos::CudaHostPinnedSpace> {
342  // CudaSpace::execution_space != CudaHostPinnedSpace::execution_space
343  enum : bool { assignable = false };
344  enum : bool { accessible = true }; // CudaSpace::execution_space
345  enum : bool { deepcopy = true };
346 };
347 
348 //----------------------------------------
349 // CudaUVMSpace::execution_space == Cuda
350 // CudaUVMSpace accessible to both Cuda and Host
351 
352 template <>
353 struct MemorySpaceAccess<Kokkos::CudaUVMSpace, Kokkos::HostSpace> {
354  enum : bool { assignable = false };
355  enum : bool { accessible = false }; // Cuda cannot access HostSpace
356  enum : bool { deepcopy = true };
357 };
358 
359 template <>
360 struct MemorySpaceAccess<Kokkos::CudaUVMSpace, Kokkos::CudaSpace> {
361  // CudaUVMSpace::execution_space == CudaSpace::execution_space
362  // Can access CudaUVMSpace from Host but cannot access CudaSpace from Host
363  enum : bool { assignable = false };
364 
365  // CudaUVMSpace::execution_space can access CudaSpace
366  enum : bool { accessible = true };
367  enum : bool { deepcopy = true };
368 };
369 
370 template <>
371 struct MemorySpaceAccess<Kokkos::CudaUVMSpace, Kokkos::CudaHostPinnedSpace> {
372  // CudaUVMSpace::execution_space != CudaHostPinnedSpace::execution_space
373  enum : bool { assignable = false };
374  enum : bool { accessible = true }; // CudaUVMSpace::execution_space
375  enum : bool { deepcopy = true };
376 };
377 
378 //----------------------------------------
379 // CudaHostPinnedSpace::execution_space == HostSpace::execution_space
380 // CudaHostPinnedSpace accessible to both Cuda and Host
381 
382 template <>
383 struct MemorySpaceAccess<Kokkos::CudaHostPinnedSpace, Kokkos::HostSpace> {
384  enum : bool { assignable = false }; // Cannot access from Cuda
385  enum : bool { accessible = true }; // CudaHostPinnedSpace::execution_space
386  enum : bool { deepcopy = true };
387 };
388 
389 template <>
390 struct MemorySpaceAccess<Kokkos::CudaHostPinnedSpace, Kokkos::CudaSpace> {
391  enum : bool { assignable = false }; // Cannot access from Host
392  enum : bool { accessible = false };
393  enum : bool { deepcopy = true };
394 };
395 
396 template <>
397 struct MemorySpaceAccess<Kokkos::CudaHostPinnedSpace, Kokkos::CudaUVMSpace> {
398  enum : bool { assignable = false }; // different execution_space
399  enum : bool { accessible = true }; // same accessibility
400  enum : bool { deepcopy = true };
401 };
402 
403 //----------------------------------------
404 
405 } // namespace Impl
406 } // namespace Kokkos
407 
408 /*--------------------------------------------------------------------------*/
409 /*--------------------------------------------------------------------------*/
410 
411 namespace Kokkos {
412 namespace Impl {
413 
414 void DeepCopyAsyncCuda(void* dst, const void* src, size_t n);
415 
416 template <>
417 struct DeepCopy<CudaSpace, CudaSpace, Cuda> {
418  DeepCopy(void* dst, const void* src, size_t);
419  DeepCopy(const Cuda&, void* dst, const void* src, size_t);
420 };
421 
422 template <>
423 struct DeepCopy<CudaSpace, HostSpace, Cuda> {
424  DeepCopy(void* dst, const void* src, size_t);
425  DeepCopy(const Cuda&, void* dst, const void* src, size_t);
426 };
427 
428 template <>
429 struct DeepCopy<HostSpace, CudaSpace, Cuda> {
430  DeepCopy(void* dst, const void* src, size_t);
431  DeepCopy(const Cuda&, void* dst, const void* src, size_t);
432 };
433 
434 template <>
435 struct DeepCopy<CudaUVMSpace, CudaUVMSpace, Cuda> {
436  DeepCopy(void* dst, const void* src, size_t n) {
437  (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(dst, src, n);
438  }
439  DeepCopy(const Cuda& instance, void* dst, const void* src, size_t n) {
440  (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(instance, dst, src, n);
441  }
442 };
443 
444 template <>
445 struct DeepCopy<CudaUVMSpace, HostSpace, Cuda> {
446  DeepCopy(void* dst, const void* src, size_t n) {
447  (void)DeepCopy<CudaSpace, HostSpace, Cuda>(dst, src, n);
448  }
449  DeepCopy(const Cuda& instance, void* dst, const void* src, size_t n) {
450  (void)DeepCopy<CudaSpace, HostSpace, Cuda>(instance, dst, src, n);
451  }
452 };
453 
454 template <>
455 struct DeepCopy<HostSpace, CudaUVMSpace, Cuda> {
456  DeepCopy(void* dst, const void* src, size_t n) {
457  (void)DeepCopy<HostSpace, CudaSpace, Cuda>(dst, src, n);
458  }
459  DeepCopy(const Cuda& instance, void* dst, const void* src, size_t n) {
460  (void)DeepCopy<HostSpace, CudaSpace, Cuda>(instance, dst, src, n);
461  }
462 };
463 
464 template <>
465 struct DeepCopy<CudaHostPinnedSpace, CudaHostPinnedSpace, Cuda> {
466  DeepCopy(void* dst, const void* src, size_t n) {
467  (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(dst, src, n);
468  }
469  DeepCopy(const Cuda& instance, void* dst, const void* src, size_t n) {
470  (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(instance, dst, src, n);
471  }
472 };
473 
474 template <>
475 struct DeepCopy<CudaHostPinnedSpace, HostSpace, Cuda> {
476  DeepCopy(void* dst, const void* src, size_t n) {
477  (void)DeepCopy<CudaSpace, HostSpace, Cuda>(dst, src, n);
478  }
479  DeepCopy(const Cuda& instance, void* dst, const void* src, size_t n) {
480  (void)DeepCopy<CudaSpace, HostSpace, Cuda>(instance, dst, src, n);
481  }
482 };
483 
484 template <>
485 struct DeepCopy<HostSpace, CudaHostPinnedSpace, Cuda> {
486  DeepCopy(void* dst, const void* src, size_t n) {
487  (void)DeepCopy<HostSpace, CudaSpace, Cuda>(dst, src, n);
488  }
489  DeepCopy(const Cuda& instance, void* dst, const void* src, size_t n) {
490  (void)DeepCopy<HostSpace, CudaSpace, Cuda>(instance, dst, src, n);
491  }
492 };
493 
494 template <>
495 struct DeepCopy<CudaUVMSpace, CudaSpace, Cuda> {
496  DeepCopy(void* dst, const void* src, size_t n) {
497  (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(dst, src, n);
498  }
499  DeepCopy(const Cuda& instance, void* dst, const void* src, size_t n) {
500  (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(instance, dst, src, n);
501  }
502 };
503 
504 template <>
505 struct DeepCopy<CudaSpace, CudaUVMSpace, Cuda> {
506  DeepCopy(void* dst, const void* src, size_t n) {
507  (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(dst, src, n);
508  }
509  DeepCopy(const Cuda& instance, void* dst, const void* src, size_t n) {
510  (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(instance, dst, src, n);
511  }
512 };
513 
514 template <>
515 struct DeepCopy<CudaUVMSpace, CudaHostPinnedSpace, Cuda> {
516  DeepCopy(void* dst, const void* src, size_t n) {
517  (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(dst, src, n);
518  }
519  DeepCopy(const Cuda& instance, void* dst, const void* src, size_t n) {
520  (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(instance, dst, src, n);
521  }
522 };
523 
524 template <>
525 struct DeepCopy<CudaHostPinnedSpace, CudaUVMSpace, Cuda> {
526  DeepCopy(void* dst, const void* src, size_t n) {
527  (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(dst, src, n);
528  }
529  DeepCopy(const Cuda& instance, void* dst, const void* src, size_t n) {
530  (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(instance, dst, src, n);
531  }
532 };
533 
534 template <>
535 struct DeepCopy<CudaSpace, CudaHostPinnedSpace, Cuda> {
536  DeepCopy(void* dst, const void* src, size_t n) {
537  (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(dst, src, n);
538  }
539  DeepCopy(const Cuda& instance, void* dst, const void* src, size_t n) {
540  (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(instance, dst, src, n);
541  }
542 };
543 
544 template <>
545 struct DeepCopy<CudaHostPinnedSpace, CudaSpace, Cuda> {
546  DeepCopy(void* dst, const void* src, size_t n) {
547  (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(dst, src, n);
548  }
549  DeepCopy(const Cuda& instance, void* dst, const void* src, size_t n) {
550  (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(instance, dst, src, n);
551  }
552 };
553 
554 template <class ExecutionSpace>
555 struct DeepCopy<CudaSpace, CudaSpace, ExecutionSpace> {
556  inline DeepCopy(void* dst, const void* src, size_t n) {
557  (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(dst, src, n);
558  }
559 
560  inline DeepCopy(const ExecutionSpace& exec, void* dst, const void* src,
561  size_t n) {
562  exec.fence();
563  DeepCopyAsyncCuda(dst, src, n);
564  }
565 };
566 
567 template <class ExecutionSpace>
568 struct DeepCopy<CudaSpace, HostSpace, ExecutionSpace> {
569  inline DeepCopy(void* dst, const void* src, size_t n) {
570  (void)DeepCopy<CudaSpace, HostSpace, Cuda>(dst, src, n);
571  }
572 
573  inline DeepCopy(const ExecutionSpace& exec, void* dst, const void* src,
574  size_t n) {
575  exec.fence();
576  DeepCopyAsyncCuda(dst, src, n);
577  }
578 };
579 
580 template <class ExecutionSpace>
581 struct DeepCopy<HostSpace, CudaSpace, ExecutionSpace> {
582  inline DeepCopy(void* dst, const void* src, size_t n) {
583  (void)DeepCopy<HostSpace, CudaSpace, Cuda>(dst, src, n);
584  }
585 
586  inline DeepCopy(const ExecutionSpace& exec, void* dst, const void* src,
587  size_t n) {
588  exec.fence();
589  DeepCopyAsyncCuda(dst, src, n);
590  }
591 };
592 
593 template <class ExecutionSpace>
594 struct DeepCopy<CudaSpace, CudaUVMSpace, ExecutionSpace> {
595  inline DeepCopy(void* dst, const void* src, size_t n) {
596  (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(dst, src, n);
597  }
598 
599  inline DeepCopy(const ExecutionSpace& exec, void* dst, const void* src,
600  size_t n) {
601  exec.fence();
602  DeepCopyAsyncCuda(dst, src, n);
603  }
604 };
605 
606 template <class ExecutionSpace>
607 struct DeepCopy<CudaSpace, CudaHostPinnedSpace, ExecutionSpace> {
608  inline DeepCopy(void* dst, const void* src, size_t n) {
609  (void)DeepCopy<CudaSpace, HostSpace, Cuda>(dst, src, n);
610  }
611 
612  inline DeepCopy(const ExecutionSpace& exec, void* dst, const void* src,
613  size_t n) {
614  exec.fence();
615  DeepCopyAsyncCuda(dst, src, n);
616  }
617 };
618 
619 template <class ExecutionSpace>
620 struct DeepCopy<CudaUVMSpace, CudaSpace, ExecutionSpace> {
621  inline DeepCopy(void* dst, const void* src, size_t n) {
622  (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(dst, src, n);
623  }
624 
625  inline DeepCopy(const ExecutionSpace& exec, void* dst, const void* src,
626  size_t n) {
627  exec.fence();
628  DeepCopyAsyncCuda(dst, src, n);
629  }
630 };
631 
632 template <class ExecutionSpace>
633 struct DeepCopy<CudaUVMSpace, CudaUVMSpace, ExecutionSpace> {
634  inline DeepCopy(void* dst, const void* src, size_t n) {
635  (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(dst, src, n);
636  }
637 
638  inline DeepCopy(const ExecutionSpace& exec, void* dst, const void* src,
639  size_t n) {
640  exec.fence();
641  DeepCopyAsyncCuda(dst, src, n);
642  }
643 };
644 
645 template <class ExecutionSpace>
646 struct DeepCopy<CudaUVMSpace, CudaHostPinnedSpace, ExecutionSpace> {
647  inline DeepCopy(void* dst, const void* src, size_t n) {
648  (void)DeepCopy<CudaSpace, HostSpace, Cuda>(dst, src, n);
649  }
650 
651  inline DeepCopy(const ExecutionSpace& exec, void* dst, const void* src,
652  size_t n) {
653  exec.fence();
654  DeepCopyAsyncCuda(dst, src, n);
655  }
656 };
657 
658 template <class ExecutionSpace>
659 struct DeepCopy<CudaUVMSpace, HostSpace, ExecutionSpace> {
660  inline DeepCopy(void* dst, const void* src, size_t n) {
661  (void)DeepCopy<CudaSpace, HostSpace, Cuda>(dst, src, n);
662  }
663 
664  inline DeepCopy(const ExecutionSpace& exec, void* dst, const void* src,
665  size_t n) {
666  exec.fence();
667  DeepCopyAsyncCuda(dst, src, n);
668  }
669 };
670 
671 template <class ExecutionSpace>
672 struct DeepCopy<CudaHostPinnedSpace, CudaSpace, ExecutionSpace> {
673  inline DeepCopy(void* dst, const void* src, size_t n) {
674  (void)DeepCopy<HostSpace, CudaSpace, Cuda>(dst, src, n);
675  }
676 
677  inline DeepCopy(const ExecutionSpace& exec, void* dst, const void* src,
678  size_t n) {
679  exec.fence();
680  DeepCopyAsyncCuda(dst, src, n);
681  }
682 };
683 
684 template <class ExecutionSpace>
685 struct DeepCopy<CudaHostPinnedSpace, CudaUVMSpace, ExecutionSpace> {
686  inline DeepCopy(void* dst, const void* src, size_t n) {
687  (void)DeepCopy<HostSpace, CudaSpace, Cuda>(dst, src, n);
688  }
689 
690  inline DeepCopy(const ExecutionSpace& exec, void* dst, const void* src,
691  size_t n) {
692  exec.fence();
693  DeepCopyAsyncCuda(dst, src, n);
694  }
695 };
696 
697 template <class ExecutionSpace>
698 struct DeepCopy<CudaHostPinnedSpace, CudaHostPinnedSpace, ExecutionSpace> {
699  inline DeepCopy(void* dst, const void* src, size_t n) {
700  (void)DeepCopy<HostSpace, HostSpace, Cuda>(dst, src, n);
701  }
702 
703  inline DeepCopy(const ExecutionSpace& exec, void* dst, const void* src,
704  size_t n) {
705  exec.fence();
706  DeepCopyAsyncCuda(dst, src, n);
707  }
708 };
709 
710 template <class ExecutionSpace>
711 struct DeepCopy<CudaHostPinnedSpace, HostSpace, ExecutionSpace> {
712  inline DeepCopy(void* dst, const void* src, size_t n) {
713  (void)DeepCopy<HostSpace, HostSpace, Cuda>(dst, src, n);
714  }
715 
716  inline DeepCopy(const ExecutionSpace& exec, void* dst, const void* src,
717  size_t n) {
718  exec.fence();
719  DeepCopyAsyncCuda(dst, src, n);
720  }
721 };
722 
723 template <class ExecutionSpace>
724 struct DeepCopy<HostSpace, CudaUVMSpace, ExecutionSpace> {
725  inline DeepCopy(void* dst, const void* src, size_t n) {
726  (void)DeepCopy<HostSpace, CudaSpace, Cuda>(dst, src, n);
727  }
728 
729  inline DeepCopy(const ExecutionSpace& exec, void* dst, const void* src,
730  size_t n) {
731  exec.fence();
732  DeepCopyAsyncCuda(dst, src, n);
733  }
734 };
735 
736 template <class ExecutionSpace>
737 struct DeepCopy<HostSpace, CudaHostPinnedSpace, ExecutionSpace> {
738  inline DeepCopy(void* dst, const void* src, size_t n) {
739  (void)DeepCopy<HostSpace, HostSpace, Cuda>(dst, src, n);
740  }
741 
742  inline DeepCopy(const ExecutionSpace& exec, void* dst, const void* src,
743  size_t n) {
744  exec.fence();
745  DeepCopyAsyncCuda(dst, src, n);
746  }
747 };
748 
749 } // namespace Impl
750 } // namespace Kokkos
751 
752 //----------------------------------------------------------------------------
753 //----------------------------------------------------------------------------
754 
755 namespace Kokkos {
756 namespace Impl {
757 
758 template <>
759 class SharedAllocationRecord<Kokkos::CudaSpace, void>
760  : public HostInaccessibleSharedAllocationRecordCommon<Kokkos::CudaSpace> {
761  private:
762  friend class SharedAllocationRecord<Kokkos::CudaUVMSpace, void>;
763  friend class SharedAllocationRecordCommon<Kokkos::CudaSpace>;
764  friend class HostInaccessibleSharedAllocationRecordCommon<Kokkos::CudaSpace>;
765 
766  using RecordBase = SharedAllocationRecord<void, void>;
767  using base_t =
768  HostInaccessibleSharedAllocationRecordCommon<Kokkos::CudaSpace>;
769 
770  SharedAllocationRecord(const SharedAllocationRecord&) = delete;
771  SharedAllocationRecord& operator=(const SharedAllocationRecord&) = delete;
772 
773  static ::cudaTextureObject_t attach_texture_object(
774  const unsigned sizeof_alias, void* const alloc_ptr,
775  const size_t alloc_size);
776 
777 #ifdef KOKKOS_ENABLE_DEBUG
778  static RecordBase s_root_record;
779 #endif
780 
781  ::cudaTextureObject_t m_tex_obj = 0;
782  const Kokkos::CudaSpace m_space;
783 
784  protected:
785  ~SharedAllocationRecord();
786  SharedAllocationRecord() = default;
787 
788  SharedAllocationRecord(
789  const Kokkos::CudaSpace& arg_space, const std::string& arg_label,
790  const size_t arg_alloc_size,
791  const RecordBase::function_type arg_dealloc = &base_t::deallocate);
792 
793  public:
794  template <typename AliasType>
795  inline ::cudaTextureObject_t attach_texture_object() {
796  static_assert((std::is_same<AliasType, int>::value ||
797  std::is_same<AliasType, ::int2>::value ||
798  std::is_same<AliasType, ::int4>::value),
799  "Cuda texture fetch only supported for alias types of int, "
800  "::int2, or ::int4");
801 
802  if (m_tex_obj == 0) {
803  m_tex_obj = attach_texture_object(sizeof(AliasType),
804  (void*)RecordBase::m_alloc_ptr,
805  RecordBase::m_alloc_size);
806  }
807 
808  return m_tex_obj;
809  }
810 
811  template <typename AliasType>
812  inline int attach_texture_object_offset(const AliasType* const ptr) {
813  // Texture object is attached to the entire allocation range
814  return ptr - reinterpret_cast<AliasType*>(RecordBase::m_alloc_ptr);
815  }
816 };
817 
818 template <>
819 class SharedAllocationRecord<Kokkos::CudaUVMSpace, void>
820  : public SharedAllocationRecordCommon<Kokkos::CudaUVMSpace> {
821  private:
822  friend class SharedAllocationRecordCommon<Kokkos::CudaUVMSpace>;
823 
824  using base_t = SharedAllocationRecordCommon<Kokkos::CudaUVMSpace>;
825  using RecordBase = SharedAllocationRecord<void, void>;
826 
827  SharedAllocationRecord(const SharedAllocationRecord&) = delete;
828  SharedAllocationRecord& operator=(const SharedAllocationRecord&) = delete;
829 
830  static RecordBase s_root_record;
831 
832  ::cudaTextureObject_t m_tex_obj = 0;
833  const Kokkos::CudaUVMSpace m_space;
834 
835  protected:
836  ~SharedAllocationRecord();
837  SharedAllocationRecord() = default;
838 
839  SharedAllocationRecord(
840  const Kokkos::CudaUVMSpace& arg_space, const std::string& arg_label,
841  const size_t arg_alloc_size,
842  const RecordBase::function_type arg_dealloc = &base_t::deallocate);
843 
844  public:
845  template <typename AliasType>
846  inline ::cudaTextureObject_t attach_texture_object() {
847  static_assert((std::is_same<AliasType, int>::value ||
848  std::is_same<AliasType, ::int2>::value ||
849  std::is_same<AliasType, ::int4>::value),
850  "Cuda texture fetch only supported for alias types of int, "
851  "::int2, or ::int4");
852 
853  if (m_tex_obj == 0) {
854  m_tex_obj = SharedAllocationRecord<Kokkos::CudaSpace, void>::
855  attach_texture_object(sizeof(AliasType),
856  (void*)RecordBase::m_alloc_ptr,
857  RecordBase::m_alloc_size);
858  }
859 
860  return m_tex_obj;
861  }
862 
863  template <typename AliasType>
864  inline int attach_texture_object_offset(const AliasType* const ptr) {
865  // Texture object is attached to the entire allocation range
866  return ptr - reinterpret_cast<AliasType*>(RecordBase::m_alloc_ptr);
867  }
868 };
869 
870 template <>
871 class SharedAllocationRecord<Kokkos::CudaHostPinnedSpace, void>
872  : public SharedAllocationRecordCommon<Kokkos::CudaHostPinnedSpace> {
873  private:
874  friend class SharedAllocationRecordCommon<Kokkos::CudaHostPinnedSpace>;
875 
876  using RecordBase = SharedAllocationRecord<void, void>;
877  using base_t = SharedAllocationRecordCommon<Kokkos::CudaHostPinnedSpace>;
878 
879  SharedAllocationRecord(const SharedAllocationRecord&) = delete;
880  SharedAllocationRecord& operator=(const SharedAllocationRecord&) = delete;
881 
882  static RecordBase s_root_record;
883 
884  const Kokkos::CudaHostPinnedSpace m_space;
885 
886  protected:
887  ~SharedAllocationRecord();
888  SharedAllocationRecord() = default;
889 
890  SharedAllocationRecord(
891  const Kokkos::CudaHostPinnedSpace& arg_space,
892  const std::string& arg_label, const size_t arg_alloc_size,
893  const RecordBase::function_type arg_dealloc = &deallocate);
894 };
895 
896 } // namespace Impl
897 } // namespace Kokkos
898 
899 //----------------------------------------------------------------------------
900 //----------------------------------------------------------------------------
901 
902 #endif /* #if defined( KOKKOS_ENABLE_CUDA ) */
903 #endif /* #define KOKKOS_CUDASPACE_HPP */
static constexpr const char * name()
Return Name of the MemorySpace.
Memory management for host memory.
bool available()
Query if hwloc is available.
LogicalMemorySpace is a space that is identical to another space, but differentiable by name and temp...
Definition: dummy.cpp:3
Access relationship between DstMemorySpace and SrcMemorySpace.