45 #ifndef KOKKOS_CUDASPACE_HPP 46 #define KOKKOS_CUDASPACE_HPP 48 #include <Kokkos_Macros.hpp> 49 #if defined(KOKKOS_ENABLE_CUDA) 51 #include <Kokkos_Core_fwd.hpp> 58 #include <Kokkos_HostSpace.hpp> 59 #include <impl/Kokkos_SharedAlloc.hpp> 61 #include <impl/Kokkos_Profiling_Interface.hpp> 63 #include <Cuda/Kokkos_Cuda_abort.hpp> 65 #ifdef KOKKOS_IMPL_DEBUG_CUDA_PIN_UVM_TO_HOST 66 extern "C" bool kokkos_impl_cuda_pin_uvm_to_host();
67 extern "C" void kokkos_impl_cuda_set_pin_uvm_to_host(
bool);
79 using memory_space = CudaSpace;
80 using execution_space = Kokkos::Cuda;
81 using device_type = Kokkos::Device<execution_space, memory_space>;
83 using size_type =
unsigned int;
88 CudaSpace(CudaSpace&& rhs) =
default;
89 CudaSpace(
const CudaSpace& rhs) =
default;
90 CudaSpace& operator=(CudaSpace&& rhs) =
default;
91 CudaSpace& operator=(
const CudaSpace& rhs) =
default;
92 ~CudaSpace() =
default;
95 void* allocate(
const size_t arg_alloc_size)
const;
96 void* allocate(
const char* arg_label,
const size_t arg_alloc_size,
97 const size_t arg_logical_size = 0)
const;
100 void deallocate(
void*
const arg_alloc_ptr,
const size_t arg_alloc_size)
const;
101 void deallocate(
const char* arg_label,
void*
const arg_alloc_ptr,
102 const size_t arg_alloc_size,
103 const size_t arg_logical_size = 0)
const;
106 template <
class,
class,
class,
class>
108 void* impl_allocate(
const char* arg_label,
const size_t arg_alloc_size,
109 const size_t arg_logical_size = 0,
110 const Kokkos::Tools::SpaceHandle =
111 Kokkos::Tools::make_space_handle(
name()))
const;
112 void impl_deallocate(
const char* arg_label,
void*
const arg_alloc_ptr,
113 const size_t arg_alloc_size,
114 const size_t arg_logical_size = 0,
115 const Kokkos::Tools::SpaceHandle =
116 Kokkos::Tools::make_space_handle(
name()))
const;
120 static constexpr
const char*
name() {
return m_name; }
124 KOKKOS_DEPRECATED
static void access_error();
125 KOKKOS_DEPRECATED
static void access_error(
const void*
const);
130 static constexpr
const char* m_name =
"Cuda";
131 friend class Kokkos::Impl::SharedAllocationRecord<
Kokkos::CudaSpace, void>;
146 using memory_space = CudaUVMSpace;
147 using execution_space = Cuda;
148 using device_type = Kokkos::Device<execution_space, memory_space>;
149 using size_type =
unsigned int;
156 KOKKOS_DEPRECATED
static int number_of_allocations();
163 CudaUVMSpace(CudaUVMSpace&& rhs) =
default;
164 CudaUVMSpace(
const CudaUVMSpace& rhs) =
default;
165 CudaUVMSpace& operator=(CudaUVMSpace&& rhs) =
default;
166 CudaUVMSpace& operator=(
const CudaUVMSpace& rhs) =
default;
167 ~CudaUVMSpace() =
default;
170 void* allocate(
const size_t arg_alloc_size)
const;
171 void* allocate(
const char* arg_label,
const size_t arg_alloc_size,
172 const size_t arg_logical_size = 0)
const;
175 void deallocate(
void*
const arg_alloc_ptr,
const size_t arg_alloc_size)
const;
176 void deallocate(
const char* arg_label,
void*
const arg_alloc_ptr,
177 const size_t arg_alloc_size,
178 const size_t arg_logical_size = 0)
const;
181 template <
class,
class,
class,
class>
183 void* impl_allocate(
const char* arg_label,
const size_t arg_alloc_size,
184 const size_t arg_logical_size = 0,
185 const Kokkos::Tools::SpaceHandle =
186 Kokkos::Tools::make_space_handle(
name()))
const;
187 void impl_deallocate(
const char* arg_label,
void*
const arg_alloc_ptr,
188 const size_t arg_alloc_size,
189 const size_t arg_logical_size = 0,
190 const Kokkos::Tools::SpaceHandle =
191 Kokkos::Tools::make_space_handle(
name()))
const;
195 static constexpr
const char*
name() {
return m_name; }
197 #ifdef KOKKOS_IMPL_DEBUG_CUDA_PIN_UVM_TO_HOST 198 static bool cuda_pin_uvm_to_host();
199 static void cuda_set_pin_uvm_to_host(
bool val);
206 #ifdef KOKKOS_IMPL_DEBUG_CUDA_PIN_UVM_TO_HOST 207 static bool kokkos_impl_cuda_pin_uvm_to_host_v;
209 static constexpr
const char* m_name =
"CudaUVM";
222 class CudaHostPinnedSpace {
226 using execution_space = HostSpace::execution_space;
227 using memory_space = CudaHostPinnedSpace;
228 using device_type = Kokkos::Device<execution_space, memory_space>;
229 using size_type =
unsigned int;
233 CudaHostPinnedSpace();
234 CudaHostPinnedSpace(CudaHostPinnedSpace&& rhs) =
default;
235 CudaHostPinnedSpace(
const CudaHostPinnedSpace& rhs) =
default;
236 CudaHostPinnedSpace& operator=(CudaHostPinnedSpace&& rhs) =
default;
237 CudaHostPinnedSpace& operator=(
const CudaHostPinnedSpace& rhs) =
default;
238 ~CudaHostPinnedSpace() =
default;
241 void* allocate(
const size_t arg_alloc_size)
const;
242 void* allocate(
const char* arg_label,
const size_t arg_alloc_size,
243 const size_t arg_logical_size = 0)
const;
246 void deallocate(
void*
const arg_alloc_ptr,
const size_t arg_alloc_size)
const;
247 void deallocate(
const char* arg_label,
void*
const arg_alloc_ptr,
248 const size_t arg_alloc_size,
249 const size_t arg_logical_size = 0)
const;
252 template <
class,
class,
class,
class>
254 void* impl_allocate(
const char* arg_label,
const size_t arg_alloc_size,
255 const size_t arg_logical_size = 0,
256 const Kokkos::Tools::SpaceHandle =
257 Kokkos::Tools::make_space_handle(
name()))
const;
258 void impl_deallocate(
const char* arg_label,
void*
const arg_alloc_ptr,
259 const size_t arg_alloc_size,
260 const size_t arg_logical_size = 0,
261 const Kokkos::Tools::SpaceHandle =
262 Kokkos::Tools::make_space_handle(
name()))
const;
266 static constexpr
const char*
name() {
return m_name; }
269 static constexpr
const char* m_name =
"CudaHostPinned";
282 cudaStream_t cuda_get_deep_copy_stream();
284 const std::unique_ptr<Kokkos::Cuda>& cuda_get_deep_copy_space(
285 bool initialize =
true);
288 Kokkos::CudaSpace>::assignable,
291 Kokkos::CudaUVMSpace>::assignable,
295 Kokkos::CudaHostPinnedSpace>::assignable,
301 struct MemorySpaceAccess<
Kokkos::HostSpace, Kokkos::CudaSpace> {
302 enum :
bool { assignable =
false };
303 enum :
bool { accessible =
false };
304 enum :
bool { deepcopy =
true };
308 struct MemorySpaceAccess<
Kokkos::HostSpace, Kokkos::CudaUVMSpace> {
310 enum :
bool { assignable =
false };
311 enum :
bool { accessible =
true };
312 enum :
bool { deepcopy =
true };
316 struct MemorySpaceAccess<
Kokkos::HostSpace, Kokkos::CudaHostPinnedSpace> {
318 enum :
bool { assignable =
true };
319 enum :
bool { accessible =
true };
320 enum :
bool { deepcopy =
true };
327 enum :
bool { assignable =
false };
328 enum :
bool { accessible =
false };
329 enum :
bool { deepcopy =
true };
333 struct MemorySpaceAccess<
Kokkos::CudaSpace, Kokkos::CudaUVMSpace> {
335 enum :
bool { assignable =
true };
336 enum :
bool { accessible =
true };
337 enum :
bool { deepcopy =
true };
341 struct MemorySpaceAccess<
Kokkos::CudaSpace, Kokkos::CudaHostPinnedSpace> {
343 enum :
bool { assignable =
false };
344 enum :
bool { accessible =
true };
345 enum :
bool { deepcopy =
true };
354 enum :
bool { assignable =
false };
355 enum :
bool { accessible =
false };
356 enum :
bool { deepcopy =
true };
360 struct MemorySpaceAccess<
Kokkos::CudaUVMSpace, Kokkos::CudaSpace> {
363 enum :
bool { assignable =
false };
366 enum :
bool { accessible =
true };
367 enum :
bool { deepcopy =
true };
371 struct MemorySpaceAccess<
Kokkos::CudaUVMSpace, Kokkos::CudaHostPinnedSpace> {
373 enum :
bool { assignable =
false };
374 enum :
bool { accessible =
true };
375 enum :
bool { deepcopy =
true };
384 enum :
bool { assignable =
false };
385 enum :
bool { accessible =
true };
386 enum :
bool { deepcopy =
true };
390 struct MemorySpaceAccess<
Kokkos::CudaHostPinnedSpace, Kokkos::CudaSpace> {
391 enum :
bool { assignable =
false };
392 enum :
bool { accessible =
false };
393 enum :
bool { deepcopy =
true };
397 struct MemorySpaceAccess<
Kokkos::CudaHostPinnedSpace, Kokkos::CudaUVMSpace> {
398 enum :
bool { assignable =
false };
399 enum :
bool { accessible =
true };
400 enum :
bool { deepcopy =
true };
414 void DeepCopyAsyncCuda(
void* dst,
const void* src,
size_t n);
417 struct DeepCopy<CudaSpace, CudaSpace, Cuda> {
418 DeepCopy(
void* dst,
const void* src,
size_t);
419 DeepCopy(
const Cuda&,
void* dst,
const void* src,
size_t);
423 struct DeepCopy<CudaSpace, HostSpace, Cuda> {
424 DeepCopy(
void* dst,
const void* src,
size_t);
425 DeepCopy(
const Cuda&,
void* dst,
const void* src,
size_t);
429 struct DeepCopy<HostSpace, CudaSpace, Cuda> {
430 DeepCopy(
void* dst,
const void* src,
size_t);
431 DeepCopy(
const Cuda&,
void* dst,
const void* src,
size_t);
435 struct DeepCopy<CudaUVMSpace, CudaUVMSpace, Cuda> {
436 DeepCopy(
void* dst,
const void* src,
size_t n) {
437 (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(dst, src, n);
439 DeepCopy(
const Cuda& instance,
void* dst,
const void* src,
size_t n) {
440 (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(instance, dst, src, n);
445 struct DeepCopy<CudaUVMSpace, HostSpace, Cuda> {
446 DeepCopy(
void* dst,
const void* src,
size_t n) {
447 (void)DeepCopy<CudaSpace, HostSpace, Cuda>(dst, src, n);
449 DeepCopy(
const Cuda& instance,
void* dst,
const void* src,
size_t n) {
450 (void)DeepCopy<CudaSpace, HostSpace, Cuda>(instance, dst, src, n);
455 struct DeepCopy<HostSpace, CudaUVMSpace, Cuda> {
456 DeepCopy(
void* dst,
const void* src,
size_t n) {
457 (void)DeepCopy<HostSpace, CudaSpace, Cuda>(dst, src, n);
459 DeepCopy(
const Cuda& instance,
void* dst,
const void* src,
size_t n) {
460 (void)DeepCopy<HostSpace, CudaSpace, Cuda>(instance, dst, src, n);
465 struct DeepCopy<CudaHostPinnedSpace, CudaHostPinnedSpace, Cuda> {
466 DeepCopy(
void* dst,
const void* src,
size_t n) {
467 (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(dst, src, n);
469 DeepCopy(
const Cuda& instance,
void* dst,
const void* src,
size_t n) {
470 (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(instance, dst, src, n);
475 struct DeepCopy<CudaHostPinnedSpace, HostSpace, Cuda> {
476 DeepCopy(
void* dst,
const void* src,
size_t n) {
477 (void)DeepCopy<CudaSpace, HostSpace, Cuda>(dst, src, n);
479 DeepCopy(
const Cuda& instance,
void* dst,
const void* src,
size_t n) {
480 (void)DeepCopy<CudaSpace, HostSpace, Cuda>(instance, dst, src, n);
485 struct DeepCopy<HostSpace, CudaHostPinnedSpace, Cuda> {
486 DeepCopy(
void* dst,
const void* src,
size_t n) {
487 (void)DeepCopy<HostSpace, CudaSpace, Cuda>(dst, src, n);
489 DeepCopy(
const Cuda& instance,
void* dst,
const void* src,
size_t n) {
490 (void)DeepCopy<HostSpace, CudaSpace, Cuda>(instance, dst, src, n);
495 struct DeepCopy<CudaUVMSpace, CudaSpace, Cuda> {
496 DeepCopy(
void* dst,
const void* src,
size_t n) {
497 (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(dst, src, n);
499 DeepCopy(
const Cuda& instance,
void* dst,
const void* src,
size_t n) {
500 (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(instance, dst, src, n);
505 struct DeepCopy<CudaSpace, CudaUVMSpace, Cuda> {
506 DeepCopy(
void* dst,
const void* src,
size_t n) {
507 (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(dst, src, n);
509 DeepCopy(
const Cuda& instance,
void* dst,
const void* src,
size_t n) {
510 (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(instance, dst, src, n);
515 struct DeepCopy<CudaUVMSpace, CudaHostPinnedSpace, Cuda> {
516 DeepCopy(
void* dst,
const void* src,
size_t n) {
517 (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(dst, src, n);
519 DeepCopy(
const Cuda& instance,
void* dst,
const void* src,
size_t n) {
520 (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(instance, dst, src, n);
525 struct DeepCopy<CudaHostPinnedSpace, CudaUVMSpace, Cuda> {
526 DeepCopy(
void* dst,
const void* src,
size_t n) {
527 (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(dst, src, n);
529 DeepCopy(
const Cuda& instance,
void* dst,
const void* src,
size_t n) {
530 (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(instance, dst, src, n);
535 struct DeepCopy<CudaSpace, CudaHostPinnedSpace, Cuda> {
536 DeepCopy(
void* dst,
const void* src,
size_t n) {
537 (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(dst, src, n);
539 DeepCopy(
const Cuda& instance,
void* dst,
const void* src,
size_t n) {
540 (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(instance, dst, src, n);
545 struct DeepCopy<CudaHostPinnedSpace, CudaSpace, Cuda> {
546 DeepCopy(
void* dst,
const void* src,
size_t n) {
547 (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(dst, src, n);
549 DeepCopy(
const Cuda& instance,
void* dst,
const void* src,
size_t n) {
550 (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(instance, dst, src, n);
554 template <
class ExecutionSpace>
555 struct DeepCopy<CudaSpace, CudaSpace, ExecutionSpace> {
556 inline DeepCopy(
void* dst,
const void* src,
size_t n) {
557 (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(dst, src, n);
560 inline DeepCopy(
const ExecutionSpace& exec,
void* dst,
const void* src,
563 DeepCopyAsyncCuda(dst, src, n);
567 template <
class ExecutionSpace>
568 struct DeepCopy<CudaSpace, HostSpace, ExecutionSpace> {
569 inline DeepCopy(
void* dst,
const void* src,
size_t n) {
570 (void)DeepCopy<CudaSpace, HostSpace, Cuda>(dst, src, n);
573 inline DeepCopy(
const ExecutionSpace& exec,
void* dst,
const void* src,
576 DeepCopyAsyncCuda(dst, src, n);
580 template <
class ExecutionSpace>
581 struct DeepCopy<HostSpace, CudaSpace, ExecutionSpace> {
582 inline DeepCopy(
void* dst,
const void* src,
size_t n) {
583 (void)DeepCopy<HostSpace, CudaSpace, Cuda>(dst, src, n);
586 inline DeepCopy(
const ExecutionSpace& exec,
void* dst,
const void* src,
589 DeepCopyAsyncCuda(dst, src, n);
593 template <
class ExecutionSpace>
594 struct DeepCopy<CudaSpace, CudaUVMSpace, ExecutionSpace> {
595 inline DeepCopy(
void* dst,
const void* src,
size_t n) {
596 (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(dst, src, n);
599 inline DeepCopy(
const ExecutionSpace& exec,
void* dst,
const void* src,
602 DeepCopyAsyncCuda(dst, src, n);
606 template <
class ExecutionSpace>
607 struct DeepCopy<CudaSpace, CudaHostPinnedSpace, ExecutionSpace> {
608 inline DeepCopy(
void* dst,
const void* src,
size_t n) {
609 (void)DeepCopy<CudaSpace, HostSpace, Cuda>(dst, src, n);
612 inline DeepCopy(
const ExecutionSpace& exec,
void* dst,
const void* src,
615 DeepCopyAsyncCuda(dst, src, n);
619 template <
class ExecutionSpace>
620 struct DeepCopy<CudaUVMSpace, CudaSpace, ExecutionSpace> {
621 inline DeepCopy(
void* dst,
const void* src,
size_t n) {
622 (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(dst, src, n);
625 inline DeepCopy(
const ExecutionSpace& exec,
void* dst,
const void* src,
628 DeepCopyAsyncCuda(dst, src, n);
632 template <
class ExecutionSpace>
633 struct DeepCopy<CudaUVMSpace, CudaUVMSpace, ExecutionSpace> {
634 inline DeepCopy(
void* dst,
const void* src,
size_t n) {
635 (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(dst, src, n);
638 inline DeepCopy(
const ExecutionSpace& exec,
void* dst,
const void* src,
641 DeepCopyAsyncCuda(dst, src, n);
645 template <
class ExecutionSpace>
646 struct DeepCopy<CudaUVMSpace, CudaHostPinnedSpace, ExecutionSpace> {
647 inline DeepCopy(
void* dst,
const void* src,
size_t n) {
648 (void)DeepCopy<CudaSpace, HostSpace, Cuda>(dst, src, n);
651 inline DeepCopy(
const ExecutionSpace& exec,
void* dst,
const void* src,
654 DeepCopyAsyncCuda(dst, src, n);
658 template <
class ExecutionSpace>
659 struct DeepCopy<CudaUVMSpace, HostSpace, ExecutionSpace> {
660 inline DeepCopy(
void* dst,
const void* src,
size_t n) {
661 (void)DeepCopy<CudaSpace, HostSpace, Cuda>(dst, src, n);
664 inline DeepCopy(
const ExecutionSpace& exec,
void* dst,
const void* src,
667 DeepCopyAsyncCuda(dst, src, n);
671 template <
class ExecutionSpace>
672 struct DeepCopy<CudaHostPinnedSpace, CudaSpace, ExecutionSpace> {
673 inline DeepCopy(
void* dst,
const void* src,
size_t n) {
674 (void)DeepCopy<HostSpace, CudaSpace, Cuda>(dst, src, n);
677 inline DeepCopy(
const ExecutionSpace& exec,
void* dst,
const void* src,
680 DeepCopyAsyncCuda(dst, src, n);
684 template <
class ExecutionSpace>
685 struct DeepCopy<CudaHostPinnedSpace, CudaUVMSpace, ExecutionSpace> {
686 inline DeepCopy(
void* dst,
const void* src,
size_t n) {
687 (void)DeepCopy<HostSpace, CudaSpace, Cuda>(dst, src, n);
690 inline DeepCopy(
const ExecutionSpace& exec,
void* dst,
const void* src,
693 DeepCopyAsyncCuda(dst, src, n);
697 template <
class ExecutionSpace>
698 struct DeepCopy<CudaHostPinnedSpace, CudaHostPinnedSpace, ExecutionSpace> {
699 inline DeepCopy(
void* dst,
const void* src,
size_t n) {
700 (void)DeepCopy<HostSpace, HostSpace, Cuda>(dst, src, n);
703 inline DeepCopy(
const ExecutionSpace& exec,
void* dst,
const void* src,
706 DeepCopyAsyncCuda(dst, src, n);
710 template <
class ExecutionSpace>
711 struct DeepCopy<CudaHostPinnedSpace, HostSpace, ExecutionSpace> {
712 inline DeepCopy(
void* dst,
const void* src,
size_t n) {
713 (void)DeepCopy<HostSpace, HostSpace, Cuda>(dst, src, n);
716 inline DeepCopy(
const ExecutionSpace& exec,
void* dst,
const void* src,
719 DeepCopyAsyncCuda(dst, src, n);
723 template <
class ExecutionSpace>
724 struct DeepCopy<HostSpace, CudaUVMSpace, ExecutionSpace> {
725 inline DeepCopy(
void* dst,
const void* src,
size_t n) {
726 (void)DeepCopy<HostSpace, CudaSpace, Cuda>(dst, src, n);
729 inline DeepCopy(
const ExecutionSpace& exec,
void* dst,
const void* src,
732 DeepCopyAsyncCuda(dst, src, n);
736 template <
class ExecutionSpace>
737 struct DeepCopy<HostSpace, CudaHostPinnedSpace, ExecutionSpace> {
738 inline DeepCopy(
void* dst,
const void* src,
size_t n) {
739 (void)DeepCopy<HostSpace, HostSpace, Cuda>(dst, src, n);
742 inline DeepCopy(
const ExecutionSpace& exec,
void* dst,
const void* src,
745 DeepCopyAsyncCuda(dst, src, n);
759 class SharedAllocationRecord<
Kokkos::CudaSpace, void>
760 :
public HostInaccessibleSharedAllocationRecordCommon<Kokkos::CudaSpace> {
762 friend class SharedAllocationRecord<
Kokkos::CudaUVMSpace, void>;
763 friend class SharedAllocationRecordCommon<Kokkos::CudaSpace>;
764 friend class HostInaccessibleSharedAllocationRecordCommon<Kokkos::CudaSpace>;
766 using RecordBase = SharedAllocationRecord<void, void>;
768 HostInaccessibleSharedAllocationRecordCommon<Kokkos::CudaSpace>;
770 SharedAllocationRecord(const SharedAllocationRecord&) = delete;
771 SharedAllocationRecord& operator=(const SharedAllocationRecord&) = delete;
773 static ::cudaTextureObject_t attach_texture_object(
774 const unsigned sizeof_alias, void* const alloc_ptr,
775 const size_t alloc_size);
777 #ifdef KOKKOS_ENABLE_DEBUG 778 static RecordBase s_root_record;
781 ::cudaTextureObject_t m_tex_obj = 0;
782 const Kokkos::CudaSpace m_space;
785 ~SharedAllocationRecord();
786 SharedAllocationRecord() = default;
788 SharedAllocationRecord(
789 const Kokkos::CudaSpace& arg_space, const std::string& arg_label,
790 const size_t arg_alloc_size,
791 const RecordBase::function_type arg_dealloc = &base_t::deallocate);
794 template <typename AliasType>
795 inline ::cudaTextureObject_t attach_texture_object() {
796 static_assert((std::is_same<AliasType, int>::value ||
797 std::is_same<AliasType, ::int2>::value ||
798 std::is_same<AliasType, ::int4>::value),
799 "Cuda texture fetch only supported for alias types of int, " 800 "::int2, or ::int4");
802 if (m_tex_obj == 0) {
803 m_tex_obj = attach_texture_object(
sizeof(AliasType),
804 (
void*)RecordBase::m_alloc_ptr,
805 RecordBase::m_alloc_size);
811 template <
typename AliasType>
812 inline int attach_texture_object_offset(
const AliasType*
const ptr) {
814 return ptr -
reinterpret_cast<AliasType*
>(RecordBase::m_alloc_ptr);
819 class SharedAllocationRecord<
Kokkos::CudaUVMSpace, void>
820 :
public SharedAllocationRecordCommon<Kokkos::CudaUVMSpace> {
822 friend class SharedAllocationRecordCommon<
Kokkos::CudaUVMSpace>;
824 using base_t = SharedAllocationRecordCommon<Kokkos::CudaUVMSpace>;
825 using RecordBase = SharedAllocationRecord<void, void>;
827 SharedAllocationRecord(const SharedAllocationRecord&) = delete;
828 SharedAllocationRecord& operator=(const SharedAllocationRecord&) = delete;
830 static RecordBase s_root_record;
832 ::cudaTextureObject_t m_tex_obj = 0;
833 const Kokkos::CudaUVMSpace m_space;
836 ~SharedAllocationRecord();
837 SharedAllocationRecord() = default;
839 SharedAllocationRecord(
840 const Kokkos::CudaUVMSpace& arg_space, const std::string& arg_label,
841 const size_t arg_alloc_size,
842 const RecordBase::function_type arg_dealloc = &base_t::deallocate);
845 template <typename AliasType>
846 inline ::cudaTextureObject_t attach_texture_object() {
847 static_assert((std::is_same<AliasType, int>::value ||
848 std::is_same<AliasType, ::int2>::value ||
849 std::is_same<AliasType, ::int4>::value),
850 "Cuda texture fetch only supported for alias types of int, " 851 "::int2, or ::int4");
853 if (m_tex_obj == 0) {
854 m_tex_obj = SharedAllocationRecord<Kokkos::CudaSpace, void>::
855 attach_texture_object(
sizeof(AliasType),
856 (
void*)RecordBase::m_alloc_ptr,
857 RecordBase::m_alloc_size);
863 template <
typename AliasType>
864 inline int attach_texture_object_offset(
const AliasType*
const ptr) {
866 return ptr -
reinterpret_cast<AliasType*
>(RecordBase::m_alloc_ptr);
871 class SharedAllocationRecord<
Kokkos::CudaHostPinnedSpace, void>
872 :
public SharedAllocationRecordCommon<Kokkos::CudaHostPinnedSpace> {
874 friend class SharedAllocationRecordCommon<
Kokkos::CudaHostPinnedSpace>;
876 using RecordBase = SharedAllocationRecord<void, void>;
877 using base_t = SharedAllocationRecordCommon<Kokkos::CudaHostPinnedSpace>;
879 SharedAllocationRecord(const SharedAllocationRecord&) = delete;
880 SharedAllocationRecord& operator=(const SharedAllocationRecord&) = delete;
882 static RecordBase s_root_record;
884 const Kokkos::CudaHostPinnedSpace m_space;
887 ~SharedAllocationRecord();
888 SharedAllocationRecord() = default;
890 SharedAllocationRecord(
891 const Kokkos::CudaHostPinnedSpace& arg_space,
892 const std::string& arg_label, const size_t arg_alloc_size,
893 const RecordBase::function_type arg_dealloc = &deallocate);
static constexpr const char * name()
Return Name of the MemorySpace.
Memory management for host memory.
bool available()
Query if hwloc is available.
LogicalMemorySpace is a space that is identical to another space, but differentiable by name and temp...
Access relationship between DstMemorySpace and SrcMemorySpace.