10#if !defined(__HIPCC_RTC__) || !defined(CK_CODE_GEN_RTC)
20template <
unsigned SizeInBytes>
43 static_assert(
sizeof(bytes) <=
sizeof(value_type));
46 template <
typename InputIterator,
typename Size,
typename OutputIterator>
47 __device__
static OutputIterator copy_n(InputIterator from, Size size, OutputIterator to)
53 for(Size count = 1; count < size; ++count)
64 __device__ carrier(
const carrier& other)
noexcept
66 copy_n(other.bytes.begin(), bytes.
Size(), bytes.
begin());
70 __device__ carrier& operator=(value_type
value)
noexcept
72 copy_n(
reinterpret_cast<const ck::byte*
>(&
value), bytes.
Size(), bytes.
begin());
77 __device__
operator value_type()
const noexcept
79 ck::byte result[
sizeof(value_type)];
81 copy_n(bytes.
begin(), bytes.
Size(), result);
83 return *
reinterpret_cast<const value_type*
>(result);
95template <
unsigned SizeInBytes>
102 return __builtin_amdgcn_readfirstlane(
value);
107 return __builtin_amdgcn_readfirstlane(
value);
112 constexpr unsigned object_size =
sizeof(
int64_t);
113 constexpr unsigned second_part_offset = object_size / 2;
114 auto*
const from_obj =
reinterpret_cast<const ck::byte*
>(&
value);
115 alignas(
int64_t) ck::byte to_obj[object_size];
119 *
reinterpret_cast<Sgpr*
>(to_obj) =
121 *
reinterpret_cast<Sgpr*
>(to_obj + second_part_offset) =
124 return *
reinterpret_cast<int64_t*
>(to_obj);
127template <
typename Object,
131 using Size = unsigned;
132 constexpr Size SgprSize = 4;
133 constexpr Size ObjectSize =
sizeof(Object);
135 auto*
const from_obj =
reinterpret_cast<const ck::byte*
>(&obj);
136 alignas(Object) ck::byte to_obj[ObjectSize];
138 constexpr Size RemainedSize = ObjectSize % SgprSize;
139 constexpr Size CompleteSgprCopyBoundary = ObjectSize - RemainedSize;
140 for(Size offset = 0; offset < CompleteSgprCopyBoundary; offset += SgprSize)
144 *
reinterpret_cast<Sgpr*
>(to_obj + offset) =
148 if constexpr(0 < RemainedSize)
153 *
reinterpret_cast<const Carrier*
>(from_obj + CompleteSgprCopyBoundary));
158 return *
reinterpret_cast<Object*
>(to_obj);
Definition threadwise_tensor_slice_transfer_util.hpp:15
typename get_carrier< SizeInBytes >::type get_carrier_t
Definition amd_wave_read_first_lane.hpp:96
long int64_t
Definition data_type.hpp:464
@ Sgpr
Definition amd_address_space.hpp:19
__device__ uint32_t amd_wave_read_first_lane(uint32_t value)
Definition amd_wave_read_first_lane.hpp:100
typename std::enable_if< B, T >::type enable_if_t
Definition enable_if.hpp:27
const GenericPointer< typename T::ValueType > T2 value
Definition pointer.h:1697
unsigned short uint16_t
Definition stdint.h:125
unsigned int uint32_t
Definition stdint.h:126
signed int int32_t
Definition stdint.h:123
unsigned char uint8_t
Definition stdint.h:124
Definition utility/array.hpp:14
__host__ static __device__ constexpr index_t Size()
Definition utility/array.hpp:20
__host__ __device__ constexpr const TData * begin() const
Definition utility/array.hpp:39
uint8_t type
Definition amd_wave_read_first_lane.hpp:26
uint16_t type
Definition amd_wave_read_first_lane.hpp:32
class carrier { using value_type=uint32_t; Array< ck::byte, 3 > bytes; static_assert(sizeof(bytes)<=sizeof(value_type)); template< typename InputIterator, typename Size, typename OutputIterator > __device__ static OutputIterator copy_n(InputIterator from, Size size, OutputIterator to) { if(0< size) { *to=*from;++to; for(Size count=1;count< size;++count) { *to=*++from;++to; } } return to; } __device__ carrier(const carrier &other) noexcept { copy_n(other.bytes.begin(), bytes.Size(), bytes.begin()); } public: __device__ carrier &operator=(value_type value) noexcept { copy_n(reinterpret_cast< const ck::byte * >(&value), bytes.Size(), bytes.begin()); return *this; } __device__ operator value_type() const noexcept { ck::byte result[sizeof(value_type)]; copy_n(bytes.begin(), bytes.Size(), result); return *reinterpret_cast< const value_type * >(result); } } type
Definition amd_wave_read_first_lane.hpp:38
uint32_t type
Definition amd_wave_read_first_lane.hpp:92
Definition amd_wave_read_first_lane.hpp:21