28template <
typename SrcDatas,
32 typename ElementwiseOperation,
34 typename SliceLengths,
35 typename DimAccessOrder,
38 typename SrcResetCoordinateAfterRunFlags,
39 typename DstResetCoordinateAfterRunFlags>
52 template <
typename Descs,
54 enable_if_t<Descs::Size() == Indices::Size(),
bool> =
false>
73 const SrcDescs& src_descs,
75 const DstDescs& dst_descs,
77 const ElementwiseOperation& element_op)
80 element_op_(element_op)
83 "wrong! cannot evenly divide");
86 template <
typename Indices, enable_if_t<SrcDescs::Size() == Indices::Size(),
bool> = false>
88 const Indices& src_slice_origin_idxs)
95 template <
typename Indices, enable_if_t<DstDescs::Size() == Indices::Size(),
bool> = false>
97 const Indices& dst_slice_origin_idxs)
108 template <
typename SrcBuffers,
110 enable_if_t<SrcDescs::Size() == SrcBuffers::Size() &&
111 DstDescs::Size() == DstBuffers::Size(),
113 __device__
void Run(
const SrcDescs& src_descs,
114 const SrcBuffers& src_bufs,
115 const DstDescs& dst_descs,
118 auto generate_vectors = [&](
auto data_types) {
119 constexpr index_t num = data_types.Size();
134 auto src_vectors = generate_vectors(SrcDatas{});
135 auto dst_vectors = generate_vectors(DstDatas{});
139 using src_vector_t =
typename remove_cvref_t<
decltype(src_vectors[i])>::type;
141 const bool is_src_valid =
145 src_vectors(i).template AsType<src_vector_t>()(
I0) =
146 src_bufs[i].
template Get<src_vector_t>(src_coords_[i].GetOffset(),
155 [&](
auto iSrc) ->
const auto& {
158 return src_vectors[iSrc].template AsType<SrcData>()[i];
165 [&](
auto iDst) ->
auto& {
168 return dst_vectors(iDst).template AsType<DstData>()(i);
180 unpack2(element_op_, dst_data_refs, src_data_refs);
185 using dst_vector_t =
typename remove_cvref_t<
decltype(dst_vectors[i])>::type;
187 const bool is_dst_valid =
194 dst_bufs(i).template Update<DstInMemOp, dst_vector_t>(
195 dst_coords_[i].GetOffset(),
197 dst_vectors[i].
template AsType<dst_vector_t>()[
I0]);
201 if constexpr(iAccess.value != num_access - 1)
221 if constexpr(SrcResetCoordinateAfterRunFlags::At(i))
223 const auto src_reset_step =
231 if constexpr(DstResetCoordinateAfterRunFlags::At(i))
233 const auto dst_reset_step =
245 if constexpr(num_access == 0)
251 constexpr auto reset_step =
259 template <index_t ISrc>
262 const Index& src_slice_origin_step_idx)
265 const auto adjusted_step_idx = SrcResetCoordinateAfterRunFlags::At(iSrc)
266 ? src_slice_origin_step_idx
276 template <index_t IDst>
279 const Index& dst_slice_origin_step_idx)
282 const auto adjusted_step_idx = DstResetCoordinateAfterRunFlags::At(iDst)
283 ? dst_slice_origin_step_idx
295 const ElementwiseOperation element_op_;
typename detail::StaticallyIndexedArrayImpl< T, N >::type StaticallyIndexedArray
Definition utility/statically_indexed_array.hpp:45
int32_t index_t
Definition ck.hpp:299
__host__ __device__ constexpr auto make_tensor_coordinate_step(const TensorDesc &, const VisibleIndex &idx_diff_visible, UpdateLowerIndexHack)
Definition tensor_description/tensor_descriptor.hpp:444
__host__ __device__ constexpr void move_tensor_coordinate(const TensorDesc &tensor_desc, TensorCoord &coord, const TensorCoordStep &coord_step)
Definition tensor_description/tensor_descriptor.hpp:508
InMemoryDataOperationEnum
Definition ck.hpp:277
remove_cv_t< remove_reference_t< T > > remove_cvref_t
Definition type.hpp:297
__host__ __device__ constexpr bool coordinate_has_valid_offset_assuming_visible_index_is_valid(const TensorDesc &tensor_desc, const TensorCoord &coord)
Definition tensor_description/tensor_descriptor.hpp:560
integral_constant< index_t, N > Number
Definition number.hpp:12
typename tuple_element< I, TTuple >::type tuple_element_t
Definition utility/tuple.hpp:208
__host__ __device__ constexpr auto generate_tuple(F &&f, Number< N >)
Definition tuple_helper.hpp:21
__host__ __device__ constexpr auto generate_sequence(F, Number< N >)
Definition sequence_helper.hpp:18
typename remove_cv< T >::type remove_cv_t
Definition type.hpp:295
__host__ __device__ constexpr auto unpack2(F &&f, X &&x, Y &&y)
Definition functional4.hpp:55
typename std::enable_if< B, T >::type enable_if_t
Definition enable_if.hpp:27
__host__ __device__ constexpr auto make_tensor_coordinate(const TensorDesc &tensor_desc, const VisibleIndex &idx_visible)
Definition tensor_description/tensor_descriptor.hpp:407
__host__ __device__ constexpr auto generate_tie(F &&f, Number< N >)
Definition tuple_helper.hpp:34
Array< index_t, N > MultiIndex
Definition array_multi_index.hpp:12
typename vector_type_maker< T, N >::type vector_type_maker_t
Definition dtype_vector.hpp:54
static __device__ __host__ constexpr auto GetStepBetween(Number< AccessIdx1dBegin >, Number< AccessIdx1dEnd >)
Definition tensor_space_filling_curve.hpp:52
__host__ static __device__ constexpr index_t GetNumOfAccess()
Definition tensor_space_filling_curve.hpp:41
static __device__ __host__ constexpr auto GetForwardStep(Number< AccessIdx1d >)
Definition tensor_space_filling_curve.hpp:66
MultiIndex< nDim > Index
Definition tensor_space_filling_curve.hpp:23
__device__ void SetDstSliceOrigins(const DstDescs &dst_descs, const Indices &dst_slice_origin_idxs)
Definition threadwise_tensor_slice_transfer_v7.hpp:96
ck::ThreadwiseTensorSliceTransfer_v7< SrcDatas, DstDatas, SrcDescs, DstDescs, ElementwiseOperation, DstInMemOps, decltype(thread_slice_lengths), DimAccessOrder, VectorDim, ScalarPerVector, ThreadTransferSrcResetCoordinateAfterRunFlags, ThreadTransferDstResetCoordinateAfterRunFlags >::nDst static constexpr index_t nDst
Definition threadwise_tensor_slice_transfer_v7.hpp:47
__device__ void MoveDstSliceWindow(const DstDescs &dst_descs, Number< IDst > iDst, const Index &dst_slice_origin_step_idx)
Definition threadwise_tensor_slice_transfer_v7.hpp:277
__device__ void MoveSrcSliceWindow(const SrcDescs &src_descs, Number< ISrc > iSrc, const Index &src_slice_origin_step_idx)
Definition threadwise_tensor_slice_transfer_v7.hpp:260
ck::ThreadwiseTensorSliceTransfer_v7< SrcDatas, DstDatas, SrcDescs, DstDescs, ElementwiseOperation, DstInMemOps, decltype(thread_slice_lengths), DimAccessOrder, VectorDim, ScalarPerVector, ThreadTransferSrcResetCoordinateAfterRunFlags, ThreadTransferDstResetCoordinateAfterRunFlags >::I0 static constexpr auto I0
Definition threadwise_tensor_slice_transfer_v7.hpp:42
ck::ThreadwiseTensorSliceTransfer_v7< SrcDatas, DstDatas, SrcDescs, DstDescs, ElementwiseOperation, DstInMemOps, decltype(thread_slice_lengths), DimAccessOrder, VectorDim, ScalarPerVector, ThreadTransferSrcResetCoordinateAfterRunFlags, ThreadTransferDstResetCoordinateAfterRunFlags >::SpaceFillingCurve SpaceFillingCurve< decltype(thread_slice_lengths), DimAccessOrder, remove_cv_t< decltype(scalar_per_access)> > SpaceFillingCurve
Definition threadwise_tensor_slice_transfer_v7.hpp:69
ck::ThreadwiseTensorSliceTransfer_v7< SrcDatas, DstDatas, SrcDescs, DstDescs, ElementwiseOperation, DstInMemOps, decltype(thread_slice_lengths), DimAccessOrder, VectorDim, ScalarPerVector, ThreadTransferSrcResetCoordinateAfterRunFlags, ThreadTransferDstResetCoordinateAfterRunFlags >::DstCoords decltype(MakeCoordinates(DstDescs{}, StaticallyIndexedArray< Index, nDst >{})) DstCoords
Definition threadwise_tensor_slice_transfer_v7.hpp:62
ck::ThreadwiseTensorSliceTransfer_v7< SrcDatas, DstDatas, SrcDescs, DstDescs, ElementwiseOperation, DstInMemOps, decltype(thread_slice_lengths), DimAccessOrder, VectorDim, ScalarPerVector, ThreadTransferSrcResetCoordinateAfterRunFlags, ThreadTransferDstResetCoordinateAfterRunFlags >::nDim static constexpr index_t nDim
Definition threadwise_tensor_slice_transfer_v7.hpp:44
__device__ constexpr ThreadwiseTensorSliceTransfer_v7(const SrcDescs &src_descs, const StaticallyIndexedArray< Index, nSrc > &src_slice_origins, const DstDescs &dst_descs, const StaticallyIndexedArray< Index, nDst > &dst_slice_origins, const ElementwiseOperation &element_op)
Definition threadwise_tensor_slice_transfer_v7.hpp:72
static __device__ constexpr auto GetCoordinateResetStep()
Definition threadwise_tensor_slice_transfer_v7.hpp:241
ck::ThreadwiseTensorSliceTransfer_v7< SrcDatas, DstDatas, SrcDescs, DstDescs, ElementwiseOperation, DstInMemOps, decltype(thread_slice_lengths), DimAccessOrder, VectorDim, ScalarPerVector, ThreadTransferSrcResetCoordinateAfterRunFlags, ThreadTransferDstResetCoordinateAfterRunFlags >::Index MultiIndex< nDim > Index
Definition threadwise_tensor_slice_transfer_v7.hpp:49
ck::ThreadwiseTensorSliceTransfer_v7< SrcDatas, DstDatas, SrcDescs, DstDescs, ElementwiseOperation, DstInMemOps, decltype(thread_slice_lengths), DimAccessOrder, VectorDim, ScalarPerVector, ThreadTransferSrcResetCoordinateAfterRunFlags, ThreadTransferDstResetCoordinateAfterRunFlags >::scalar_per_access static constexpr auto scalar_per_access
Definition threadwise_tensor_slice_transfer_v7.hpp:66
ck::ThreadwiseTensorSliceTransfer_v7< SrcDatas, DstDatas, SrcDescs, DstDescs, ElementwiseOperation, DstInMemOps, decltype(thread_slice_lengths), DimAccessOrder, VectorDim, ScalarPerVector, ThreadTransferSrcResetCoordinateAfterRunFlags, ThreadTransferDstResetCoordinateAfterRunFlags >::SrcCoords decltype(MakeCoordinates(SrcDescs{}, StaticallyIndexedArray< Index, nSrc >{})) SrcCoords
Definition threadwise_tensor_slice_transfer_v7.hpp:61
static constexpr auto MakeCoordinates(const Descs &descs, const Indices &indices)
Definition threadwise_tensor_slice_transfer_v7.hpp:55
ck::ThreadwiseTensorSliceTransfer_v7< SrcDatas, DstDatas, SrcDescs, DstDescs, ElementwiseOperation, DstInMemOps, decltype(thread_slice_lengths), DimAccessOrder, VectorDim, ScalarPerVector, ThreadTransferSrcResetCoordinateAfterRunFlags, ThreadTransferDstResetCoordinateAfterRunFlags >::nSrc static constexpr index_t nSrc
Definition threadwise_tensor_slice_transfer_v7.hpp:46
__device__ void SetSrcSliceOrigins(const SrcDescs &src_descs, const Indices &src_slice_origin_idxs)
Definition threadwise_tensor_slice_transfer_v7.hpp:87
__device__ void Run(const SrcDescs &src_descs, const SrcBuffers &src_bufs, const DstDescs &dst_descs, DstBuffers dst_bufs)
Definition threadwise_tensor_slice_transfer_v7.hpp:113
Definition threadwise_tensor_slice_transfer_util.hpp:20
Definition functional2.hpp:33