device_column_to_image_impl.hpp Source File#
device_column_to_image_impl.hpp
Go to the documentation of this file.
__device__ void copy(const SrcTensorType &src_tensor, DstTensorType &dst_tensor)
Perform optimized copy between two tensors partitions (threadwise copy). Tensors must have the same s...
Definition copy.hpp:36
float launch_and_time_kernel(const StreamConfig &stream_config, F kernel, dim3 grid_dim, dim3 block_dim, std::size_t lds_byte, Args... args)
Definition host_utility/kernel_launch.hpp:14
__host__ __device__ constexpr auto integer_divide_ceil(X x, Y y)
Definition utility/math.hpp:72
Definition tensor_operation/gpu/device/tensor_layout.hpp:42
Definition convolution_backward_data_specialization.hpp:8
Definition convolution_backward_data_specialization.hpp:7
Definition ck.hpp:268
__host__ __device__ constexpr auto make_pass_through_transform(const LowLength &low_length)
Definition multi_index_transform_helper.hpp:12
__host__ __device__ constexpr auto make_naive_tensor_descriptor(const Tuple< Lengths... > &lengths, const Tuple< Strides... > &strides)
Definition tensor_descriptor_helper.hpp:49
__host__ __device__ constexpr auto make_merge_transform(const LowLengths &low_lengths)
Definition multi_index_transform_helper.hpp:55
auto accumulate_n(ForwardIterator first, Size count, T init, BinaryOperation op) -> decltype(std::accumulate(first, std::next(first, count), init, op))
Definition library/utility/numeric.hpp:11
__host__ __device__ constexpr auto make_tuple(Xs &&... xs)
Definition utility/tuple.hpp:211
__host__ __device__ constexpr auto transform_tensor_descriptor(const OldTensorDescriptor &old_tensor_desc, const NewTransforms &new_transforms, NewLowerDimensionOldVisibleIdss, NewUpperDimensionNewVisibleIdss)
Definition tensor_description/tensor_descriptor.hpp:319
__global__ void kernel_tensor_rearrange(const InputGridDesc in_grid_desc, const InputDataType *__restrict__ p_in_global, const OutputGridDesc out_grid_desc, OutputDataType *__restrict__ p_out_global, const index_t batch_count, const Block2ETileMap block_2_tile_map, const ComputePtrOffsetOfStridedBatch compute_ptr_offset_of_batch)
Definition gridwise_tensor_rearrange.hpp:30
Definition ck/stream_config.hpp:10
Definition block_to_ctile_map.hpp:261
Definition gridwise_tensor_rearrange.hpp:71
static __host__ constexpr bool CheckValidity(const InputGridDesc &in_grid_desc, const OutputGridDesc &out_grid_desc)
Definition gridwise_tensor_rearrange.hpp:137
Definition utility/sequence.hpp:43
Definition tensor_operation/operator_transform/transform_conv_fwd_to_gemm.hpp:25
Definition device_base.hpp:197
BaseArgument()=default
BaseInvoker()=default
Definition device_column_to_image_impl.hpp:281
std::vector< InputGridDesc > in_grid_desc_m_k_container_
Definition device_column_to_image_impl.hpp:450
Argument(const void *p_in, void *p_out, const ck::index_t G, const ck::index_t N, const ck::index_t C, const std::array< index_t, NDimSpatial > &input_spatial_lengths, const std::array< index_t, NDimSpatial > &filter_spatial_lengths, const std::array< index_t, NDimSpatial > &output_spatial_lengths, const std::array< index_t, NDimSpatial+3 > &image_g_n_c_wis_strides, const std::array< index_t, 3 > &gemm_g_m_k_strides, const std::array< index_t, NDimSpatial > &conv_filter_strides, const std::array< index_t, NDimSpatial > &conv_filter_dilations, const std::array< index_t, NDimSpatial > &input_left_pads, const std::array< index_t, NDimSpatial > &input_right_pads)
Definition device_column_to_image_impl.hpp:282
std::vector< OutputGridDesc > out_grid_desc_m_k_container_
Definition device_column_to_image_impl.hpp:451
std::vector< OutputDataType * > p_out_container_
Definition device_column_to_image_impl.hpp:454
const std::array< index_t, NDimSpatial+3 > & image_g_n_c_wis_strides_
Definition device_column_to_image_impl.hpp:444
const std::array< index_t, NDimSpatial > & conv_filter_dilations_
Definition device_column_to_image_impl.hpp:446
const ck::index_t X_
Definition device_column_to_image_impl.hpp:439
OutputDataType * p_out_
Definition device_column_to_image_impl.hpp:442
const std::array< index_t, NDimSpatial > & input_right_pads_
Definition device_column_to_image_impl.hpp:448
const ck::index_t G_
Definition device_column_to_image_impl.hpp:437
ComputePtrOffsetOfStridedBatch compute_ptr_offset_of_batch_
Definition device_column_to_image_impl.hpp:456
void Print() const
Definition device_column_to_image_impl.hpp:428
const InputDataType * p_in_
Definition device_column_to_image_impl.hpp:441
const ck::index_t C_
Definition device_column_to_image_impl.hpp:438
std::vector< const InputDataType * > p_in_container_
Definition device_column_to_image_impl.hpp:453
const std::array< index_t, NDimSpatial > & input_left_pads_
Definition device_column_to_image_impl.hpp:447
const std::array< index_t, NDimSpatial > & conv_filter_strides_
Definition device_column_to_image_impl.hpp:445
Definition device_column_to_image_impl.hpp:460
float Run(const BaseArgument *p_arg, const StreamConfig &stream_config=StreamConfig{}) override
Definition device_column_to_image_impl.hpp:501
float Run(const Argument &arg, const StreamConfig &stream_config=StreamConfig{})
Definition device_column_to_image_impl.hpp:461
Definition device_column_to_image_impl.hpp:48
std::unique_ptr< BaseArgument > MakeArgumentPointer(const void *p_in, void *p_out, const ck::index_t G, const ck::index_t N, const ck::index_t C, const std::array< index_t, NDimSpatial > &input_spatial_lengths, const std::array< index_t, NDimSpatial > &filter_spatial_lengths, const std::array< index_t, NDimSpatial > &output_spatial_lengths, const std::array< index_t, NDimSpatial+3 > &image_g_n_c_wis_strides, const std::array< index_t, 3 > &gemm_g_m_k_strides, const std::array< index_t, NDimSpatial > &conv_filter_strides, const std::array< index_t, NDimSpatial > &conv_filter_dilations, const std::array< index_t, NDimSpatial > &input_left_pads, const std::array< index_t, NDimSpatial > &input_right_pads) override
Make argument pointer for image to column.
Definition device_column_to_image_impl.hpp:591
std::string GetTypeString() const override
Definition device_column_to_image_impl.hpp:627
bool IsSupportedArgument(const Argument &arg)
Definition device_column_to_image_impl.hpp:508
static auto MakeOutDescriptor_M_K(const ck::index_t N, const ck::index_t C, const std::array< index_t, NDimSpatial > &input_spatial_lengths, const std::array< index_t, NDimSpatial > &filter_spatial_lengths, const std::array< index_t, NDimSpatial+3 > &image_g_n_c_wis_strides, const std::array< index_t, NDimSpatial > &conv_filter_strides, const std::array< index_t, NDimSpatial > &conv_filter_dilations, const std::array< index_t, NDimSpatial > &input_left_pads, const std::array< index_t, NDimSpatial > &input_right_pads, const std::array< index_t, NDimSpatial > &image_offsets, const std::array< index_t, NDimSpatial > &independent_filters, const std::array< index_t, NDimSpatial > &effs)
Definition device_column_to_image_impl.hpp:180
static constexpr auto matrix_padder
Definition device_column_to_image_impl.hpp:70
bool IsSupportedArgument(const BaseArgument *p_arg) override
Definition device_column_to_image_impl.hpp:552
static constexpr auto ZIdx
Definition device_column_to_image_impl.hpp:62
static auto MakeInputDescriptor_M_K(const ck::index_t N, const ck::index_t C, const std::array< index_t, NDimSpatial > &filter_spatial_lengths, const std::array< index_t, NDimSpatial > &output_spatial_lengths, const std::array< index_t, NDimSpatial > &conv_filter_strides, const std::array< index_t, 3 > &gemm_g_m_k_strides, const std::array< index_t, NDimSpatial > &independent_filters, const std::array< index_t, NDimSpatial > &effs)
Definition device_column_to_image_impl.hpp:99
static constexpr bool is_GNSpatialC
Definition device_column_to_image_impl.hpp:53
remove_cvref_t< decltype(MakeOutDescriptor_M_K( 1, 1, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}))> OutputGridDesc
Definition device_column_to_image_impl.hpp:260
static index_t GetNumberOfIndependentFilters(const index_t input_spatial_len, const index_t left_pad, const index_t right_pad, const index_t filter_len, const index_t filter_stride, const index_t filter_dilation, const index_t image_offset)
Definition device_column_to_image_impl.hpp:75
static constexpr auto I1
Definition device_column_to_image_impl.hpp:59
TransformConvFwdToGemm< NDimSpatial, ConvolutionForwardSpecialization::Default > ConvToGemmFwdTransformer
Definition device_column_to_image_impl.hpp:68
static constexpr auto I0
Definition device_column_to_image_impl.hpp:58
static constexpr auto XIdx
Definition device_column_to_image_impl.hpp:64
remove_cvref_t< decltype(BlockToCTileMap_M00_N0_M01Adapt< MPerBlock, KPerBlock, InputGridDesc >( InputGridDesc{}))> Block2ETileMap
Definition device_column_to_image_impl.hpp:263
static constexpr auto I2
Definition device_column_to_image_impl.hpp:60
GridwiseTensorRearrange< InputGridDesc, InputDataType, OutputGridDesc, OutputDataType, BlockSize, MPerBlock, KPerBlock, ThreadClusterLengths, ScalarPerVector, InMemoryDataOperationEnum::Add, Block2ETileMap, ComputePtrOffsetOfStridedBatch<> > GridwiseTensorRearrangeKernel
Definition device_column_to_image_impl.hpp:267
std::unique_ptr< BaseInvoker > MakeInvokerPointer() override
Definition device_column_to_image_impl.hpp:622
remove_cvref_t< decltype(MakeInputDescriptor_M_K(1, 1, {}, {}, {}, {}, {}, {}))> InputGridDesc
Definition device_column_to_image_impl.hpp:258
static constexpr auto spatial_offset
Definition device_column_to_image_impl.hpp:66
static auto MakeInvoker()
Definition device_column_to_image_impl.hpp:588
static auto MakeArgument(const void *p_in, void *p_out, const ck::index_t G, const ck::index_t N, const ck::index_t C, const std::array< index_t, NDimSpatial > &input_spatial_lengths, const std::array< index_t, NDimSpatial > &filter_spatial_lengths, const std::array< index_t, NDimSpatial > &output_spatial_lengths, const std::array< index_t, NDimSpatial+3 > &image_g_n_c_wis_strides, const std::array< index_t, 3 > &gemm_g_m_k_strides, const std::array< index_t, NDimSpatial > &conv_filter_strides, const std::array< index_t, NDimSpatial > &conv_filter_dilations, const std::array< index_t, NDimSpatial > &input_left_pads, const std::array< index_t, NDimSpatial > &input_right_pads)
Definition device_column_to_image_impl.hpp:557
static constexpr auto YIdx
Definition device_column_to_image_impl.hpp:63
static constexpr bool is_NSpatialGC
Definition device_column_to_image_impl.hpp:49
Convolution Tensor Rearrange.
Definition device_conv_tensor_rearrange.hpp:36
Definition matrix_padder.hpp:180