BlockwiseWelford< T, BlockSize, ThreadClusterLengths_M_K, ThreadClusterArrangeOrder, GetActualVariance > Struct Template Reference

BlockwiseWelford&lt; T, BlockSize, ThreadClusterLengths_M_K, ThreadClusterArrangeOrder, GetActualVariance &gt; Struct Template Reference#

Composable Kernel: ck::BlockwiseWelford< T, BlockSize, ThreadClusterLengths_M_K, ThreadClusterArrangeOrder, GetActualVariance > Struct Template Reference
ck::BlockwiseWelford< T, BlockSize, ThreadClusterLengths_M_K, ThreadClusterArrangeOrder, GetActualVariance > Struct Template Reference

#include <blockwise_welford.hpp>

Static Public Member Functions

template<typename CountDataType>
static __device__ void Merge (T &mean_a, T &var_a, CountDataType &count_a, T mean_b, T var_b, CountDataType count_b)
template<typename CountDataType>
static __device__ void Run (T &mean_value, T &var_value, CountDataType &count)

Static Public Attributes

static constexpr auto BufferLength_M = ThreadClusterLengths_M_K::At(0)
static constexpr auto BufferLength_K = ThreadClusterLengths_M_K::At(1)
static constexpr auto block_buf_desc_m_k
static constexpr auto thread_cluster_desc

Member Function Documentation

◆ Merge()

template<typename T, index_t BlockSize, typename ThreadClusterLengths_M_K, typename ThreadClusterArrangeOrder, bool GetActualVariance = true>
template<typename CountDataType>
__device__ void ck::BlockwiseWelford< T, BlockSize, ThreadClusterLengths_M_K, ThreadClusterArrangeOrder, GetActualVariance >::Merge ( T & mean_a,
T & var_a,
CountDataType & count_a,
T mean_b,
T var_b,
CountDataType count_b )
inlinestatic

◆ Run()

template<typename T, index_t BlockSize, typename ThreadClusterLengths_M_K, typename ThreadClusterArrangeOrder, bool GetActualVariance = true>
template<typename CountDataType>
__device__ void ck::BlockwiseWelford< T, BlockSize, ThreadClusterLengths_M_K, ThreadClusterArrangeOrder, GetActualVariance >::Run ( T & mean_value,
T & var_value,
CountDataType & count )
inlinestatic

Member Data Documentation

◆ block_buf_desc_m_k

template<typename T, index_t BlockSize, typename ThreadClusterLengths_M_K, typename ThreadClusterArrangeOrder, bool GetActualVariance = true>
auto ck::BlockwiseWelford< T, BlockSize, ThreadClusterLengths_M_K, ThreadClusterArrangeOrder, GetActualVariance >::block_buf_desc_m_k
staticconstexpr
Initial value:
integral_constant< index_t, N > Number
Definition number.hpp:12
__host__ __device__ constexpr auto make_naive_tensor_descriptor_packed(const Tuple< Lengths... > &lengths)
Definition tensor_descriptor_helper.hpp:101
__host__ __device__ constexpr auto make_tuple(Xs &&... xs)
Definition utility/tuple.hpp:211

◆ BufferLength_K

template<typename T, index_t BlockSize, typename ThreadClusterLengths_M_K, typename ThreadClusterArrangeOrder, bool GetActualVariance = true>
auto ck::BlockwiseWelford< T, BlockSize, ThreadClusterLengths_M_K, ThreadClusterArrangeOrder, GetActualVariance >::BufferLength_K = ThreadClusterLengths_M_K::At(1)
staticconstexpr

◆ BufferLength_M

template<typename T, index_t BlockSize, typename ThreadClusterLengths_M_K, typename ThreadClusterArrangeOrder, bool GetActualVariance = true>
auto ck::BlockwiseWelford< T, BlockSize, ThreadClusterLengths_M_K, ThreadClusterArrangeOrder, GetActualVariance >::BufferLength_M = ThreadClusterLengths_M_K::At(0)
staticconstexpr

◆ thread_cluster_desc

template<typename T, index_t BlockSize, typename ThreadClusterLengths_M_K, typename ThreadClusterArrangeOrder, bool GetActualVariance = true>
auto ck::BlockwiseWelford< T, BlockSize, ThreadClusterLengths_M_K, ThreadClusterArrangeOrder, GetActualVariance >::thread_cluster_desc
staticconstexpr
Initial value:
=
make_cluster_descriptor(ThreadClusterLengths_M_K{}, ThreadClusterArrangeOrder{})
__host__ __device__ constexpr auto make_cluster_descriptor(const Lengths &lengths, ArrangeOrder order=typename arithmetic_sequence_gen< 0, Lengths::Size(), 1 >::type{})
Definition tensor_description/cluster_descriptor.hpp:13

The documentation for this struct was generated from the following file: