WarpGemmSmfmacImpl< WarpGemmAttribute_ > Struct Template Reference

WarpGemmSmfmacImpl&lt; WarpGemmAttribute_ &gt; Struct Template Reference#

Composable Kernel: ck_tile::WarpGemmSmfmacImpl< WarpGemmAttribute_ > Struct Template Reference
ck_tile::WarpGemmSmfmacImpl< WarpGemmAttribute_ > Struct Template Reference

#include <warp_gemm_smfmac_impl.hpp>

Public Types

using WarpGemmAttribute = remove_cvref_t<WarpGemmAttribute_>
using ADataType = typename WarpGemmAttribute::ADataType
using BDataType = typename WarpGemmAttribute::BDataType
using CDataType = typename WarpGemmAttribute::CDataType
using AWarpDstrEncoding = typename WarpGemmAttribute::AWarpDstrEncoding
using BWarpDstrEncoding = typename WarpGemmAttribute::BWarpDstrEncoding
using CWarpDstrEncoding = typename WarpGemmAttribute::CWarpDstrEncoding
using AWarpDstr = remove_cvref_t<decltype(make_static_tile_distribution(AWarpDstrEncoding{}))>
using BWarpDstr = remove_cvref_t<decltype(make_static_tile_distribution(BWarpDstrEncoding{}))>
using CWarpDstr = remove_cvref_t<decltype(make_static_tile_distribution(CWarpDstrEncoding{}))>
using AWarpTensor = static_distributed_tensor<ADataType, AWarpDstr>
using BWarpTensor = static_distributed_tensor<BDataType, BWarpDstr>
using CWarpTensor = static_distributed_tensor<CDataType, CWarpDstr>

Public Member Functions

template<typename AVec>
CK_TILE_DEVICE int32_t compress_a (AVec &a_vec) const
 Compress A vector for 2:4 structured sparsity instruction by moving all non-zero elements into lower part of a_vec to half its effective size.
template<typename CTensor, typename ATensor, typename BTensor, bool post_nop_ = false>
CK_TILE_DEVICE void operator() (CTensor &c, const ATensor &a, const BTensor &b, bool_constant< post_nop_ >={}) const

Static Public Member Functions

static CK_TILE_HOST_DEVICE constexpr auto get_num_of_access ()

Static Public Attributes

static constexpr index_t kM = WarpGemmAttribute::kM
static constexpr index_t kN = WarpGemmAttribute::kN
static constexpr index_t kK = WarpGemmAttribute::kK
static constexpr index_t kKPerThread = WarpGemmAttribute::kKPerThread
 The number of elements in K dimension processed by single thread in wavefront.

Member Typedef Documentation

◆ ADataType

template<typename WarpGemmAttribute_>
using ck_tile::WarpGemmSmfmacImpl< WarpGemmAttribute_ >::ADataType = typename WarpGemmAttribute::ADataType

◆ AWarpDstr

template<typename WarpGemmAttribute_>
using ck_tile::WarpGemmSmfmacImpl< WarpGemmAttribute_ >::AWarpDstr = remove_cvref_t<decltype(make_static_tile_distribution(AWarpDstrEncoding{}))>

◆ AWarpDstrEncoding

template<typename WarpGemmAttribute_>
using ck_tile::WarpGemmSmfmacImpl< WarpGemmAttribute_ >::AWarpDstrEncoding = typename WarpGemmAttribute::AWarpDstrEncoding

◆ AWarpTensor

template<typename WarpGemmAttribute_>
using ck_tile::WarpGemmSmfmacImpl< WarpGemmAttribute_ >::AWarpTensor = static_distributed_tensor<ADataType, AWarpDstr>

◆ BDataType

template<typename WarpGemmAttribute_>
using ck_tile::WarpGemmSmfmacImpl< WarpGemmAttribute_ >::BDataType = typename WarpGemmAttribute::BDataType

◆ BWarpDstr

template<typename WarpGemmAttribute_>
using ck_tile::WarpGemmSmfmacImpl< WarpGemmAttribute_ >::BWarpDstr = remove_cvref_t<decltype(make_static_tile_distribution(BWarpDstrEncoding{}))>

◆ BWarpDstrEncoding

template<typename WarpGemmAttribute_>
using ck_tile::WarpGemmSmfmacImpl< WarpGemmAttribute_ >::BWarpDstrEncoding = typename WarpGemmAttribute::BWarpDstrEncoding

◆ BWarpTensor

template<typename WarpGemmAttribute_>
using ck_tile::WarpGemmSmfmacImpl< WarpGemmAttribute_ >::BWarpTensor = static_distributed_tensor<BDataType, BWarpDstr>

◆ CDataType

template<typename WarpGemmAttribute_>
using ck_tile::WarpGemmSmfmacImpl< WarpGemmAttribute_ >::CDataType = typename WarpGemmAttribute::CDataType

◆ CWarpDstr

template<typename WarpGemmAttribute_>
using ck_tile::WarpGemmSmfmacImpl< WarpGemmAttribute_ >::CWarpDstr = remove_cvref_t<decltype(make_static_tile_distribution(CWarpDstrEncoding{}))>

◆ CWarpDstrEncoding

template<typename WarpGemmAttribute_>
using ck_tile::WarpGemmSmfmacImpl< WarpGemmAttribute_ >::CWarpDstrEncoding = typename WarpGemmAttribute::CWarpDstrEncoding

◆ CWarpTensor

template<typename WarpGemmAttribute_>
using ck_tile::WarpGemmSmfmacImpl< WarpGemmAttribute_ >::CWarpTensor = static_distributed_tensor<CDataType, CWarpDstr>

◆ WarpGemmAttribute

template<typename WarpGemmAttribute_>
using ck_tile::WarpGemmSmfmacImpl< WarpGemmAttribute_ >::WarpGemmAttribute = remove_cvref_t<WarpGemmAttribute_>

Member Function Documentation

◆ compress_a()

template<typename WarpGemmAttribute_>
template<typename AVec>
CK_TILE_DEVICE int32_t ck_tile::WarpGemmSmfmacImpl< WarpGemmAttribute_ >::compress_a ( AVec & a_vec) const
inline

Compress A vector for 2:4 structured sparsity instruction by moving all non-zero elements into lower part of a_vec to half its effective size.

Parameters
a_vecVector to be compressed.
Returns
Four 2-bit indexes of non-zero elements locations

◆ get_num_of_access()

template<typename WarpGemmAttribute_>
CK_TILE_HOST_DEVICE constexpr auto ck_tile::WarpGemmSmfmacImpl< WarpGemmAttribute_ >::get_num_of_access ( )
inlinestaticconstexpr

◆ operator()()

template<typename WarpGemmAttribute_>
template<typename CTensor, typename ATensor, typename BTensor, bool post_nop_ = false>
CK_TILE_DEVICE void ck_tile::WarpGemmSmfmacImpl< WarpGemmAttribute_ >::operator() ( CTensor & c,
const ATensor & a,
const BTensor & b,
bool_constant< post_nop_ > = {} ) const
inline

Member Data Documentation

◆ kK

template<typename WarpGemmAttribute_>
index_t ck_tile::WarpGemmSmfmacImpl< WarpGemmAttribute_ >::kK = WarpGemmAttribute::kK
staticconstexpr

◆ kKPerThread

template<typename WarpGemmAttribute_>
index_t ck_tile::WarpGemmSmfmacImpl< WarpGemmAttribute_ >::kKPerThread = WarpGemmAttribute::kKPerThread
staticconstexpr

The number of elements in K dimension processed by single thread in wavefront.

Note
Note that WarpGemm may run MFMA instruction multiple times (on different K). In such situation this value reflects this fact.

◆ kM

template<typename WarpGemmAttribute_>
index_t ck_tile::WarpGemmSmfmacImpl< WarpGemmAttribute_ >::kM = WarpGemmAttribute::kM
staticconstexpr

◆ kN

template<typename WarpGemmAttribute_>
index_t ck_tile::WarpGemmSmfmacImpl< WarpGemmAttribute_ >::kN = WarpGemmAttribute::kN
staticconstexpr

The documentation for this struct was generated from the following file: