#include <warp_gemm_impl.hpp>
|
| template<typename CTensor, typename ATensor, typename BTensor, bool post_nop_ = false> |
| CK_TILE_DEVICE void | operator() (CTensor &c, const ATensor &a, const BTensor &b, bool_constant< post_nop_ >={}) const |
| template<typename CTensor, typename ATensor, typename BTensor, index_t i_subk, bool post_nop_ = false> |
| CK_TILE_DEVICE void | operator() (CTensor &c, const ATensor &a, const BTensor &b, number< i_subk >, bool_constant< post_nop_ >={}) const |
| template<index_t opselA, index_t opselB, typename CTensor, typename ATensor, typename BTensor, bool post_nop_ = false> |
| CK_TILE_DEVICE void | operator() (CTensor &c, const ATensor &a, const BTensor &b, const int32_t &a_scale, const int32_t &b_scale, bool_constant< post_nop_ >={}) const |
| template<typename ATensor, typename BTensor> |
| CK_TILE_DEVICE auto | operator() (const ATensor &a, const BTensor &b) const |
| template<index_t opselA, index_t opselB, typename ATensor, typename BTensor> |
| CK_TILE_DEVICE auto | operator() (const ATensor &a, const BTensor &b, const int32_t &a_scale, const int32_t &b_scale) const |
|
| static constexpr index_t | kM = WarpGemmAttribute::kM |
| static constexpr index_t | kN = WarpGemmAttribute::kN |
| static constexpr index_t | kK = WarpGemmAttribute::kK |
| static constexpr index_t | kCMLane = WarpGemmAttribute::kCMLane |
| static constexpr index_t | kKPerThread = WarpGemmAttribute::kKPerThread |
| | The number of elements in K dimension processed by single thread in wavefront.
|
◆ ADataType
template<typename WarpGemmAttribute_>
◆ AWarpDstr
template<typename WarpGemmAttribute_>
◆ AWarpDstrEncoding
template<typename WarpGemmAttribute_>
| using ck_tile::WarpGemmImpl< WarpGemmAttribute_ >::AWarpDstrEncoding = typename WarpGemmAttribute::AWarpDstrEncoding |
◆ AWarpTensor
template<typename WarpGemmAttribute_>
◆ BDataType
template<typename WarpGemmAttribute_>
◆ BWarpDstr
template<typename WarpGemmAttribute_>
◆ BWarpDstrEncoding
template<typename WarpGemmAttribute_>
| using ck_tile::WarpGemmImpl< WarpGemmAttribute_ >::BWarpDstrEncoding = typename WarpGemmAttribute::BWarpDstrEncoding |
◆ BWarpTensor
template<typename WarpGemmAttribute_>
◆ CDataType
template<typename WarpGemmAttribute_>
◆ CWarpDstr
template<typename WarpGemmAttribute_>
◆ CWarpDstrEncoding
template<typename WarpGemmAttribute_>
| using ck_tile::WarpGemmImpl< WarpGemmAttribute_ >::CWarpDstrEncoding = typename WarpGemmAttribute::CWarpDstrEncoding |
◆ CWarpTensor
template<typename WarpGemmAttribute_>
◆ WarpGemmAttribute
template<typename WarpGemmAttribute_>
◆ get_num_of_access()
template<typename WarpGemmAttribute_>
◆ operator()() [1/5]
template<typename WarpGemmAttribute_>
template<typename ATensor, typename BTensor>
◆ operator()() [2/5]
template<typename WarpGemmAttribute_>
template<
index_t opselA,
index_t opselB, typename ATensor, typename BTensor>
◆ operator()() [3/5]
template<typename WarpGemmAttribute_>
template<typename CTensor, typename ATensor, typename BTensor, bool post_nop_ = false>
◆ operator()() [4/5]
template<typename WarpGemmAttribute_>
template<
index_t opselA,
index_t opselB, typename CTensor, typename ATensor, typename BTensor, bool post_nop_ = false>
◆ operator()() [5/5]
template<typename WarpGemmAttribute_>
template<typename CTensor, typename ATensor, typename BTensor,
index_t i_subk, bool post_nop_ = false>
◆ kCMLane
template<typename WarpGemmAttribute_>
◆ kK
template<typename WarpGemmAttribute_>
◆ kKPerThread
template<typename WarpGemmAttribute_>
The number of elements in K dimension processed by single thread in wavefront.
- Note
- Note that WarpGemm may run MFMA instruction multiple times (on different K). In such situation this value reflects this fact.
◆ kM
template<typename WarpGemmAttribute_>
◆ kN
template<typename WarpGemmAttribute_>
The documentation for this struct was generated from the following file: