StreamKKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_ > Struct Template Reference#
Classes |
Public Types |
Public Member Functions |
Static Public Member Functions |
Static Public Attributes |
List of all members
ck_tile::StreamKKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_ > Struct Template Reference
#include <streamk_gemm_kernel.hpp>
Classes | |
| struct | StreamKKernelArgs |
| ALayout and ADataType are expected to be scalars, not a tuple. More... | |
Public Types | |
| using | UniversalGemmKernel |
| Inject the UniversalGemmKernel base class to support execution of all necessary functions. | |
| using | TilePartitioner = remove_cvref_t<TilePartitioner_> |
| using | GemmPipeline = remove_cvref_t<GemmPipeline_> |
| using | EpiloguePipeline = remove_cvref_t<EpiloguePipeline_> |
| using | ALayout = remove_cvref_t<typename GemmPipeline::ALayout> |
| Specify the layout configurations for A, B, and C. | |
| using | BLayout = remove_cvref_t<typename GemmPipeline::BLayout> |
| using | CLayout = remove_cvref_t<typename GemmPipeline::CLayout> |
| using | ADataType = remove_cvref_t<typename GemmPipeline::ADataType> |
| Specify the data type configurations for A, B, and C. | |
| using | BDataType = remove_cvref_t<typename GemmPipeline::BDataType> |
| using | CDataType = remove_cvref_t<typename EpiloguePipeline::ODataType> |
| using | KernelArgs = StreamKKernelArgs |
| using | Kernel = StreamKKernel<TilePartitioner, GemmPipeline, EpiloguePipeline> |
Public Member Functions | |
| CK_TILE_DEVICE void | operator() (StreamKKernelArgs kargs) const |
| Entry point for the Stream-K Kernel, performing the main Stream-K loop. | |
Static Public Member Functions | |
| static CK_TILE_HOST const std::string | GetName () |
| static CK_TILE_HOST auto | GridSize (const TilePartitioner &tile_partitioner) -> dim3 |
| Compute the grid size for the Stream K kernel using the tile_partitioner. | |
| static CK_TILE_HOST auto | MaxOccupancyGridSize (const stream_config &s) -> dim3 |
| Get the maximum occupancy grid size for the persistent kernel on the current device. | |
| static CK_TILE_HOST constexpr auto | BlockSize () -> dim3 |
| static CK_TILE_HOST StreamKKernelArgs | MakeKernelArgs (const StreamKHostArgs &host_args, int num_cu=NumCU(), int occupancy=Occupancy()) |
| Constructs kernel arguments for the Stream-K kernel. | |
| template<bool UseDefaultScheduler = true> | |
| static CK_TILE_DEVICE void | RunGemm (const std::array< const ADataType *, UniversalGemmKernel::NumATensor > &as_ptr, const std::array< const BDataType *, UniversalGemmKernel::NumBTensor > &bs_ptr, const std::array< const void *, UniversalGemmKernel::NumDTensor > &ds_ptr, CDataType *c_ptr, void *smem_ptr_0, const typename UniversalGemmKernel::KernelArgs &kargs, const index_t num_loop, const index_t block_idx_m, const index_t block_idx_n, const index_t k_size) |
| static CK_TILE_HOST bool | IsSupportedArgument (const StreamKKernelArgs &kargs) |
| static CK_TILE_HOST uint32_t | GetWorkSpaceSize (const StreamKKernelArgs &kargs) |
| Computes the buffer size needed to store accumulation results for Stream K. | |
| static CK_TILE_HOST void | SetWorkSpacePointer (StreamKKernelArgs &kargs, void *workspace_ptr) |
| Sets the kargs' current workspace_ptr to the given workspace_ptr. | |
Static Public Attributes | |
| static constexpr index_t | kBlockSize = UniversalGemmKernel::kBlockSize |
Member Typedef Documentation
◆ ADataType
template<typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
| using ck_tile::StreamKKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::ADataType = remove_cvref_t<typename GemmPipeline::ADataType> |
Specify the data type configurations for A, B, and C.
◆ ALayout
template<typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
| using ck_tile::StreamKKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::ALayout = remove_cvref_t<typename GemmPipeline::ALayout> |
Specify the layout configurations for A, B, and C.
◆ BDataType
template<typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
| using ck_tile::StreamKKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::BDataType = remove_cvref_t<typename GemmPipeline::BDataType> |
◆ BLayout
template<typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
| using ck_tile::StreamKKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::BLayout = remove_cvref_t<typename GemmPipeline::BLayout> |
◆ CDataType
template<typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
| using ck_tile::StreamKKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::CDataType = remove_cvref_t<typename EpiloguePipeline::ODataType> |
◆ CLayout
template<typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
| using ck_tile::StreamKKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::CLayout = remove_cvref_t<typename GemmPipeline::CLayout> |
◆ EpiloguePipeline
template<typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
| using ck_tile::StreamKKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::EpiloguePipeline = remove_cvref_t<EpiloguePipeline_> |
◆ GemmPipeline
template<typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
| using ck_tile::StreamKKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::GemmPipeline = remove_cvref_t<GemmPipeline_> |
◆ Kernel
template<typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
| using ck_tile::StreamKKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::Kernel = StreamKKernel<TilePartitioner, GemmPipeline, EpiloguePipeline> |
◆ KernelArgs
template<typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
| using ck_tile::StreamKKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::KernelArgs = StreamKKernelArgs |
◆ TilePartitioner
template<typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
| using ck_tile::StreamKKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::TilePartitioner = remove_cvref_t<TilePartitioner_> |
◆ UniversalGemmKernel
template<typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
| using ck_tile::StreamKKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_ >::UniversalGemmKernel |
Initial value:
UniversalGemmKernel< TilePartitioner_, GemmPipeline_, EpiloguePipeline_ > UniversalGemmKernel
Inject the UniversalGemmKernel base class to support execution of all necessary functions.
Definition batched_gemm_kernel.hpp:65
Inject the UniversalGemmKernel base class to support execution of all necessary functions.
Member Function Documentation
◆ BlockSize()
template<typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
|
inlinestaticconstexpr |
◆ GetName()
template<typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
|
inlinestaticnodiscard |
◆ GetWorkSpaceSize()
template<typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
|
inlinestatic |
Computes the buffer size needed to store accumulation results for Stream K.
- Returns
- The buffer size needed.
◆ GridSize()
template<typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
|
inlinestatic |
Compute the grid size for the Stream K kernel using the tile_partitioner.
- Returns
- The grid size.
◆ IsSupportedArgument()
template<typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
|
inlinestatic |
◆ MakeKernelArgs()
template<typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
|
inlinestatic |
Constructs kernel arguments for the Stream-K kernel.
- Parameters
-
host_args Stream-K host arguments. num_cu Number of compute units (CUs). The default is the number of CUs on the device. The caller may select their own to assist with test reproducibility, etc. occupancy The maximum number of active blocks per CU for this kernel. The caller may select their own to assist with test reproducibility, etc.
- Returns
- The kernel arguments for Stream-K.
◆ MaxOccupancyGridSize()
template<typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
|
inlinestatic |
Get the maximum occupancy grid size for the persistent kernel on the current device.
- Returns
- The maximum occupancy grid size.
- Note
- This function queries the maximum occupancy of the kernel using hipOccupancyMaxActiveBlocksPerMultiprocessor.
◆ operator()()
template<typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
|
inline |
Entry point for the Stream-K Kernel, performing the main Stream-K loop.
◆ RunGemm()
template<typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
template<bool UseDefaultScheduler = true>
|
inlinestatic |
◆ SetWorkSpacePointer()
template<typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
|
inlinestatic |
Sets the kargs' current workspace_ptr to the given workspace_ptr.
- Note
- Assumes that the given workspace_ptr points to allocated device memory.
Member Data Documentation
◆ kBlockSize
template<typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
|
staticconstexpr |
The documentation for this struct was generated from the following file: